summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp98
-rw-r--r--lib/Transforms/Scalar/AlignmentFromAssumptions.cpp60
-rw-r--r--lib/Transforms/Scalar/BDCE.cpp76
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt7
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp302
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp9
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp175
-rw-r--r--lib/Transforms/Scalar/DCE.cpp70
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp840
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp140
-rw-r--r--lib/Transforms/Scalar/Float2Int.cpp90
-rw-r--r--lib/Transforms/Scalar/GVN.cpp1285
-rw-r--r--lib/Transforms/Scalar/GVNHoist.cpp825
-rw-r--r--lib/Transforms/Scalar/GuardWidening.cpp691
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp313
-rw-r--r--lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp267
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp273
-rw-r--r--lib/Transforms/Scalar/LICM.cpp773
-rw-r--r--lib/Transforms/Scalar/LoadCombine.cpp73
-rw-r--r--lib/Transforms/Scalar/LoopDataPrefetch.cpp304
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp181
-rw-r--r--lib/Transforms/Scalar/LoopDistribute.cpp398
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp428
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp167
-rw-r--r--lib/Transforms/Scalar/LoopInterchange.cpp75
-rw-r--r--lib/Transforms/Scalar/LoopLoadElimination.cpp75
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp407
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp229
-rw-r--r--lib/Transforms/Scalar/LoopSimplifyCFG.cpp114
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp98
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp992
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp125
-rw-r--r--lib/Transforms/Scalar/LoopVersioningLICM.cpp571
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp96
-rw-r--r--lib/Transforms/Scalar/LowerExpectIntrinsic.cpp27
-rw-r--r--lib/Transforms/Scalar/LowerGuardIntrinsic.cpp123
-rw-r--r--lib/Transforms/Scalar/Makefile15
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp301
-rw-r--r--lib/Transforms/Scalar/MergedLoadStoreMotion.cpp292
-rw-r--r--lib/Transforms/Scalar/NaryReassociate.cpp49
-rw-r--r--lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp179
-rw-r--r--lib/Transforms/Scalar/PlaceSafepoints.cpp471
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp370
-rw-r--r--lib/Transforms/Scalar/Reg2Mem.cpp26
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp1035
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp601
-rw-r--r--lib/Transforms/Scalar/SROA.cpp123
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp68
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp2630
-rw-r--r--lib/Transforms/Scalar/Scalarizer.cpp30
-rw-r--r--lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp24
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp40
-rw-r--r--lib/Transforms/Scalar/Sink.cpp250
-rw-r--r--lib/Transforms/Scalar/SpeculativeExecution.cpp52
-rw-r--r--lib/Transforms/Scalar/StraightLineStrengthReduce.cpp79
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp120
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp301
57 files changed, 9247 insertions, 8586 deletions
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index 590a52da6b192..0eed0240c7416 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -22,10 +22,12 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -33,22 +35,70 @@ using namespace llvm;
STATISTIC(NumRemoved, "Number of instructions removed");
+static void collectLiveScopes(const DILocalScope &LS,
+ SmallPtrSetImpl<const Metadata *> &AliveScopes) {
+ if (!AliveScopes.insert(&LS).second)
+ return;
+
+ if (isa<DISubprogram>(LS))
+ return;
+
+ // Tail-recurse through the scope chain.
+ collectLiveScopes(cast<DILocalScope>(*LS.getScope()), AliveScopes);
+}
+
+static void collectLiveScopes(const DILocation &DL,
+ SmallPtrSetImpl<const Metadata *> &AliveScopes) {
+ // Even though DILocations are not scopes, shove them into AliveScopes so we
+ // don't revisit them.
+ if (!AliveScopes.insert(&DL).second)
+ return;
+
+ // Collect live scopes from the scope chain.
+ collectLiveScopes(*DL.getScope(), AliveScopes);
+
+ // Tail-recurse through the inlined-at chain.
+ if (const DILocation *IA = DL.getInlinedAt())
+ collectLiveScopes(*IA, AliveScopes);
+}
+
+// Check if this instruction is a runtime call for value profiling and
+// if it's instrumenting a constant.
+static bool isInstrumentsConstant(Instruction &I) {
+ if (CallInst *CI = dyn_cast<CallInst>(&I))
+ if (Function *Callee = CI->getCalledFunction())
+ if (Callee->getName().equals(getInstrProfValueProfFuncName()))
+ if (isa<Constant>(CI->getArgOperand(0)))
+ return true;
+ return false;
+}
+
static bool aggressiveDCE(Function& F) {
- SmallPtrSet<Instruction*, 128> Alive;
+ SmallPtrSet<Instruction*, 32> Alive;
SmallVector<Instruction*, 128> Worklist;
// Collect the set of "root" instructions that are known live.
for (Instruction &I : instructions(F)) {
- if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) || I.isEHPad() ||
- I.mayHaveSideEffects()) {
+ if (isa<TerminatorInst>(I) || I.isEHPad() || I.mayHaveSideEffects()) {
+ // Skip any value profile instrumentation calls if they are
+ // instrumenting constants.
+ if (isInstrumentsConstant(I))
+ continue;
Alive.insert(&I);
Worklist.push_back(&I);
}
}
- // Propagate liveness backwards to operands.
+ // Propagate liveness backwards to operands. Keep track of live debug info
+ // scopes.
+ SmallPtrSet<const Metadata *, 32> AliveScopes;
while (!Worklist.empty()) {
Instruction *Curr = Worklist.pop_back_val();
+
+ // Collect the live debug info scopes attached to this instruction.
+ if (const DILocation *DL = Curr->getDebugLoc())
+ collectLiveScopes(*DL, AliveScopes);
+
for (Use &OI : Curr->operands()) {
if (Instruction *Inst = dyn_cast<Instruction>(OI))
if (Alive.insert(Inst).second)
@@ -61,10 +111,30 @@ static bool aggressiveDCE(Function& F) {
// value of the function, and may therefore be deleted safely.
// NOTE: We reuse the Worklist vector here for memory efficiency.
for (Instruction &I : instructions(F)) {
- if (!Alive.count(&I)) {
- Worklist.push_back(&I);
- I.dropAllReferences();
+ // Check if the instruction is alive.
+ if (Alive.count(&I))
+ continue;
+
+ if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&I)) {
+ // Check if the scope of this variable location is alive.
+ if (AliveScopes.count(DII->getDebugLoc()->getScope()))
+ continue;
+
+ // Fallthrough and drop the intrinsic.
+ DEBUG({
+ // If intrinsic is pointing at a live SSA value, there may be an
+ // earlier optimization bug: if we know the location of the variable,
+ // why isn't the scope of the location alive?
+ if (Value *V = DII->getVariableLocation())
+ if (Instruction *II = dyn_cast<Instruction>(V))
+ if (Alive.count(II))
+ dbgs() << "Dropping debug info for " << *DII << "\n";
+ });
}
+
+ // Prepare to delete.
+ Worklist.push_back(&I);
+ I.dropAllReferences();
}
for (Instruction *&I : Worklist) {
@@ -75,10 +145,14 @@ static bool aggressiveDCE(Function& F) {
return !Worklist.empty();
}
-PreservedAnalyses ADCEPass::run(Function &F) {
- if (aggressiveDCE(F))
- return PreservedAnalyses::none();
- return PreservedAnalyses::all();
+PreservedAnalyses ADCEPass::run(Function &F, FunctionAnalysisManager &) {
+ if (!aggressiveDCE(F))
+ return PreservedAnalyses::all();
+
+ // FIXME: This should also 'preserve the CFG'.
+ auto PA = PreservedAnalyses();
+ PA.preserve<GlobalsAA>();
+ return PA;
}
namespace {
@@ -89,7 +163,7 @@ struct ADCELegacyPass : public FunctionPass {
}
bool runOnFunction(Function& F) override {
- if (skipOptnoneFunction(F))
+ if (skipFunction(F))
return false;
return aggressiveDCE(F);
}
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 4b721d38adba7..7f8b8ce91e79a 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -18,6 +18,7 @@
#define AA_NAME "alignment-from-assumptions"
#define DEBUG_TYPE AA_NAME
+#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -25,13 +26,11 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instruction.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
@@ -67,18 +66,7 @@ struct AlignmentFromAssumptions : public FunctionPass {
AU.addPreserved<ScalarEvolutionWrapperPass>();
}
- // For memory transfers, we need a common alignment for both the source and
- // destination. If we have a new alignment for only one operand of a transfer
- // instruction, save it in these maps. If we reach the other operand through
- // another assumption later, then we may change the alignment at that point.
- DenseMap<MemTransferInst *, unsigned> NewDestAlignments, NewSrcAlignments;
-
- ScalarEvolution *SE;
- DominatorTree *DT;
-
- bool extractAlignmentInfo(CallInst *I, Value *&AAPtr, const SCEV *&AlignSCEV,
- const SCEV *&OffSCEV);
- bool processAssumption(CallInst *I);
+ AlignmentFromAssumptionsPass Impl;
};
}
@@ -209,9 +197,10 @@ static unsigned getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
return 0;
}
-bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I,
- Value *&AAPtr, const SCEV *&AlignSCEV,
- const SCEV *&OffSCEV) {
+bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I,
+ Value *&AAPtr,
+ const SCEV *&AlignSCEV,
+ const SCEV *&OffSCEV) {
// An alignment assume must be a statement about the least-significant
// bits of the pointer being zero, possibly with some offset.
ICmpInst *ICI = dyn_cast<ICmpInst>(I->getArgOperand(0));
@@ -302,7 +291,7 @@ bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I,
return true;
}
-bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
+bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
Value *AAPtr;
const SCEV *AlignSCEV, *OffSCEV;
if (!extractAlignmentInfo(ACall, AAPtr, AlignSCEV, OffSCEV))
@@ -411,14 +400,26 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
}
bool AlignmentFromAssumptions::runOnFunction(Function &F) {
- bool Changed = false;
+ if (skipFunction(F))
+ return false;
+
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ return Impl.runImpl(F, AC, SE, DT);
+}
+
+bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC,
+ ScalarEvolution *SE_,
+ DominatorTree *DT_) {
+ SE = SE_;
+ DT = DT_;
NewDestAlignments.clear();
NewSrcAlignments.clear();
+ bool Changed = false;
for (auto &AssumeVH : AC.assumptions())
if (AssumeVH)
Changed |= processAssumption(cast<CallInst>(AssumeVH));
@@ -426,3 +427,20 @@ bool AlignmentFromAssumptions::runOnFunction(Function &F) {
return Changed;
}
+PreservedAnalyses
+AlignmentFromAssumptionsPass::run(Function &F, FunctionAnalysisManager &AM) {
+
+ AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
+ ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ bool Changed = runImpl(F, AC, &SE, &DT);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<AAManager>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ PA.preserve<GlobalsAA>();
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp
index cb9b8b6fffc84..4f6225f4c7b01 100644
--- a/lib/Transforms/Scalar/BDCE.cpp
+++ b/lib/Transforms/Scalar/BDCE.cpp
@@ -14,11 +14,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/BDCE.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
@@ -27,6 +27,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
#define DEBUG_TYPE "bdce"
@@ -34,35 +35,7 @@ using namespace llvm;
STATISTIC(NumRemoved, "Number of instructions removed (unused)");
STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)");
-namespace {
-struct BDCE : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- BDCE() : FunctionPass(ID) {
- initializeBDCEPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function& F) override;
-
- void getAnalysisUsage(AnalysisUsage& AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<DemandedBits>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-};
-}
-
-char BDCE::ID = 0;
-INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(DemandedBits)
-INITIALIZE_PASS_END(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
- false, false)
-
-bool BDCE::runOnFunction(Function& F) {
- if (skipOptnoneFunction(F))
- return false;
- DemandedBits &DB = getAnalysis<DemandedBits>();
-
+static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
SmallVector<Instruction*, 128> Worklist;
bool Changed = false;
for (Instruction &I : instructions(F)) {
@@ -96,7 +69,44 @@ bool BDCE::runOnFunction(Function& F) {
return Changed;
}
-FunctionPass *llvm::createBitTrackingDCEPass() {
- return new BDCE();
+PreservedAnalyses BDCEPass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto &DB = AM.getResult<DemandedBitsAnalysis>(F);
+ if (!bitTrackingDCE(F, DB))
+ return PreservedAnalyses::all();
+
+ // FIXME: This should also 'preserve the CFG'.
+ auto PA = PreservedAnalyses();
+ PA.preserve<GlobalsAA>();
+ return PA;
}
+namespace {
+struct BDCELegacyPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ BDCELegacyPass() : FunctionPass(ID) {
+ initializeBDCELegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ auto &DB = getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
+ return bitTrackingDCE(F, DB);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<DemandedBitsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+}
+
+char BDCELegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(BDCELegacyPass, "bdce",
+ "Bit-Tracking Dead Code Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
+INITIALIZE_PASS_END(BDCELegacyPass, "bdce",
+ "Bit-Tracking Dead Code Elimination", false, false)
+
+FunctionPass *llvm::createBitTrackingDCEPass() { return new BDCELegacyPass(); }
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index a0ddbd0852063..9f04344b8b0a2 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -10,13 +10,16 @@ add_llvm_library(LLVMScalarOpts
EarlyCSE.cpp
FlattenCFGPass.cpp
Float2Int.cpp
+ GuardWidening.cpp
GVN.cpp
+ GVNHoist.cpp
InductiveRangeCheckElimination.cpp
IndVarSimplify.cpp
JumpThreading.cpp
LICM.cpp
LoadCombine.cpp
LoopDeletion.cpp
+ LoopDataPrefetch.cpp
LoopDistribute.cpp
LoopIdiomRecognize.cpp
LoopInstSimplify.cpp
@@ -24,11 +27,14 @@ add_llvm_library(LLVMScalarOpts
LoopLoadElimination.cpp
LoopRerollPass.cpp
LoopRotation.cpp
+ LoopSimplifyCFG.cpp
LoopStrengthReduce.cpp
LoopUnrollPass.cpp
LoopUnswitch.cpp
+ LoopVersioningLICM.cpp
LowerAtomic.cpp
LowerExpectIntrinsic.cpp
+ LowerGuardIntrinsic.cpp
MemCpyOptimizer.cpp
MergedLoadStoreMotion.cpp
NaryReassociate.cpp
@@ -40,7 +46,6 @@ add_llvm_library(LLVMScalarOpts
SCCP.cpp
SROA.cpp
Scalar.cpp
- ScalarReplAggregates.cpp
Scalarizer.cpp
SeparateConstOffsetFromGEP.cpp
SimplifyCFGPass.cpp
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index 84f7f5fff5b59..913e939c2bd40 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -33,20 +33,20 @@
// %0 = load i64* inttoptr (i64 big_constant to i64*)
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/ConstantHoisting.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include <tuple>
using namespace llvm;
+using namespace consthoist;
#define DEBUG_TYPE "consthoist"
@@ -54,75 +54,12 @@ STATISTIC(NumConstantsHoisted, "Number of constants hoisted");
STATISTIC(NumConstantsRebased, "Number of constants rebased");
namespace {
-struct ConstantUser;
-struct RebasedConstantInfo;
-
-typedef SmallVector<ConstantUser, 8> ConstantUseListType;
-typedef SmallVector<RebasedConstantInfo, 4> RebasedConstantListType;
-
-/// \brief Keeps track of the user of a constant and the operand index where the
-/// constant is used.
-struct ConstantUser {
- Instruction *Inst;
- unsigned OpndIdx;
-
- ConstantUser(Instruction *Inst, unsigned Idx) : Inst(Inst), OpndIdx(Idx) { }
-};
-
-/// \brief Keeps track of a constant candidate and its uses.
-struct ConstantCandidate {
- ConstantUseListType Uses;
- ConstantInt *ConstInt;
- unsigned CumulativeCost;
-
- ConstantCandidate(ConstantInt *ConstInt)
- : ConstInt(ConstInt), CumulativeCost(0) { }
-
- /// \brief Add the user to the use list and update the cost.
- void addUser(Instruction *Inst, unsigned Idx, unsigned Cost) {
- CumulativeCost += Cost;
- Uses.push_back(ConstantUser(Inst, Idx));
- }
-};
-
-/// \brief This represents a constant that has been rebased with respect to a
-/// base constant. The difference to the base constant is recorded in Offset.
-struct RebasedConstantInfo {
- ConstantUseListType Uses;
- Constant *Offset;
-
- RebasedConstantInfo(ConstantUseListType &&Uses, Constant *Offset)
- : Uses(std::move(Uses)), Offset(Offset) { }
-};
-
-/// \brief A base constant and all its rebased constants.
-struct ConstantInfo {
- ConstantInt *BaseConstant;
- RebasedConstantListType RebasedConstants;
-};
-
/// \brief The constant hoisting pass.
-class ConstantHoisting : public FunctionPass {
- typedef DenseMap<ConstantInt *, unsigned> ConstCandMapType;
- typedef std::vector<ConstantCandidate> ConstCandVecType;
-
- const TargetTransformInfo *TTI;
- DominatorTree *DT;
- BasicBlock *Entry;
-
- /// Keeps track of constant candidates found in the function.
- ConstCandVecType ConstCandVec;
-
- /// Keep track of cast instructions we already cloned.
- SmallDenseMap<Instruction *, Instruction *> ClonedCastMap;
-
- /// These are the final constants we decided to hoist.
- SmallVector<ConstantInfo, 8> ConstantVec;
+class ConstantHoistingLegacyPass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- ConstantHoisting() : FunctionPass(ID), TTI(nullptr), DT(nullptr),
- Entry(nullptr) {
- initializeConstantHoistingPass(*PassRegistry::getPassRegistry());
+ ConstantHoistingLegacyPass() : FunctionPass(ID) {
+ initializeConstantHoistingLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &Fn) override;
@@ -135,67 +72,36 @@ public:
AU.addRequired<TargetTransformInfoWrapperPass>();
}
-private:
- /// \brief Initialize the pass.
- void setup(Function &Fn) {
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);
- Entry = &Fn.getEntryBlock();
- }
+ void releaseMemory() override { Impl.releaseMemory(); }
- /// \brief Cleanup.
- void cleanup() {
- ConstantVec.clear();
- ClonedCastMap.clear();
- ConstCandVec.clear();
-
- TTI = nullptr;
- DT = nullptr;
- Entry = nullptr;
- }
-
- Instruction *findMatInsertPt(Instruction *Inst, unsigned Idx = ~0U) const;
- Instruction *findConstantInsertionPoint(const ConstantInfo &ConstInfo) const;
- void collectConstantCandidates(ConstCandMapType &ConstCandMap,
- Instruction *Inst, unsigned Idx,
- ConstantInt *ConstInt);
- void collectConstantCandidates(ConstCandMapType &ConstCandMap,
- Instruction *Inst);
- void collectConstantCandidates(Function &Fn);
- void findAndMakeBaseConstant(ConstCandVecType::iterator S,
- ConstCandVecType::iterator E);
- void findBaseConstants();
- void emitBaseConstants(Instruction *Base, Constant *Offset,
- const ConstantUser &ConstUser);
- bool emitBaseConstants();
- void deleteDeadCastInst() const;
- bool optimizeConstants(Function &Fn);
+private:
+ ConstantHoistingPass Impl;
};
}
-char ConstantHoisting::ID = 0;
-INITIALIZE_PASS_BEGIN(ConstantHoisting, "consthoist", "Constant Hoisting",
- false, false)
+char ConstantHoistingLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist",
+ "Constant Hoisting", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(ConstantHoisting, "consthoist", "Constant Hoisting",
- false, false)
+INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist",
+ "Constant Hoisting", false, false)
FunctionPass *llvm::createConstantHoistingPass() {
- return new ConstantHoisting();
+ return new ConstantHoistingLegacyPass();
}
/// \brief Perform the constant hoisting optimization for the given function.
-bool ConstantHoisting::runOnFunction(Function &Fn) {
- if (skipOptnoneFunction(Fn))
+bool ConstantHoistingLegacyPass::runOnFunction(Function &Fn) {
+ if (skipFunction(Fn))
return false;
DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n");
DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n');
- setup(Fn);
-
- bool MadeChange = optimizeConstants(Fn);
+ bool MadeChange = Impl.runImpl(
+ Fn, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn),
+ getAnalysis<DominatorTreeWrapperPass>().getDomTree(), Fn.getEntryBlock());
if (MadeChange) {
DEBUG(dbgs() << "********** Function after Constant Hoisting: "
@@ -204,15 +110,13 @@ bool ConstantHoisting::runOnFunction(Function &Fn) {
}
DEBUG(dbgs() << "********** End Constant Hoisting **********\n");
- cleanup();
-
return MadeChange;
}
/// \brief Find the constant materialization insertion point.
-Instruction *ConstantHoisting::findMatInsertPt(Instruction *Inst,
- unsigned Idx) const {
+Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst,
+ unsigned Idx) const {
// If the operand is a cast instruction, then we have to materialize the
// constant before the cast instruction.
if (Idx != ~0U) {
@@ -237,8 +141,8 @@ Instruction *ConstantHoisting::findMatInsertPt(Instruction *Inst,
}
/// \brief Find an insertion point that dominates all uses.
-Instruction *ConstantHoisting::
-findConstantInsertionPoint(const ConstantInfo &ConstInfo) const {
+Instruction *ConstantHoistingPass::findConstantInsertionPoint(
+ const ConstantInfo &ConstInfo) const {
assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry.");
// Collect all basic blocks.
SmallPtrSet<BasicBlock *, 8> BBs;
@@ -272,10 +176,9 @@ findConstantInsertionPoint(const ConstantInfo &ConstInfo) const {
/// The operand at index Idx is not necessarily the constant integer itself. It
/// could also be a cast instruction or a constant expression that uses the
// constant integer.
-void ConstantHoisting::collectConstantCandidates(ConstCandMapType &ConstCandMap,
- Instruction *Inst,
- unsigned Idx,
- ConstantInt *ConstInt) {
+void ConstantHoistingPass::collectConstantCandidates(
+ ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx,
+ ConstantInt *ConstInt) {
unsigned Cost;
// Ask the target about the cost of materializing the constant for the given
// instruction and operand index.
@@ -309,8 +212,8 @@ void ConstantHoisting::collectConstantCandidates(ConstCandMapType &ConstCandMap,
/// \brief Scan the instruction for expensive integer constants and record them
/// in the constant candidate vector.
-void ConstantHoisting::collectConstantCandidates(ConstCandMapType &ConstCandMap,
- Instruction *Inst) {
+void ConstantHoistingPass::collectConstantCandidates(
+ ConstCandMapType &ConstCandMap, Instruction *Inst) {
// Skip all cast instructions. They are visited indirectly later on.
if (Inst->isCast())
return;
@@ -320,6 +223,18 @@ void ConstantHoisting::collectConstantCandidates(ConstCandMapType &ConstCandMap,
if (isa<InlineAsm>(Call->getCalledValue()))
return;
+ // Switch cases must remain constant, and if the value being tested is
+ // constant the entire thing should disappear.
+ if (isa<SwitchInst>(Inst))
+ return;
+
+ // Static allocas (constant size in the entry block) are handled by
+ // prologue/epilogue insertion so they're free anyway. We definitely don't
+ // want to make them non-constant.
+ auto AI = dyn_cast<AllocaInst>(Inst);
+ if (AI && AI->isStaticAlloca())
+ return;
+
// Scan all operands.
for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) {
Value *Opnd = Inst->getOperand(Idx);
@@ -363,25 +278,116 @@ void ConstantHoisting::collectConstantCandidates(ConstCandMapType &ConstCandMap,
/// \brief Collect all integer constants in the function that cannot be folded
/// into an instruction itself.
-void ConstantHoisting::collectConstantCandidates(Function &Fn) {
+void ConstantHoistingPass::collectConstantCandidates(Function &Fn) {
ConstCandMapType ConstCandMap;
for (BasicBlock &BB : Fn)
for (Instruction &Inst : BB)
collectConstantCandidates(ConstCandMap, &Inst);
}
-/// \brief Find the base constant within the given range and rebase all other
-/// constants with respect to the base constant.
-void ConstantHoisting::findAndMakeBaseConstant(ConstCandVecType::iterator S,
- ConstCandVecType::iterator E) {
- auto MaxCostItr = S;
+// This helper function is necessary to deal with values that have different
+// bit widths (APInt Operator- does not like that). If the value cannot be
+// represented in uint64 we return an "empty" APInt. This is then interpreted
+// as the value is not in range.
+static llvm::Optional<APInt> calculateOffsetDiff(APInt V1, APInt V2)
+{
+ llvm::Optional<APInt> Res = None;
+ unsigned BW = V1.getBitWidth() > V2.getBitWidth() ?
+ V1.getBitWidth() : V2.getBitWidth();
+ uint64_t LimVal1 = V1.getLimitedValue();
+ uint64_t LimVal2 = V2.getLimitedValue();
+
+ if (LimVal1 == ~0ULL || LimVal2 == ~0ULL)
+ return Res;
+
+ uint64_t Diff = LimVal1 - LimVal2;
+ return APInt(BW, Diff, true);
+}
+
+// From a list of constants, one needs to picked as the base and the other
+// constants will be transformed into an offset from that base constant. The
+// question is which we can pick best? For example, consider these constants
+// and their number of uses:
+//
+// Constants| 2 | 4 | 12 | 42 |
+// NumUses | 3 | 2 | 8 | 7 |
+//
+// Selecting constant 12 because it has the most uses will generate negative
+// offsets for constants 2 and 4 (i.e. -10 and -8 respectively). If negative
+// offsets lead to less optimal code generation, then there might be better
+// solutions. Suppose immediates in the range of 0..35 are most optimally
+// supported by the architecture, then selecting constant 2 is most optimal
+// because this will generate offsets: 0, 2, 10, 40. Offsets 0, 2 and 10 are in
+// range 0..35, and thus 3 + 2 + 8 = 13 uses are in range. Selecting 12 would
+// have only 8 uses in range, so choosing 2 as a base is more optimal. Thus, in
+// selecting the base constant the range of the offsets is a very important
+// factor too that we take into account here. This algorithm calculates a total
+// costs for selecting a constant as the base and substract the costs if
+// immediates are out of range. It has quadratic complexity, so we call this
+// function only when we're optimising for size and there are less than 100
+// constants, we fall back to the straightforward algorithm otherwise
+// which does not do all the offset calculations.
+unsigned
+ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
+ ConstCandVecType::iterator E,
+ ConstCandVecType::iterator &MaxCostItr) {
unsigned NumUses = 0;
- // Use the constant that has the maximum cost as base constant.
+
+ if(!Entry->getParent()->optForSize() || std::distance(S,E) > 100) {
+ for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
+ NumUses += ConstCand->Uses.size();
+ if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
+ MaxCostItr = ConstCand;
+ }
+ return NumUses;
+ }
+
+ DEBUG(dbgs() << "== Maximize constants in range ==\n");
+ int MaxCost = -1;
for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
+ auto Value = ConstCand->ConstInt->getValue();
+ Type *Ty = ConstCand->ConstInt->getType();
+ int Cost = 0;
NumUses += ConstCand->Uses.size();
- if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
+ DEBUG(dbgs() << "= Constant: " << ConstCand->ConstInt->getValue() << "\n");
+
+ for (auto User : ConstCand->Uses) {
+ unsigned Opcode = User.Inst->getOpcode();
+ unsigned OpndIdx = User.OpndIdx;
+ Cost += TTI->getIntImmCost(Opcode, OpndIdx, Value, Ty);
+ DEBUG(dbgs() << "Cost: " << Cost << "\n");
+
+ for (auto C2 = S; C2 != E; ++C2) {
+ llvm::Optional<APInt> Diff = calculateOffsetDiff(
+ C2->ConstInt->getValue(),
+ ConstCand->ConstInt->getValue());
+ if (Diff) {
+ const int ImmCosts =
+ TTI->getIntImmCodeSizeCost(Opcode, OpndIdx, Diff.getValue(), Ty);
+ Cost -= ImmCosts;
+ DEBUG(dbgs() << "Offset " << Diff.getValue() << " "
+ << "has penalty: " << ImmCosts << "\n"
+ << "Adjusted cost: " << Cost << "\n");
+ }
+ }
+ }
+ DEBUG(dbgs() << "Cumulative cost: " << Cost << "\n");
+ if (Cost > MaxCost) {
+ MaxCost = Cost;
MaxCostItr = ConstCand;
+ DEBUG(dbgs() << "New candidate: " << MaxCostItr->ConstInt->getValue()
+ << "\n");
+ }
}
+ return NumUses;
+}
+
+/// \brief Find the base constant within the given range and rebase all other
+/// constants with respect to the base constant.
+void ConstantHoistingPass::findAndMakeBaseConstant(
+ ConstCandVecType::iterator S, ConstCandVecType::iterator E) {
+ auto MaxCostItr = S;
+ unsigned NumUses = maximizeConstantsInRange(S, E, MaxCostItr);
// Don't hoist constants that have only one use.
if (NumUses <= 1)
@@ -404,7 +410,7 @@ void ConstantHoisting::findAndMakeBaseConstant(ConstCandVecType::iterator S,
/// \brief Finds and combines constant candidates that can be easily
/// rematerialized with an add from a common base constant.
-void ConstantHoisting::findBaseConstants() {
+void ConstantHoistingPass::findBaseConstants() {
// Sort the constants by value and type. This invalidates the mapping!
std::sort(ConstCandVec.begin(), ConstCandVec.end(),
[](const ConstantCandidate &LHS, const ConstantCandidate &RHS) {
@@ -466,8 +472,9 @@ static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat) {
/// \brief Emit materialization code for all rebased constants and update their
/// users.
-void ConstantHoisting::emitBaseConstants(Instruction *Base, Constant *Offset,
- const ConstantUser &ConstUser) {
+void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
+ Constant *Offset,
+ const ConstantUser &ConstUser) {
Instruction *Mat = Base;
if (Offset) {
Instruction *InsertionPt = findMatInsertPt(ConstUser.Inst,
@@ -538,7 +545,7 @@ void ConstantHoisting::emitBaseConstants(Instruction *Base, Constant *Offset,
/// \brief Hoist and hide the base constant behind a bitcast and emit
/// materialization code for derived constants.
-bool ConstantHoisting::emitBaseConstants() {
+bool ConstantHoistingPass::emitBaseConstants() {
bool MadeChange = false;
for (auto const &ConstInfo : ConstantVec) {
// Hoist and hide the base constant behind a bitcast.
@@ -572,14 +579,18 @@ bool ConstantHoisting::emitBaseConstants() {
/// \brief Check all cast instructions we made a copy of and remove them if they
/// have no more users.
-void ConstantHoisting::deleteDeadCastInst() const {
+void ConstantHoistingPass::deleteDeadCastInst() const {
for (auto const &I : ClonedCastMap)
if (I.first->use_empty())
I.first->eraseFromParent();
}
/// \brief Optimize expensive integer constants in the given function.
-bool ConstantHoisting::optimizeConstants(Function &Fn) {
+bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
+ DominatorTree &DT, BasicBlock &Entry) {
+ this->TTI = &TTI;
+ this->DT = &DT;
+ this->Entry = &Entry;
// Collect all constant candidates.
collectConstantCandidates(Fn);
@@ -604,3 +615,14 @@ bool ConstantHoisting::optimizeConstants(Function &Fn) {
return MadeChange;
}
+
+PreservedAnalyses ConstantHoistingPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ if (!runImpl(F, TTI, DT, F.getEntryBlock()))
+ return PreservedAnalyses::all();
+
+ // FIXME: This should also 'preserve the CFG'.
+ return PreservedAnalyses::none();
+}
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index c974ebb9456f8..88172d19fe5a9 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -61,11 +61,14 @@ FunctionPass *llvm::createConstantPropagationPass() {
}
bool ConstantPropagation::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
// Initialize the worklist to all of the instructions ready to process...
std::set<Instruction*> WorkList;
- for(inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
- WorkList.insert(&*i);
- }
+ for (Instruction &I: instructions(&F))
+ WorkList.insert(&I);
+
bool Changed = false;
const DataLayout &DL = F.getParent()->getDataLayout();
TargetLibraryInfo *TLI =
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 686bd40711049..c0fed05333921 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/GlobalsModRef.h"
@@ -35,22 +36,11 @@ STATISTIC(NumMemAccess, "Number of memory access targets propagated");
STATISTIC(NumCmps, "Number of comparisons propagated");
STATISTIC(NumReturns, "Number of return values propagated");
STATISTIC(NumDeadCases, "Number of switch cases removed");
+STATISTIC(NumSDivs, "Number of sdiv converted to udiv");
+STATISTIC(NumSRems, "Number of srem converted to urem");
namespace {
class CorrelatedValuePropagation : public FunctionPass {
- LazyValueInfo *LVI;
-
- bool processSelect(SelectInst *SI);
- bool processPHI(PHINode *P);
- bool processMemAccess(Instruction *I);
- bool processCmp(CmpInst *C);
- bool processSwitch(SwitchInst *SI);
- bool processCallSite(CallSite CS);
-
- /// Return a constant value for V usable at At and everything it
- /// dominates. If no such Constant can be found, return nullptr.
- Constant *getConstantAt(Value *V, Instruction *At);
-
public:
static char ID;
CorrelatedValuePropagation(): FunctionPass(ID) {
@@ -60,7 +50,7 @@ namespace {
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LazyValueInfo>();
+ AU.addRequired<LazyValueInfoWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
}
};
@@ -69,7 +59,7 @@ namespace {
char CorrelatedValuePropagation::ID = 0;
INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation",
"Value Propagation", false, false)
-INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
INITIALIZE_PASS_END(CorrelatedValuePropagation, "correlated-propagation",
"Value Propagation", false, false)
@@ -78,7 +68,7 @@ Pass *llvm::createCorrelatedValuePropagationPass() {
return new CorrelatedValuePropagation();
}
-bool CorrelatedValuePropagation::processSelect(SelectInst *S) {
+static bool processSelect(SelectInst *S, LazyValueInfo *LVI) {
if (S->getType()->isVectorTy()) return false;
if (isa<Constant>(S->getOperand(0))) return false;
@@ -101,7 +91,7 @@ bool CorrelatedValuePropagation::processSelect(SelectInst *S) {
return true;
}
-bool CorrelatedValuePropagation::processPHI(PHINode *P) {
+static bool processPHI(PHINode *P, LazyValueInfo *LVI) {
bool Changed = false;
BasicBlock *BB = P->getParent();
@@ -169,7 +159,7 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
return Changed;
}
-bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
+static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) {
Value *Pointer = nullptr;
if (LoadInst *L = dyn_cast<LoadInst>(I))
Pointer = L->getPointerOperand();
@@ -186,11 +176,11 @@ bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
return true;
}
-/// processCmp - See if LazyValueInfo's ability to exploit edge conditions,
-/// or range information is sufficient to prove this comparison. Even for
-/// local conditions, this can sometimes prove conditions instcombine can't by
+/// See if LazyValueInfo's ability to exploit edge conditions or range
+/// information is sufficient to prove this comparison. Even for local
+/// conditions, this can sometimes prove conditions instcombine can't by
/// exploiting range information.
-bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
+static bool processCmp(CmpInst *C, LazyValueInfo *LVI) {
Value *Op0 = C->getOperand(0);
Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
if (!Op1) return false;
@@ -218,14 +208,14 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
return true;
}
-/// processSwitch - Simplify a switch instruction by removing cases which can
-/// never fire. If the uselessness of a case could be determined locally then
-/// constant propagation would already have figured it out. Instead, walk the
-/// predecessors and statically evaluate cases based on information available
-/// on that edge. Cases that cannot fire no matter what the incoming edge can
-/// safely be removed. If a case fires on every incoming edge then the entire
-/// switch can be removed and replaced with a branch to the case destination.
-bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
+/// Simplify a switch instruction by removing cases which can never fire. If the
+/// uselessness of a case could be determined locally then constant propagation
+/// would already have figured it out. Instead, walk the predecessors and
+/// statically evaluate cases based on information available on that edge. Cases
+/// that cannot fire no matter what the incoming edge can safely be removed. If
+/// a case fires on every incoming edge then the entire switch can be removed
+/// and replaced with a branch to the case destination.
+static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) {
Value *Cond = SI->getCondition();
BasicBlock *BB = SI->getParent();
@@ -304,16 +294,18 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
return Changed;
}
-/// processCallSite - Infer nonnull attributes for the arguments at the
-/// specified callsite.
-bool CorrelatedValuePropagation::processCallSite(CallSite CS) {
+/// Infer nonnull attributes for the arguments at the specified callsite.
+static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
SmallVector<unsigned, 4> Indices;
unsigned ArgNo = 0;
for (Value *V : CS.args()) {
PointerType *Type = dyn_cast<PointerType>(V->getType());
-
+ // Try to mark pointer typed parameters as non-null. We skip the
+ // relatively expensive analysis for constants which are obviously either
+ // null or non-null to start with.
if (Type && !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) &&
+ !isa<Constant>(V) &&
LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
ConstantPointerNull::get(Type),
CS.getInstruction()) == LazyValueInfo::False)
@@ -334,7 +326,62 @@ bool CorrelatedValuePropagation::processCallSite(CallSite CS) {
return true;
}
-Constant *CorrelatedValuePropagation::getConstantAt(Value *V, Instruction *At) {
+// Helper function to rewrite srem and sdiv. As a policy choice, we choose not
+// to waste compile time on anything where the operands are local defs. While
+// LVI can sometimes reason about such cases, it's not its primary purpose.
+static bool hasLocalDefs(BinaryOperator *SDI) {
+ for (Value *O : SDI->operands()) {
+ auto *I = dyn_cast<Instruction>(O);
+ if (I && I->getParent() == SDI->getParent())
+ return true;
+ }
+ return false;
+}
+
+static bool hasPositiveOperands(BinaryOperator *SDI, LazyValueInfo *LVI) {
+ Constant *Zero = ConstantInt::get(SDI->getType(), 0);
+ for (Value *O : SDI->operands()) {
+ auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SGE, O, Zero, SDI);
+ if (Result != LazyValueInfo::True)
+ return false;
+ }
+ return true;
+}
+
+static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) {
+ if (SDI->getType()->isVectorTy() || hasLocalDefs(SDI) ||
+ !hasPositiveOperands(SDI, LVI))
+ return false;
+
+ ++NumSRems;
+ auto *BO = BinaryOperator::CreateURem(SDI->getOperand(0), SDI->getOperand(1),
+ SDI->getName(), SDI);
+ SDI->replaceAllUsesWith(BO);
+ SDI->eraseFromParent();
+ return true;
+}
+
+/// See if LazyValueInfo's ability to exploit edge conditions or range
+/// information is sufficient to prove the both operands of this SDiv are
+/// positive. If this is the case, replace the SDiv with a UDiv. Even for local
+/// conditions, this can sometimes prove conditions instcombine can't by
+/// exploiting range information.
+static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
+ if (SDI->getType()->isVectorTy() || hasLocalDefs(SDI) ||
+ !hasPositiveOperands(SDI, LVI))
+ return false;
+
+ ++NumSDivs;
+ auto *BO = BinaryOperator::CreateUDiv(SDI->getOperand(0), SDI->getOperand(1),
+ SDI->getName(), SDI);
+ BO->setIsExact(SDI->isExact());
+ SDI->replaceAllUsesWith(BO);
+ SDI->eraseFromParent();
+
+ return true;
+}
+
+static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {
if (Constant *C = LVI->getConstant(V, At->getParent(), At))
return C;
@@ -357,44 +404,45 @@ Constant *CorrelatedValuePropagation::getConstantAt(Value *V, Instruction *At) {
ConstantInt::getFalse(C->getContext());
}
-bool CorrelatedValuePropagation::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
- return false;
-
- LVI = &getAnalysis<LazyValueInfo>();
-
+static bool runImpl(Function &F, LazyValueInfo *LVI) {
bool FnChanged = false;
- for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ for (BasicBlock &BB : F) {
bool BBChanged = false;
- for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
+ for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
Instruction *II = &*BI++;
switch (II->getOpcode()) {
case Instruction::Select:
- BBChanged |= processSelect(cast<SelectInst>(II));
+ BBChanged |= processSelect(cast<SelectInst>(II), LVI);
break;
case Instruction::PHI:
- BBChanged |= processPHI(cast<PHINode>(II));
+ BBChanged |= processPHI(cast<PHINode>(II), LVI);
break;
case Instruction::ICmp:
case Instruction::FCmp:
- BBChanged |= processCmp(cast<CmpInst>(II));
+ BBChanged |= processCmp(cast<CmpInst>(II), LVI);
break;
case Instruction::Load:
case Instruction::Store:
- BBChanged |= processMemAccess(II);
+ BBChanged |= processMemAccess(II, LVI);
break;
case Instruction::Call:
case Instruction::Invoke:
- BBChanged |= processCallSite(CallSite(II));
+ BBChanged |= processCallSite(CallSite(II), LVI);
+ break;
+ case Instruction::SRem:
+ BBChanged |= processSRem(cast<BinaryOperator>(II), LVI);
+ break;
+ case Instruction::SDiv:
+ BBChanged |= processSDiv(cast<BinaryOperator>(II), LVI);
break;
}
}
- Instruction *Term = FI->getTerminator();
+ Instruction *Term = BB.getTerminator();
switch (Term->getOpcode()) {
case Instruction::Switch:
- BBChanged |= processSwitch(cast<SwitchInst>(Term));
+ BBChanged |= processSwitch(cast<SwitchInst>(Term), LVI);
break;
case Instruction::Ret: {
auto *RI = cast<ReturnInst>(Term);
@@ -404,7 +452,7 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
auto *RetVal = RI->getReturnValue();
if (!RetVal) break; // handle "ret void"
if (isa<Constant>(RetVal)) break; // nothing to do
- if (auto *C = getConstantAt(RetVal, RI)) {
+ if (auto *C = getConstantAt(RetVal, RI, LVI)) {
++NumReturns;
RI->replaceUsesOfWith(RetVal, C);
BBChanged = true;
@@ -417,3 +465,28 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
return FnChanged;
}
+
+bool CorrelatedValuePropagation::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
+ return runImpl(F, LVI);
+}
+
+PreservedAnalyses
+CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
+
+ LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F);
+ bool Changed = runImpl(F, LVI);
+
+ // FIXME: We need to invalidate LVI to avoid PR28400. Is there a better
+ // solution?
+ AM.invalidate<LazyValueAnalysis>(F);
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<GlobalsAA>();
+ return PA;
+}
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index b67c3c7742fd7..f73809d9f0454 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -16,13 +16,14 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/DCE.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -41,7 +42,7 @@ namespace {
initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
}
bool runOnBasicBlock(BasicBlock &BB) override {
- if (skipOptnoneFunction(BB))
+ if (skipBasicBlock(BB))
return false;
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
@@ -71,28 +72,6 @@ Pass *llvm::createDeadInstEliminationPass() {
return new DeadInstElimination();
}
-
-namespace {
- //===--------------------------------------------------------------------===//
- // DeadCodeElimination pass implementation
- //
- struct DCE : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- DCE() : FunctionPass(ID) {
- initializeDCEPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- }
- };
-}
-
-char DCE::ID = 0;
-INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
-
static bool DCEInstruction(Instruction *I,
SmallSetVector<Instruction *, 16> &WorkList,
const TargetLibraryInfo *TLI) {
@@ -121,13 +100,7 @@ static bool DCEInstruction(Instruction *I,
return false;
}
-bool DCE::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
- return false;
-
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
-
+static bool eliminateDeadCode(Function &F, TargetLibraryInfo *TLI) {
bool MadeChange = false;
SmallSetVector<Instruction *, 16> WorkList;
// Iterate over the original function, only adding insts to the worklist
@@ -150,7 +123,38 @@ bool DCE::runOnFunction(Function &F) {
return MadeChange;
}
-FunctionPass *llvm::createDeadCodeEliminationPass() {
- return new DCE();
+PreservedAnalyses DCEPass::run(Function &F, AnalysisManager<Function> &AM) {
+ if (eliminateDeadCode(F, AM.getCachedResult<TargetLibraryAnalysis>(F)))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+namespace {
+struct DCELegacyPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ DCELegacyPass() : FunctionPass(ID) {
+ initializeDCELegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
+
+ return eliminateDeadCode(F, TLI);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ }
+};
}
+char DCELegacyPass::ID = 0;
+INITIALIZE_PASS(DCELegacyPass, "dce", "Dead Code Elimination", false, false)
+
+FunctionPass *llvm::createDeadCodeEliminationPass() {
+ return new DCELegacyPass();
+}
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 36ad0a5f7b91c..ed58a87ae1a8a 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -15,7 +15,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
@@ -34,9 +35,12 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
+#include <map>
using namespace llvm;
#define DEBUG_TYPE "dse"
@@ -44,90 +48,35 @@ using namespace llvm;
STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
STATISTIC(NumFastStores, "Number of stores deleted");
STATISTIC(NumFastOther , "Number of other instrs removed");
+STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
-namespace {
- struct DSE : public FunctionPass {
- AliasAnalysis *AA;
- MemoryDependenceAnalysis *MD;
- DominatorTree *DT;
- const TargetLibraryInfo *TLI;
-
- static char ID; // Pass identification, replacement for typeid
- DSE() : FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr) {
- initializeDSEPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipOptnoneFunction(F))
- return false;
-
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- MD = &getAnalysis<MemoryDependenceAnalysis>();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+static cl::opt<bool>
+EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
+ cl::init(true), cl::Hidden,
+ cl::desc("Enable partial-overwrite tracking in DSE"));
- bool Changed = false;
- for (BasicBlock &I : F)
- // Only check non-dead blocks. Dead blocks may have strange pointer
- // cycles that will confuse alias analysis.
- if (DT->isReachableFromEntry(&I))
- Changed |= runOnBasicBlock(I);
-
- AA = nullptr; MD = nullptr; DT = nullptr;
- return Changed;
- }
-
- bool runOnBasicBlock(BasicBlock &BB);
- bool MemoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI);
- bool HandleFree(CallInst *F);
- bool handleEndBlock(BasicBlock &BB);
- void RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
- SmallSetVector<Value *, 16> &DeadStackObjects,
- const DataLayout &DL);
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<MemoryDependenceAnalysis>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<MemoryDependenceAnalysis>();
- }
- };
-}
-
-char DSE::ID = 0;
-INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
-
-FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
//===----------------------------------------------------------------------===//
// Helper functions
//===----------------------------------------------------------------------===//
-/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
-/// and zero out all the operands of this instruction. If any of them become
-/// dead, delete them and the computation tree that feeds them.
-///
+/// Delete this instruction. Before we do, go through and zero out all the
+/// operands of this instruction. If any of them become dead, delete them and
+/// the computation tree that feeds them.
/// If ValueSet is non-null, remove any deleted instructions from it as well.
-///
-static void DeleteDeadInstruction(Instruction *I,
- MemoryDependenceAnalysis &MD,
- const TargetLibraryInfo &TLI,
- SmallSetVector<Value*, 16> *ValueSet = nullptr) {
+static void
+deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
+ MemoryDependenceResults &MD, const TargetLibraryInfo &TLI,
+ SmallSetVector<Value *, 16> *ValueSet = nullptr) {
SmallVector<Instruction*, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
--NumFastOther;
+ // Keeping the iterator straight is a pain, so we let this routine tell the
+ // caller what the next instruction is after we're done mucking about.
+ BasicBlock::iterator NewIter = *BBI;
+
// Before we touch this instruction, remove it from memdep!
do {
Instruction *DeadInst = NowDeadInsts.pop_back_val();
@@ -150,15 +99,19 @@ static void DeleteDeadInstruction(Instruction *I,
NowDeadInsts.push_back(OpI);
}
- DeadInst->eraseFromParent();
+
+ if (NewIter == DeadInst->getIterator())
+ NewIter = DeadInst->eraseFromParent();
+ else
+ DeadInst->eraseFromParent();
if (ValueSet) ValueSet->remove(DeadInst);
} while (!NowDeadInsts.empty());
+ *BBI = NewIter;
}
-
-/// hasMemoryWrite - Does this instruction write some memory? This only returns
-/// true for things that we can analyze with other helpers below.
+/// Does this instruction write some memory? This only returns true for things
+/// that we can analyze with other helpers below.
static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) {
if (isa<StoreInst>(I))
return true;
@@ -176,30 +129,23 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) {
}
if (auto CS = CallSite(I)) {
if (Function *F = CS.getCalledFunction()) {
- if (TLI.has(LibFunc::strcpy) &&
- F->getName() == TLI.getName(LibFunc::strcpy)) {
+ StringRef FnName = F->getName();
+ if (TLI.has(LibFunc::strcpy) && FnName == TLI.getName(LibFunc::strcpy))
return true;
- }
- if (TLI.has(LibFunc::strncpy) &&
- F->getName() == TLI.getName(LibFunc::strncpy)) {
+ if (TLI.has(LibFunc::strncpy) && FnName == TLI.getName(LibFunc::strncpy))
return true;
- }
- if (TLI.has(LibFunc::strcat) &&
- F->getName() == TLI.getName(LibFunc::strcat)) {
+ if (TLI.has(LibFunc::strcat) && FnName == TLI.getName(LibFunc::strcat))
return true;
- }
- if (TLI.has(LibFunc::strncat) &&
- F->getName() == TLI.getName(LibFunc::strncat)) {
+ if (TLI.has(LibFunc::strncat) && FnName == TLI.getName(LibFunc::strncat))
return true;
- }
}
}
return false;
}
-/// getLocForWrite - Return a Location stored to by the specified instruction.
-/// If isRemovable returns true, this function and getLocForRead completely
-/// describe the memory operations for this instruction.
+/// Return a Location stored to by the specified instruction. If isRemovable
+/// returns true, this function and getLocForRead completely describe the memory
+/// operations for this instruction.
static MemoryLocation getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
return MemoryLocation::get(SI);
@@ -228,8 +174,8 @@ static MemoryLocation getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
}
}
-/// getLocForRead - Return the location read by the specified "hasMemoryWrite"
-/// instruction if any.
+/// Return the location read by the specified "hasMemoryWrite" instruction if
+/// any.
static MemoryLocation getLocForRead(Instruction *Inst,
const TargetLibraryInfo &TLI) {
assert(hasMemoryWrite(Inst, TLI) && "Unknown instruction case");
@@ -241,9 +187,8 @@ static MemoryLocation getLocForRead(Instruction *Inst,
return MemoryLocation();
}
-
-/// isRemovable - If the value of this instruction and the memory it writes to
-/// is unused, may we delete this instruction?
+/// If the value of this instruction and the memory it writes to is unused, may
+/// we delete this instruction?
static bool isRemovable(Instruction *I) {
// Don't remove volatile/atomic stores.
if (StoreInst *SI = dyn_cast<StoreInst>(I))
@@ -275,9 +220,9 @@ static bool isRemovable(Instruction *I) {
}
-/// isShortenable - Returns true if this instruction can be safely shortened in
+/// Returns true if the end of this instruction can be safely shortened in
/// length.
-static bool isShortenable(Instruction *I) {
+static bool isShortenableAtTheEnd(Instruction *I) {
// Don't shorten stores for now
if (isa<StoreInst>(I))
return false;
@@ -288,6 +233,7 @@ static bool isShortenable(Instruction *I) {
case Intrinsic::memset:
case Intrinsic::memcpy:
// Do shorten memory intrinsics.
+ // FIXME: Add memmove if it's also safe to transform.
return true;
}
}
@@ -297,7 +243,16 @@ static bool isShortenable(Instruction *I) {
return false;
}
-/// getStoredPointerOperand - Return the pointer that is being written to.
+/// Returns true if the beginning of this instruction can be safely shortened
+/// in length.
+static bool isShortenableAtTheBeginning(Instruction *I) {
+ // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
+ // easily done by offsetting the source address.
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
+ return II && II->getIntrinsicID() == Intrinsic::memset;
+}
+
+/// Return the pointer that is being written to.
static Value *getStoredPointerOperand(Instruction *I) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getPointerOperand();
@@ -327,46 +282,45 @@ static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
}
namespace {
- enum OverwriteResult
- {
- OverwriteComplete,
- OverwriteEnd,
- OverwriteUnknown
- };
+enum OverwriteResult {
+ OverwriteBegin,
+ OverwriteComplete,
+ OverwriteEnd,
+ OverwriteUnknown
+};
}
-/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location
-/// completely overwrites a store to the 'Earlier' location.
-/// 'OverwriteEnd' if the end of the 'Earlier' location is completely
-/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
+typedef DenseMap<Instruction *,
+ std::map<int64_t, int64_t>> InstOverlapIntervalsTy;
+
+/// Return 'OverwriteComplete' if a store to the 'Later' location completely
+/// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of
+/// the 'Earlier' location is completely overwritten by 'Later',
+/// 'OverwriteBegin' if the beginning of the 'Earlier' location is overwritten
+/// by 'Later', or 'OverwriteUnknown' if nothing can be determined.
static OverwriteResult isOverwrite(const MemoryLocation &Later,
const MemoryLocation &Earlier,
const DataLayout &DL,
const TargetLibraryInfo &TLI,
- int64_t &EarlierOff, int64_t &LaterOff) {
+ int64_t &EarlierOff, int64_t &LaterOff,
+ Instruction *DepWrite,
+ InstOverlapIntervalsTy &IOL) {
+ // If we don't know the sizes of either access, then we can't do a comparison.
+ if (Later.Size == MemoryLocation::UnknownSize ||
+ Earlier.Size == MemoryLocation::UnknownSize)
+ return OverwriteUnknown;
+
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
// If the start pointers are the same, we just have to compare sizes to see if
// the later store was larger than the earlier store.
if (P1 == P2) {
- // If we don't know the sizes of either access, then we can't do a
- // comparison.
- if (Later.Size == MemoryLocation::UnknownSize ||
- Earlier.Size == MemoryLocation::UnknownSize)
- return OverwriteUnknown;
-
// Make sure that the Later size is >= the Earlier size.
if (Later.Size >= Earlier.Size)
return OverwriteComplete;
}
- // Otherwise, we have to have size information, and the later store has to be
- // larger than the earlier one.
- if (Later.Size == MemoryLocation::UnknownSize ||
- Earlier.Size == MemoryLocation::UnknownSize)
- return OverwriteUnknown;
-
// Check to see if the later store is to the entire object (either a global,
// an alloca, or a byval/inalloca argument). If so, then it clearly
// overwrites any other store to the same object.
@@ -416,8 +370,68 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
return OverwriteComplete;
- // The other interesting case is if the later store overwrites the end of
- // the earlier store
+ // We may now overlap, although the overlap is not complete. There might also
+ // be other incomplete overlaps, and together, they might cover the complete
+ // earlier write.
+ // Note: The correctness of this logic depends on the fact that this function
+ // is not even called providing DepWrite when there are any intervening reads.
+ if (EnablePartialOverwriteTracking &&
+ LaterOff < int64_t(EarlierOff + Earlier.Size) &&
+ int64_t(LaterOff + Later.Size) >= EarlierOff) {
+
+ // Insert our part of the overlap into the map.
+ auto &IM = IOL[DepWrite];
+ DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff << ", " <<
+ int64_t(EarlierOff + Earlier.Size) << ") Later [" <<
+ LaterOff << ", " << int64_t(LaterOff + Later.Size) << ")\n");
+
+ // Make sure that we only insert non-overlapping intervals and combine
+ // adjacent intervals. The intervals are stored in the map with the ending
+ // offset as the key (in the half-open sense) and the starting offset as
+ // the value.
+ int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + Later.Size;
+
+ // Find any intervals ending at, or after, LaterIntStart which start
+ // before LaterIntEnd.
+ auto ILI = IM.lower_bound(LaterIntStart);
+ if (ILI != IM.end() && ILI->second <= LaterIntEnd) {
+ // This existing interval is overlapped with the current store somewhere
+ // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing
+ // intervals and adjusting our start and end.
+ LaterIntStart = std::min(LaterIntStart, ILI->second);
+ LaterIntEnd = std::max(LaterIntEnd, ILI->first);
+ ILI = IM.erase(ILI);
+
+ // Continue erasing and adjusting our end in case other previous
+ // intervals are also overlapped with the current store.
+ //
+ // |--- ealier 1 ---| |--- ealier 2 ---|
+ // |------- later---------|
+ //
+ while (ILI != IM.end() && ILI->second <= LaterIntEnd) {
+ assert(ILI->second > LaterIntStart && "Unexpected interval");
+ LaterIntEnd = std::max(LaterIntEnd, ILI->first);
+ ILI = IM.erase(ILI);
+ }
+ }
+
+ IM[LaterIntEnd] = LaterIntStart;
+
+ ILI = IM.begin();
+ if (ILI->second <= EarlierOff &&
+ ILI->first >= int64_t(EarlierOff + Earlier.Size)) {
+ DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier [" <<
+ EarlierOff << ", " <<
+ int64_t(EarlierOff + Earlier.Size) <<
+ ") Composite Later [" <<
+ ILI->second << ", " << ILI->first << ")\n");
+ ++NumCompletePartials;
+ return OverwriteComplete;
+ }
+ }
+
+ // Another interesting case is if the later store overwrites the end of the
+ // earlier store.
//
// |--earlier--|
// |-- later --|
@@ -429,11 +443,25 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
return OverwriteEnd;
+ // Finally, we also need to check if the later store overwrites the beginning
+ // of the earlier store.
+ //
+ // |--earlier--|
+ // |-- later --|
+ //
+ // In this case we may want to move the destination address and trim the size
+ // of earlier to avoid generating writes to addresses which will definitely
+ // be overwritten later.
+ if (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff) {
+ assert (int64_t(LaterOff + Later.Size) < int64_t(EarlierOff + Earlier.Size)
+ && "Expect to be handled as OverwriteComplete" );
+ return OverwriteBegin;
+ }
// Otherwise, they don't completely overlap.
return OverwriteUnknown;
}
-/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
+/// If 'Inst' might be a self read (i.e. a noop copy of a
/// memory region into an identical pointer) then it doesn't actually make its
/// input dead in the traditional sense. Consider this case:
///
@@ -478,192 +506,13 @@ static bool isPossibleSelfRead(Instruction *Inst,
}
-//===----------------------------------------------------------------------===//
-// DSE Pass
-//===----------------------------------------------------------------------===//
-
-bool DSE::runOnBasicBlock(BasicBlock &BB) {
- const DataLayout &DL = BB.getModule()->getDataLayout();
- bool MadeChange = false;
-
- // Do a top-down walk on the BB.
- for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
- Instruction *Inst = &*BBI++;
-
- // Handle 'free' calls specially.
- if (CallInst *F = isFreeCall(Inst, TLI)) {
- MadeChange |= HandleFree(F);
- continue;
- }
-
- // If we find something that writes memory, get its memory dependence.
- if (!hasMemoryWrite(Inst, *TLI))
- continue;
-
- // If we're storing the same value back to a pointer that we just
- // loaded from, then the store can be removed.
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-
- auto RemoveDeadInstAndUpdateBBI = [&](Instruction *DeadInst) {
- // DeleteDeadInstruction can delete the current instruction. Save BBI
- // in case we need it.
- WeakVH NextInst(&*BBI);
-
- DeleteDeadInstruction(DeadInst, *MD, *TLI);
-
- if (!NextInst) // Next instruction deleted.
- BBI = BB.begin();
- else if (BBI != BB.begin()) // Revisit this instruction if possible.
- --BBI;
- ++NumRedundantStores;
- MadeChange = true;
- };
-
- if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) {
- if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
- isRemovable(SI) &&
- MemoryIsNotModifiedBetween(DepLoad, SI)) {
-
- DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n "
- << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n');
-
- RemoveDeadInstAndUpdateBBI(SI);
- continue;
- }
- }
-
- // Remove null stores into the calloc'ed objects
- Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand());
-
- if (StoredConstant && StoredConstant->isNullValue() &&
- isRemovable(SI)) {
- Instruction *UnderlyingPointer = dyn_cast<Instruction>(
- GetUnderlyingObject(SI->getPointerOperand(), DL));
-
- if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) &&
- MemoryIsNotModifiedBetween(UnderlyingPointer, SI)) {
- DEBUG(dbgs()
- << "DSE: Remove null store to the calloc'ed object:\n DEAD: "
- << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n');
-
- RemoveDeadInstAndUpdateBBI(SI);
- continue;
- }
- }
- }
-
- MemDepResult InstDep = MD->getDependency(Inst);
-
- // Ignore any store where we can't find a local dependence.
- // FIXME: cross-block DSE would be fun. :)
- if (!InstDep.isDef() && !InstDep.isClobber())
- continue;
-
- // Figure out what location is being stored to.
- MemoryLocation Loc = getLocForWrite(Inst, *AA);
-
- // If we didn't get a useful location, fail.
- if (!Loc.Ptr)
- continue;
-
- while (InstDep.isDef() || InstDep.isClobber()) {
- // Get the memory clobbered by the instruction we depend on. MemDep will
- // skip any instructions that 'Loc' clearly doesn't interact with. If we
- // end up depending on a may- or must-aliased load, then we can't optimize
- // away the store and we bail out. However, if we depend on on something
- // that overwrites the memory location we *can* potentially optimize it.
- //
- // Find out what memory location the dependent instruction stores.
- Instruction *DepWrite = InstDep.getInst();
- MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA);
- // If we didn't get a useful location, or if it isn't a size, bail out.
- if (!DepLoc.Ptr)
- break;
-
- // If we find a write that is a) removable (i.e., non-volatile), b) is
- // completely obliterated by the store to 'Loc', and c) which we know that
- // 'Inst' doesn't load from, then we can remove it.
- if (isRemovable(DepWrite) &&
- !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
- int64_t InstWriteOffset, DepWriteOffset;
- OverwriteResult OR =
- isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);
- if (OR == OverwriteComplete) {
- DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
- << *DepWrite << "\n KILLER: " << *Inst << '\n');
-
- // Delete the store and now-dead instructions that feed it.
- DeleteDeadInstruction(DepWrite, *MD, *TLI);
- ++NumFastStores;
- MadeChange = true;
-
- // DeleteDeadInstruction can delete the current instruction in loop
- // cases, reset BBI.
- BBI = Inst->getIterator();
- if (BBI != BB.begin())
- --BBI;
- break;
- } else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
- // TODO: base this on the target vector size so that if the earlier
- // store was too small to get vector writes anyway then its likely
- // a good idea to shorten it
- // Power of 2 vector writes are probably always a bad idea to optimize
- // as any store/memset/memcpy is likely using vector instructions so
- // shortening it to not vector size is likely to be slower
- MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite);
- unsigned DepWriteAlign = DepIntrinsic->getAlignment();
- if (llvm::isPowerOf2_64(InstWriteOffset) ||
- ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
-
- DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: "
- << *DepWrite << "\n KILLER (offset "
- << InstWriteOffset << ", "
- << DepLoc.Size << ")"
- << *Inst << '\n');
-
- Value* DepWriteLength = DepIntrinsic->getLength();
- Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
- InstWriteOffset -
- DepWriteOffset);
- DepIntrinsic->setLength(TrimmedLength);
- MadeChange = true;
- }
- }
- }
-
- // If this is a may-aliased store that is clobbering the store value, we
- // can keep searching past it for another must-aliased pointer that stores
- // to the same location. For example, in:
- // store -> P
- // store -> Q
- // store -> P
- // we can remove the first store to P even though we don't know if P and Q
- // alias.
- if (DepWrite == &BB.front()) break;
-
- // Can't look past this instruction if it might read 'Loc'.
- if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
- break;
-
- InstDep = MD->getPointerDependencyFrom(Loc, false,
- DepWrite->getIterator(), &BB);
- }
- }
-
- // If this block ends in a return, unwind, or unreachable, all allocas are
- // dead at its end, which means stores to them are also dead.
- if (BB.getTerminator()->getNumSuccessors() == 0)
- MadeChange |= handleEndBlock(BB);
-
- return MadeChange;
-}
-
/// Returns true if the memory which is accessed by the second instruction is not
/// modified between the first and the second instruction.
/// Precondition: Second instruction must be dominated by the first
/// instruction.
-bool DSE::MemoryIsNotModifiedBetween(Instruction *FirstI,
- Instruction *SecondI) {
+static bool memoryIsNotModifiedBetween(Instruction *FirstI,
+ Instruction *SecondI,
+ AliasAnalysis *AA) {
SmallVector<BasicBlock *, 16> WorkList;
SmallPtrSet<BasicBlock *, 8> Visited;
BasicBlock::iterator FirstBBI(FirstI);
@@ -718,7 +567,7 @@ bool DSE::MemoryIsNotModifiedBetween(Instruction *FirstI,
/// Find all blocks that will unconditionally lead to the block BB and append
/// them to F.
-static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
+static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
BasicBlock *BB, DominatorTree *DT) {
for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
BasicBlock *Pred = *I;
@@ -732,9 +581,11 @@ static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
}
}
-/// HandleFree - Handle frees of entire structures whose dependency is a store
+/// Handle frees of entire structures whose dependency is a store
/// to a field of that structure.
-bool DSE::HandleFree(CallInst *F) {
+static bool handleFree(CallInst *F, AliasAnalysis *AA,
+ MemoryDependenceResults *MD, DominatorTree *DT,
+ const TargetLibraryInfo *TLI) {
bool MadeChange = false;
MemoryLocation Loc = MemoryLocation(F->getOperand(0));
@@ -761,10 +612,9 @@ bool DSE::HandleFree(CallInst *F) {
if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
break;
- auto Next = ++Dependency->getIterator();
-
- // DCE instructions only used to calculate that store
- DeleteDeadInstruction(Dependency, *MD, *TLI);
+ // DCE instructions only used to calculate that store.
+ BasicBlock::iterator BBI(Dependency);
+ deleteDeadInstruction(Dependency, &BBI, *MD, *TLI);
++NumFastStores;
MadeChange = true;
@@ -773,23 +623,53 @@ bool DSE::HandleFree(CallInst *F) {
// s[0] = 0;
// s[1] = 0; // This has just been deleted.
// free(s);
- Dep = MD->getPointerDependencyFrom(Loc, false, Next, BB);
+ Dep = MD->getPointerDependencyFrom(Loc, false, BBI, BB);
}
if (Dep.isNonLocal())
- FindUnconditionalPreds(Blocks, BB, DT);
+ findUnconditionalPreds(Blocks, BB, DT);
}
return MadeChange;
}
-/// handleEndBlock - Remove dead stores to stack-allocated locations in the
-/// function end block. Ex:
+/// Check to see if the specified location may alias any of the stack objects in
+/// the DeadStackObjects set. If so, they become live because the location is
+/// being loaded.
+static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
+ SmallSetVector<Value *, 16> &DeadStackObjects,
+ const DataLayout &DL, AliasAnalysis *AA,
+ const TargetLibraryInfo *TLI) {
+ const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL);
+
+ // A constant can't be in the dead pointer set.
+ if (isa<Constant>(UnderlyingPointer))
+ return;
+
+ // If the kill pointer can be easily reduced to an alloca, don't bother doing
+ // extraneous AA queries.
+ if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
+ DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer));
+ return;
+ }
+
+ // Remove objects that could alias LoadedLoc.
+ DeadStackObjects.remove_if([&](Value *I) {
+ // See if the loaded location could alias the stack location.
+ MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI));
+ return !AA->isNoAlias(StackLoc, LoadedLoc);
+ });
+}
+
+/// Remove dead stores to stack-allocated locations in the function end block.
+/// Ex:
/// %A = alloca i32
/// ...
/// store i32 1, i32* %A
/// ret void
-bool DSE::handleEndBlock(BasicBlock &BB) {
+static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
+ MemoryDependenceResults *MD,
+ const TargetLibraryInfo *TLI) {
bool MadeChange = false;
// Keep track of all of the stack objects that are dead at the end of the
@@ -828,15 +708,14 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Stores to stack values are valid candidates for removal.
bool AllDead = true;
- for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
- E = Pointers.end(); I != E; ++I)
- if (!DeadStackObjects.count(*I)) {
+ for (Value *Pointer : Pointers)
+ if (!DeadStackObjects.count(Pointer)) {
AllDead = false;
break;
}
if (AllDead) {
- Instruction *Dead = &*BBI++;
+ Instruction *Dead = &*BBI;
DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: "
<< *Dead << "\n Objects: ";
@@ -849,7 +728,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
dbgs() << '\n');
// DCE instructions only used to calculate that store.
- DeleteDeadInstruction(Dead, *MD, *TLI, &DeadStackObjects);
+ deleteDeadInstruction(Dead, &BBI, *MD, *TLI, &DeadStackObjects);
++NumFastStores;
MadeChange = true;
continue;
@@ -858,8 +737,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Remove any dead non-memory-mutating instructions.
if (isInstructionTriviallyDead(&*BBI, TLI)) {
- Instruction *Inst = &*BBI++;
- DeleteDeadInstruction(Inst, *MD, *TLI, &DeadStackObjects);
+ deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, &DeadStackObjects);
++NumFastOther;
MadeChange = true;
continue;
@@ -873,7 +751,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
}
if (auto CS = CallSite(&*BBI)) {
- // Remove allocation function calls from the list of dead stack objects;
+ // Remove allocation function calls from the list of dead stack objects;
// there can't be any references before the definition.
if (isAllocLikeFn(&*BBI, TLI))
DeadStackObjects.remove(&*BBI);
@@ -900,6 +778,14 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
continue;
}
+ // We can remove the dead stores, irrespective of the fence and its ordering
+ // (release/acquire/seq_cst). Fences only constraints the ordering of
+ // already visible stores, it does not make a store visible to other
+ // threads. So, skipping over a fence does not change a store from being
+ // dead.
+ if (isa<FenceInst>(*BBI))
+ continue;
+
MemoryLocation LoadedLoc;
// If we encounter a use of the pointer, it is no longer considered dead
@@ -922,7 +808,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Remove any allocas from the DeadPointer set that are loaded, as this
// makes any stores above the access live.
- RemoveAccessedObjects(LoadedLoc, DeadStackObjects, DL);
+ removeAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI);
// If all of the allocas were clobbered by the access then we're not going
// to find anything else to process.
@@ -933,29 +819,285 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
return MadeChange;
}
-/// RemoveAccessedObjects - Check to see if the specified location may alias any
-/// of the stack objects in the DeadStackObjects set. If so, they become live
-/// because the location is being loaded.
-void DSE::RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
- SmallSetVector<Value *, 16> &DeadStackObjects,
- const DataLayout &DL) {
- const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL);
+static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
+ AliasAnalysis *AA, MemoryDependenceResults *MD,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ // Must be a store instruction.
+ StoreInst *SI = dyn_cast<StoreInst>(Inst);
+ if (!SI)
+ return false;
- // A constant can't be in the dead pointer set.
- if (isa<Constant>(UnderlyingPointer))
- return;
+ // If we're storing the same value back to a pointer that we just loaded from,
+ // then the store can be removed.
+ if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) {
+ if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
+ isRemovable(SI) && memoryIsNotModifiedBetween(DepLoad, SI, AA)) {
- // If the kill pointer can be easily reduced to an alloca, don't bother doing
- // extraneous AA queries.
- if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
- DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer));
- return;
+ DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: "
+ << *DepLoad << "\n STORE: " << *SI << '\n');
+
+ deleteDeadInstruction(SI, &BBI, *MD, *TLI);
+ ++NumRedundantStores;
+ return true;
+ }
}
- // Remove objects that could alias LoadedLoc.
- DeadStackObjects.remove_if([&](Value *I) {
- // See if the loaded location could alias the stack location.
- MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI));
- return !AA->isNoAlias(StackLoc, LoadedLoc);
- });
+ // Remove null stores into the calloc'ed objects
+ Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand());
+ if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) {
+ Instruction *UnderlyingPointer =
+ dyn_cast<Instruction>(GetUnderlyingObject(SI->getPointerOperand(), DL));
+
+ if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) &&
+ memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA)) {
+ DEBUG(
+ dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: "
+ << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n');
+
+ deleteDeadInstruction(SI, &BBI, *MD, *TLI);
+ ++NumRedundantStores;
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
+ MemoryDependenceResults *MD, DominatorTree *DT,
+ const TargetLibraryInfo *TLI) {
+ const DataLayout &DL = BB.getModule()->getDataLayout();
+ bool MadeChange = false;
+
+ // A map of interval maps representing partially-overwritten value parts.
+ InstOverlapIntervalsTy IOL;
+
+ // Do a top-down walk on the BB.
+ for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
+ // Handle 'free' calls specially.
+ if (CallInst *F = isFreeCall(&*BBI, TLI)) {
+ MadeChange |= handleFree(F, AA, MD, DT, TLI);
+ // Increment BBI after handleFree has potentially deleted instructions.
+ // This ensures we maintain a valid iterator.
+ ++BBI;
+ continue;
+ }
+
+ Instruction *Inst = &*BBI++;
+
+ // Check to see if Inst writes to memory. If not, continue.
+ if (!hasMemoryWrite(Inst, *TLI))
+ continue;
+
+ // eliminateNoopStore will update in iterator, if necessary.
+ if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI)) {
+ MadeChange = true;
+ continue;
+ }
+
+ // If we find something that writes memory, get its memory dependence.
+ MemDepResult InstDep = MD->getDependency(Inst);
+
+ // Ignore any store where we can't find a local dependence.
+ // FIXME: cross-block DSE would be fun. :)
+ if (!InstDep.isDef() && !InstDep.isClobber())
+ continue;
+
+ // Figure out what location is being stored to.
+ MemoryLocation Loc = getLocForWrite(Inst, *AA);
+
+ // If we didn't get a useful location, fail.
+ if (!Loc.Ptr)
+ continue;
+
+ while (InstDep.isDef() || InstDep.isClobber()) {
+ // Get the memory clobbered by the instruction we depend on. MemDep will
+ // skip any instructions that 'Loc' clearly doesn't interact with. If we
+ // end up depending on a may- or must-aliased load, then we can't optimize
+ // away the store and we bail out. However, if we depend on something
+ // that overwrites the memory location we *can* potentially optimize it.
+ //
+ // Find out what memory location the dependent instruction stores.
+ Instruction *DepWrite = InstDep.getInst();
+ MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA);
+ // If we didn't get a useful location, or if it isn't a size, bail out.
+ if (!DepLoc.Ptr)
+ break;
+
+ // If we find a write that is a) removable (i.e., non-volatile), b) is
+ // completely obliterated by the store to 'Loc', and c) which we know that
+ // 'Inst' doesn't load from, then we can remove it.
+ if (isRemovable(DepWrite) &&
+ !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
+ int64_t InstWriteOffset, DepWriteOffset;
+ OverwriteResult OR =
+ isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset,
+ DepWrite, IOL);
+ if (OR == OverwriteComplete) {
+ DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
+ << *DepWrite << "\n KILLER: " << *Inst << '\n');
+
+ // Delete the store and now-dead instructions that feed it.
+ deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI);
+ ++NumFastStores;
+ MadeChange = true;
+
+ // We erased DepWrite; start over.
+ InstDep = MD->getDependency(Inst);
+ continue;
+ } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) ||
+ ((OR == OverwriteBegin &&
+ isShortenableAtTheBeginning(DepWrite)))) {
+ // TODO: base this on the target vector size so that if the earlier
+ // store was too small to get vector writes anyway then its likely
+ // a good idea to shorten it
+ // Power of 2 vector writes are probably always a bad idea to optimize
+ // as any store/memset/memcpy is likely using vector instructions so
+ // shortening it to not vector size is likely to be slower
+ MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite);
+ unsigned DepWriteAlign = DepIntrinsic->getAlignment();
+ bool IsOverwriteEnd = (OR == OverwriteEnd);
+ if (!IsOverwriteEnd)
+ InstWriteOffset = int64_t(InstWriteOffset + Loc.Size);
+
+ if ((llvm::isPowerOf2_64(InstWriteOffset) &&
+ DepWriteAlign <= InstWriteOffset) ||
+ ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
+
+ DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
+ << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
+ << *DepWrite << "\n KILLER (offset "
+ << InstWriteOffset << ", " << DepLoc.Size << ")"
+ << *Inst << '\n');
+
+ int64_t NewLength =
+ IsOverwriteEnd
+ ? InstWriteOffset - DepWriteOffset
+ : DepLoc.Size - (InstWriteOffset - DepWriteOffset);
+
+ Value *DepWriteLength = DepIntrinsic->getLength();
+ Value *TrimmedLength =
+ ConstantInt::get(DepWriteLength->getType(), NewLength);
+ DepIntrinsic->setLength(TrimmedLength);
+
+ if (!IsOverwriteEnd) {
+ int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset);
+ Value *Indices[1] = {
+ ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};
+ GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
+ DepIntrinsic->getRawDest(), Indices, "", DepWrite);
+ DepIntrinsic->setDest(NewDestGEP);
+ }
+ MadeChange = true;
+ }
+ }
+ }
+
+ // If this is a may-aliased store that is clobbering the store value, we
+ // can keep searching past it for another must-aliased pointer that stores
+ // to the same location. For example, in:
+ // store -> P
+ // store -> Q
+ // store -> P
+ // we can remove the first store to P even though we don't know if P and Q
+ // alias.
+ if (DepWrite == &BB.front()) break;
+
+ // Can't look past this instruction if it might read 'Loc'.
+ if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
+ break;
+
+ InstDep = MD->getPointerDependencyFrom(Loc, false,
+ DepWrite->getIterator(), &BB);
+ }
+ }
+
+ // If this block ends in a return, unwind, or unreachable, all allocas are
+ // dead at its end, which means stores to them are also dead.
+ if (BB.getTerminator()->getNumSuccessors() == 0)
+ MadeChange |= handleEndBlock(BB, AA, MD, TLI);
+
+ return MadeChange;
+}
+
+static bool eliminateDeadStores(Function &F, AliasAnalysis *AA,
+ MemoryDependenceResults *MD, DominatorTree *DT,
+ const TargetLibraryInfo *TLI) {
+ bool MadeChange = false;
+ for (BasicBlock &BB : F)
+ // Only check non-dead blocks. Dead blocks may have strange pointer
+ // cycles that will confuse alias analysis.
+ if (DT->isReachableFromEntry(&BB))
+ MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI);
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// DSE Pass
+//===----------------------------------------------------------------------===//
+PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
+ AliasAnalysis *AA = &AM.getResult<AAManager>(F);
+ DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ MemoryDependenceResults *MD = &AM.getResult<MemoryDependenceAnalysis>(F);
+ const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+
+ if (!eliminateDeadStores(F, AA, MD, DT, TLI))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<GlobalsAA>();
+ PA.preserve<MemoryDependenceAnalysis>();
+ return PA;
+}
+
+namespace {
+/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
+class DSELegacyPass : public FunctionPass {
+public:
+ DSELegacyPass() : FunctionPass(ID) {
+ initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ MemoryDependenceResults *MD =
+ &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+
+ return eliminateDeadStores(F, AA, MD, DT, TLI);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<MemoryDependenceWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<MemoryDependenceWrapperPass>();
+ }
+
+ static char ID; // Pass identification, replacement for typeid
+};
+} // end anonymous namespace
+
+char DSELegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
+ false)
+
+FunctionPass *llvm::createDeadStoreEliminationPass() {
+ return new DSELegacyPass();
}
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 7ef062e71ff3a..9d0ef42e0396d 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -16,8 +16,8 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -40,6 +40,7 @@ using namespace llvm::PatternMatch;
STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
STATISTIC(NumCSE, "Number of instructions CSE'd");
+STATISTIC(NumCSECVP, "Number of compare instructions CVP'd");
STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
STATISTIC(NumCSECall, "Number of call instructions CSE'd");
STATISTIC(NumDSE, "Number of trivial dead stores removed");
@@ -97,15 +98,6 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
std::swap(LHS, RHS);
- if (isa<OverflowingBinaryOperator>(BinOp)) {
- // Hash the overflow behavior
- unsigned Overflow =
- BinOp->hasNoSignedWrap() * OverflowingBinaryOperator::NoSignedWrap |
- BinOp->hasNoUnsignedWrap() *
- OverflowingBinaryOperator::NoUnsignedWrap;
- return hash_combine(BinOp->getOpcode(), Overflow, LHS, RHS);
- }
-
return hash_combine(BinOp->getOpcode(), LHS, RHS);
}
@@ -152,7 +144,7 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
if (LHSI->getOpcode() != RHSI->getOpcode())
return false;
- if (LHSI->isIdenticalTo(RHSI))
+ if (LHSI->isIdenticalToWhenDefined(RHSI))
return true;
// If we're not strictly identical, we still might be a commutable instruction
@@ -164,15 +156,6 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
"same opcode, but different instruction type?");
BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
- // Check overflow attributes
- if (isa<OverflowingBinaryOperator>(LHSBinOp)) {
- assert(isa<OverflowingBinaryOperator>(RHSBinOp) &&
- "same opcode, but different operator type?");
- if (LHSBinOp->hasNoUnsignedWrap() != RHSBinOp->hasNoUnsignedWrap() ||
- LHSBinOp->hasNoSignedWrap() != RHSBinOp->hasNoSignedWrap())
- return false;
- }
-
// Commuted equality
return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
@@ -296,16 +279,18 @@ public:
/// present the table; it is the responsibility of the consumer to inspect
/// the atomicity/volatility if needed.
struct LoadValue {
- Value *Data;
+ Instruction *DefInst;
unsigned Generation;
int MatchingId;
bool IsAtomic;
+ bool IsInvariant;
LoadValue()
- : Data(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {}
- LoadValue(Value *Data, unsigned Generation, unsigned MatchingId,
- bool IsAtomic)
- : Data(Data), Generation(Generation), MatchingId(MatchingId),
- IsAtomic(IsAtomic) {}
+ : DefInst(nullptr), Generation(0), MatchingId(-1), IsAtomic(false),
+ IsInvariant(false) {}
+ LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
+ bool IsAtomic, bool IsInvariant)
+ : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
+ IsAtomic(IsAtomic), IsInvariant(IsInvariant) {}
};
typedef RecyclingAllocator<BumpPtrAllocator,
ScopedHashTableVal<Value *, LoadValue>>
@@ -318,7 +303,8 @@ public:
/// values.
///
/// It uses the same generation count as loads.
- typedef ScopedHashTable<CallValue, std::pair<Value *, unsigned>> CallHTType;
+ typedef ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>
+ CallHTType;
CallHTType AvailableCalls;
/// \brief This is the current generation of the memory value.
@@ -354,7 +340,7 @@ private:
// Contains all the needed information to create a stack for doing a depth
// first tranversal of the tree. This includes scopes for values, loads, and
// calls as well as the generation. There is a child iterator so that the
- // children do not need to be store spearately.
+ // children do not need to be store separately.
class StackNode {
public:
StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
@@ -446,7 +432,12 @@ private:
return true;
}
-
+ bool isInvariantLoad() const {
+ if (auto *LI = dyn_cast<LoadInst>(Inst))
+ return LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
+ return false;
+ }
+
bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
return (getPointerOperand() == Inst.getPointerOperand() &&
getMatchingId() == Inst.getMatchingId());
@@ -500,6 +491,7 @@ private:
}
bool EarlyCSE::processNode(DomTreeNode *Node) {
+ bool Changed = false;
BasicBlock *BB = Node->getBlock();
// If this block has a single predecessor, then the predecessor is the parent
@@ -513,7 +505,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If this node has a single predecessor which ends in a conditional branch,
// we can infer the value of the branch condition given that we took this
- // path. We need the single predeccesor to ensure there's not another path
+ // path. We need the single predecessor to ensure there's not another path
// which reaches this block where the condition might hold a different
// value. Since we're adding this to the scoped hash table (like any other
// def), it will have been popped if we encounter a future merge block.
@@ -530,9 +522,13 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
<< CondInst->getName() << "' as " << *ConditionalConstant
<< " in " << BB->getName() << "\n");
- // Replace all dominated uses with the known value
- replaceDominatedUsesWith(CondInst, ConditionalConstant, DT,
- BasicBlockEdge(Pred, BB));
+ // Replace all dominated uses with the known value.
+ if (unsigned Count =
+ replaceDominatedUsesWith(CondInst, ConditionalConstant, DT,
+ BasicBlockEdge(Pred, BB))) {
+ Changed = true;
+ NumCSECVP = NumCSECVP + Count;
+ }
}
/// LastStore - Keep track of the last non-volatile store that we saw... for
@@ -541,7 +537,6 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
/// stores which can occur in bitfield code among other things.
Instruction *LastStore = nullptr;
- bool Changed = false;
const DataLayout &DL = BB->getModule()->getDataLayout();
// See if any instructions in the block can be eliminated. If so, do it. If
@@ -567,15 +562,38 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
+ if (match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) {
+ if (auto *CondI =
+ dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
+ // The condition we're on guarding here is true for all dominated
+ // locations.
+ if (SimpleValue::canHandle(CondI))
+ AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
+ }
+
+ // Guard intrinsics read all memory, but don't write any memory.
+ // Accordingly, don't update the generation but consume the last store (to
+ // avoid an incorrect DSE).
+ LastStore = nullptr;
+ continue;
+ }
+
// If the instruction can be simplified (e.g. X+0 = X) then replace it with
// its simpler value.
if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) {
DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
- Inst->replaceAllUsesWith(V);
- Inst->eraseFromParent();
- Changed = true;
- ++NumSimplify;
- continue;
+ if (!Inst->use_empty()) {
+ Inst->replaceAllUsesWith(V);
+ Changed = true;
+ }
+ if (isInstructionTriviallyDead(Inst, &TLI)) {
+ Inst->eraseFromParent();
+ Changed = true;
+ }
+ if (Changed) {
+ ++NumSimplify;
+ continue;
+ }
}
// If this is a simple instruction that we can value number, process it.
@@ -583,6 +601,8 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// See if the instruction has an available value. If so, use it.
if (Value *V = AvailableValues.lookup(Inst)) {
DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V << '\n');
+ if (auto *I = dyn_cast<Instruction>(V))
+ I->andIRFlags(Inst);
Inst->replaceAllUsesWith(V);
Inst->eraseFromParent();
Changed = true;
@@ -606,18 +626,25 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
}
// If we have an available version of this load, and if it is the right
- // generation, replace this instruction.
+ // generation or the load is known to be from an invariant location,
+ // replace this instruction.
+ //
+ // A dominating invariant load implies that the location loaded from is
+ // unchanging beginning at the point of the invariant load, so the load
+ // we're CSE'ing _away_ does not need to be invariant, only the available
+ // load we're CSE'ing _to_ does.
LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
- if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration &&
+ if (InVal.DefInst != nullptr &&
+ (InVal.Generation == CurrentGeneration || InVal.IsInvariant) &&
InVal.MatchingId == MemInst.getMatchingId() &&
// We don't yet handle removing loads with ordering of any kind.
!MemInst.isVolatile() && MemInst.isUnordered() &&
// We can't replace an atomic load with one which isn't also atomic.
InVal.IsAtomic >= MemInst.isAtomic()) {
- Value *Op = getOrCreateResult(InVal.Data, Inst->getType());
+ Value *Op = getOrCreateResult(InVal.DefInst, Inst->getType());
if (Op != nullptr) {
DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
- << " to: " << *InVal.Data << '\n');
+ << " to: " << *InVal.DefInst << '\n');
if (!Inst->use_empty())
Inst->replaceAllUsesWith(Op);
Inst->eraseFromParent();
@@ -631,7 +658,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
AvailableLoads.insert(
MemInst.getPointerOperand(),
LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
- MemInst.isAtomic()));
+ MemInst.isAtomic(), MemInst.isInvariantLoad()));
LastStore = nullptr;
continue;
}
@@ -649,7 +676,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (CallValue::canHandle(Inst)) {
// If we have an available version of this call, and if it is the right
// generation, replace this instruction.
- std::pair<Value *, unsigned> InVal = AvailableCalls.lookup(Inst);
+ std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(Inst);
if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
<< " to: " << *InVal.first << '\n');
@@ -663,7 +690,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// Otherwise, remember that we have this instruction.
AvailableCalls.insert(
- Inst, std::pair<Value *, unsigned>(Inst, CurrentGeneration));
+ Inst, std::pair<Instruction *, unsigned>(Inst, CurrentGeneration));
continue;
}
@@ -673,7 +700,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// to advance the generation. We do need to prevent DSE across the fence,
// but that's handled above.
if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
- if (FI->getOrdering() == Release) {
+ if (FI->getOrdering() == AtomicOrdering::Release) {
assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
continue;
}
@@ -685,8 +712,8 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// the store originally was.
if (MemInst.isValid() && MemInst.isStore()) {
LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
- if (InVal.Data &&
- InVal.Data == getOrCreateResult(Inst, InVal.Data->getType()) &&
+ if (InVal.DefInst &&
+ InVal.DefInst == getOrCreateResult(Inst, InVal.DefInst->getType()) &&
InVal.Generation == CurrentGeneration &&
InVal.MatchingId == MemInst.getMatchingId() &&
// We don't yet handle removing stores with ordering of any kind.
@@ -743,7 +770,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
AvailableLoads.insert(
MemInst.getPointerOperand(),
LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
- MemInst.isAtomic()));
+ MemInst.isAtomic(), /*IsInvariant=*/false));
// Remember that this was the last unordered store we saw for DSE. We
// don't yet handle DSE on ordered or volatile stores since we don't
@@ -818,11 +845,11 @@ bool EarlyCSE::run() {
}
PreservedAnalyses EarlyCSEPass::run(Function &F,
- AnalysisManager<Function> *AM) {
- auto &TLI = AM->getResult<TargetLibraryAnalysis>(F);
- auto &TTI = AM->getResult<TargetIRAnalysis>(F);
- auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
- auto &AC = AM->getResult<AssumptionAnalysis>(F);
+ AnalysisManager<Function> &AM) {
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
EarlyCSE CSE(TLI, TTI, DT, AC);
@@ -833,6 +860,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
// FIXME: Bundle this with other CFG-preservation.
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<GlobalsAA>();
return PA;
}
@@ -853,7 +881,7 @@ public:
}
bool runOnFunction(Function &F) override {
- if (skipOptnoneFunction(F))
+ if (skipFunction(F))
return false;
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp
index 7f5d78656b50b..7aa6dc6992b61 100644
--- a/lib/Transforms/Scalar/Float2Int.cpp
+++ b/lib/Transforms/Scalar/Float2Int.cpp
@@ -13,15 +13,13 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "float2int"
+
+#include "llvm/Transforms/Scalar/Float2Int.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/EquivalenceClasses.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
@@ -53,41 +51,31 @@ MaxIntegerBW("float2int-max-integer-bw", cl::init(64), cl::Hidden,
"(default=64)"));
namespace {
- struct Float2Int : public FunctionPass {
+ struct Float2IntLegacyPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- Float2Int() : FunctionPass(ID) {
- initializeFloat2IntPass(*PassRegistry::getPassRegistry());
+ Float2IntLegacyPass() : FunctionPass(ID) {
+ initializeFloat2IntLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
+ return Impl.runImpl(F);
}
- bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addPreserved<GlobalsAAWrapperPass>();
}
- void findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots);
- ConstantRange seen(Instruction *I, ConstantRange R);
- ConstantRange badRange();
- ConstantRange unknownRange();
- ConstantRange validateRange(ConstantRange R);
- void walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots);
- void walkForwards();
- bool validateAndTransform();
- Value *convert(Instruction *I, Type *ToTy);
- void cleanup();
-
- MapVector<Instruction*, ConstantRange > SeenInsts;
- SmallPtrSet<Instruction*,8> Roots;
- EquivalenceClasses<Instruction*> ECs;
- MapVector<Instruction*, Value*> ConvertedInsts;
- LLVMContext *Ctx;
+ private:
+ Float2IntPass Impl;
};
}
-char Float2Int::ID = 0;
-INITIALIZE_PASS_BEGIN(Float2Int, "float2int", "Float to int", false, false)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_END(Float2Int, "float2int", "Float to int", false, false)
+char Float2IntLegacyPass::ID = 0;
+INITIALIZE_PASS(Float2IntLegacyPass, "float2int", "Float to int", false, false)
// Given a FCmp predicate, return a matching ICmp predicate if one
// exists, otherwise return BAD_ICMP_PREDICATE.
@@ -129,7 +117,7 @@ static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
// Find the roots - instructions that convert from the FP domain to
// integer domain.
-void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
+void Float2IntPass::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
for (auto &I : instructions(F)) {
if (isa<VectorType>(I.getType()))
continue;
@@ -149,7 +137,7 @@ void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
}
// Helper - mark I as having been traversed, having range R.
-ConstantRange Float2Int::seen(Instruction *I, ConstantRange R) {
+ConstantRange Float2IntPass::seen(Instruction *I, ConstantRange R) {
DEBUG(dbgs() << "F2I: " << *I << ":" << R << "\n");
if (SeenInsts.find(I) != SeenInsts.end())
SeenInsts.find(I)->second = R;
@@ -159,13 +147,13 @@ ConstantRange Float2Int::seen(Instruction *I, ConstantRange R) {
}
// Helper - get a range representing a poison value.
-ConstantRange Float2Int::badRange() {
+ConstantRange Float2IntPass::badRange() {
return ConstantRange(MaxIntegerBW + 1, true);
}
-ConstantRange Float2Int::unknownRange() {
+ConstantRange Float2IntPass::unknownRange() {
return ConstantRange(MaxIntegerBW + 1, false);
}
-ConstantRange Float2Int::validateRange(ConstantRange R) {
+ConstantRange Float2IntPass::validateRange(ConstantRange R) {
if (R.getBitWidth() > MaxIntegerBW + 1)
return badRange();
return R;
@@ -185,7 +173,7 @@ ConstantRange Float2Int::validateRange(ConstantRange R) {
// Breadth-first walk of the use-def graph; determine the set of nodes
// we care about and eagerly determine if some of them are poisonous.
-void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
+void Float2IntPass::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
std::deque<Instruction*> Worklist(Roots.begin(), Roots.end());
while (!Worklist.empty()) {
Instruction *I = Worklist.back();
@@ -246,8 +234,8 @@ void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
// Walk forwards down the list of seen instructions, so we visit defs before
// uses.
-void Float2Int::walkForwards() {
- for (auto &It : make_range(SeenInsts.rbegin(), SeenInsts.rend())) {
+void Float2IntPass::walkForwards() {
+ for (auto &It : reverse(SeenInsts)) {
if (It.second != unknownRange())
continue;
@@ -318,7 +306,7 @@ void Float2Int::walkForwards() {
// Instead, we ask APFloat to round itself to an integral value - this
// preserves sign-of-zero - then compare the result with the original.
//
- APFloat F = CF->getValueAPF();
+ const APFloat &F = CF->getValueAPF();
// First, weed out obviously incorrect values. Non-finite numbers
// can't be represented and neither can negative zero, unless
@@ -357,7 +345,7 @@ void Float2Int::walkForwards() {
}
// If there is a valid transform to be done, do it.
-bool Float2Int::validateAndTransform() {
+bool Float2IntPass::validateAndTransform() {
bool MadeChange = false;
// Iterate over every disjoint partition of the def-use graph.
@@ -439,7 +427,7 @@ bool Float2Int::validateAndTransform() {
return MadeChange;
}
-Value *Float2Int::convert(Instruction *I, Type *ToTy) {
+Value *Float2IntPass::convert(Instruction *I, Type *ToTy) {
if (ConvertedInsts.find(I) != ConvertedInsts.end())
// Already converted this instruction.
return ConvertedInsts[I];
@@ -511,15 +499,12 @@ Value *Float2Int::convert(Instruction *I, Type *ToTy) {
}
// Perform dead code elimination on the instructions we just modified.
-void Float2Int::cleanup() {
- for (auto &I : make_range(ConvertedInsts.rbegin(), ConvertedInsts.rend()))
+void Float2IntPass::cleanup() {
+ for (auto &I : reverse(ConvertedInsts))
I.first->eraseFromParent();
}
-bool Float2Int::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
- return false;
-
+bool Float2IntPass::runImpl(Function &F) {
DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n");
// Clear out all state.
ECs = EquivalenceClasses<Instruction*>();
@@ -540,4 +525,17 @@ bool Float2Int::runOnFunction(Function &F) {
return Modified;
}
-FunctionPass *llvm::createFloat2IntPass() { return new Float2Int(); }
+namespace llvm {
+FunctionPass *createFloat2IntPass() { return new Float2IntLegacyPass(); }
+
+PreservedAnalyses Float2IntPass::run(Function &F, FunctionAnalysisManager &) {
+ if (!runImpl(F))
+ return PreservedAnalyses::all();
+ else {
+ // FIXME: This should also 'preserve the CFG'.
+ PreservedAnalyses PA;
+ PA.preserve<GlobalsAA>();
+ return PA;
+ }
+}
+} // End namespace llvm
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index a028b8c444bae..a35a1062cbcd8 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -15,7 +15,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Hashing.h"
@@ -44,7 +44,6 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -53,6 +52,7 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <vector>
using namespace llvm;
+using namespace llvm::gvn;
using namespace PatternMatch;
#define DEBUG_TYPE "gvn"
@@ -74,106 +74,167 @@ static cl::opt<uint32_t>
MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
cl::desc("Max recurse depth (default = 1000)"));
-//===----------------------------------------------------------------------===//
-// ValueTable Class
-//===----------------------------------------------------------------------===//
-
-/// This class holds the mapping between values and value numbers. It is used
-/// as an efficient mechanism to determine the expression-wise equivalence of
-/// two values.
-namespace {
- struct Expression {
- uint32_t opcode;
- Type *type;
- SmallVector<uint32_t, 4> varargs;
+struct llvm::GVN::Expression {
+ uint32_t opcode;
+ Type *type;
+ SmallVector<uint32_t, 4> varargs;
- Expression(uint32_t o = ~2U) : opcode(o) { }
+ Expression(uint32_t o = ~2U) : opcode(o) {}
- bool operator==(const Expression &other) const {
- if (opcode != other.opcode)
- return false;
- if (opcode == ~0U || opcode == ~1U)
- return true;
- if (type != other.type)
- return false;
- if (varargs != other.varargs)
- return false;
+ bool operator==(const Expression &other) const {
+ if (opcode != other.opcode)
+ return false;
+ if (opcode == ~0U || opcode == ~1U)
return true;
- }
-
- friend hash_code hash_value(const Expression &Value) {
- return hash_combine(Value.opcode, Value.type,
- hash_combine_range(Value.varargs.begin(),
- Value.varargs.end()));
- }
- };
+ if (type != other.type)
+ return false;
+ if (varargs != other.varargs)
+ return false;
+ return true;
+ }
- class ValueTable {
- DenseMap<Value*, uint32_t> valueNumbering;
- DenseMap<Expression, uint32_t> expressionNumbering;
- AliasAnalysis *AA;
- MemoryDependenceAnalysis *MD;
- DominatorTree *DT;
-
- uint32_t nextValueNumber;
-
- Expression create_expression(Instruction* I);
- Expression create_cmp_expression(unsigned Opcode,
- CmpInst::Predicate Predicate,
- Value *LHS, Value *RHS);
- Expression create_extractvalue_expression(ExtractValueInst* EI);
- uint32_t lookup_or_add_call(CallInst* C);
- public:
- ValueTable() : nextValueNumber(1) { }
- uint32_t lookup_or_add(Value *V);
- uint32_t lookup(Value *V) const;
- uint32_t lookup_or_add_cmp(unsigned Opcode, CmpInst::Predicate Pred,
- Value *LHS, Value *RHS);
- bool exists(Value *V) const;
- void add(Value *V, uint32_t num);
- void clear();
- void erase(Value *v);
- void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
- AliasAnalysis *getAliasAnalysis() const { return AA; }
- void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
- void setDomTree(DominatorTree* D) { DT = D; }
- uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
- void verifyRemoved(const Value *) const;
- };
-}
+ friend hash_code hash_value(const Expression &Value) {
+ return hash_combine(
+ Value.opcode, Value.type,
+ hash_combine_range(Value.varargs.begin(), Value.varargs.end()));
+ }
+};
namespace llvm {
-template <> struct DenseMapInfo<Expression> {
- static inline Expression getEmptyKey() {
- return ~0U;
- }
+template <> struct DenseMapInfo<GVN::Expression> {
+ static inline GVN::Expression getEmptyKey() { return ~0U; }
- static inline Expression getTombstoneKey() {
- return ~1U;
- }
+ static inline GVN::Expression getTombstoneKey() { return ~1U; }
- static unsigned getHashValue(const Expression e) {
+ static unsigned getHashValue(const GVN::Expression &e) {
using llvm::hash_value;
return static_cast<unsigned>(hash_value(e));
}
- static bool isEqual(const Expression &LHS, const Expression &RHS) {
+ static bool isEqual(const GVN::Expression &LHS, const GVN::Expression &RHS) {
return LHS == RHS;
}
};
+} // End llvm namespace.
+
+/// Represents a particular available value that we know how to materialize.
+/// Materialization of an AvailableValue never fails. An AvailableValue is
+/// implicitly associated with a rematerialization point which is the
+/// location of the instruction from which it was formed.
+struct llvm::gvn::AvailableValue {
+ enum ValType {
+ SimpleVal, // A simple offsetted value that is accessed.
+ LoadVal, // A value produced by a load.
+ MemIntrin, // A memory intrinsic which is loaded from.
+ UndefVal // A UndefValue representing a value from dead block (which
+ // is not yet physically removed from the CFG).
+ };
-}
+ /// V - The value that is live out of the block.
+ PointerIntPair<Value *, 2, ValType> Val;
+
+ /// Offset - The byte offset in Val that is interesting for the load query.
+ unsigned Offset;
+
+ static AvailableValue get(Value *V, unsigned Offset = 0) {
+ AvailableValue Res;
+ Res.Val.setPointer(V);
+ Res.Val.setInt(SimpleVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
+ AvailableValue Res;
+ Res.Val.setPointer(MI);
+ Res.Val.setInt(MemIntrin);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValue getLoad(LoadInst *LI, unsigned Offset = 0) {
+ AvailableValue Res;
+ Res.Val.setPointer(LI);
+ Res.Val.setInt(LoadVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValue getUndef() {
+ AvailableValue Res;
+ Res.Val.setPointer(nullptr);
+ Res.Val.setInt(UndefVal);
+ Res.Offset = 0;
+ return Res;
+ }
+
+ bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+ bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+ bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+ bool isUndefValue() const { return Val.getInt() == UndefVal; }
+
+ Value *getSimpleValue() const {
+ assert(isSimpleValue() && "Wrong accessor");
+ return Val.getPointer();
+ }
+
+ LoadInst *getCoercedLoadValue() const {
+ assert(isCoercedLoadValue() && "Wrong accessor");
+ return cast<LoadInst>(Val.getPointer());
+ }
+
+ MemIntrinsic *getMemIntrinValue() const {
+ assert(isMemIntrinValue() && "Wrong accessor");
+ return cast<MemIntrinsic>(Val.getPointer());
+ }
+
+ /// Emit code at the specified insertion point to adjust the value defined
+ /// here to the specified type. This handles various coercion cases.
+ Value *MaterializeAdjustedValue(LoadInst *LI, Instruction *InsertPt,
+ GVN &gvn) const;
+};
+
+/// Represents an AvailableValue which can be rematerialized at the end of
+/// the associated BasicBlock.
+struct llvm::gvn::AvailableValueInBlock {
+ /// BB - The basic block in question.
+ BasicBlock *BB;
+
+ /// AV - The actual available value
+ AvailableValue AV;
+
+ static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.AV = std::move(AV);
+ return Res;
+ }
+
+ static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+ unsigned Offset = 0) {
+ return get(BB, AvailableValue::get(V, Offset));
+ }
+ static AvailableValueInBlock getUndef(BasicBlock *BB) {
+ return get(BB, AvailableValue::getUndef());
+ }
+
+ /// Emit code at the end of this block to adjust the value defined here to
+ /// the specified type. This handles various coercion cases.
+ Value *MaterializeAdjustedValue(LoadInst *LI, GVN &gvn) const {
+ return AV.MaterializeAdjustedValue(LI, BB->getTerminator(), gvn);
+ }
+};
//===----------------------------------------------------------------------===//
// ValueTable Internal Functions
//===----------------------------------------------------------------------===//
-Expression ValueTable::create_expression(Instruction *I) {
+GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
Expression e;
e.type = I->getType();
e.opcode = I->getOpcode();
for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
OI != OE; ++OI)
- e.varargs.push_back(lookup_or_add(*OI));
+ e.varargs.push_back(lookupOrAdd(*OI));
if (I->isCommutative()) {
// Ensure that commutative instructions that only differ by a permutation
// of their operands get the same value number by sorting the operand value
@@ -201,15 +262,15 @@ Expression ValueTable::create_expression(Instruction *I) {
return e;
}
-Expression ValueTable::create_cmp_expression(unsigned Opcode,
- CmpInst::Predicate Predicate,
- Value *LHS, Value *RHS) {
+GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
+ CmpInst::Predicate Predicate,
+ Value *LHS, Value *RHS) {
assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
"Not a comparison!");
Expression e;
e.type = CmpInst::makeCmpResultType(LHS->getType());
- e.varargs.push_back(lookup_or_add(LHS));
- e.varargs.push_back(lookup_or_add(RHS));
+ e.varargs.push_back(lookupOrAdd(LHS));
+ e.varargs.push_back(lookupOrAdd(RHS));
// Sort the operand value numbers so x<y and y>x get the same value number.
if (e.varargs[0] > e.varargs[1]) {
@@ -220,7 +281,7 @@ Expression ValueTable::create_cmp_expression(unsigned Opcode,
return e;
}
-Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
+GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
assert(EI && "Not an ExtractValueInst?");
Expression e;
e.type = EI->getType();
@@ -252,8 +313,8 @@ Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
// Intrinsic recognized. Grab its args to finish building the expression.
assert(I->getNumArgOperands() == 2 &&
"Expect two args for recognised intrinsics.");
- e.varargs.push_back(lookup_or_add(I->getArgOperand(0)));
- e.varargs.push_back(lookup_or_add(I->getArgOperand(1)));
+ e.varargs.push_back(lookupOrAdd(I->getArgOperand(0)));
+ e.varargs.push_back(lookupOrAdd(I->getArgOperand(1)));
return e;
}
}
@@ -263,7 +324,7 @@ Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
e.opcode = EI->getOpcode();
for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end();
OI != OE; ++OI)
- e.varargs.push_back(lookup_or_add(*OI));
+ e.varargs.push_back(lookupOrAdd(*OI));
for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end();
II != IE; ++II)
@@ -276,20 +337,32 @@ Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
// ValueTable External Functions
//===----------------------------------------------------------------------===//
+GVN::ValueTable::ValueTable() : nextValueNumber(1) {}
+GVN::ValueTable::ValueTable(const ValueTable &Arg)
+ : valueNumbering(Arg.valueNumbering),
+ expressionNumbering(Arg.expressionNumbering), AA(Arg.AA), MD(Arg.MD),
+ DT(Arg.DT), nextValueNumber(Arg.nextValueNumber) {}
+GVN::ValueTable::ValueTable(ValueTable &&Arg)
+ : valueNumbering(std::move(Arg.valueNumbering)),
+ expressionNumbering(std::move(Arg.expressionNumbering)),
+ AA(std::move(Arg.AA)), MD(std::move(Arg.MD)), DT(std::move(Arg.DT)),
+ nextValueNumber(std::move(Arg.nextValueNumber)) {}
+GVN::ValueTable::~ValueTable() {}
+
/// add - Insert a value into the table with a specified value number.
-void ValueTable::add(Value *V, uint32_t num) {
+void GVN::ValueTable::add(Value *V, uint32_t num) {
valueNumbering.insert(std::make_pair(V, num));
}
-uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
+uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
if (AA->doesNotAccessMemory(C)) {
- Expression exp = create_expression(C);
+ Expression exp = createExpr(C);
uint32_t &e = expressionNumbering[exp];
if (!e) e = nextValueNumber++;
valueNumbering[C] = e;
return e;
} else if (AA->onlyReadsMemory(C)) {
- Expression exp = create_expression(C);
+ Expression exp = createExpr(C);
uint32_t &e = expressionNumbering[exp];
if (!e) {
e = nextValueNumber++;
@@ -318,21 +391,21 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
}
for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
- uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
- uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
+ uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
+ uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i));
if (c_vn != cd_vn) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
}
- uint32_t v = lookup_or_add(local_cdep);
+ uint32_t v = lookupOrAdd(local_cdep);
valueNumbering[C] = v;
return v;
}
// Non-local case.
- const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
+ const MemoryDependenceResults::NonLocalDepInfo &deps =
MD->getNonLocalCallDependency(CallSite(C));
// FIXME: Move the checking logic to MemDep!
CallInst* cdep = nullptr;
@@ -372,15 +445,15 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
return nextValueNumber++;
}
for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
- uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
- uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
+ uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
+ uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i));
if (c_vn != cd_vn) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
}
- uint32_t v = lookup_or_add(cdep);
+ uint32_t v = lookupOrAdd(cdep);
valueNumbering[C] = v;
return v;
@@ -391,11 +464,11 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
}
/// Returns true if a value number exists for the specified value.
-bool ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; }
+bool GVN::ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; }
/// lookup_or_add - Returns the value number for the specified value, assigning
/// it a new number if it did not have one before.
-uint32_t ValueTable::lookup_or_add(Value *V) {
+uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
if (VI != valueNumbering.end())
return VI->second;
@@ -409,7 +482,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
Expression exp;
switch (I->getOpcode()) {
case Instruction::Call:
- return lookup_or_add_call(cast<CallInst>(I));
+ return lookupOrAddCall(cast<CallInst>(I));
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -448,10 +521,10 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
case Instruction::ShuffleVector:
case Instruction::InsertValue:
case Instruction::GetElementPtr:
- exp = create_expression(I);
+ exp = createExpr(I);
break;
case Instruction::ExtractValue:
- exp = create_extractvalue_expression(cast<ExtractValueInst>(I));
+ exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
break;
default:
valueNumbering[V] = nextValueNumber;
@@ -466,7 +539,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
/// Returns the value number of the specified value. Fails if
/// the value has not yet been numbered.
-uint32_t ValueTable::lookup(Value *V) const {
+uint32_t GVN::ValueTable::lookup(Value *V) const {
DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
assert(VI != valueNumbering.end() && "Value not numbered?");
return VI->second;
@@ -476,30 +549,30 @@ uint32_t ValueTable::lookup(Value *V) const {
/// assigning it a new number if it did not have one before. Useful when
/// we deduced the result of a comparison, but don't immediately have an
/// instruction realizing that comparison to hand.
-uint32_t ValueTable::lookup_or_add_cmp(unsigned Opcode,
- CmpInst::Predicate Predicate,
- Value *LHS, Value *RHS) {
- Expression exp = create_cmp_expression(Opcode, Predicate, LHS, RHS);
+uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
+ CmpInst::Predicate Predicate,
+ Value *LHS, Value *RHS) {
+ Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
uint32_t& e = expressionNumbering[exp];
if (!e) e = nextValueNumber++;
return e;
}
/// Remove all entries from the ValueTable.
-void ValueTable::clear() {
+void GVN::ValueTable::clear() {
valueNumbering.clear();
expressionNumbering.clear();
nextValueNumber = 1;
}
/// Remove a value from the value numbering.
-void ValueTable::erase(Value *V) {
+void GVN::ValueTable::erase(Value *V) {
valueNumbering.erase(V);
}
/// verifyRemoved - Verify that the value is removed from all internal data
/// structures.
-void ValueTable::verifyRemoved(const Value *V) const {
+void GVN::ValueTable::verifyRemoved(const Value *V) const {
for (DenseMap<Value*, uint32_t>::const_iterator
I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
assert(I->first != V && "Inst still occurs in value numbering map!");
@@ -510,251 +583,26 @@ void ValueTable::verifyRemoved(const Value *V) const {
// GVN Pass
//===----------------------------------------------------------------------===//
-namespace {
- class GVN;
- struct AvailableValueInBlock {
- /// BB - The basic block in question.
- BasicBlock *BB;
- enum ValType {
- SimpleVal, // A simple offsetted value that is accessed.
- LoadVal, // A value produced by a load.
- MemIntrin, // A memory intrinsic which is loaded from.
- UndefVal // A UndefValue representing a value from dead block (which
- // is not yet physically removed from the CFG).
- };
-
- /// V - The value that is live out of the block.
- PointerIntPair<Value *, 2, ValType> Val;
-
- /// Offset - The byte offset in Val that is interesting for the load query.
- unsigned Offset;
-
- static AvailableValueInBlock get(BasicBlock *BB, Value *V,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(V);
- Res.Val.setInt(SimpleVal);
- Res.Offset = Offset;
- return Res;
- }
-
- static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(MI);
- Res.Val.setInt(MemIntrin);
- Res.Offset = Offset;
- return Res;
- }
-
- static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(LI);
- Res.Val.setInt(LoadVal);
- Res.Offset = Offset;
- return Res;
- }
-
- static AvailableValueInBlock getUndef(BasicBlock *BB) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(nullptr);
- Res.Val.setInt(UndefVal);
- Res.Offset = 0;
- return Res;
- }
-
- bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
- bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
- bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
- bool isUndefValue() const { return Val.getInt() == UndefVal; }
-
- Value *getSimpleValue() const {
- assert(isSimpleValue() && "Wrong accessor");
- return Val.getPointer();
- }
-
- LoadInst *getCoercedLoadValue() const {
- assert(isCoercedLoadValue() && "Wrong accessor");
- return cast<LoadInst>(Val.getPointer());
- }
-
- MemIntrinsic *getMemIntrinValue() const {
- assert(isMemIntrinValue() && "Wrong accessor");
- return cast<MemIntrinsic>(Val.getPointer());
- }
-
- /// Emit code into this block to adjust the value defined here to the
- /// specified type. This handles various coercion cases.
- Value *MaterializeAdjustedValue(LoadInst *LI, GVN &gvn) const;
- };
-
- class GVN : public FunctionPass {
- bool NoLoads;
- MemoryDependenceAnalysis *MD;
- DominatorTree *DT;
- const TargetLibraryInfo *TLI;
- AssumptionCache *AC;
- SetVector<BasicBlock *> DeadBlocks;
-
- ValueTable VN;
-
- /// A mapping from value numbers to lists of Value*'s that
- /// have that value number. Use findLeader to query it.
- struct LeaderTableEntry {
- Value *Val;
- const BasicBlock *BB;
- LeaderTableEntry *Next;
- };
- DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
- BumpPtrAllocator TableAllocator;
-
- // Block-local map of equivalent values to their leader, does not
- // propagate to any successors. Entries added mid-block are applied
- // to the remaining instructions in the block.
- SmallMapVector<llvm::Value *, llvm::Constant *, 4> ReplaceWithConstMap;
- SmallVector<Instruction*, 8> InstrsToErase;
-
- typedef SmallVector<NonLocalDepResult, 64> LoadDepVect;
- typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect;
- typedef SmallVector<BasicBlock*, 64> UnavailBlkVect;
-
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit GVN(bool noloads = false)
- : FunctionPass(ID), NoLoads(noloads), MD(nullptr) {
- initializeGVNPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- /// This removes the specified instruction from
- /// our various maps and marks it for deletion.
- void markInstructionForDeletion(Instruction *I) {
- VN.erase(I);
- InstrsToErase.push_back(I);
- }
-
- DominatorTree &getDominatorTree() const { return *DT; }
- AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
- MemoryDependenceAnalysis &getMemDep() const { return *MD; }
- private:
- /// Push a new Value to the LeaderTable onto the list for its value number.
- void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) {
- LeaderTableEntry &Curr = LeaderTable[N];
- if (!Curr.Val) {
- Curr.Val = V;
- Curr.BB = BB;
- return;
- }
-
- LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>();
- Node->Val = V;
- Node->BB = BB;
- Node->Next = Curr.Next;
- Curr.Next = Node;
- }
-
- /// Scan the list of values corresponding to a given
- /// value number, and remove the given instruction if encountered.
- void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) {
- LeaderTableEntry* Prev = nullptr;
- LeaderTableEntry* Curr = &LeaderTable[N];
-
- while (Curr && (Curr->Val != I || Curr->BB != BB)) {
- Prev = Curr;
- Curr = Curr->Next;
- }
-
- if (!Curr)
- return;
-
- if (Prev) {
- Prev->Next = Curr->Next;
- } else {
- if (!Curr->Next) {
- Curr->Val = nullptr;
- Curr->BB = nullptr;
- } else {
- LeaderTableEntry* Next = Curr->Next;
- Curr->Val = Next->Val;
- Curr->BB = Next->BB;
- Curr->Next = Next->Next;
- }
- }
- }
-
- // List of critical edges to be split between iterations.
- SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
-
- // This transformation requires dominator postdominator info
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- if (!NoLoads)
- AU.addRequired<MemoryDependenceAnalysis>();
- AU.addRequired<AAResultsWrapperPass>();
-
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-
-
- // Helper functions of redundant load elimination
- bool processLoad(LoadInst *L);
- bool processNonLocalLoad(LoadInst *L);
- bool processAssumeIntrinsic(IntrinsicInst *II);
- void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
- AvailValInBlkVect &ValuesPerBlock,
- UnavailBlkVect &UnavailableBlocks);
- bool PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
- UnavailBlkVect &UnavailableBlocks);
-
- // Other helper routines
- bool processInstruction(Instruction *I);
- bool processBlock(BasicBlock *BB);
- void dump(DenseMap<uint32_t, Value*> &d);
- bool iterateOnFunction(Function &F);
- bool performPRE(Function &F);
- bool performScalarPRE(Instruction *I);
- bool performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
- unsigned int ValNo);
- Value *findLeader(const BasicBlock *BB, uint32_t num);
- void cleanupGlobalSets();
- void verifyRemoved(const Instruction *I) const;
- bool splitCriticalEdges();
- BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
- bool replaceOperandsWithConsts(Instruction *I) const;
- bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
- bool DominatesByEdge);
- bool processFoldableCondBr(BranchInst *BI);
- void addDeadBlock(BasicBlock *BB);
- void assignValNumForDeadCode();
- };
-
- char GVN::ID = 0;
-}
-
-// The public interface to this file...
-FunctionPass *llvm::createGVNPass(bool NoLoads) {
- return new GVN(NoLoads);
+PreservedAnalyses GVN::run(Function &F, AnalysisManager<Function> &AM) {
+ // FIXME: The order of evaluation of these 'getResult' calls is very
+ // significant! Re-ordering these variables will cause GVN when run alone to
+ // be less effective! We should fix memdep and basic-aa to not exhibit this
+ // behavior, but until then don't change the order here.
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &AA = AM.getResult<AAManager>(F);
+ auto &MemDep = AM.getResult<MemoryDependenceAnalysis>(F);
+ bool Changed = runImpl(F, AC, DT, TLI, AA, &MemDep);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<GlobalsAA>();
+ return PA;
}
-INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void GVN::dump(DenseMap<uint32_t, Value*>& d) {
errs() << "{\n";
for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
@@ -764,7 +612,6 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {
}
errs() << "}\n";
}
-#endif
/// Return true if we can prove that the value
/// we're analyzing is fully available in the specified block. As we go, keep
@@ -875,38 +722,45 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
IRBuilder<> &IRB,
const DataLayout &DL) {
- if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL))
- return nullptr;
+ assert(CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
+ "precondition violation - materialization can't fail");
+
+ if (auto *CExpr = dyn_cast<ConstantExpr>(StoredVal))
+ StoredVal = ConstantFoldConstantExpression(CExpr, DL);
// If this is already the right type, just return it.
Type *StoredValTy = StoredVal->getType();
- uint64_t StoreSize = DL.getTypeSizeInBits(StoredValTy);
- uint64_t LoadSize = DL.getTypeSizeInBits(LoadedTy);
+ uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy);
+ uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy);
// If the store and reload are the same size, we can always reuse it.
- if (StoreSize == LoadSize) {
+ if (StoredValSize == LoadedValSize) {
// Pointer to Pointer -> use bitcast.
if (StoredValTy->getScalarType()->isPointerTy() &&
- LoadedTy->getScalarType()->isPointerTy())
- return IRB.CreateBitCast(StoredVal, LoadedTy);
+ LoadedTy->getScalarType()->isPointerTy()) {
+ StoredVal = IRB.CreateBitCast(StoredVal, LoadedTy);
+ } else {
+ // Convert source pointers to integers, which can be bitcast.
+ if (StoredValTy->getScalarType()->isPointerTy()) {
+ StoredValTy = DL.getIntPtrType(StoredValTy);
+ StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy);
+ }
- // Convert source pointers to integers, which can be bitcast.
- if (StoredValTy->getScalarType()->isPointerTy()) {
- StoredValTy = DL.getIntPtrType(StoredValTy);
- StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy);
- }
+ Type *TypeToCastTo = LoadedTy;
+ if (TypeToCastTo->getScalarType()->isPointerTy())
+ TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
- Type *TypeToCastTo = LoadedTy;
- if (TypeToCastTo->getScalarType()->isPointerTy())
- TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
+ if (StoredValTy != TypeToCastTo)
+ StoredVal = IRB.CreateBitCast(StoredVal, TypeToCastTo);
- if (StoredValTy != TypeToCastTo)
- StoredVal = IRB.CreateBitCast(StoredVal, TypeToCastTo);
+ // Cast to pointer if the load needs a pointer type.
+ if (LoadedTy->getScalarType()->isPointerTy())
+ StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy);
+ }
- // Cast to pointer if the load needs a pointer type.
- if (LoadedTy->getScalarType()->isPointerTy())
- StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy);
+ if (auto *CExpr = dyn_cast<ConstantExpr>(StoredVal))
+ StoredVal = ConstantFoldConstantExpression(CExpr, DL);
return StoredVal;
}
@@ -914,7 +768,8 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
// If the loaded value is smaller than the available value, then we can
// extract out a piece from it. If the available value is too small, then we
// can't do anything.
- assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
+ assert(StoredValSize >= LoadedValSize &&
+ "CanCoerceMustAliasedValueToLoad fail");
// Convert source pointers to integers, which can be manipulated.
if (StoredValTy->getScalarType()->isPointerTy()) {
@@ -924,29 +779,35 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
// Convert vectors and fp to integer, which can be manipulated.
if (!StoredValTy->isIntegerTy()) {
- StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
+ StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize);
StoredVal = IRB.CreateBitCast(StoredVal, StoredValTy);
}
// If this is a big-endian system, we need to shift the value down to the low
// bits so that a truncate will work.
if (DL.isBigEndian()) {
- StoredVal = IRB.CreateLShr(StoredVal, StoreSize - LoadSize, "tmp");
+ uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) -
+ DL.getTypeStoreSizeInBits(LoadedTy);
+ StoredVal = IRB.CreateLShr(StoredVal, ShiftAmt, "tmp");
}
// Truncate the integer to the right size now.
- Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
+ Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize);
StoredVal = IRB.CreateTrunc(StoredVal, NewIntTy, "trunc");
- if (LoadedTy == NewIntTy)
- return StoredVal;
+ if (LoadedTy != NewIntTy) {
+ // If the result is a pointer, inttoptr.
+ if (LoadedTy->getScalarType()->isPointerTy())
+ StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy, "inttoptr");
+ else
+ // Otherwise, bitcast.
+ StoredVal = IRB.CreateBitCast(StoredVal, LoadedTy, "bitcast");
+ }
- // If the result is a pointer, inttoptr.
- if (LoadedTy->getScalarType()->isPointerTy())
- return IRB.CreateIntToPtr(StoredVal, LoadedTy, "inttoptr");
+ if (auto *CExpr = dyn_cast<ConstantExpr>(StoredVal))
+ StoredVal = ConstantFoldConstantExpression(CExpr, DL);
- // Otherwise, bitcast.
- return IRB.CreateBitCast(StoredVal, LoadedTy, "bitcast");
+ return StoredVal;
}
/// This function is called when we have a
@@ -1067,10 +928,15 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
- unsigned Size = MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize(
+ unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
LoadBase, LoadOffs, LoadSize, DepLI);
if (Size == 0) return -1;
+ // Check non-obvious conditions enforced by MDA which we rely on for being
+ // able to materialize this potentially available value
+ assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!");
+ assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load");
+
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL);
}
@@ -1117,7 +983,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- if (ConstantFoldLoadFromConstPtr(Src, DL))
+ if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL))
return Offset;
return -1;
}
@@ -1173,9 +1039,9 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
const DataLayout &DL = SrcVal->getModule()->getDataLayout();
// If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
// widen SrcVal out to a larger load.
- unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType());
+ unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType());
unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
- if (Offset+LoadSize > SrcValSize) {
+ if (Offset+LoadSize > SrcValStoreSize) {
assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
// If we have a load/load clobber an DepLI can be widened to cover this
@@ -1207,8 +1073,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
// system, we need to shift down to get the relevant bits.
Value *RV = NewLoad;
if (DL.isBigEndian())
- RV = Builder.CreateLShr(RV,
- NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
+ RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8);
RV = Builder.CreateTrunc(RV, SrcVal->getType());
SrcVal->replaceAllUsesWith(RV);
@@ -1279,7 +1144,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- return ConstantFoldLoadFromConstPtr(Src, DL);
+ return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL);
}
@@ -1294,7 +1159,8 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
if (ValuesPerBlock.size() == 1 &&
gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
LI->getParent())) {
- assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block");
+ assert(!ValuesPerBlock[0].AV.isUndefValue() &&
+ "Dead BB dominate this block");
return ValuesPerBlock[0].MaterializeAdjustedValue(LI, gvn);
}
@@ -1316,15 +1182,16 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
return SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
}
-Value *AvailableValueInBlock::MaterializeAdjustedValue(LoadInst *LI,
- GVN &gvn) const {
+Value *AvailableValue::MaterializeAdjustedValue(LoadInst *LI,
+ Instruction *InsertPt,
+ GVN &gvn) const {
Value *Res;
Type *LoadTy = LI->getType();
const DataLayout &DL = LI->getModule()->getDataLayout();
if (isSimpleValue()) {
Res = getSimpleValue();
if (Res->getType() != LoadTy) {
- Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), DL);
+ Res = GetStoreValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
<< *getSimpleValue() << '\n'
@@ -1335,16 +1202,15 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(LoadInst *LI,
if (Load->getType() == LoadTy && Offset == 0) {
Res = Load;
} else {
- Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
- gvn);
-
+ Res = GetLoadValueForLoad(Load, Offset, LoadTy, InsertPt, gvn);
+
DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
<< *getCoercedLoadValue() << '\n'
<< *Res << '\n' << "\n\n\n");
}
} else if (isMemIntrinValue()) {
Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
- BB->getTerminator(), DL);
+ InsertPt, DL);
DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *getMemIntrinValue() << '\n'
<< *Res << '\n' << "\n\n\n");
@@ -1353,6 +1219,7 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(LoadInst *LI,
DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";);
return UndefValue::get(LoadTy);
}
+ assert(Res && "failed to materialize?");
return Res;
}
@@ -1362,7 +1229,134 @@ static bool isLifetimeStart(const Instruction *Inst) {
return false;
}
-void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
+bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
+ Value *Address, AvailableValue &Res) {
+
+ assert((DepInfo.isDef() || DepInfo.isClobber()) &&
+ "expected a local dependence");
+ assert(LI->isUnordered() && "rules below are incorrect for ordered access");
+
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+
+ if (DepInfo.isClobber()) {
+ // If the dependence is to a store that writes to a superset of the bits
+ // read by the load, we can extract the bits we need for the load from the
+ // stored value.
+ if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
+ // Can't forward from non-atomic to atomic without violating memory model.
+ if (Address && LI->isAtomic() <= DepSI->isAtomic()) {
+ int Offset =
+ AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI);
+ if (Offset != -1) {
+ Res = AvailableValue::get(DepSI->getValueOperand(), Offset);
+ return true;
+ }
+ }
+ }
+
+ // Check to see if we have something like this:
+ // load i32* P
+ // load i8* (P+1)
+ // if we have this, replace the later with an extraction from the former.
+ if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
+ // If this is a clobber and L is the first instruction in its block, then
+ // we have the first instruction in the entry block.
+ // Can't forward from non-atomic to atomic without violating memory model.
+ if (DepLI != LI && Address && LI->isAtomic() <= DepLI->isAtomic()) {
+ int Offset =
+ AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
+
+ if (Offset != -1) {
+ Res = AvailableValue::getLoad(DepLI, Offset);
+ return true;
+ }
+ }
+ }
+
+ // If the clobbering value is a memset/memcpy/memmove, see if we can
+ // forward a value on from it.
+ if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
+ if (Address && !LI->isAtomic()) {
+ int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
+ DepMI, DL);
+ if (Offset != -1) {
+ Res = AvailableValue::getMI(DepMI, Offset);
+ return true;
+ }
+ }
+ }
+ // Nothing known about this clobber, have to be conservative
+ DEBUG(
+ // fast print dep, using operator<< on instruction is too slow.
+ dbgs() << "GVN: load ";
+ LI->printAsOperand(dbgs());
+ Instruction *I = DepInfo.getInst();
+ dbgs() << " is clobbered by " << *I << '\n';
+ );
+ return false;
+ }
+ assert(DepInfo.isDef() && "follows from above");
+
+ Instruction *DepInst = DepInfo.getInst();
+
+ // Loading the allocation -> undef.
+ if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) ||
+ // Loading immediately after lifetime begin -> undef.
+ isLifetimeStart(DepInst)) {
+ Res = AvailableValue::get(UndefValue::get(LI->getType()));
+ return true;
+ }
+
+ // Loading from calloc (which zero initializes memory) -> zero
+ if (isCallocLikeFn(DepInst, TLI)) {
+ Res = AvailableValue::get(Constant::getNullValue(LI->getType()));
+ return true;
+ }
+
+ if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
+ // Reject loads and stores that are to the same address but are of
+ // different types if we have to. If the stored value is larger or equal to
+ // the loaded value, we can reuse it.
+ if (S->getValueOperand()->getType() != LI->getType() &&
+ !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
+ LI->getType(), DL))
+ return false;
+
+ // Can't forward from non-atomic to atomic without violating memory model.
+ if (S->isAtomic() < LI->isAtomic())
+ return false;
+
+ Res = AvailableValue::get(S->getValueOperand());
+ return true;
+ }
+
+ if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
+ // If the types mismatch and we can't handle it, reject reuse of the load.
+ // If the stored value is larger or equal to the loaded value, we can reuse
+ // it.
+ if (LD->getType() != LI->getType() &&
+ !CanCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
+ return false;
+
+ // Can't forward from non-atomic to atomic without violating memory model.
+ if (LD->isAtomic() < LI->isAtomic())
+ return false;
+
+ Res = AvailableValue::getLoad(LD);
+ return true;
+ }
+
+ // Unknown def - must be conservative
+ DEBUG(
+ // fast print dep, using operator<< on instruction is too slow.
+ dbgs() << "GVN: load ";
+ LI->printAsOperand(dbgs());
+ dbgs() << " has unknown def " << *DepInst << '\n';
+ );
+ return false;
+}
+
+void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
AvailValInBlkVect &ValuesPerBlock,
UnavailBlkVect &UnavailableBlocks) {
@@ -1371,7 +1365,6 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
// dependencies that produce an unknown value for the load (such as a call
// that could potentially clobber the load).
unsigned NumDeps = Deps.size();
- const DataLayout &DL = LI->getModule()->getDataLayout();
for (unsigned i = 0, e = NumDeps; i != e; ++i) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
@@ -1388,122 +1381,28 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
continue;
}
- if (DepInfo.isClobber()) {
- // The address being loaded in this non-local block may not be the same as
- // the pointer operand of the load if PHI translation occurs. Make sure
- // to consider the right address.
- Value *Address = Deps[i].getAddress();
-
- // If the dependence is to a store that writes to a superset of the bits
- // read by the load, we can extract the bits we need for the load from the
- // stored value.
- if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
- if (Address) {
- int Offset =
- AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI);
- if (Offset != -1) {
- ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
- DepSI->getValueOperand(),
- Offset));
- continue;
- }
- }
- }
-
- // Check to see if we have something like this:
- // load i32* P
- // load i8* (P+1)
- // if we have this, replace the later with an extraction from the former.
- if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
- // If this is a clobber and L is the first instruction in its block, then
- // we have the first instruction in the entry block.
- if (DepLI != LI && Address) {
- int Offset =
- AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
-
- if (Offset != -1) {
- ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
- Offset));
- continue;
- }
- }
- }
-
- // If the clobbering value is a memset/memcpy/memmove, see if we can
- // forward a value on from it.
- if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
- if (Address) {
- int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
- DepMI, DL);
- if (Offset != -1) {
- ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
- Offset));
- continue;
- }
- }
- }
-
- UnavailableBlocks.push_back(DepBB);
- continue;
- }
-
- // DepInfo.isDef() here
-
- Instruction *DepInst = DepInfo.getInst();
-
- // Loading the allocation -> undef.
- if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) ||
- // Loading immediately after lifetime begin -> undef.
- isLifetimeStart(DepInst)) {
- ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
- UndefValue::get(LI->getType())));
- continue;
- }
-
- // Loading from calloc (which zero initializes memory) -> zero
- if (isCallocLikeFn(DepInst, TLI)) {
- ValuesPerBlock.push_back(AvailableValueInBlock::get(
- DepBB, Constant::getNullValue(LI->getType())));
- continue;
- }
-
- if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
- // Reject loads and stores that are to the same address but are of
- // different types if we have to.
- if (S->getValueOperand()->getType() != LI->getType()) {
- // If the stored value is larger or equal to the loaded value, we can
- // reuse it.
- if (!CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
- LI->getType(), DL)) {
- UnavailableBlocks.push_back(DepBB);
- continue;
- }
- }
+ // The address being loaded in this non-local block may not be the same as
+ // the pointer operand of the load if PHI translation occurs. Make sure
+ // to consider the right address.
+ Value *Address = Deps[i].getAddress();
+ AvailableValue AV;
+ if (AnalyzeLoadAvailability(LI, DepInfo, Address, AV)) {
+ // subtlety: because we know this was a non-local dependency, we know
+ // it's safe to materialize anywhere between the instruction within
+ // DepInfo and the end of it's block.
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
- S->getValueOperand()));
- continue;
- }
-
- if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
- // If the types mismatch and we can't handle it, reject reuse of the load.
- if (LD->getType() != LI->getType()) {
- // If the stored value is larger or equal to the loaded value, we can
- // reuse it.
- if (!CanCoerceMustAliasedValueToLoad(LD, LI->getType(), DL)) {
- UnavailableBlocks.push_back(DepBB);
- continue;
- }
- }
- ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD));
- continue;
+ std::move(AV)));
+ } else {
+ UnavailableBlocks.push_back(DepBB);
}
-
- UnavailableBlocks.push_back(DepBB);
}
+
+ assert(NumDeps == ValuesPerBlock.size() + UnavailableBlocks.size() &&
+ "post condition violation");
}
-bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
+bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
UnavailBlkVect &UnavailableBlocks) {
// Okay, we have *some* definitions of the value. This means that the value
// is available in some of our (transitive) predecessors. Lets think about
@@ -1661,16 +1560,17 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// parent's availability map. However, in doing so, we risk getting into
// ordering issues. If a block hasn't been processed yet, we would be
// marking a value as AVAIL-IN, which isn't what we intend.
- VN.lookup_or_add(I);
+ VN.lookupOrAdd(I);
}
for (const auto &PredLoad : PredLoads) {
BasicBlock *UnavailablePred = PredLoad.first;
Value *LoadPtr = PredLoad.second;
- Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
- LI->getAlignment(),
- UnavailablePred->getTerminator());
+ auto *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre",
+ LI->isVolatile(), LI->getAlignment(),
+ LI->getOrdering(), LI->getSynchScope(),
+ UnavailablePred->getTerminator());
// Transfer the old load's AA tags to the new load.
AAMDNodes Tags;
@@ -1682,6 +1582,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD);
if (auto *InvGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group))
NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD);
+ if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range))
+ NewLoad->setMetadata(LLVMContext::MD_range, RangeMD);
// Transfer DebugLoc.
NewLoad->setDebugLoc(LI->getDebugLoc());
@@ -1846,30 +1748,29 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
}
static void patchReplacementInstruction(Instruction *I, Value *Repl) {
+ auto *ReplInst = dyn_cast<Instruction>(Repl);
+ if (!ReplInst)
+ return;
+
// Patch the replacement so that it is not more restrictive than the value
// being replaced.
- BinaryOperator *Op = dyn_cast<BinaryOperator>(I);
- BinaryOperator *ReplOp = dyn_cast<BinaryOperator>(Repl);
- if (Op && ReplOp)
- ReplOp->andIRFlags(Op);
-
- if (Instruction *ReplInst = dyn_cast<Instruction>(Repl)) {
- // FIXME: If both the original and replacement value are part of the
- // same control-flow region (meaning that the execution of one
- // guarantees the execution of the other), then we can combine the
- // noalias scopes here and do better than the general conservative
- // answer used in combineMetadata().
-
- // In general, GVN unifies expressions over different control-flow
- // regions, and so we need a conservative combination of the noalias
- // scopes.
- static const unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_range,
- LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
- LLVMContext::MD_invariant_group};
- combineMetadata(ReplInst, I, KnownIDs);
- }
+ ReplInst->andIRFlags(I);
+
+ // FIXME: If both the original and replacement value are part of the
+ // same control-flow region (meaning that the execution of one
+ // guarantees the execution of the other), then we can combine the
+ // noalias scopes here and do better than the general conservative
+ // answer used in combineMetadata().
+
+ // In general, GVN unifies expressions over different control-flow
+ // regions, and so we need a conservative combination of the noalias
+ // scopes.
+ static const unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_range,
+ LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
+ LLVMContext::MD_invariant_group};
+ combineMetadata(ReplInst, I, KnownIDs);
}
static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
@@ -1883,7 +1784,8 @@ bool GVN::processLoad(LoadInst *L) {
if (!MD)
return false;
- if (!L->isSimple())
+ // This code hasn't been audited for ordered or volatile memory access
+ if (!L->isUnordered())
return false;
if (L->use_empty()) {
@@ -1893,84 +1795,14 @@ bool GVN::processLoad(LoadInst *L) {
// ... to a pointer that has been loaded from before...
MemDepResult Dep = MD->getDependency(L);
- const DataLayout &DL = L->getModule()->getDataLayout();
-
- // If we have a clobber and target data is around, see if this is a clobber
- // that we can fix up through code synthesis.
- if (Dep.isClobber()) {
- // Check to see if we have something like this:
- // store i32 123, i32* %P
- // %A = bitcast i32* %P to i8*
- // %B = gep i8* %A, i32 1
- // %C = load i8* %B
- //
- // We could do that by recognizing if the clobber instructions are obviously
- // a common base + constant offset, and if the previous store (or memset)
- // completely covers this load. This sort of thing can happen in bitfield
- // access code.
- Value *AvailVal = nullptr;
- if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
- int Offset = AnalyzeLoadFromClobberingStore(
- L->getType(), L->getPointerOperand(), DepSI);
- if (Offset != -1)
- AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
- L->getType(), L, DL);
- }
-
- // Check to see if we have something like this:
- // load i32* P
- // load i8* (P+1)
- // if we have this, replace the later with an extraction from the former.
- if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) {
- // If this is a clobber and L is the first instruction in its block, then
- // we have the first instruction in the entry block.
- if (DepLI == L)
- return false;
-
- int Offset = AnalyzeLoadFromClobberingLoad(
- L->getType(), L->getPointerOperand(), DepLI, DL);
- if (Offset != -1)
- AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
- }
-
- // If the clobbering value is a memset/memcpy/memmove, see if we can forward
- // a value on from it.
- if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
- int Offset = AnalyzeLoadFromClobberingMemInst(
- L->getType(), L->getPointerOperand(), DepMI, DL);
- if (Offset != -1)
- AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, DL);
- }
-
- if (AvailVal) {
- DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
- << *AvailVal << '\n' << *L << "\n\n\n");
-
- // Replace the load!
- L->replaceAllUsesWith(AvailVal);
- if (AvailVal->getType()->getScalarType()->isPointerTy())
- MD->invalidateCachedPointerInfo(AvailVal);
- markInstructionForDeletion(L);
- ++NumGVNLoad;
- return true;
- }
-
- // If the value isn't available, don't do anything!
- DEBUG(
- // fast print dep, using operator<< on instruction is too slow.
- dbgs() << "GVN: load ";
- L->printAsOperand(dbgs());
- Instruction *I = Dep.getInst();
- dbgs() << " is clobbered by " << *I << '\n';
- );
- return false;
- }
// If it is defined in another block, try harder.
if (Dep.isNonLocal())
return processNonLocalLoad(L);
- if (!Dep.isDef()) {
+ // Only handle the local case below
+ if (!Dep.isDef() && !Dep.isClobber()) {
+ // This might be a NonFuncLocal or an Unknown
DEBUG(
// fast print dep, using operator<< on instruction is too slow.
dbgs() << "GVN: load ";
@@ -1980,86 +1812,18 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
- Instruction *DepInst = Dep.getInst();
- if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
- Value *StoredVal = DepSI->getValueOperand();
-
- // The store and load are to a must-aliased pointer, but they may not
- // actually have the same type. See if we know how to reuse the stored
- // value (depending on its type).
- if (StoredVal->getType() != L->getType()) {
- IRBuilder<> Builder(L);
- StoredVal =
- CoerceAvailableValueToLoadType(StoredVal, L->getType(), Builder, DL);
- if (!StoredVal)
- return false;
-
- DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
- << '\n' << *L << "\n\n\n");
- }
-
- // Remove it!
- L->replaceAllUsesWith(StoredVal);
- if (StoredVal->getType()->getScalarType()->isPointerTy())
- MD->invalidateCachedPointerInfo(StoredVal);
- markInstructionForDeletion(L);
- ++NumGVNLoad;
- return true;
- }
-
- if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
- Value *AvailableVal = DepLI;
-
- // The loads are of a must-aliased pointer, but they may not actually have
- // the same type. See if we know how to reuse the previously loaded value
- // (depending on its type).
- if (DepLI->getType() != L->getType()) {
- IRBuilder<> Builder(L);
- AvailableVal =
- CoerceAvailableValueToLoadType(DepLI, L->getType(), Builder, DL);
- if (!AvailableVal)
- return false;
-
- DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
- << "\n" << *L << "\n\n\n");
- }
-
- // Remove it!
- patchAndReplaceAllUsesWith(L, AvailableVal);
- if (DepLI->getType()->getScalarType()->isPointerTy())
- MD->invalidateCachedPointerInfo(DepLI);
- markInstructionForDeletion(L);
- ++NumGVNLoad;
- return true;
- }
-
- // If this load really doesn't depend on anything, then we must be loading an
- // undef value. This can happen when loading for a fresh allocation with no
- // intervening stores, for example.
- if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) {
- L->replaceAllUsesWith(UndefValue::get(L->getType()));
- markInstructionForDeletion(L);
- ++NumGVNLoad;
- return true;
- }
+ AvailableValue AV;
+ if (AnalyzeLoadAvailability(L, Dep, L->getPointerOperand(), AV)) {
+ Value *AvailableValue = AV.MaterializeAdjustedValue(L, L, *this);
- // If this load occurs either right after a lifetime begin,
- // then the loaded value is undefined.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
- L->replaceAllUsesWith(UndefValue::get(L->getType()));
- markInstructionForDeletion(L);
- ++NumGVNLoad;
- return true;
- }
- }
-
- // If this load follows a calloc (which zero initializes memory),
- // then the loaded value is zero
- if (isCallocLikeFn(DepInst, TLI)) {
- L->replaceAllUsesWith(Constant::getNullValue(L->getType()));
+ // Replace the load!
+ patchAndReplaceAllUsesWith(L, AvailableValue);
markInstructionForDeletion(L);
++NumGVNLoad;
+ // Tell MDA to rexamine the reused pointer since we might have more
+ // information after forwarding it.
+ if (MD && AvailableValue->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(AvailableValue);
return true;
}
@@ -2105,9 +1869,8 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
// GVN runs all such loops have preheaders, which means that Dst will have
// been changed to have only one predecessor, namely Src.
const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
- const BasicBlock *Src = E.getStart();
- assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
- (void)Src;
+ assert((!Pred || Pred == E.getStart()) &&
+ "No edge between these basic blocks!");
return Pred != nullptr;
}
@@ -2133,7 +1896,8 @@ bool GVN::replaceOperandsWithConsts(Instruction *Instr) const {
/// The given values are known to be equal in every block
/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
/// 'RHS' everywhere in the scope. Returns whether a change was made.
-/// If DominatesByEdge is false, then it means that it is dominated by Root.End.
+/// If DominatesByEdge is false, then it means that we will propagate the RHS
+/// value starting from the end of Root.Start.
bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
bool DominatesByEdge) {
SmallVector<std::pair<Value*, Value*>, 4> Worklist;
@@ -2141,7 +1905,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
bool Changed = false;
// For speed, compute a conservative fast approximation to
// DT->dominates(Root, Root.getEnd());
- bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
+ const bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
while (!Worklist.empty()) {
std::pair<Value*, Value*> Item = Worklist.pop_back_val();
@@ -2164,12 +1928,12 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
// right-hand side, ensure the longest lived term is on the right-hand side,
// so the shortest lived term will be replaced by the longest lived.
// This tends to expose more simplifications.
- uint32_t LVN = VN.lookup_or_add(LHS);
+ uint32_t LVN = VN.lookupOrAdd(LHS);
if ((isa<Argument>(LHS) && isa<Argument>(RHS)) ||
(isa<Instruction>(LHS) && isa<Instruction>(RHS))) {
// Move the 'oldest' value to the right-hand side, using the value number
// as a proxy for age.
- uint32_t RVN = VN.lookup_or_add(RHS);
+ uint32_t RVN = VN.lookupOrAdd(RHS);
if (LVN < RVN) {
std::swap(LHS, RHS);
LVN = RVN;
@@ -2195,7 +1959,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
unsigned NumReplacements =
DominatesByEdge
? replaceDominatedUsesWith(LHS, RHS, *DT, Root)
- : replaceDominatedUsesWith(LHS, RHS, *DT, Root.getEnd());
+ : replaceDominatedUsesWith(LHS, RHS, *DT, Root.getStart());
Changed |= NumReplacements > 0;
NumGVNEqProp += NumReplacements;
@@ -2245,7 +2009,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
// Floating point -0.0 and 0.0 compare equal, so we can only
// propagate values if we know that we have a constant and that
// its value is non-zero.
-
+
// FIXME: We should do this optimization if 'no signed zeros' is
// applicable via an instruction-level fast-math-flag or some other
// indicator that relaxed FP semantics are being used.
@@ -2253,7 +2017,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
if (isa<ConstantFP>(Op1) && !cast<ConstantFP>(Op1)->isZero())
Worklist.push_back(std::make_pair(Op0, Op1));
}
-
+
// If "A >= B" is known true, replace "A < B" with false everywhere.
CmpInst::Predicate NotPred = Cmp->getInversePredicate();
Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
@@ -2261,7 +2025,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
// out the value number that it would have and use that to find an
// appropriate instruction (if any).
uint32_t NextNum = VN.getNextUnusedValueNumber();
- uint32_t Num = VN.lookup_or_add_cmp(Cmp->getOpcode(), NotPred, Op0, Op1);
+ uint32_t Num = VN.lookupOrAddCmp(Cmp->getOpcode(), NotPred, Op0, Op1);
// If the number we were assigned was brand new then there is no point in
// looking for an instruction realizing it: there cannot be one!
if (Num < NextNum) {
@@ -2271,7 +2035,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
DominatesByEdge
? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root)
: replaceDominatedUsesWith(NotCmp, NotVal, *DT,
- Root.getEnd());
+ Root.getStart());
Changed |= NumReplacements > 0;
NumGVNEqProp += NumReplacements;
}
@@ -2303,12 +2067,21 @@ bool GVN::processInstruction(Instruction *I) {
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
const DataLayout &DL = I->getModule()->getDataLayout();
if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) {
- I->replaceAllUsesWith(V);
- if (MD && V->getType()->getScalarType()->isPointerTy())
- MD->invalidateCachedPointerInfo(V);
- markInstructionForDeletion(I);
- ++NumGVNSimpl;
- return true;
+ bool Changed = false;
+ if (!I->use_empty()) {
+ I->replaceAllUsesWith(V);
+ Changed = true;
+ }
+ if (isInstructionTriviallyDead(I, TLI)) {
+ markInstructionForDeletion(I);
+ Changed = true;
+ }
+ if (Changed) {
+ if (MD && V->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ ++NumGVNSimpl;
+ return true;
+ }
}
if (IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(I))
@@ -2319,7 +2092,7 @@ bool GVN::processInstruction(Instruction *I) {
if (processLoad(LI))
return true;
- unsigned Num = VN.lookup_or_add(LI);
+ unsigned Num = VN.lookupOrAdd(LI);
addToLeaderTable(Num, LI, LI->getParent());
return false;
}
@@ -2383,7 +2156,7 @@ bool GVN::processInstruction(Instruction *I) {
return false;
uint32_t NextNum = VN.getNextUnusedValueNumber();
- unsigned Num = VN.lookup_or_add(I);
+ unsigned Num = VN.lookupOrAdd(I);
// Allocations are always uniquely numbered, so we can save time and memory
// by fast failing them.
@@ -2422,18 +2195,16 @@ bool GVN::processInstruction(Instruction *I) {
}
/// runOnFunction - This is the main transformation entry point for a function.
-bool GVN::runOnFunction(Function& F) {
- if (skipOptnoneFunction(F))
- return false;
-
- if (!NoLoads)
- MD = &getAnalysis<MemoryDependenceAnalysis>();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- VN.setAliasAnalysis(&getAnalysis<AAResultsWrapperPass>().getAAResults());
- VN.setMemDep(MD);
+bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
+ const TargetLibraryInfo &RunTLI, AAResults &RunAA,
+ MemoryDependenceResults *RunMD) {
+ AC = &RunAC;
+ DT = &RunDT;
VN.setDomTree(DT);
+ TLI = &RunTLI;
+ VN.setAliasAnalysis(&RunAA);
+ MD = RunMD;
+ VN.setMemDep(MD);
bool Changed = false;
bool ShouldContinue = true;
@@ -2476,7 +2247,7 @@ bool GVN::runOnFunction(Function& F) {
cleanupGlobalSets();
// Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
- // iteration.
+ // iteration.
DeadBlocks.clear();
return Changed;
@@ -2576,8 +2347,6 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
}
bool GVN::performScalarPRE(Instruction *CurInst) {
- SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap;
-
if (isa<AllocaInst>(CurInst) || isa<TerminatorInst>(CurInst) ||
isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
@@ -2608,8 +2377,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
unsigned NumWithout = 0;
BasicBlock *PREPred = nullptr;
BasicBlock *CurrentBlock = CurInst->getParent();
- predMap.clear();
+ SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap;
for (BasicBlock *P : predecessors(CurrentBlock)) {
// We're not interested in PRE where the block is its
// own predecessor, or in blocks with predecessors
@@ -2702,7 +2471,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
DEBUG(verifyRemoved(CurInst));
CurInst->eraseFromParent();
++NumGVNInstr;
-
+
return true;
}
@@ -2825,7 +2594,7 @@ void GVN::addDeadBlock(BasicBlock *BB) {
SmallVector<BasicBlock *, 8> Dom;
DT->getDescendants(D, Dom);
DeadBlocks.insert(Dom.begin(), Dom.end());
-
+
// Figure out the dominance-frontier(D).
for (BasicBlock *B : Dom) {
for (BasicBlock *S : successors(B)) {
@@ -2883,13 +2652,13 @@ void GVN::addDeadBlock(BasicBlock *BB) {
// If the given branch is recognized as a foldable branch (i.e. conditional
// branch with constant condition), it will perform following analyses and
// transformation.
-// 1) If the dead out-coming edge is a critical-edge, split it. Let
+// 1) If the dead out-coming edge is a critical-edge, split it. Let
// R be the target of the dead out-coming edge.
// 1) Identify the set of dead blocks implied by the branch's dead outcoming
// edge. The result of this step will be {X| X is dominated by R}
// 2) Identify those blocks which haves at least one dead predecessor. The
// result of this step will be dominance-frontier(R).
-// 3) Update the PHIs in DF(R) by replacing the operands corresponding to
+// 3) Update the PHIs in DF(R) by replacing the operands corresponding to
// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
//
// Return true iff *NEW* dead code are found.
@@ -2905,8 +2674,8 @@ bool GVN::processFoldableCondBr(BranchInst *BI) {
if (!Cond)
return false;
- BasicBlock *DeadRoot = Cond->getZExtValue() ?
- BI->getSuccessor(1) : BI->getSuccessor(0);
+ BasicBlock *DeadRoot =
+ Cond->getZExtValue() ? BI->getSuccessor(1) : BI->getSuccessor(0);
if (DeadBlocks.count(DeadRoot))
return false;
@@ -2924,8 +2693,62 @@ bool GVN::processFoldableCondBr(BranchInst *BI) {
void GVN::assignValNumForDeadCode() {
for (BasicBlock *BB : DeadBlocks) {
for (Instruction &Inst : *BB) {
- unsigned ValNum = VN.lookup_or_add(&Inst);
+ unsigned ValNum = VN.lookupOrAdd(&Inst);
addToLeaderTable(ValNum, &Inst, BB);
}
}
}
+
+class llvm::gvn::GVNLegacyPass : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit GVNLegacyPass(bool NoLoads = false)
+ : FunctionPass(ID), NoLoads(NoLoads) {
+ initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
+ return Impl.runImpl(
+ F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
+ getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ getAnalysis<AAResultsWrapperPass>().getAAResults(),
+ NoLoads ? nullptr
+ : &getAnalysis<MemoryDependenceWrapperPass>().getMemDep());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ if (!NoLoads)
+ AU.addRequired<MemoryDependenceWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
+
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+
+private:
+ bool NoLoads;
+ GVN Impl;
+};
+
+char GVNLegacyPass::ID = 0;
+
+// The public interface to this file...
+FunctionPass *llvm::createGVNPass(bool NoLoads) {
+ return new GVNLegacyPass(NoLoads);
+}
+
+INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp
new file mode 100644
index 0000000000000..cce1db3874b78
--- /dev/null
+++ b/lib/Transforms/Scalar/GVNHoist.cpp
@@ -0,0 +1,825 @@
+//===- GVNHoist.cpp - Hoist scalar and load expressions -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass hoists expressions from branches to a common dominator. It uses
+// GVN (global value numbering) to discover expressions computing the same
+// values. The primary goal is to reduce the code size, and in some
+// cases reduce critical path (by exposing more ILP).
+// Hoisting may affect the performance in some cases. To mitigate that, hoisting
+// is disabled in the following cases.
+// 1. Scalars across calls.
+// 2. geps when corresponding load/store cannot be hoisted.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Utils/MemorySSA.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "gvn-hoist"
+
+STATISTIC(NumHoisted, "Number of instructions hoisted");
+STATISTIC(NumRemoved, "Number of instructions removed");
+STATISTIC(NumLoadsHoisted, "Number of loads hoisted");
+STATISTIC(NumLoadsRemoved, "Number of loads removed");
+STATISTIC(NumStoresHoisted, "Number of stores hoisted");
+STATISTIC(NumStoresRemoved, "Number of stores removed");
+STATISTIC(NumCallsHoisted, "Number of calls hoisted");
+STATISTIC(NumCallsRemoved, "Number of calls removed");
+
+static cl::opt<int>
+ MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1),
+ cl::desc("Max number of instructions to hoist "
+ "(default unlimited = -1)"));
+static cl::opt<int> MaxNumberOfBBSInPath(
+ "gvn-hoist-max-bbs", cl::Hidden, cl::init(4),
+ cl::desc("Max number of basic blocks on the path between "
+ "hoisting locations (default = 4, unlimited = -1)"));
+
+namespace {
+
+// Provides a sorting function based on the execution order of two instructions.
+struct SortByDFSIn {
+private:
+ DenseMap<const BasicBlock *, unsigned> &DFSNumber;
+
+public:
+ SortByDFSIn(DenseMap<const BasicBlock *, unsigned> &D) : DFSNumber(D) {}
+
+ // Returns true when A executes before B.
+ bool operator()(const Instruction *A, const Instruction *B) const {
+ // FIXME: libc++ has a std::sort() algorithm that will call the compare
+ // function on the same element. Once PR20837 is fixed and some more years
+ // pass by and all the buildbots have moved to a corrected std::sort(),
+ // enable the following assert:
+ //
+ // assert(A != B);
+
+ const BasicBlock *BA = A->getParent();
+ const BasicBlock *BB = B->getParent();
+ unsigned NA = DFSNumber[BA];
+ unsigned NB = DFSNumber[BB];
+ if (NA < NB)
+ return true;
+ if (NA == NB) {
+ // Sort them in the order they occur in the same basic block.
+ BasicBlock::const_iterator AI(A), BI(B);
+ return std::distance(AI, BI) < 0;
+ }
+ return false;
+ }
+};
+
+// A map from a pair of VNs to all the instructions with those VNs.
+typedef DenseMap<std::pair<unsigned, unsigned>, SmallVector<Instruction *, 4>>
+ VNtoInsns;
+// An invalid value number Used when inserting a single value number into
+// VNtoInsns.
+enum : unsigned { InvalidVN = ~2U };
+
+// Records all scalar instructions candidate for code hoisting.
+class InsnInfo {
+ VNtoInsns VNtoScalars;
+
+public:
+ // Inserts I and its value number in VNtoScalars.
+ void insert(Instruction *I, GVN::ValueTable &VN) {
+ // Scalar instruction.
+ unsigned V = VN.lookupOrAdd(I);
+ VNtoScalars[{V, InvalidVN}].push_back(I);
+ }
+
+ const VNtoInsns &getVNTable() const { return VNtoScalars; }
+};
+
+// Records all load instructions candidate for code hoisting.
+class LoadInfo {
+ VNtoInsns VNtoLoads;
+
+public:
+ // Insert Load and the value number of its memory address in VNtoLoads.
+ void insert(LoadInst *Load, GVN::ValueTable &VN) {
+ if (Load->isSimple()) {
+ unsigned V = VN.lookupOrAdd(Load->getPointerOperand());
+ VNtoLoads[{V, InvalidVN}].push_back(Load);
+ }
+ }
+
+ const VNtoInsns &getVNTable() const { return VNtoLoads; }
+};
+
+// Records all store instructions candidate for code hoisting.
+class StoreInfo {
+ VNtoInsns VNtoStores;
+
+public:
+ // Insert the Store and a hash number of the store address and the stored
+ // value in VNtoStores.
+ void insert(StoreInst *Store, GVN::ValueTable &VN) {
+ if (!Store->isSimple())
+ return;
+ // Hash the store address and the stored value.
+ Value *Ptr = Store->getPointerOperand();
+ Value *Val = Store->getValueOperand();
+ VNtoStores[{VN.lookupOrAdd(Ptr), VN.lookupOrAdd(Val)}].push_back(Store);
+ }
+
+ const VNtoInsns &getVNTable() const { return VNtoStores; }
+};
+
+// Records all call instructions candidate for code hoisting.
+class CallInfo {
+ VNtoInsns VNtoCallsScalars;
+ VNtoInsns VNtoCallsLoads;
+ VNtoInsns VNtoCallsStores;
+
+public:
+ // Insert Call and its value numbering in one of the VNtoCalls* containers.
+ void insert(CallInst *Call, GVN::ValueTable &VN) {
+ // A call that doesNotAccessMemory is handled as a Scalar,
+ // onlyReadsMemory will be handled as a Load instruction,
+ // all other calls will be handled as stores.
+ unsigned V = VN.lookupOrAdd(Call);
+ auto Entry = std::make_pair(V, InvalidVN);
+
+ if (Call->doesNotAccessMemory())
+ VNtoCallsScalars[Entry].push_back(Call);
+ else if (Call->onlyReadsMemory())
+ VNtoCallsLoads[Entry].push_back(Call);
+ else
+ VNtoCallsStores[Entry].push_back(Call);
+ }
+
+ const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; }
+
+ const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; }
+
+ const VNtoInsns &getStoreVNTable() const { return VNtoCallsStores; }
+};
+
+typedef DenseMap<const BasicBlock *, bool> BBSideEffectsSet;
+typedef SmallVector<Instruction *, 4> SmallVecInsn;
+typedef SmallVectorImpl<Instruction *> SmallVecImplInsn;
+
+// This pass hoists common computations across branches sharing common
+// dominator. The primary goal is to reduce the code size, and in some
+// cases reduce critical path (by exposing more ILP).
+class GVNHoist {
+public:
+ GVN::ValueTable VN;
+ DominatorTree *DT;
+ AliasAnalysis *AA;
+ MemoryDependenceResults *MD;
+ const bool OptForMinSize;
+ DenseMap<const BasicBlock *, unsigned> DFSNumber;
+ BBSideEffectsSet BBSideEffects;
+ MemorySSA *MSSA;
+ int HoistedCtr;
+
+ enum InsKind { Unknown, Scalar, Load, Store };
+
+ GVNHoist(DominatorTree *Dt, AliasAnalysis *Aa, MemoryDependenceResults *Md,
+ bool OptForMinSize)
+ : DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize), HoistedCtr(0) {}
+
+ // Return true when there are exception handling in BB.
+ bool hasEH(const BasicBlock *BB) {
+ auto It = BBSideEffects.find(BB);
+ if (It != BBSideEffects.end())
+ return It->second;
+
+ if (BB->isEHPad() || BB->hasAddressTaken()) {
+ BBSideEffects[BB] = true;
+ return true;
+ }
+
+ if (BB->getTerminator()->mayThrow()) {
+ BBSideEffects[BB] = true;
+ return true;
+ }
+
+ BBSideEffects[BB] = false;
+ return false;
+ }
+
+ // Return true when all paths from A to the end of the function pass through
+ // either B or C.
+ bool hoistingFromAllPaths(const BasicBlock *A, const BasicBlock *B,
+ const BasicBlock *C) {
+ // We fully copy the WL in order to be able to remove items from it.
+ SmallPtrSet<const BasicBlock *, 2> WL;
+ WL.insert(B);
+ WL.insert(C);
+
+ for (auto It = df_begin(A), E = df_end(A); It != E;) {
+ // There exists a path from A to the exit of the function if we are still
+ // iterating in DF traversal and we removed all instructions from the work
+ // list.
+ if (WL.empty())
+ return false;
+
+ const BasicBlock *BB = *It;
+ if (WL.erase(BB)) {
+ // Stop DFS traversal when BB is in the work list.
+ It.skipChildren();
+ continue;
+ }
+
+ // Check for end of function, calls that do not return, etc.
+ if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator()))
+ return false;
+
+ // Increment DFS traversal when not skipping children.
+ ++It;
+ }
+
+ return true;
+ }
+
+ /* Return true when I1 appears before I2 in the instructions of BB. */
+ bool firstInBB(BasicBlock *BB, const Instruction *I1, const Instruction *I2) {
+ for (Instruction &I : *BB) {
+ if (&I == I1)
+ return true;
+ if (&I == I2)
+ return false;
+ }
+
+ llvm_unreachable("I1 and I2 not found in BB");
+ }
+ // Return true when there are users of Def in BB.
+ bool hasMemoryUseOnPath(MemoryAccess *Def, const BasicBlock *BB,
+ const Instruction *OldPt) {
+ const BasicBlock *DefBB = Def->getBlock();
+ const BasicBlock *OldBB = OldPt->getParent();
+
+ for (User *U : Def->users())
+ if (auto *MU = dyn_cast<MemoryUse>(U)) {
+ BasicBlock *UBB = MU->getBlock();
+ // Only analyze uses in BB.
+ if (BB != UBB)
+ continue;
+
+ // A use in the same block as the Def is on the path.
+ if (UBB == DefBB) {
+ assert(MSSA->locallyDominates(Def, MU) && "def not dominating use");
+ return true;
+ }
+
+ if (UBB != OldBB)
+ return true;
+
+ // It is only harmful to hoist when the use is before OldPt.
+ if (firstInBB(UBB, MU->getMemoryInst(), OldPt))
+ return true;
+ }
+
+ return false;
+ }
+
+ // Return true when there are exception handling or loads of memory Def
+ // between OldPt and NewPt.
+
+ // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
+ // return true when the counter NBBsOnAllPaths reaces 0, except when it is
+ // initialized to -1 which is unlimited.
+ bool hasEHOrLoadsOnPath(const Instruction *NewPt, const Instruction *OldPt,
+ MemoryAccess *Def, int &NBBsOnAllPaths) {
+ const BasicBlock *NewBB = NewPt->getParent();
+ const BasicBlock *OldBB = OldPt->getParent();
+ assert(DT->dominates(NewBB, OldBB) && "invalid path");
+ assert(DT->dominates(Def->getBlock(), NewBB) &&
+ "def does not dominate new hoisting point");
+
+ // Walk all basic blocks reachable in depth-first iteration on the inverse
+ // CFG from OldBB to NewBB. These blocks are all the blocks that may be
+ // executed between the execution of NewBB and OldBB. Hoisting an expression
+ // from OldBB into NewBB has to be safe on all execution paths.
+ for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) {
+ if (*I == NewBB) {
+ // Stop traversal when reaching HoistPt.
+ I.skipChildren();
+ continue;
+ }
+
+ // Impossible to hoist with exceptions on the path.
+ if (hasEH(*I))
+ return true;
+
+ // Check that we do not move a store past loads.
+ if (hasMemoryUseOnPath(Def, *I, OldPt))
+ return true;
+
+ // Stop walk once the limit is reached.
+ if (NBBsOnAllPaths == 0)
+ return true;
+
+ // -1 is unlimited number of blocks on all paths.
+ if (NBBsOnAllPaths != -1)
+ --NBBsOnAllPaths;
+
+ ++I;
+ }
+
+ return false;
+ }
+
+ // Return true when there are exception handling between HoistPt and BB.
+ // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
+ // return true when the counter NBBsOnAllPaths reaches 0, except when it is
+ // initialized to -1 which is unlimited.
+ bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *BB,
+ int &NBBsOnAllPaths) {
+ assert(DT->dominates(HoistPt, BB) && "Invalid path");
+
+ // Walk all basic blocks reachable in depth-first iteration on
+ // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
+ // blocks that may be executed between the execution of NewHoistPt and
+ // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
+ // on all execution paths.
+ for (auto I = idf_begin(BB), E = idf_end(BB); I != E;) {
+ if (*I == HoistPt) {
+ // Stop traversal when reaching NewHoistPt.
+ I.skipChildren();
+ continue;
+ }
+
+ // Impossible to hoist with exceptions on the path.
+ if (hasEH(*I))
+ return true;
+
+ // Stop walk once the limit is reached.
+ if (NBBsOnAllPaths == 0)
+ return true;
+
+ // -1 is unlimited number of blocks on all paths.
+ if (NBBsOnAllPaths != -1)
+ --NBBsOnAllPaths;
+
+ ++I;
+ }
+
+ return false;
+ }
+
+ // Return true when it is safe to hoist a memory load or store U from OldPt
+ // to NewPt.
+ bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt,
+ MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths) {
+
+ // In place hoisting is safe.
+ if (NewPt == OldPt)
+ return true;
+
+ const BasicBlock *NewBB = NewPt->getParent();
+ const BasicBlock *OldBB = OldPt->getParent();
+ const BasicBlock *UBB = U->getBlock();
+
+ // Check for dependences on the Memory SSA.
+ MemoryAccess *D = U->getDefiningAccess();
+ BasicBlock *DBB = D->getBlock();
+ if (DT->properlyDominates(NewBB, DBB))
+ // Cannot move the load or store to NewBB above its definition in DBB.
+ return false;
+
+ if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D))
+ if (auto *UD = dyn_cast<MemoryUseOrDef>(D))
+ if (firstInBB(DBB, NewPt, UD->getMemoryInst()))
+ // Cannot move the load or store to NewPt above its definition in D.
+ return false;
+
+ // Check for unsafe hoistings due to side effects.
+ if (K == InsKind::Store) {
+ if (hasEHOrLoadsOnPath(NewPt, OldPt, D, NBBsOnAllPaths))
+ return false;
+ } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths))
+ return false;
+
+ if (UBB == NewBB) {
+ if (DT->properlyDominates(DBB, NewBB))
+ return true;
+ assert(UBB == DBB);
+ assert(MSSA->locallyDominates(D, U));
+ }
+
+ // No side effects: it is safe to hoist.
+ return true;
+ }
+
+ // Return true when it is safe to hoist scalar instructions from BB1 and BB2
+ // to HoistBB.
+ bool safeToHoistScalar(const BasicBlock *HoistBB, const BasicBlock *BB1,
+ const BasicBlock *BB2, int &NBBsOnAllPaths) {
+ // Check that the hoisted expression is needed on all paths. When HoistBB
+ // already contains an instruction to be hoisted, the expression is needed
+ // on all paths. Enable scalar hoisting at -Oz as it is safe to hoist
+ // scalars to a place where they are partially needed.
+ if (!OptForMinSize && BB1 != HoistBB &&
+ !hoistingFromAllPaths(HoistBB, BB1, BB2))
+ return false;
+
+ if (hasEHOnPath(HoistBB, BB1, NBBsOnAllPaths) ||
+ hasEHOnPath(HoistBB, BB2, NBBsOnAllPaths))
+ return false;
+
+ // Safe to hoist scalars from BB1 and BB2 to HoistBB.
+ return true;
+ }
+
+ // Each element of a hoisting list contains the basic block where to hoist and
+ // a list of instructions to be hoisted.
+ typedef std::pair<BasicBlock *, SmallVecInsn> HoistingPointInfo;
+ typedef SmallVector<HoistingPointInfo, 4> HoistingPointList;
+
+ // Partition InstructionsToHoist into a set of candidates which can share a
+ // common hoisting point. The partitions are collected in HPL. IsScalar is
+ // true when the instructions in InstructionsToHoist are scalars. IsLoad is
+ // true when the InstructionsToHoist are loads, false when they are stores.
+ void partitionCandidates(SmallVecImplInsn &InstructionsToHoist,
+ HoistingPointList &HPL, InsKind K) {
+ // No need to sort for two instructions.
+ if (InstructionsToHoist.size() > 2) {
+ SortByDFSIn Pred(DFSNumber);
+ std::sort(InstructionsToHoist.begin(), InstructionsToHoist.end(), Pred);
+ }
+
+ int NBBsOnAllPaths = MaxNumberOfBBSInPath;
+
+ SmallVecImplInsn::iterator II = InstructionsToHoist.begin();
+ SmallVecImplInsn::iterator Start = II;
+ Instruction *HoistPt = *II;
+ BasicBlock *HoistBB = HoistPt->getParent();
+ MemoryUseOrDef *UD;
+ if (K != InsKind::Scalar)
+ UD = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(HoistPt));
+
+ for (++II; II != InstructionsToHoist.end(); ++II) {
+ Instruction *Insn = *II;
+ BasicBlock *BB = Insn->getParent();
+ BasicBlock *NewHoistBB;
+ Instruction *NewHoistPt;
+
+ if (BB == HoistBB) {
+ NewHoistBB = HoistBB;
+ NewHoistPt = firstInBB(BB, Insn, HoistPt) ? Insn : HoistPt;
+ } else {
+ NewHoistBB = DT->findNearestCommonDominator(HoistBB, BB);
+ if (NewHoistBB == BB)
+ NewHoistPt = Insn;
+ else if (NewHoistBB == HoistBB)
+ NewHoistPt = HoistPt;
+ else
+ NewHoistPt = NewHoistBB->getTerminator();
+ }
+
+ if (K == InsKind::Scalar) {
+ if (safeToHoistScalar(NewHoistBB, HoistBB, BB, NBBsOnAllPaths)) {
+ // Extend HoistPt to NewHoistPt.
+ HoistPt = NewHoistPt;
+ HoistBB = NewHoistBB;
+ continue;
+ }
+ } else {
+ // When NewBB already contains an instruction to be hoisted, the
+ // expression is needed on all paths.
+ // Check that the hoisted expression is needed on all paths: it is
+ // unsafe to hoist loads to a place where there may be a path not
+ // loading from the same address: for instance there may be a branch on
+ // which the address of the load may not be initialized.
+ if ((HoistBB == NewHoistBB || BB == NewHoistBB ||
+ hoistingFromAllPaths(NewHoistBB, HoistBB, BB)) &&
+ // Also check that it is safe to move the load or store from HoistPt
+ // to NewHoistPt, and from Insn to NewHoistPt.
+ safeToHoistLdSt(NewHoistPt, HoistPt, UD, K, NBBsOnAllPaths) &&
+ safeToHoistLdSt(NewHoistPt, Insn,
+ cast<MemoryUseOrDef>(MSSA->getMemoryAccess(Insn)),
+ K, NBBsOnAllPaths)) {
+ // Extend HoistPt to NewHoistPt.
+ HoistPt = NewHoistPt;
+ HoistBB = NewHoistBB;
+ continue;
+ }
+ }
+
+ // At this point it is not safe to extend the current hoisting to
+ // NewHoistPt: save the hoisting list so far.
+ if (std::distance(Start, II) > 1)
+ HPL.push_back({HoistBB, SmallVecInsn(Start, II)});
+
+ // Start over from BB.
+ Start = II;
+ if (K != InsKind::Scalar)
+ UD = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(*Start));
+ HoistPt = Insn;
+ HoistBB = BB;
+ NBBsOnAllPaths = MaxNumberOfBBSInPath;
+ }
+
+ // Save the last partition.
+ if (std::distance(Start, II) > 1)
+ HPL.push_back({HoistBB, SmallVecInsn(Start, II)});
+ }
+
+ // Initialize HPL from Map.
+ void computeInsertionPoints(const VNtoInsns &Map, HoistingPointList &HPL,
+ InsKind K) {
+ for (const auto &Entry : Map) {
+ if (MaxHoistedThreshold != -1 && ++HoistedCtr > MaxHoistedThreshold)
+ return;
+
+ const SmallVecInsn &V = Entry.second;
+ if (V.size() < 2)
+ continue;
+
+ // Compute the insertion point and the list of expressions to be hoisted.
+ SmallVecInsn InstructionsToHoist;
+ for (auto I : V)
+ if (!hasEH(I->getParent()))
+ InstructionsToHoist.push_back(I);
+
+ if (!InstructionsToHoist.empty())
+ partitionCandidates(InstructionsToHoist, HPL, K);
+ }
+ }
+
+ // Return true when all operands of Instr are available at insertion point
+ // HoistPt. When limiting the number of hoisted expressions, one could hoist
+ // a load without hoisting its access function. So before hoisting any
+ // expression, make sure that all its operands are available at insert point.
+ bool allOperandsAvailable(const Instruction *I,
+ const BasicBlock *HoistPt) const {
+ for (const Use &Op : I->operands())
+ if (const auto *Inst = dyn_cast<Instruction>(&Op))
+ if (!DT->dominates(Inst->getParent(), HoistPt))
+ return false;
+
+ return true;
+ }
+
+ Instruction *firstOfTwo(Instruction *I, Instruction *J) const {
+ for (Instruction &I1 : *I->getParent())
+ if (&I1 == I || &I1 == J)
+ return &I1;
+ llvm_unreachable("Both I and J must be from same BB");
+ }
+
+ // Replace the use of From with To in Insn.
+ void replaceUseWith(Instruction *Insn, Value *From, Value *To) const {
+ for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
+ UI != UE;) {
+ Use &U = *UI++;
+ if (U.getUser() == Insn) {
+ U.set(To);
+ return;
+ }
+ }
+ llvm_unreachable("should replace exactly once");
+ }
+
+ bool makeOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt) const {
+ // Check whether the GEP of a ld/st can be synthesized at HoistPt.
+ GetElementPtrInst *Gep = nullptr;
+ Instruction *Val = nullptr;
+ if (auto *Ld = dyn_cast<LoadInst>(Repl))
+ Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand());
+ if (auto *St = dyn_cast<StoreInst>(Repl)) {
+ Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand());
+ Val = dyn_cast<Instruction>(St->getValueOperand());
+ // Check that the stored value is available.
+ if (Val) {
+ if (isa<GetElementPtrInst>(Val)) {
+ // Check whether we can compute the GEP at HoistPt.
+ if (!allOperandsAvailable(Val, HoistPt))
+ return false;
+ } else if (!DT->dominates(Val->getParent(), HoistPt))
+ return false;
+ }
+ }
+
+ // Check whether we can compute the Gep at HoistPt.
+ if (!Gep || !allOperandsAvailable(Gep, HoistPt))
+ return false;
+
+ // Copy the gep before moving the ld/st.
+ Instruction *ClonedGep = Gep->clone();
+ ClonedGep->insertBefore(HoistPt->getTerminator());
+ replaceUseWith(Repl, Gep, ClonedGep);
+
+ // Also copy Val when it is a GEP.
+ if (Val && isa<GetElementPtrInst>(Val)) {
+ Instruction *ClonedVal = Val->clone();
+ ClonedVal->insertBefore(HoistPt->getTerminator());
+ replaceUseWith(Repl, Val, ClonedVal);
+ }
+
+ return true;
+ }
+
+ std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL) {
+ unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0;
+ for (const HoistingPointInfo &HP : HPL) {
+ // Find out whether we already have one of the instructions in HoistPt,
+ // in which case we do not have to move it.
+ BasicBlock *HoistPt = HP.first;
+ const SmallVecInsn &InstructionsToHoist = HP.second;
+ Instruction *Repl = nullptr;
+ for (Instruction *I : InstructionsToHoist)
+ if (I->getParent() == HoistPt) {
+ // If there are two instructions in HoistPt to be hoisted in place:
+ // update Repl to be the first one, such that we can rename the uses
+ // of the second based on the first.
+ Repl = !Repl ? I : firstOfTwo(Repl, I);
+ }
+
+ if (Repl) {
+ // Repl is already in HoistPt: it remains in place.
+ assert(allOperandsAvailable(Repl, HoistPt) &&
+ "instruction depends on operands that are not available");
+ } else {
+ // When we do not find Repl in HoistPt, select the first in the list
+ // and move it to HoistPt.
+ Repl = InstructionsToHoist.front();
+
+ // We can move Repl in HoistPt only when all operands are available.
+ // The order in which hoistings are done may influence the availability
+ // of operands.
+ if (!allOperandsAvailable(Repl, HoistPt) &&
+ !makeOperandsAvailable(Repl, HoistPt))
+ continue;
+ Repl->moveBefore(HoistPt->getTerminator());
+ }
+
+ if (isa<LoadInst>(Repl))
+ ++NL;
+ else if (isa<StoreInst>(Repl))
+ ++NS;
+ else if (isa<CallInst>(Repl))
+ ++NC;
+ else // Scalar
+ ++NI;
+
+ // Remove and rename all other instructions.
+ for (Instruction *I : InstructionsToHoist)
+ if (I != Repl) {
+ ++NR;
+ if (isa<LoadInst>(Repl))
+ ++NumLoadsRemoved;
+ else if (isa<StoreInst>(Repl))
+ ++NumStoresRemoved;
+ else if (isa<CallInst>(Repl))
+ ++NumCallsRemoved;
+ I->replaceAllUsesWith(Repl);
+ I->eraseFromParent();
+ }
+ }
+
+ NumHoisted += NL + NS + NC + NI;
+ NumRemoved += NR;
+ NumLoadsHoisted += NL;
+ NumStoresHoisted += NS;
+ NumCallsHoisted += NC;
+ return {NI, NL + NC + NS};
+ }
+
+ // Hoist all expressions. Returns Number of scalars hoisted
+ // and number of non-scalars hoisted.
+ std::pair<unsigned, unsigned> hoistExpressions(Function &F) {
+ InsnInfo II;
+ LoadInfo LI;
+ StoreInfo SI;
+ CallInfo CI;
+ for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
+ for (Instruction &I1 : *BB) {
+ if (auto *Load = dyn_cast<LoadInst>(&I1))
+ LI.insert(Load, VN);
+ else if (auto *Store = dyn_cast<StoreInst>(&I1))
+ SI.insert(Store, VN);
+ else if (auto *Call = dyn_cast<CallInst>(&I1)) {
+ if (auto *Intr = dyn_cast<IntrinsicInst>(Call)) {
+ if (isa<DbgInfoIntrinsic>(Intr) ||
+ Intr->getIntrinsicID() == Intrinsic::assume)
+ continue;
+ }
+ if (Call->mayHaveSideEffects()) {
+ if (!OptForMinSize)
+ break;
+ // We may continue hoisting across calls which write to memory.
+ if (Call->mayThrow())
+ break;
+ }
+ CI.insert(Call, VN);
+ } else if (OptForMinSize || !isa<GetElementPtrInst>(&I1))
+ // Do not hoist scalars past calls that may write to memory because
+ // that could result in spills later. geps are handled separately.
+ // TODO: We can relax this for targets like AArch64 as they have more
+ // registers than X86.
+ II.insert(&I1, VN);
+ }
+ }
+
+ HoistingPointList HPL;
+ computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar);
+ computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load);
+ computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store);
+ computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar);
+ computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load);
+ computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store);
+ return hoist(HPL);
+ }
+
+ bool run(Function &F) {
+ VN.setDomTree(DT);
+ VN.setAliasAnalysis(AA);
+ VN.setMemDep(MD);
+ bool Res = false;
+
+ unsigned I = 0;
+ for (const BasicBlock *BB : depth_first(&F.getEntryBlock()))
+ DFSNumber.insert({BB, ++I});
+
+ // FIXME: use lazy evaluation of VN to avoid the fix-point computation.
+ while (1) {
+ // FIXME: only compute MemorySSA once. We need to update the analysis in
+ // the same time as transforming the code.
+ MemorySSA M(F, AA, DT);
+ MSSA = &M;
+
+ auto HoistStat = hoistExpressions(F);
+ if (HoistStat.first + HoistStat.second == 0) {
+ return Res;
+ }
+ if (HoistStat.second > 0) {
+ // To address a limitation of the current GVN, we need to rerun the
+ // hoisting after we hoisted loads in order to be able to hoist all
+ // scalars dependent on the hoisted loads. Same for stores.
+ VN.clear();
+ }
+ Res = true;
+ }
+
+ return Res;
+ }
+};
+
+class GVNHoistLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ GVNHoistLegacyPass() : FunctionPass(ID) {
+ initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
+
+ GVNHoist G(&DT, &AA, &MD, F.optForMinSize());
+ return G.run(F);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<MemoryDependenceWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+};
+} // namespace
+
+PreservedAnalyses GVNHoistPass::run(Function &F,
+ AnalysisManager<Function> &AM) {
+ DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ AliasAnalysis &AA = AM.getResult<AAManager>(F);
+ MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F);
+
+ GVNHoist G(&DT, &AA, &MD, F.optForMinSize());
+ if (!G.run(F))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
+char GVNHoistLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(GVNHoistLegacyPass, "gvn-hoist",
+ "Early GVN Hoisting of Expressions", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(GVNHoistLegacyPass, "gvn-hoist",
+ "Early GVN Hoisting of Expressions", false, false)
+
+FunctionPass *llvm::createGVNHoistPass() { return new GVNHoistLegacyPass(); }
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
new file mode 100644
index 0000000000000..7686e65efed92
--- /dev/null
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -0,0 +1,691 @@
+//===- GuardWidening.cpp - ---- Guard widening ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the guard widening pass. The semantics of the
+// @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails
+// more often that it did before the transform. This optimization is called
+// "widening" and can be used hoist and common runtime checks in situations like
+// these:
+//
+// %cmp0 = 7 u< Length
+// call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ]
+// call @unknown_side_effects()
+// %cmp1 = 9 u< Length
+// call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ]
+// ...
+//
+// =>
+//
+// %cmp0 = 9 u< Length
+// call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ]
+// call @unknown_side_effects()
+// ...
+//
+// If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a
+// generic implementation of the same function, which will have the correct
+// semantics from that point onward. It is always _legal_ to deoptimize (so
+// replacing %cmp0 with false is "correct"), though it may not always be
+// profitable to do so.
+//
+// NB! This pass is a work in progress. It hasn't been tuned to be "production
+// ready" yet. It is known to have quadriatic running time and will not scale
+// to large numbers of guards
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/GuardWidening.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "guard-widening"
+
+namespace {
+
+class GuardWideningImpl {
+ DominatorTree &DT;
+ PostDominatorTree &PDT;
+ LoopInfo &LI;
+
+ /// The set of guards whose conditions have been widened into dominating
+ /// guards.
+ SmallVector<IntrinsicInst *, 16> EliminatedGuards;
+
+ /// The set of guards which have been widened to include conditions to other
+ /// guards.
+ DenseSet<IntrinsicInst *> WidenedGuards;
+
+ /// Try to eliminate guard \p Guard by widening it into an earlier dominating
+ /// guard. \p DFSI is the DFS iterator on the dominator tree that is
+ /// currently visiting the block containing \p Guard, and \p GuardsPerBlock
+ /// maps BasicBlocks to the set of guards seen in that block.
+ bool eliminateGuardViaWidening(
+ IntrinsicInst *Guard, const df_iterator<DomTreeNode *> &DFSI,
+ const DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> &
+ GuardsPerBlock);
+
+ /// Used to keep track of which widening potential is more effective.
+ enum WideningScore {
+ /// Don't widen.
+ WS_IllegalOrNegative,
+
+ /// Widening is performance neutral as far as the cycles spent in check
+ /// conditions goes (but can still help, e.g., code layout, having less
+ /// deopt state).
+ WS_Neutral,
+
+ /// Widening is profitable.
+ WS_Positive,
+
+ /// Widening is very profitable. Not significantly different from \c
+ /// WS_Positive, except by the order.
+ WS_VeryPositive
+ };
+
+ static StringRef scoreTypeToString(WideningScore WS);
+
+ /// Compute the score for widening the condition in \p DominatedGuard
+ /// (contained in \p DominatedGuardLoop) into \p DominatingGuard (contained in
+ /// \p DominatingGuardLoop).
+ WideningScore computeWideningScore(IntrinsicInst *DominatedGuard,
+ Loop *DominatedGuardLoop,
+ IntrinsicInst *DominatingGuard,
+ Loop *DominatingGuardLoop);
+
+ /// Helper to check if \p V can be hoisted to \p InsertPos.
+ bool isAvailableAt(Value *V, Instruction *InsertPos) {
+ SmallPtrSet<Instruction *, 8> Visited;
+ return isAvailableAt(V, InsertPos, Visited);
+ }
+
+ bool isAvailableAt(Value *V, Instruction *InsertPos,
+ SmallPtrSetImpl<Instruction *> &Visited);
+
+ /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c
+ /// isAvailableAt returned true.
+ void makeAvailableAt(Value *V, Instruction *InsertPos);
+
+ /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try
+ /// to generate an expression computing the logical AND of \p Cond0 and \p
+ /// Cond1. Return true if the expression computing the AND is only as
+ /// expensive as computing one of the two. If \p InsertPt is true then
+ /// actually generate the resulting expression, make it available at \p
+ /// InsertPt and return it in \p Result (else no change to the IR is made).
+ bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt,
+ Value *&Result);
+
+ /// Represents a range check of the form \c Base + \c Offset u< \c Length,
+ /// with the constraint that \c Length is not negative. \c CheckInst is the
+ /// pre-existing instruction in the IR that computes the result of this range
+ /// check.
+ class RangeCheck {
+ Value *Base;
+ ConstantInt *Offset;
+ Value *Length;
+ ICmpInst *CheckInst;
+
+ public:
+ explicit RangeCheck(Value *Base, ConstantInt *Offset, Value *Length,
+ ICmpInst *CheckInst)
+ : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {}
+
+ void setBase(Value *NewBase) { Base = NewBase; }
+ void setOffset(ConstantInt *NewOffset) { Offset = NewOffset; }
+
+ Value *getBase() const { return Base; }
+ ConstantInt *getOffset() const { return Offset; }
+ const APInt &getOffsetValue() const { return getOffset()->getValue(); }
+ Value *getLength() const { return Length; };
+ ICmpInst *getCheckInst() const { return CheckInst; }
+
+ void print(raw_ostream &OS, bool PrintTypes = false) {
+ OS << "Base: ";
+ Base->printAsOperand(OS, PrintTypes);
+ OS << " Offset: ";
+ Offset->printAsOperand(OS, PrintTypes);
+ OS << " Length: ";
+ Length->printAsOperand(OS, PrintTypes);
+ }
+
+ LLVM_DUMP_METHOD void dump() {
+ print(dbgs());
+ dbgs() << "\n";
+ }
+ };
+
+ /// Parse \p CheckCond into a conjunction (logical-and) of range checks; and
+ /// append them to \p Checks. Returns true on success, may clobber \c Checks
+ /// on failure.
+ bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks) {
+ SmallPtrSet<Value *, 8> Visited;
+ return parseRangeChecks(CheckCond, Checks, Visited);
+ }
+
+ bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks,
+ SmallPtrSetImpl<Value *> &Visited);
+
+ /// Combine the checks in \p Checks into a smaller set of checks and append
+ /// them into \p CombinedChecks. Return true on success (i.e. all of checks
+ /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks
+ /// and \p CombinedChecks on success and on failure.
+ bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks,
+ SmallVectorImpl<RangeCheck> &CombinedChecks);
+
+ /// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of
+ /// computing only one of the two expressions?
+ bool isWideningCondProfitable(Value *Cond0, Value *Cond1) {
+ Value *ResultUnused;
+ return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused);
+ }
+
+ /// Widen \p ToWiden to fail if \p NewCondition is false (in addition to
+ /// whatever it is already checking).
+ void widenGuard(IntrinsicInst *ToWiden, Value *NewCondition) {
+ Value *Result;
+ widenCondCommon(ToWiden->getArgOperand(0), NewCondition, ToWiden, Result);
+ ToWiden->setArgOperand(0, Result);
+ }
+
+public:
+ explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree &PDT,
+ LoopInfo &LI)
+ : DT(DT), PDT(PDT), LI(LI) {}
+
+ /// The entry point for this pass.
+ bool run();
+};
+
+struct GuardWideningLegacyPass : public FunctionPass {
+ static char ID;
+ GuardWideningPass Impl;
+
+ GuardWideningLegacyPass() : FunctionPass(ID) {
+ initializeGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ return GuardWideningImpl(
+ getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(),
+ getAnalysis<LoopInfoWrapperPass>().getLoopInfo()).run();
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ }
+};
+
+}
+
+bool GuardWideningImpl::run() {
+ using namespace llvm::PatternMatch;
+
+ DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> GuardsInBlock;
+ bool Changed = false;
+
+ for (auto DFI = df_begin(DT.getRootNode()), DFE = df_end(DT.getRootNode());
+ DFI != DFE; ++DFI) {
+ auto *BB = (*DFI)->getBlock();
+ auto &CurrentList = GuardsInBlock[BB];
+
+ for (auto &I : *BB)
+ if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>()))
+ CurrentList.push_back(cast<IntrinsicInst>(&I));
+
+ for (auto *II : CurrentList)
+ Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock);
+ }
+
+ for (auto *II : EliminatedGuards)
+ if (!WidenedGuards.count(II))
+ II->eraseFromParent();
+
+ return Changed;
+}
+
+bool GuardWideningImpl::eliminateGuardViaWidening(
+ IntrinsicInst *GuardInst, const df_iterator<DomTreeNode *> &DFSI,
+ const DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> &
+ GuardsInBlock) {
+ IntrinsicInst *BestSoFar = nullptr;
+ auto BestScoreSoFar = WS_IllegalOrNegative;
+ auto *GuardInstLoop = LI.getLoopFor(GuardInst->getParent());
+
+ // In the set of dominating guards, find the one we can merge GuardInst with
+ // for the most profit.
+ for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) {
+ auto *CurBB = DFSI.getPath(i)->getBlock();
+ auto *CurLoop = LI.getLoopFor(CurBB);
+ assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!");
+ const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second;
+
+ auto I = GuardsInCurBB.begin();
+ auto E = GuardsInCurBB.end();
+
+#ifndef NDEBUG
+ {
+ unsigned Index = 0;
+ for (auto &I : *CurBB) {
+ if (Index == GuardsInCurBB.size())
+ break;
+ if (GuardsInCurBB[Index] == &I)
+ Index++;
+ }
+ assert(Index == GuardsInCurBB.size() &&
+ "Guards expected to be in order!");
+ }
+#endif
+
+ assert((i == (e - 1)) == (GuardInst->getParent() == CurBB) && "Bad DFS?");
+
+ if (i == (e - 1)) {
+ // Corner case: make sure we're only looking at guards strictly dominating
+ // GuardInst when visiting GuardInst->getParent().
+ auto NewEnd = std::find(I, E, GuardInst);
+ assert(NewEnd != E && "GuardInst not in its own block?");
+ E = NewEnd;
+ }
+
+ for (auto *Candidate : make_range(I, E)) {
+ auto Score =
+ computeWideningScore(GuardInst, GuardInstLoop, Candidate, CurLoop);
+ DEBUG(dbgs() << "Score between " << *GuardInst->getArgOperand(0)
+ << " and " << *Candidate->getArgOperand(0) << " is "
+ << scoreTypeToString(Score) << "\n");
+ if (Score > BestScoreSoFar) {
+ BestScoreSoFar = Score;
+ BestSoFar = Candidate;
+ }
+ }
+ }
+
+ if (BestScoreSoFar == WS_IllegalOrNegative) {
+ DEBUG(dbgs() << "Did not eliminate guard " << *GuardInst << "\n");
+ return false;
+ }
+
+ assert(BestSoFar != GuardInst && "Should have never visited same guard!");
+ assert(DT.dominates(BestSoFar, GuardInst) && "Should be!");
+
+ DEBUG(dbgs() << "Widening " << *GuardInst << " into " << *BestSoFar
+ << " with score " << scoreTypeToString(BestScoreSoFar) << "\n");
+ widenGuard(BestSoFar, GuardInst->getArgOperand(0));
+ GuardInst->setArgOperand(0, ConstantInt::getTrue(GuardInst->getContext()));
+ EliminatedGuards.push_back(GuardInst);
+ WidenedGuards.insert(BestSoFar);
+ return true;
+}
+
+GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
+ IntrinsicInst *DominatedGuard, Loop *DominatedGuardLoop,
+ IntrinsicInst *DominatingGuard, Loop *DominatingGuardLoop) {
+ bool HoistingOutOfLoop = false;
+
+ if (DominatingGuardLoop != DominatedGuardLoop) {
+ if (DominatingGuardLoop &&
+ !DominatingGuardLoop->contains(DominatedGuardLoop))
+ return WS_IllegalOrNegative;
+
+ HoistingOutOfLoop = true;
+ }
+
+ if (!isAvailableAt(DominatedGuard->getArgOperand(0), DominatingGuard))
+ return WS_IllegalOrNegative;
+
+ bool HoistingOutOfIf =
+ !PDT.dominates(DominatedGuard->getParent(), DominatingGuard->getParent());
+
+ if (isWideningCondProfitable(DominatedGuard->getArgOperand(0),
+ DominatingGuard->getArgOperand(0)))
+ return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;
+
+ if (HoistingOutOfLoop)
+ return WS_Positive;
+
+ return HoistingOutOfIf ? WS_IllegalOrNegative : WS_Neutral;
+}
+
+bool GuardWideningImpl::isAvailableAt(Value *V, Instruction *Loc,
+ SmallPtrSetImpl<Instruction *> &Visited) {
+ auto *Inst = dyn_cast<Instruction>(V);
+ if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst))
+ return true;
+
+ if (!isSafeToSpeculativelyExecute(Inst, Loc, &DT) ||
+ Inst->mayReadFromMemory())
+ return false;
+
+ Visited.insert(Inst);
+
+ // We only want to go _up_ the dominance chain when recursing.
+ assert(!isa<PHINode>(Loc) &&
+ "PHIs should return false for isSafeToSpeculativelyExecute");
+ assert(DT.isReachableFromEntry(Inst->getParent()) &&
+ "We did a DFS from the block entry!");
+ return all_of(Inst->operands(),
+ [&](Value *Op) { return isAvailableAt(Op, Loc, Visited); });
+}
+
+void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) {
+ auto *Inst = dyn_cast<Instruction>(V);
+ if (!Inst || DT.dominates(Inst, Loc))
+ return;
+
+ assert(isSafeToSpeculativelyExecute(Inst, Loc, &DT) &&
+ !Inst->mayReadFromMemory() && "Should've checked with isAvailableAt!");
+
+ for (Value *Op : Inst->operands())
+ makeAvailableAt(Op, Loc);
+
+ Inst->moveBefore(Loc);
+}
+
+bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
+ Instruction *InsertPt, Value *&Result) {
+ using namespace llvm::PatternMatch;
+
+ {
+ // L >u C0 && L >u C1 -> L >u max(C0, C1)
+ ConstantInt *RHS0, *RHS1;
+ Value *LHS;
+ ICmpInst::Predicate Pred0, Pred1;
+ if (match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) &&
+ match(Cond1, m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) {
+
+ ConstantRange CR0 =
+ ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue());
+ ConstantRange CR1 =
+ ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue());
+
+ // SubsetIntersect is a subset of the actual mathematical intersection of
+ // CR0 and CR1, while SupersetIntersect is a superset of the actual
+ // mathematical intersection. If these two ConstantRanges are equal, then
+ // we know we were able to represent the actual mathematical intersection
+ // of CR0 and CR1, and can use the same to generate an icmp instruction.
+ //
+ // Given what we're doing here and the semantics of guards, it would
+ // actually be correct to just use SubsetIntersect, but that may be too
+ // aggressive in cases we care about.
+ auto SubsetIntersect = CR0.inverse().unionWith(CR1.inverse()).inverse();
+ auto SupersetIntersect = CR0.intersectWith(CR1);
+
+ APInt NewRHSAP;
+ CmpInst::Predicate Pred;
+ if (SubsetIntersect == SupersetIntersect &&
+ SubsetIntersect.getEquivalentICmp(Pred, NewRHSAP)) {
+ if (InsertPt) {
+ ConstantInt *NewRHS = ConstantInt::get(Cond0->getContext(), NewRHSAP);
+ Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk");
+ }
+ return true;
+ }
+ }
+ }
+
+ {
+ SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks;
+ if (parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) &&
+ combineRangeChecks(Checks, CombinedChecks)) {
+ if (InsertPt) {
+ Result = nullptr;
+ for (auto &RC : CombinedChecks) {
+ makeAvailableAt(RC.getCheckInst(), InsertPt);
+ if (Result)
+ Result = BinaryOperator::CreateAnd(RC.getCheckInst(), Result, "",
+ InsertPt);
+ else
+ Result = RC.getCheckInst();
+ }
+
+ Result->setName("wide.chk");
+ }
+ return true;
+ }
+ }
+
+ // Base case -- just logical-and the two conditions together.
+
+ if (InsertPt) {
+ makeAvailableAt(Cond0, InsertPt);
+ makeAvailableAt(Cond1, InsertPt);
+
+ Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt);
+ }
+
+ // We were not able to compute Cond0 AND Cond1 for the price of one.
+ return false;
+}
+
+bool GuardWideningImpl::parseRangeChecks(
+ Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
+ SmallPtrSetImpl<Value *> &Visited) {
+ if (!Visited.insert(CheckCond).second)
+ return true;
+
+ using namespace llvm::PatternMatch;
+
+ {
+ Value *AndLHS, *AndRHS;
+ if (match(CheckCond, m_And(m_Value(AndLHS), m_Value(AndRHS))))
+ return parseRangeChecks(AndLHS, Checks) &&
+ parseRangeChecks(AndRHS, Checks);
+ }
+
+ auto *IC = dyn_cast<ICmpInst>(CheckCond);
+ if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() ||
+ (IC->getPredicate() != ICmpInst::ICMP_ULT &&
+ IC->getPredicate() != ICmpInst::ICMP_UGT))
+ return false;
+
+ Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1);
+ if (IC->getPredicate() == ICmpInst::ICMP_UGT)
+ std::swap(CmpLHS, CmpRHS);
+
+ auto &DL = IC->getModule()->getDataLayout();
+
+ GuardWideningImpl::RangeCheck Check(
+ CmpLHS, cast<ConstantInt>(ConstantInt::getNullValue(CmpRHS->getType())),
+ CmpRHS, IC);
+
+ if (!isKnownNonNegative(Check.getLength(), DL))
+ return false;
+
+ // What we have in \c Check now is a correct interpretation of \p CheckCond.
+ // Try to see if we can move some constant offsets into the \c Offset field.
+
+ bool Changed;
+ auto &Ctx = CheckCond->getContext();
+
+ do {
+ Value *OpLHS;
+ ConstantInt *OpRHS;
+ Changed = false;
+
+#ifndef NDEBUG
+ auto *BaseInst = dyn_cast<Instruction>(Check.getBase());
+ assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) &&
+ "Unreachable instruction?");
+#endif
+
+ if (match(Check.getBase(), m_Add(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {
+ Check.setBase(OpLHS);
+ APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue();
+ Check.setOffset(ConstantInt::get(Ctx, NewOffset));
+ Changed = true;
+ } else if (match(Check.getBase(),
+ m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {
+ unsigned BitWidth = OpLHS->getType()->getScalarSizeInBits();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ computeKnownBits(OpLHS, KnownZero, KnownOne, DL);
+ if ((OpRHS->getValue() & KnownZero) == OpRHS->getValue()) {
+ Check.setBase(OpLHS);
+ APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue();
+ Check.setOffset(ConstantInt::get(Ctx, NewOffset));
+ Changed = true;
+ }
+ }
+ } while (Changed);
+
+ Checks.push_back(Check);
+ return true;
+}
+
+bool GuardWideningImpl::combineRangeChecks(
+ SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
+ SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) {
+ unsigned OldCount = Checks.size();
+ while (!Checks.empty()) {
+ // Pick all of the range checks with a specific base and length, and try to
+ // merge them.
+ Value *CurrentBase = Checks.front().getBase();
+ Value *CurrentLength = Checks.front().getLength();
+
+ SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks;
+
+ auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) {
+ return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength;
+ };
+
+ std::copy_if(Checks.begin(), Checks.end(),
+ std::back_inserter(CurrentChecks), IsCurrentCheck);
+ Checks.erase(remove_if(Checks, IsCurrentCheck), Checks.end());
+
+ assert(CurrentChecks.size() != 0 && "We know we have at least one!");
+
+ if (CurrentChecks.size() < 3) {
+ RangeChecksOut.insert(RangeChecksOut.end(), CurrentChecks.begin(),
+ CurrentChecks.end());
+ continue;
+ }
+
+ // CurrentChecks.size() will typically be 3 here, but so far there has been
+ // no need to hard-code that fact.
+
+ std::sort(CurrentChecks.begin(), CurrentChecks.end(),
+ [&](const GuardWideningImpl::RangeCheck &LHS,
+ const GuardWideningImpl::RangeCheck &RHS) {
+ return LHS.getOffsetValue().slt(RHS.getOffsetValue());
+ });
+
+ // Note: std::sort should not invalidate the ChecksStart iterator.
+
+ ConstantInt *MinOffset = CurrentChecks.front().getOffset(),
+ *MaxOffset = CurrentChecks.back().getOffset();
+
+ unsigned BitWidth = MaxOffset->getValue().getBitWidth();
+ if ((MaxOffset->getValue() - MinOffset->getValue())
+ .ugt(APInt::getSignedMinValue(BitWidth)))
+ return false;
+
+ APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue();
+ const APInt &HighOffset = MaxOffset->getValue();
+ auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) {
+ return (HighOffset - RC.getOffsetValue()).ult(MaxDiff);
+ };
+
+ if (MaxDiff.isMinValue() ||
+ !std::all_of(std::next(CurrentChecks.begin()), CurrentChecks.end(),
+ OffsetOK))
+ return false;
+
+ // We have a series of f+1 checks as:
+ //
+ // I+k_0 u< L ... Chk_0
+ // I_k_1 u< L ... Chk_1
+ // ...
+ // I_k_f u< L ... Chk_(f+1)
+ //
+ // with forall i in [0,f): k_f-k_i u< k_f-k_0 ... Precond_0
+ // k_f-k_0 u< INT_MIN+k_f ... Precond_1
+ // k_f != k_0 ... Precond_2
+ //
+ // Claim:
+ // Chk_0 AND Chk_(f+1) implies all the other checks
+ //
+ // Informal proof sketch:
+ //
+ // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap
+ // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and
+ // thus I+k_f is the greatest unsigned value in that range.
+ //
+ // This combined with Ckh_(f+1) shows that everything in that range is u< L.
+ // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1)
+ // lie in [I+k_0,I+k_f], this proving our claim.
+ //
+ // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are
+ // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal
+ // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping
+ // range by definition, and the latter case is impossible:
+ //
+ // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1)
+ // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ //
+ // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted
+ // with 'x' above) to be at least >u INT_MIN.
+
+ RangeChecksOut.emplace_back(CurrentChecks.front());
+ RangeChecksOut.emplace_back(CurrentChecks.back());
+ }
+
+ assert(RangeChecksOut.size() <= OldCount && "We pessimized!");
+ return RangeChecksOut.size() != OldCount;
+}
+
+PreservedAnalyses GuardWideningPass::run(Function &F,
+ AnalysisManager<Function> &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
+ bool Changed = GuardWideningImpl(DT, PDT, LI).run();
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) {
+ switch (WS) {
+ case WS_IllegalOrNegative:
+ return "IllegalOrNegative";
+ case WS_Neutral:
+ return "Neutral";
+ case WS_Positive:
+ return "Positive";
+ case WS_VeryPositive:
+ return "VeryPositive";
+ }
+
+ llvm_unreachable("Fully covered switch above!");
+}
+
+char GuardWideningLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards",
+ false, false)
+
+FunctionPass *llvm::createGuardWideningPass() {
+ return new GuardWideningLegacyPass();
+}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index ec5e15f0b8f83..542cf38e43bbd 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -24,13 +24,14 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/IndVarSimplify.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopPassManager.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -69,9 +70,6 @@ static cl::opt<bool> VerifyIndvars(
"verify-indvars", cl::Hidden,
cl::desc("Verify the ScalarEvolution result after running indvars"));
-static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden,
- cl::desc("Reduce live induction variables."));
-
enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl };
static cl::opt<ReplaceExitVal> ReplaceExitValue(
@@ -87,42 +85,16 @@ static cl::opt<ReplaceExitVal> ReplaceExitValue(
namespace {
struct RewritePhi;
-class IndVarSimplify : public LoopPass {
- LoopInfo *LI;
- ScalarEvolution *SE;
- DominatorTree *DT;
- TargetLibraryInfo *TLI;
+class IndVarSimplify {
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+ const DataLayout &DL;
+ TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
SmallVector<WeakVH, 16> DeadInsts;
- bool Changed;
-public:
-
- static char ID; // Pass identification, replacement for typeid
- IndVarSimplify()
- : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) {
- initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addPreservedID(LoopSimplifyID);
- AU.addPreservedID(LCSSAID);
- AU.setPreservesCFG();
- }
-
-private:
- void releaseMemory() override {
- DeadInsts.clear();
- }
+ bool Changed = false;
bool isValidRewrite(Value *FromVal, Value *ToVal);
@@ -133,6 +105,7 @@ private:
bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+ void rewriteFirstIterationLoopExitValues(Loop *L);
Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
PHINode *IndVar, SCEVExpander &Rewriter);
@@ -141,22 +114,15 @@ private:
Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
Instruction *InsertPt, Type *Ty);
-};
-}
-char IndVarSimplify::ID = 0;
-INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
- "Induction Variable Simplification", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_END(IndVarSimplify, "indvars",
- "Induction Variable Simplification", false, false)
+public:
+ IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
+ const DataLayout &DL, TargetLibraryInfo *TLI,
+ TargetTransformInfo *TTI)
+ : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
-Pass *llvm::createIndVarSimplifyPass() {
- return new IndVarSimplify();
+ bool run(Loop *L);
+};
}
/// Return true if the SCEV expansion generated by the rewriter can replace the
@@ -504,10 +470,9 @@ struct RewritePhi {
unsigned Ith; // Ith incoming value.
Value *Val; // Exit value after expansion.
bool HighCost; // High Cost when expansion.
- bool SafePhi; // LCSSASafePhiForRAUW.
- RewritePhi(PHINode *P, unsigned I, Value *V, bool H, bool S)
- : PN(P), Ith(I), Val(V), HighCost(H), SafePhi(S) {}
+ RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
+ : PN(P), Ith(I), Val(V), HighCost(H) {}
};
}
@@ -550,9 +515,7 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// Find all values that are computed inside the loop, but used outside of it.
// Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
// the exit blocks of the loop to find them.
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- BasicBlock *ExitBB = ExitBlocks[i];
-
+ for (BasicBlock *ExitBB : ExitBlocks) {
// If there are no PHI nodes in this exit block, then no values defined
// inside the loop are used on this path, skip it.
PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
@@ -560,29 +523,13 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
unsigned NumPreds = PN->getNumIncomingValues();
- // We would like to be able to RAUW single-incoming value PHI nodes. We
- // have to be certain this is safe even when this is an LCSSA PHI node.
- // While the computed exit value is no longer varying in *this* loop, the
- // exit block may be an exit block for an outer containing loop as well,
- // the exit value may be varying in the outer loop, and thus it may still
- // require an LCSSA PHI node. The safe case is when this is
- // single-predecessor PHI node (LCSSA) and the exit block containing it is
- // part of the enclosing loop, or this is the outer most loop of the nest.
- // In either case the exit value could (at most) be varying in the same
- // loop body as the phi node itself. Thus if it is in turn used outside of
- // an enclosing loop it will only be via a separate LCSSA node.
- bool LCSSASafePhiForRAUW =
- NumPreds == 1 &&
- (!L->getParentLoop() || L->getParentLoop() == LI->getLoopFor(ExitBB));
-
// Iterate over all of the PHI nodes.
BasicBlock::iterator BBI = ExitBB->begin();
while ((PN = dyn_cast<PHINode>(BBI++))) {
if (PN->use_empty())
continue; // dead use, don't replace it
- // SCEV only supports integer expressions for now.
- if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy())
+ if (!SE->isSCEVable(PN->getType()))
continue;
// It's necessary to tell ScalarEvolution about this explicitly so that
@@ -669,8 +616,7 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
}
// Collect all the candidate PHINodes to be rewritten.
- RewritePhiSet.push_back(
- RewritePhi(PN, i, ExitVal, HighCost, LCSSASafePhiForRAUW));
+ RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
}
}
}
@@ -699,9 +645,9 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
if (isInstructionTriviallyDead(Inst, TLI))
DeadInsts.push_back(Inst);
- // If we determined that this PHI is safe to replace even if an LCSSA
- // PHI, do so.
- if (Phi.SafePhi) {
+ // Replace PN with ExitVal if that is legal and does not break LCSSA.
+ if (PN->getNumIncomingValues() == 1 &&
+ LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
PN->replaceAllUsesWith(ExitVal);
PN->eraseFromParent();
}
@@ -712,6 +658,80 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
Rewriter.clearInsertPoint();
}
+//===---------------------------------------------------------------------===//
+// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
+// they will exit at the first iteration.
+//===---------------------------------------------------------------------===//
+
+/// Check to see if this loop has loop invariant conditions which lead to loop
+/// exits. If so, we know that if the exit path is taken, it is at the first
+/// loop iteration. This lets us predict exit values of PHI nodes that live in
+/// loop header.
+void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
+ // Verify the input to the pass is already in LCSSA form.
+ assert(L->isLCSSAForm(*DT));
+
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+ auto *LoopHeader = L->getHeader();
+ assert(LoopHeader && "Invalid loop");
+
+ for (auto *ExitBB : ExitBlocks) {
+ BasicBlock::iterator BBI = ExitBB->begin();
+ // If there are no more PHI nodes in this exit block, then no more
+ // values defined inside the loop are used on this path.
+ while (auto *PN = dyn_cast<PHINode>(BBI++)) {
+ for (unsigned IncomingValIdx = 0, E = PN->getNumIncomingValues();
+ IncomingValIdx != E; ++IncomingValIdx) {
+ auto *IncomingBB = PN->getIncomingBlock(IncomingValIdx);
+
+ // We currently only support loop exits from loop header. If the
+ // incoming block is not loop header, we need to recursively check
+ // all conditions starting from loop header are loop invariants.
+ // Additional support might be added in the future.
+ if (IncomingBB != LoopHeader)
+ continue;
+
+ // Get condition that leads to the exit path.
+ auto *TermInst = IncomingBB->getTerminator();
+
+ Value *Cond = nullptr;
+ if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
+ // Must be a conditional branch, otherwise the block
+ // should not be in the loop.
+ Cond = BI->getCondition();
+ } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
+ Cond = SI->getCondition();
+ else
+ continue;
+
+ if (!L->isLoopInvariant(Cond))
+ continue;
+
+ auto *ExitVal =
+ dyn_cast<PHINode>(PN->getIncomingValue(IncomingValIdx));
+
+ // Only deal with PHIs.
+ if (!ExitVal)
+ continue;
+
+ // If ExitVal is a PHI on the loop header, then we know its
+ // value along this exit because the exit can only be taken
+ // on the first iteration.
+ auto *LoopPreheader = L->getLoopPreheader();
+ assert(LoopPreheader && "Invalid loop");
+ int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
+ if (PreheaderIdx != -1) {
+ assert(ExitVal->getParent() == LoopHeader &&
+ "ExitVal must be in loop header");
+ PN->setIncomingValue(IncomingValIdx,
+ ExitVal->getIncomingValue(PreheaderIdx));
+ }
+ }
+ }
+ }
+}
+
/// Check whether it is possible to delete the loop after rewriting exit
/// value. If it is possible, ignore ReplaceExitValue and do rewriting
/// aggressively.
@@ -1240,6 +1260,12 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
if (UsePhi->getNumOperands() != 1)
truncateIVUse(DU, DT, LI);
else {
+ // Widening the PHI requires us to insert a trunc. The logical place
+ // for this trunc is in the same BB as the PHI. This is not possible if
+ // the BB is terminated by a catchswitch.
+ if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
+ return nullptr;
+
PHINode *WidePhi =
PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
UsePhi);
@@ -1317,8 +1343,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// Reuse the IV increment that SCEVExpander created as long as it dominates
// NarrowUse.
Instruction *WideUse = nullptr;
- if (WideAddRec == WideIncExpr
- && Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
+ if (WideAddRec == WideIncExpr && Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
WideUse = WideInc;
else {
WideUse = cloneIVUser(DU, WideAddRec);
@@ -1355,8 +1380,7 @@ void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
if (!Widened.insert(NarrowUser).second)
continue;
- NarrowIVUsers.push_back(
- NarrowIVDefUse(NarrowDef, NarrowUser, WideDef, NeverNegative));
+ NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, NeverNegative);
}
}
@@ -1391,9 +1415,10 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
// An AddRec must have loop-invariant operands. Since this AddRec is
// materialized by a loop header phi, the expression cannot have any post-loop
// operands, so they must dominate the loop header.
- assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
- SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader())
- && "Loop header phi recurrence inputs do not dominate the loop");
+ assert(
+ SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
+ SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
+ "Loop header phi recurrence inputs do not dominate the loop");
// The rewriter provides a value for the desired IV expression. This may
// either find an existing phi or materialize a new one. Either way, we
@@ -1463,8 +1488,6 @@ public:
: SE(SCEV), TTI(TTI), IVPhi(IV) {
DT = DTree;
WI.NarrowIV = IVPhi;
- if (ReduceLiveIVs)
- setSplitOverflowIntrinsics();
}
// Implement the interface used by simplifyUsersOfIV.
@@ -1729,6 +1752,7 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
const SCEV *BestInit = nullptr;
BasicBlock *LatchBlock = L->getLoopLatch();
assert(LatchBlock && "needsLFTR should guarantee a loop latch");
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
PHINode *Phi = cast<PHINode>(I);
@@ -1747,8 +1771,7 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
// AR may be wider than BECount. With eq/ne tests overflow is immaterial.
// AR may not be a narrower type, or we may never exit.
uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
- if (PhiWidth < BCWidth ||
- !L->getHeader()->getModule()->getDataLayout().isLegalInteger(PhiWidth))
+ if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
continue;
const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
@@ -1767,8 +1790,8 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
// the loop test. In this case we assume that performing LFTR could not
// increase the number of undef users.
if (ICmpInst *Cond = getLoopTest(L)) {
- if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT)
- && Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
+ if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) &&
+ Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
continue;
}
}
@@ -1810,9 +1833,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
// finds a valid pointer IV. Sign extend BECount in order to materialize a
// GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
// the existing GEPs whenever possible.
- if (IndVar->getType()->isPointerTy()
- && !IVCount->getType()->isPointerTy()) {
-
+ if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
// IVOffset will be the new GEP offset that is interpreted by GEP as a
// signed value. IVCount on the other hand represents the loop trip count,
// which is an unsigned value. FindLoopCounter only allows induction
@@ -1833,13 +1854,13 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
// We could handle pointer IVs other than i8*, but we need to compensate for
// gep index scaling. See canExpandBackedgeTakenCount comments.
assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
- cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
- && "unit stride pointer IV must be i8*");
+ cast<PointerType>(GEPBase->getType())
+ ->getElementType())->isOne() &&
+ "unit stride pointer IV must be i8*");
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
- }
- else {
+ } else {
// In any other case, convert both IVInit and IVCount to integers before
// comparing. This may result in SCEV expension of pointers, but in practice
// SCEV will fold the pointer arithmetic away as such:
@@ -1913,8 +1934,9 @@ linearFunctionTestReplace(Loop *L,
}
Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
- assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy()
- && "genLoopLimit missed a cast");
+ assert(ExitCnt->getType()->isPointerTy() ==
+ IndVar->getType()->isPointerTy() &&
+ "genLoopLimit missed a cast");
// Insert a new icmp_ne or icmp_eq instruction before the branch.
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
@@ -2074,9 +2096,9 @@ void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
// IndVarSimplify driver. Manage several subpasses of IV simplification.
//===----------------------------------------------------------------------===//
-bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipOptnoneFunction(L))
- return false;
+bool IndVarSimplify::run(Loop *L) {
+ // We need (and expect!) the incoming loop to be in LCSSA.
+ assert(L->isRecursivelyLCSSAForm(*DT) && "LCSSA required to run indvars!");
// If LoopSimplify form is not available, stay out of trouble. Some notes:
// - LSR currently only supports LoopSimplify-form loops. Indvars'
@@ -2089,18 +2111,6 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
if (!L->isLoopSimplifyForm())
return false;
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- TLI = TLIP ? &TLIP->getTLI() : nullptr;
- auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
- TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
-
- DeadInsts.clear();
- Changed = false;
-
// If there are any floating-point recurrences, attempt to
// transform them to use integer recurrences.
rewriteNonIntegerIVs(L);
@@ -2172,6 +2182,11 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// loop may be sunk below the loop to reduce register pressure.
sinkUnusedInvariants(L);
+ // rewriteFirstIterationLoopExitValues does not rely on the computation of
+ // trip count and therefore can further simplify exit values in addition to
+ // rewriteLoopExitValues.
+ rewriteFirstIterationLoopExitValues(L);
+
// Clean up dead instructions.
Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
@@ -2197,3 +2212,69 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
return Changed;
}
+
+PreservedAnalyses IndVarSimplifyPass::run(Loop &L, AnalysisManager<Loop> &AM) {
+ auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
+ Function *F = L.getHeader()->getParent();
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
+ auto *LI = FAM.getCachedResult<LoopAnalysis>(*F);
+ auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F);
+ auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F);
+
+ assert((LI && SE && DT) &&
+ "Analyses required for indvarsimplify not available!");
+
+ // Optional analyses.
+ auto *TTI = FAM.getCachedResult<TargetIRAnalysis>(*F);
+ auto *TLI = FAM.getCachedResult<TargetLibraryAnalysis>(*F);
+
+ IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
+ if (!IVS.run(&L))
+ return PreservedAnalyses::all();
+
+ // FIXME: This should also 'preserve the CFG'.
+ return getLoopPassPreservedAnalyses();
+}
+
+namespace {
+struct IndVarSimplifyLegacyPass : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ IndVarSimplifyLegacyPass() : LoopPass(ID) {
+ initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+ if (skipLoop(L))
+ return false;
+
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
+ auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+ auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+
+ IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
+ return IVS.run(L);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ getLoopAnalysisUsage(AU);
+ }
+};
+}
+
+char IndVarSimplifyLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
+ "Induction Variable Simplification", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
+ "Induction Variable Simplification", false, false)
+
+Pass *llvm::createIndVarSimplifyPass() {
+ return new IndVarSimplifyLegacyPass();
+}
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index dea61f6ff3d7e..ec7f09a2d598f 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -67,7 +67,6 @@
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
-#include <array>
using namespace llvm;
@@ -114,24 +113,22 @@ class InductiveRangeCheck {
RANGE_CHECK_UNKNOWN = (unsigned)-1
};
- static const char *rangeCheckKindToStr(RangeCheckKind);
+ static StringRef rangeCheckKindToStr(RangeCheckKind);
- const SCEV *Offset;
- const SCEV *Scale;
- Value *Length;
- BranchInst *Branch;
- RangeCheckKind Kind;
+ const SCEV *Offset = nullptr;
+ const SCEV *Scale = nullptr;
+ Value *Length = nullptr;
+ Use *CheckUse = nullptr;
+ RangeCheckKind Kind = RANGE_CHECK_UNKNOWN;
static RangeCheckKind parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
ScalarEvolution &SE, Value *&Index,
Value *&Length);
- static InductiveRangeCheck::RangeCheckKind
- parseRangeCheck(Loop *L, ScalarEvolution &SE, Value *Condition,
- const SCEV *&Index, Value *&UpperLimit);
-
- InductiveRangeCheck() :
- Offset(nullptr), Scale(nullptr), Length(nullptr), Branch(nullptr) { }
+ static void
+ extractRangeChecksFromCond(Loop *L, ScalarEvolution &SE, Use &ConditionUse,
+ SmallVectorImpl<InductiveRangeCheck> &Checks,
+ SmallPtrSetImpl<Value *> &Visited);
public:
const SCEV *getOffset() const { return Offset; }
@@ -150,9 +147,9 @@ public:
Length->print(OS);
else
OS << "(null)";
- OS << "\n Branch: ";
- getBranch()->print(OS);
- OS << "\n";
+ OS << "\n CheckUse: ";
+ getCheckUse()->getUser()->print(OS);
+ OS << " Operand: " << getCheckUse()->getOperandNo() << "\n";
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -161,7 +158,7 @@ public:
}
#endif
- BranchInst *getBranch() const { return Branch; }
+ Use *getCheckUse() const { return CheckUse; }
/// Represents an signed integer range [Range.getBegin(), Range.getEnd()). If
/// R.getEnd() sle R.getBegin(), then R denotes the empty range.
@@ -180,8 +177,6 @@ public:
const SCEV *getEnd() const { return End; }
};
- typedef SpecificBumpPtrAllocator<InductiveRangeCheck> AllocatorTy;
-
/// This is the value the condition of the branch needs to evaluate to for the
/// branch to take the hot successor (see (1) above).
bool getPassingDirection() { return true; }
@@ -190,19 +185,20 @@ public:
/// check is redundant and can be constant-folded away. The induction
/// variable is not required to be the canonical {0,+,1} induction variable.
Optional<Range> computeSafeIterationSpace(ScalarEvolution &SE,
- const SCEVAddRecExpr *IndVar,
- IRBuilder<> &B) const;
-
- /// Create an inductive range check out of BI if possible, else return
- /// nullptr.
- static InductiveRangeCheck *create(AllocatorTy &Alloc, BranchInst *BI,
- Loop *L, ScalarEvolution &SE,
- BranchProbabilityInfo &BPI);
+ const SCEVAddRecExpr *IndVar) const;
+
+ /// Parse out a set of inductive range checks from \p BI and append them to \p
+ /// Checks.
+ ///
+ /// NB! There may be conditions feeding into \p BI that aren't inductive range
+ /// checks, and hence don't end up in \p Checks.
+ static void
+ extractRangeChecksFromBranch(BranchInst *BI, Loop *L, ScalarEvolution &SE,
+ BranchProbabilityInfo &BPI,
+ SmallVectorImpl<InductiveRangeCheck> &Checks);
};
class InductiveRangeCheckElimination : public LoopPass {
- InductiveRangeCheck::AllocatorTy Allocator;
-
public:
static char ID;
InductiveRangeCheckElimination() : LoopPass(ID) {
@@ -211,11 +207,8 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<BranchProbabilityInfoWrapperPass>();
+ getLoopAnalysisUsage(AU);
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
@@ -226,15 +219,12 @@ char InductiveRangeCheckElimination::ID = 0;
INITIALIZE_PASS_BEGIN(InductiveRangeCheckElimination, "irce",
"Inductive range check elimination", false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_END(InductiveRangeCheckElimination, "irce",
"Inductive range check elimination", false, false)
-const char *InductiveRangeCheck::rangeCheckKindToStr(
+StringRef InductiveRangeCheck::rangeCheckKindToStr(
InductiveRangeCheck::RangeCheckKind RCK) {
switch (RCK) {
case InductiveRangeCheck::RANGE_CHECK_UNKNOWN:
@@ -253,11 +243,9 @@ const char *InductiveRangeCheck::rangeCheckKindToStr(
llvm_unreachable("unknown range check type!");
}
-/// Parse a single ICmp instruction, `ICI`, into a range check. If `ICI`
-/// cannot
+/// Parse a single ICmp instruction, `ICI`, into a range check. If `ICI` cannot
/// be interpreted as a range check, return `RANGE_CHECK_UNKNOWN` and set
-/// `Index` and `Length` to `nullptr`. Otherwise set `Index` to the value
-/// being
+/// `Index` and `Length` to `nullptr`. Otherwise set `Index` to the value being
/// range checked, and set `Length` to the upper limit `Index` is being range
/// checked with if (and only if) the range check type is stronger or equal to
/// RANGE_CHECK_UPPER.
@@ -327,106 +315,89 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
llvm_unreachable("default clause returns!");
}
-/// Parses an arbitrary condition into a range check. `Length` is set only if
-/// the range check is recognized to be `RANGE_CHECK_UPPER` or stronger.
-InductiveRangeCheck::RangeCheckKind
-InductiveRangeCheck::parseRangeCheck(Loop *L, ScalarEvolution &SE,
- Value *Condition, const SCEV *&Index,
- Value *&Length) {
+void InductiveRangeCheck::extractRangeChecksFromCond(
+ Loop *L, ScalarEvolution &SE, Use &ConditionUse,
+ SmallVectorImpl<InductiveRangeCheck> &Checks,
+ SmallPtrSetImpl<Value *> &Visited) {
using namespace llvm::PatternMatch;
- Value *A = nullptr;
- Value *B = nullptr;
-
- if (match(Condition, m_And(m_Value(A), m_Value(B)))) {
- Value *IndexA = nullptr, *IndexB = nullptr;
- Value *LengthA = nullptr, *LengthB = nullptr;
- ICmpInst *ICmpA = dyn_cast<ICmpInst>(A), *ICmpB = dyn_cast<ICmpInst>(B);
-
- if (!ICmpA || !ICmpB)
- return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
-
- auto RCKindA = parseRangeCheckICmp(L, ICmpA, SE, IndexA, LengthA);
- auto RCKindB = parseRangeCheckICmp(L, ICmpB, SE, IndexB, LengthB);
-
- if (RCKindA == InductiveRangeCheck::RANGE_CHECK_UNKNOWN ||
- RCKindB == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
- return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
-
- if (IndexA != IndexB)
- return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
-
- if (LengthA != nullptr && LengthB != nullptr && LengthA != LengthB)
- return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
-
- Index = SE.getSCEV(IndexA);
- if (isa<SCEVCouldNotCompute>(Index))
- return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+ Value *Condition = ConditionUse.get();
+ if (!Visited.insert(Condition).second)
+ return;
- Length = LengthA == nullptr ? LengthB : LengthA;
+ if (match(Condition, m_And(m_Value(), m_Value()))) {
+ SmallVector<InductiveRangeCheck, 8> SubChecks;
+ extractRangeChecksFromCond(L, SE, cast<User>(Condition)->getOperandUse(0),
+ SubChecks, Visited);
+ extractRangeChecksFromCond(L, SE, cast<User>(Condition)->getOperandUse(1),
+ SubChecks, Visited);
+
+ if (SubChecks.size() == 2) {
+ // Handle a special case where we know how to merge two checks separately
+ // checking the upper and lower bounds into a full range check.
+ const auto &RChkA = SubChecks[0];
+ const auto &RChkB = SubChecks[1];
+ if ((RChkA.Length == RChkB.Length || !RChkA.Length || !RChkB.Length) &&
+ RChkA.Offset == RChkB.Offset && RChkA.Scale == RChkB.Scale) {
+
+ // If RChkA.Kind == RChkB.Kind then we just found two identical checks.
+ // But if one of them is a RANGE_CHECK_LOWER and the other is a
+ // RANGE_CHECK_UPPER (only possibility if they're different) then
+ // together they form a RANGE_CHECK_BOTH.
+ SubChecks[0].Kind =
+ (InductiveRangeCheck::RangeCheckKind)(RChkA.Kind | RChkB.Kind);
+ SubChecks[0].Length = RChkA.Length ? RChkA.Length : RChkB.Length;
+ SubChecks[0].CheckUse = &ConditionUse;
+
+ // We updated one of the checks in place, now erase the other.
+ SubChecks.pop_back();
+ }
+ }
- return (InductiveRangeCheck::RangeCheckKind)(RCKindA | RCKindB);
+ Checks.insert(Checks.end(), SubChecks.begin(), SubChecks.end());
+ return;
}
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
- Value *IndexVal = nullptr;
-
- auto RCKind = parseRangeCheckICmp(L, ICI, SE, IndexVal, Length);
+ ICmpInst *ICI = dyn_cast<ICmpInst>(Condition);
+ if (!ICI)
+ return;
- if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
- return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+ Value *Length = nullptr, *Index;
+ auto RCKind = parseRangeCheckICmp(L, ICI, SE, Index, Length);
+ if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
+ return;
- Index = SE.getSCEV(IndexVal);
- if (isa<SCEVCouldNotCompute>(Index))
- return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+ const auto *IndexAddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Index));
+ bool IsAffineIndex =
+ IndexAddRec && (IndexAddRec->getLoop() == L) && IndexAddRec->isAffine();
- return RCKind;
- }
+ if (!IsAffineIndex)
+ return;
- return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+ InductiveRangeCheck IRC;
+ IRC.Length = Length;
+ IRC.Offset = IndexAddRec->getStart();
+ IRC.Scale = IndexAddRec->getStepRecurrence(SE);
+ IRC.CheckUse = &ConditionUse;
+ IRC.Kind = RCKind;
+ Checks.push_back(IRC);
}
-
-InductiveRangeCheck *
-InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI,
- Loop *L, ScalarEvolution &SE,
- BranchProbabilityInfo &BPI) {
+void InductiveRangeCheck::extractRangeChecksFromBranch(
+ BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo &BPI,
+ SmallVectorImpl<InductiveRangeCheck> &Checks) {
if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch())
- return nullptr;
+ return;
BranchProbability LikelyTaken(15, 16);
- if (BPI.getEdgeProbability(BI->getParent(), (unsigned) 0) < LikelyTaken)
- return nullptr;
-
- Value *Length = nullptr;
- const SCEV *IndexSCEV = nullptr;
-
- auto RCKind = InductiveRangeCheck::parseRangeCheck(L, SE, BI->getCondition(),
- IndexSCEV, Length);
-
- if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
- return nullptr;
-
- assert(IndexSCEV && "contract with SplitRangeCheckCondition!");
- assert((!(RCKind & InductiveRangeCheck::RANGE_CHECK_UPPER) || Length) &&
- "contract with SplitRangeCheckCondition!");
-
- const SCEVAddRecExpr *IndexAddRec = dyn_cast<SCEVAddRecExpr>(IndexSCEV);
- bool IsAffineIndex =
- IndexAddRec && (IndexAddRec->getLoop() == L) && IndexAddRec->isAffine();
+ if (BPI.getEdgeProbability(BI->getParent(), (unsigned)0) < LikelyTaken)
+ return;
- if (!IsAffineIndex)
- return nullptr;
-
- InductiveRangeCheck *IRC = new (A.Allocate()) InductiveRangeCheck;
- IRC->Length = Length;
- IRC->Offset = IndexAddRec->getStart();
- IRC->Scale = IndexAddRec->getStepRecurrence(SE);
- IRC->Branch = BI;
- IRC->Kind = RCKind;
- return IRC;
+ SmallPtrSet<Value *, 8> Visited;
+ InductiveRangeCheck::extractRangeChecksFromCond(L, SE, BI->getOperandUse(0),
+ Checks, Visited);
}
namespace {
@@ -666,7 +637,7 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
return None;
}
- BranchInst *LatchBr = dyn_cast<BranchInst>(&*Latch->rbegin());
+ BranchInst *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
if (!LatchBr || LatchBr->isUnconditional()) {
FailureReason = "latch terminator not conditional branch";
return None;
@@ -792,7 +763,7 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
return None;
}
- IRBuilder<> B(&*Preheader->rbegin());
+ IRBuilder<> B(Preheader->getTerminator());
RightValue = B.CreateAdd(RightValue, One);
}
@@ -814,7 +785,7 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
return None;
}
- IRBuilder<> B(&*Preheader->rbegin());
+ IRBuilder<> B(Preheader->getTerminator());
RightValue = B.CreateSub(RightValue, One);
}
}
@@ -833,7 +804,7 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
const DataLayout &DL = Preheader->getModule()->getDataLayout();
Value *IndVarStartV =
SCEVExpander(SE, DL, "irce")
- .expandCodeFor(IndVarStart, IndVarTy, &*Preheader->rbegin());
+ .expandCodeFor(IndVarStart, IndVarTy, Preheader->getTerminator());
IndVarStartV->setName("indvar.start");
LoopStructure Result;
@@ -947,7 +918,7 @@ void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
for (Instruction &I : *ClonedBB)
RemapInstruction(&I, Result.Map,
- RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
// Exit blocks will now have one more predecessor and their PHI nodes need
// to be edited to reflect that. No phi nodes need to be introduced because
@@ -1055,7 +1026,7 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F,
&*BBInsertLocation);
- BranchInst *PreheaderJump = cast<BranchInst>(&*Preheader->rbegin());
+ BranchInst *PreheaderJump = cast<BranchInst>(Preheader->getTerminator());
bool Increasing = LS.IndVarIncreasing;
IRBuilder<> B(PreheaderJump);
@@ -1305,9 +1276,8 @@ bool LoopConstrainer::run() {
/// in which the range check can be safely elided. If it cannot compute such a
/// range, returns None.
Optional<InductiveRangeCheck::Range>
-InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE,
- const SCEVAddRecExpr *IndVar,
- IRBuilder<> &) const {
+InductiveRangeCheck::computeSafeIterationSpace(
+ ScalarEvolution &SE, const SCEVAddRecExpr *IndVar) const {
// IndVar is of the form "A + B * I" (where "I" is the canonical induction
// variable, that may or may not exist as a real llvm::Value in the loop) and
// this inductive range check is a range check on the "C + D * I" ("C" is
@@ -1375,7 +1345,7 @@ InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE,
static Optional<InductiveRangeCheck::Range>
IntersectRange(ScalarEvolution &SE,
const Optional<InductiveRangeCheck::Range> &R1,
- const InductiveRangeCheck::Range &R2, IRBuilder<> &B) {
+ const InductiveRangeCheck::Range &R2) {
if (!R1.hasValue())
return R2;
auto &R1Value = R1.getValue();
@@ -1392,6 +1362,9 @@ IntersectRange(ScalarEvolution &SE,
}
bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipLoop(L))
+ return false;
+
if (L->getBlocks().size() >= LoopSizeCutoff) {
DEBUG(dbgs() << "irce: giving up constraining loop, too large\n";);
return false;
@@ -1404,17 +1377,15 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
}
LLVMContext &Context = Preheader->getContext();
- InductiveRangeCheck::AllocatorTy IRCAlloc;
- SmallVector<InductiveRangeCheck *, 16> RangeChecks;
+ SmallVector<InductiveRangeCheck, 16> RangeChecks;
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
BranchProbabilityInfo &BPI =
getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
for (auto BBI : L->getBlocks())
if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
- if (InductiveRangeCheck *IRC =
- InductiveRangeCheck::create(IRCAlloc, TBI, L, SE, BPI))
- RangeChecks.push_back(IRC);
+ InductiveRangeCheck::extractRangeChecksFromBranch(TBI, L, SE, BPI,
+ RangeChecks);
if (RangeChecks.empty())
return false;
@@ -1423,8 +1394,8 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
OS << "irce: looking at loop "; L->print(OS);
OS << "irce: loop has " << RangeChecks.size()
<< " inductive range checks: \n";
- for (InductiveRangeCheck *IRC : RangeChecks)
- IRC->print(OS);
+ for (InductiveRangeCheck &IRC : RangeChecks)
+ IRC.print(OS);
};
DEBUG(PrintRecognizedRangeChecks(dbgs()));
@@ -1450,14 +1421,14 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
Optional<InductiveRangeCheck::Range> SafeIterRange;
Instruction *ExprInsertPt = Preheader->getTerminator();
- SmallVector<InductiveRangeCheck *, 4> RangeChecksToEliminate;
+ SmallVector<InductiveRangeCheck, 4> RangeChecksToEliminate;
IRBuilder<> B(ExprInsertPt);
- for (InductiveRangeCheck *IRC : RangeChecks) {
- auto Result = IRC->computeSafeIterationSpace(SE, IndVar, B);
+ for (InductiveRangeCheck &IRC : RangeChecks) {
+ auto Result = IRC.computeSafeIterationSpace(SE, IndVar);
if (Result.hasValue()) {
auto MaybeSafeIterRange =
- IntersectRange(SE, SafeIterRange, Result.getValue(), B);
+ IntersectRange(SE, SafeIterRange, Result.getValue());
if (MaybeSafeIterRange.hasValue()) {
RangeChecksToEliminate.push_back(IRC);
SafeIterRange = MaybeSafeIterRange.getValue();
@@ -1487,11 +1458,11 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
// Optimize away the now-redundant range checks.
- for (InductiveRangeCheck *IRC : RangeChecksToEliminate) {
- ConstantInt *FoldedRangeCheck = IRC->getPassingDirection()
+ for (InductiveRangeCheck &IRC : RangeChecksToEliminate) {
+ ConstantInt *FoldedRangeCheck = IRC.getPassingDirection()
? ConstantInt::getTrue(Context)
: ConstantInt::getFalse(Context);
- IRC->getBranch()->setCondition(FoldedRangeCheck);
+ IRC.getCheckUse()->set(FoldedRangeCheck);
}
}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index dcdcfed66e641..b9e717cf763e2 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -11,31 +11,25 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/JumpThreading.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -46,6 +40,7 @@
#include <algorithm>
#include <memory>
using namespace llvm;
+using namespace jumpthreading;
#define DEBUG_TYPE "jump-threading"
@@ -66,17 +61,6 @@ ImplicationSearchThreshold(
cl::init(3), cl::Hidden);
namespace {
- // These are at global scope so static functions can use them too.
- typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo;
- typedef SmallVector<std::pair<Constant*, BasicBlock*>, 8> PredValueInfoTy;
-
- // This is used to keep track of what kind of constant we're currently hoping
- // to find.
- enum ConstantPreference {
- WantInteger,
- WantBlockAddress
- };
-
/// This pass performs 'jump threading', which looks at blocks that have
/// multiple predecessors and multiple successors. If one or more of the
/// predecessors of the block can be proven to always jump to one of the
@@ -94,89 +78,31 @@ namespace {
/// revectored to the false side of the second if.
///
class JumpThreading : public FunctionPass {
- TargetLibraryInfo *TLI;
- LazyValueInfo *LVI;
- std::unique_ptr<BlockFrequencyInfo> BFI;
- std::unique_ptr<BranchProbabilityInfo> BPI;
- bool HasProfileData;
-#ifdef NDEBUG
- SmallPtrSet<const BasicBlock *, 16> LoopHeaders;
-#else
- SmallSet<AssertingVH<const BasicBlock>, 16> LoopHeaders;
-#endif
- DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
-
- unsigned BBDupThreshold;
-
- // RAII helper for updating the recursion stack.
- struct RecursionSetRemover {
- DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
- std::pair<Value*, BasicBlock*> ThePair;
-
- RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S,
- std::pair<Value*, BasicBlock*> P)
- : TheSet(S), ThePair(P) { }
-
- ~RecursionSetRemover() {
- TheSet.erase(ThePair);
- }
- };
+ JumpThreadingPass Impl;
+
public:
static char ID; // Pass identification
- JumpThreading(int T = -1) : FunctionPass(ID) {
- BBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
+ JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) {
initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LazyValueInfo>();
- AU.addPreserved<LazyValueInfo>();
+ AU.addRequired<LazyValueInfoWrapperPass>();
+ AU.addPreserved<LazyValueInfoWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
- void releaseMemory() override {
- BFI.reset();
- BPI.reset();
- }
-
- void FindLoopHeaders(Function &F);
- bool ProcessBlock(BasicBlock *BB);
- bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
- BasicBlock *SuccBB);
- bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
- const SmallVectorImpl<BasicBlock *> &PredBBs);
-
- bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
- PredValueInfo &Result,
- ConstantPreference Preference,
- Instruction *CxtI = nullptr);
- bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
- ConstantPreference Preference,
- Instruction *CxtI = nullptr);
-
- bool ProcessBranchOnPHI(PHINode *PN);
- bool ProcessBranchOnXOR(BinaryOperator *BO);
- bool ProcessImpliedCondition(BasicBlock *BB);
-
- bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
- bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
- bool TryToUnfoldSelectInCurrBB(BasicBlock *BB);
-
- private:
- BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
- const char *Suffix);
- void UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB,
- BasicBlock *NewBB, BasicBlock *SuccBB);
+ void releaseMemory() override { Impl.releaseMemory(); }
};
}
char JumpThreading::ID = 0;
INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
"Jump Threading", false, false)
-INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
@@ -184,24 +110,72 @@ INITIALIZE_PASS_END(JumpThreading, "jump-threading",
// Public interface to the Jump Threading pass
FunctionPass *llvm::createJumpThreadingPass(int Threshold) { return new JumpThreading(Threshold); }
+JumpThreadingPass::JumpThreadingPass(int T) {
+ BBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
+}
+
/// runOnFunction - Top level algorithm.
///
bool JumpThreading::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
+ if (skipFunction(F))
return false;
+ auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
+ std::unique_ptr<BlockFrequencyInfo> BFI;
+ std::unique_ptr<BranchProbabilityInfo> BPI;
+ bool HasProfileData = F.getEntryCount().hasValue();
+ if (HasProfileData) {
+ LoopInfo LI{DominatorTree(F)};
+ BPI.reset(new BranchProbabilityInfo(F, LI));
+ BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
+ }
+ return Impl.runImpl(F, TLI, LVI, HasProfileData, std::move(BFI),
+ std::move(BPI));
+}
+
+PreservedAnalyses JumpThreadingPass::run(Function &F,
+ AnalysisManager<Function> &AM) {
+
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &LVI = AM.getResult<LazyValueAnalysis>(F);
+ std::unique_ptr<BlockFrequencyInfo> BFI;
+ std::unique_ptr<BranchProbabilityInfo> BPI;
+ bool HasProfileData = F.getEntryCount().hasValue();
+ if (HasProfileData) {
+ LoopInfo LI{DominatorTree(F)};
+ BPI.reset(new BranchProbabilityInfo(F, LI));
+ BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
+ }
+ bool Changed =
+ runImpl(F, &TLI, &LVI, HasProfileData, std::move(BFI), std::move(BPI));
+
+ // FIXME: We need to invalidate LVI to avoid PR28400. Is there a better
+ // solution?
+ AM.invalidate<LazyValueAnalysis>(F);
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<GlobalsAA>();
+ return PA;
+}
+
+bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
+ LazyValueInfo *LVI_, bool HasProfileData_,
+ std::unique_ptr<BlockFrequencyInfo> BFI_,
+ std::unique_ptr<BranchProbabilityInfo> BPI_) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- LVI = &getAnalysis<LazyValueInfo>();
+ TLI = TLI_;
+ LVI = LVI_;
BFI.reset();
BPI.reset();
// When profile data is available, we need to update edge weights after
// successful jump threading, which requires both BPI and BFI being available.
- HasProfileData = F.getEntryCount().hasValue();
+ HasProfileData = HasProfileData_;
if (HasProfileData) {
- LoopInfo LI{DominatorTree(F)};
- BPI.reset(new BranchProbabilityInfo(F, LI));
- BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
+ BPI = std::move(BPI_);
+ BFI = std::move(BFI_);
}
// Remove unreachable blocks from function as they may result in infinite
@@ -245,10 +219,13 @@ bool JumpThreading::runOnFunction(Function &F) {
// Can't thread an unconditional jump, but if the block is "almost
// empty", we can replace uses of it with uses of the successor and make
// this dead.
+ // We should not eliminate the loop header either, because eliminating
+ // a loop header might later prevent LoopSimplify from transforming nested
+ // loops into simplified form.
if (BI && BI->isUnconditional() &&
BB != &BB->getParent()->getEntryBlock() &&
// If the terminator is the only non-phi instruction, try to nuke it.
- BB->getFirstNonPHIOrDbg()->isTerminator()) {
+ BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB)) {
// Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
// block, we have to make sure it isn't in the LoopHeaders set. We
// reinsert afterward if needed.
@@ -361,7 +338,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
/// enough to track all of these properties and keep it up-to-date as the CFG
/// mutates, so we don't allow any of these transformations.
///
-void JumpThreading::FindLoopHeaders(Function &F) {
+void JumpThreadingPass::FindLoopHeaders(Function &F) {
SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
FindFunctionBackedges(F, Edges);
@@ -395,10 +372,9 @@ static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
///
/// This returns true if there were any known values.
///
-bool JumpThreading::
-ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
- ConstantPreference Preference,
- Instruction *CxtI) {
+bool JumpThreadingPass::ComputeValueKnownInPredecessors(
+ Value *V, BasicBlock *BB, PredValueInfo &Result,
+ ConstantPreference Preference, Instruction *CxtI) {
// This method walks up use-def chains recursively. Because of this, we could
// get into an infinite loop going around loops in the use-def chain. To
// prevent this, keep track of what (value, block) pairs we've already visited
@@ -415,7 +391,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
for (BasicBlock *Pred : predecessors(BB))
Result.push_back(std::make_pair(KC, Pred));
- return true;
+ return !Result.empty();
}
// If V is a non-instruction value, or an instruction in a different block,
@@ -465,6 +441,25 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
return !Result.empty();
}
+ // Handle Cast instructions. Only see through Cast when the source operand is
+ // PHI or Cmp and the source type is i1 to save the compilation time.
+ if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ Value *Source = CI->getOperand(0);
+ if (!Source->getType()->isIntegerTy(1))
+ return false;
+ if (!isa<PHINode>(Source) && !isa<CmpInst>(Source))
+ return false;
+ ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI);
+ if (Result.empty())
+ return false;
+
+ // Convert the known values.
+ for (auto &R : Result)
+ R.first = ConstantExpr::getCast(CI->getOpcode(), R.first, CI->getType());
+
+ return true;
+ }
+
PredValueInfoTy LHSVals, RHSVals;
// Handle some boolean conditions.
@@ -705,7 +700,7 @@ static bool hasAddressTakenAndUsed(BasicBlock *BB) {
/// ProcessBlock - If there are any predecessors whose control can be threaded
/// through to a successor, transform them now.
-bool JumpThreading::ProcessBlock(BasicBlock *BB) {
+bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// If the block is trivially dead, just return and let the caller nuke it.
// This simplifies other transformations.
if (pred_empty(BB) &&
@@ -889,7 +884,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
return false;
}
-bool JumpThreading::ProcessImpliedCondition(BasicBlock *BB) {
+bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) {
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BI || !BI->isConditional())
return false;
@@ -903,12 +898,17 @@ bool JumpThreading::ProcessImpliedCondition(BasicBlock *BB) {
while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
- if (!PBI || !PBI->isConditional() || PBI->getSuccessor(0) != CurrentBB)
+ if (!PBI || !PBI->isConditional())
+ return false;
+ if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
return false;
- if (isImpliedCondition(PBI->getCondition(), Cond, DL)) {
- BI->getSuccessor(1)->removePredecessor(BB);
- BranchInst::Create(BI->getSuccessor(0), BI);
+ bool FalseDest = PBI->getSuccessor(1) == CurrentBB;
+ Optional<bool> Implication =
+ isImpliedCondition(PBI->getCondition(), Cond, DL, FalseDest);
+ if (Implication) {
+ BI->getSuccessor(*Implication ? 1 : 0)->removePredecessor(BB);
+ BranchInst::Create(BI->getSuccessor(*Implication ? 0 : 1), BI);
BI->eraseFromParent();
return true;
}
@@ -923,9 +923,9 @@ bool JumpThreading::ProcessImpliedCondition(BasicBlock *BB) {
/// load instruction, eliminate it by replacing it with a PHI node. This is an
/// important optimization that encourages jump threading, and needs to be run
/// interlaced with other jump threading tasks.
-bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
- // Don't hack volatile/atomic loads.
- if (!LI->isSimple()) return false;
+bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
+ // Don't hack volatile and ordered loads.
+ if (!LI->isUnordered()) return false;
// If the load is defined in a block with exactly one predecessor, it can't be
// partially redundant.
@@ -952,10 +952,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
BasicBlock::iterator BBIt(LI);
if (Value *AvailableVal =
- FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, DefMaxInstsToScan)) {
+ FindAvailableLoadedValue(LI, LoadBB, BBIt, DefMaxInstsToScan)) {
// If the value of the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
- //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
// If the returned value is the load itself, replace with an undef. This can
// only happen in dead loops.
@@ -994,7 +993,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Scan the predecessor to see if the value is available in the pred.
BBIt = PredBB->end();
AAMDNodes ThisAATags;
- Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt,
+ Value *PredAvailable = FindAvailableLoadedValue(LI, PredBB, BBIt,
DefMaxInstsToScan,
nullptr, &ThisAATags);
if (!PredAvailable) {
@@ -1056,9 +1055,10 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (UnavailablePred) {
assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
"Can't handle critical edge here!");
- LoadInst *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
- LI->getAlignment(),
- UnavailablePred->getTerminator());
+ LoadInst *NewVal =
+ new LoadInst(LoadedPtr, LI->getName() + ".pr", false,
+ LI->getAlignment(), LI->getOrdering(), LI->getSynchScope(),
+ UnavailablePred->getTerminator());
NewVal->setDebugLoc(LI->getDebugLoc());
if (AATags)
NewVal->setAAMetadata(AATags);
@@ -1100,8 +1100,6 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
PN->addIncoming(PredV, I->first);
}
- //cerr << "PRE: " << *LI << *PN << "\n";
-
LI->replaceAllUsesWith(PN);
LI->eraseFromParent();
@@ -1171,9 +1169,9 @@ FindMostPopularDest(BasicBlock *BB,
return MostPopularDest;
}
-bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
- ConstantPreference Preference,
- Instruction *CxtI) {
+bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+ ConstantPreference Preference,
+ Instruction *CxtI) {
// If threading this would thread across a loop header, don't even try to
// thread the edge.
if (LoopHeaders.count(BB))
@@ -1279,7 +1277,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
/// a PHI node in the current block. See if there are any simplifications we
/// can do based on inputs to the phi node.
///
-bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
+bool JumpThreadingPass::ProcessBranchOnPHI(PHINode *PN) {
BasicBlock *BB = PN->getParent();
// TODO: We could make use of this to do it once for blocks with common PHI
@@ -1309,7 +1307,7 @@ bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
/// a xor instruction in the current block. See if there are any
/// simplifications we can do based on inputs to the xor.
///
-bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
+bool JumpThreadingPass::ProcessBranchOnXOR(BinaryOperator *BO) {
BasicBlock *BB = BO->getParent();
// If either the LHS or RHS of the xor is a constant, don't do this
@@ -1437,9 +1435,9 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
/// ThreadEdge - We have decided that it is safe and profitable to factor the
/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
/// across BB. Transform the IR to reflect this change.
-bool JumpThreading::ThreadEdge(BasicBlock *BB,
- const SmallVectorImpl<BasicBlock*> &PredBBs,
- BasicBlock *SuccBB) {
+bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock *> &PredBBs,
+ BasicBlock *SuccBB) {
// If threading to the same block as we come from, we would infinite loop.
if (SuccBB == BB) {
DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
@@ -1593,9 +1591,9 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
/// Create a new basic block that will be the predecessor of BB and successor of
/// all blocks in Preds. When profile data is availble, update the frequency of
/// this new block.
-BasicBlock *JumpThreading::SplitBlockPreds(BasicBlock *BB,
- ArrayRef<BasicBlock *> Preds,
- const char *Suffix) {
+BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix) {
// Collect the frequencies of all predecessors of BB, which will be used to
// update the edge weight on BB->SuccBB.
BlockFrequency PredBBFreq(0);
@@ -1615,10 +1613,10 @@ BasicBlock *JumpThreading::SplitBlockPreds(BasicBlock *BB,
/// Update the block frequency of BB and branch weight and the metadata on the
/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
/// Freq(PredBB->BB) / Freq(BB->SuccBB).
-void JumpThreading::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
- BasicBlock *BB,
- BasicBlock *NewBB,
- BasicBlock *SuccBB) {
+void JumpThreadingPass::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
+ BasicBlock *BB,
+ BasicBlock *NewBB,
+ BasicBlock *SuccBB) {
if (!HasProfileData)
return;
@@ -1679,8 +1677,8 @@ void JumpThreading::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
/// If we can duplicate the contents of BB up into PredBB do so now, this
/// improves the odds that the branch will be on an analyzable instruction like
/// a compare.
-bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
- const SmallVectorImpl<BasicBlock *> &PredBBs) {
+bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
+ BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
assert(!PredBBs.empty() && "Can't handle an empty set");
// If BB is a loop header, then duplicating this block outside the loop would
@@ -1750,13 +1748,18 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// phi translation.
if (Value *IV =
SimplifyInstruction(New, BB->getModule()->getDataLayout())) {
- delete New;
ValueMapping[&*BI] = IV;
+ if (!New->mayHaveSideEffects()) {
+ delete New;
+ New = nullptr;
+ }
} else {
+ ValueMapping[&*BI] = New;
+ }
+ if (New) {
// Otherwise, insert the new instruction into the block.
New->setName(BI->getName());
PredBB->getInstList().insert(OldPredBranch->getIterator(), New);
- ValueMapping[&*BI] = New;
}
}
@@ -1829,7 +1832,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
///
/// And expand the select into a branch structure if one of its arms allows %c
/// to be folded. This later enables threading from bb1 over bb2.
-bool JumpThreading::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
+bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
@@ -1907,7 +1910,7 @@ bool JumpThreading::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
/// select if the associated PHI has at least one constant. If the unfolded
/// select is not jump-threaded, it will be folded again in the later
/// optimizations.
-bool JumpThreading::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
+bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
// If threading this would thread across a loop header, don't thread the edge.
// See the comments above FindLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB))
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 8923ff74253c1..2c0a70e44f574 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -30,15 +30,19 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopPassManager.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -56,183 +60,173 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "licm"
-STATISTIC(NumSunk , "Number of instructions sunk out of loop");
-STATISTIC(NumHoisted , "Number of instructions hoisted out of loop");
+STATISTIC(NumSunk, "Number of instructions sunk out of loop");
+STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
-STATISTIC(NumPromoted , "Number of memory locations promoted to registers");
+STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
static cl::opt<bool>
-DisablePromotion("disable-licm-promotion", cl::Hidden,
- cl::desc("Disable memory promotion in LICM pass"));
+ DisablePromotion("disable-licm-promotion", cl::Hidden,
+ cl::desc("Disable memory promotion in LICM pass"));
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
- const LICMSafetyInfo *SafetyInfo);
-static bool hoist(Instruction &I, BasicBlock *Preheader);
+ const LoopSafetyInfo *SafetyInfo);
+static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
+ const LoopSafetyInfo *SafetyInfo);
static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
const Loop *CurLoop, AliasSetTracker *CurAST,
- const LICMSafetyInfo *SafetyInfo);
-static bool isGuaranteedToExecute(const Instruction &Inst,
- const DominatorTree *DT,
- const Loop *CurLoop,
- const LICMSafetyInfo *SafetyInfo);
+ const LoopSafetyInfo *SafetyInfo);
static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
const DominatorTree *DT,
- const TargetLibraryInfo *TLI,
const Loop *CurLoop,
- const LICMSafetyInfo *SafetyInfo,
+ const LoopSafetyInfo *SafetyInfo,
const Instruction *CtxI = nullptr);
static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
- const AAMDNodes &AAInfo,
+ const AAMDNodes &AAInfo,
AliasSetTracker *CurAST);
static Instruction *
CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
const LoopInfo *LI,
- const LICMSafetyInfo *SafetyInfo);
+ const LoopSafetyInfo *SafetyInfo);
static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA,
DominatorTree *DT, TargetLibraryInfo *TLI,
Loop *CurLoop, AliasSetTracker *CurAST,
- LICMSafetyInfo *SafetyInfo);
+ LoopSafetyInfo *SafetyInfo);
namespace {
- struct LICM : public LoopPass {
- static char ID; // Pass identification, replacement for typeid
- LICM() : LoopPass(ID) {
- initializeLICMPass(*PassRegistry::getPassRegistry());
- }
+struct LoopInvariantCodeMotion {
+ bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
+ TargetLibraryInfo *TLI, ScalarEvolution *SE, bool DeleteAST);
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG...
- ///
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addPreserved<BasicAAWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addPreserved<SCEVAAWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
+ DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() {
+ return LoopToAliasSetMap;
+ }
+
+private:
+ DenseMap<Loop *, AliasSetTracker *> LoopToAliasSetMap;
- using llvm::Pass::doFinalization;
+ AliasSetTracker *collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
+ AliasAnalysis *AA);
+};
+
+struct LegacyLICMPass : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ LegacyLICMPass() : LoopPass(ID) {
+ initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
+ }
- bool doFinalization() override {
- assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+ if (skipLoop(L))
return false;
- }
- private:
- AliasAnalysis *AA; // Current AliasAnalysis information
- LoopInfo *LI; // Current LoopInfo
- DominatorTree *DT; // Dominator Tree for the current Loop.
+ auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+ return LICM.runOnLoop(L,
+ &getAnalysis<AAResultsWrapperPass>().getAAResults(),
+ &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
+ &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ SE ? &SE->getSE() : nullptr, false);
+ }
- TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding.
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG...
+ ///
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ getLoopAnalysisUsage(AU);
+ }
- // State that is updated as we process loops.
- bool Changed; // Set to true when we change anything.
- BasicBlock *Preheader; // The preheader block of the current loop...
- Loop *CurLoop; // The current loop we are working on...
- AliasSetTracker *CurAST; // AliasSet information for the current loop...
- DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
+ using llvm::Pass::doFinalization;
- /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
- void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
- Loop *L) override;
+ bool doFinalization() override {
+ assert(LICM.getLoopToAliasSetMap().empty() &&
+ "Didn't free loop alias sets");
+ return false;
+ }
- /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
- /// set.
- void deleteAnalysisValue(Value *V, Loop *L) override;
+private:
+ LoopInvariantCodeMotion LICM;
- /// Simple Analysis hook. Delete loop L from alias set map.
- void deleteAnalysisLoop(Loop *L) override;
- };
+ /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+ void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
+ Loop *L) override;
+
+ /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
+ /// set.
+ void deleteAnalysisValue(Value *V, Loop *L) override;
+
+ /// Simple Analysis hook. Delete loop L from alias set map.
+ void deleteAnalysisLoop(Loop *L) override;
+};
+}
+
+PreservedAnalyses LICMPass::run(Loop &L, AnalysisManager<Loop> &AM) {
+ const auto &FAM =
+ AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
+ Function *F = L.getHeader()->getParent();
+
+ auto *AA = FAM.getCachedResult<AAManager>(*F);
+ auto *LI = FAM.getCachedResult<LoopAnalysis>(*F);
+ auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F);
+ auto *TLI = FAM.getCachedResult<TargetLibraryAnalysis>(*F);
+ auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F);
+ assert((AA && LI && DT && TLI && SE) && "Analyses for LICM not available");
+
+ LoopInvariantCodeMotion LICM;
+
+ if (!LICM.runOnLoop(&L, AA, LI, DT, TLI, SE, true))
+ return PreservedAnalyses::all();
+
+ // FIXME: There is no setPreservesCFG in the new PM. When that becomes
+ // available, it should be used here.
+ return getLoopPassPreservedAnalyses();
}
-char LICM::ID = 0;
-INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+char LegacyLICMPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
-INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
+ false)
-Pass *llvm::createLICMPass() { return new LICM(); }
+Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
/// Hoist expressions out of the specified loop. Note, alias info for inner
/// loop is not preserved so it is not a good idea to run LICM multiple
/// times on one loop.
+/// We should delete AST for inner loops in the new pass manager to avoid
+/// memory leak.
///
-bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipOptnoneFunction(L))
- return false;
-
- Changed = false;
-
- // Get our Loop and Alias Analysis information...
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA,
+ LoopInfo *LI, DominatorTree *DT,
+ TargetLibraryInfo *TLI,
+ ScalarEvolution *SE, bool DeleteAST) {
+ bool Changed = false;
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
- CurAST = new AliasSetTracker(*AA);
- // Collect Alias info from subloops.
- for (Loop *InnerL : L->getSubLoops()) {
- AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
- assert(InnerAST && "Where is my AST?");
-
- // What if InnerLoop was modified by other passes ?
- CurAST->add(*InnerAST);
-
- // Once we've incorporated the inner loop's AST into ours, we don't need the
- // subloop's anymore.
- delete InnerAST;
- LoopToAliasSetMap.erase(InnerL);
- }
-
- CurLoop = L;
+ AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA);
// Get the preheader block to move instructions into...
- Preheader = L->getLoopPreheader();
-
- // Loop over the body of this loop, looking for calls, invokes, and stores.
- // Because subloops have already been incorporated into AST, we skip blocks in
- // subloops.
- //
- for (BasicBlock *BB : L->blocks()) {
- if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops.
- CurAST->add(*BB); // Incorporate the specified basic block
- }
+ BasicBlock *Preheader = L->getLoopPreheader();
// Compute loop safety information.
- LICMSafetyInfo SafetyInfo;
- computeLICMSafetyInfo(&SafetyInfo, CurLoop);
+ LoopSafetyInfo SafetyInfo;
+ computeLoopSafetyInfo(&SafetyInfo, L);
// We want to visit all of the instructions in this loop... that are not parts
// of our subloops (they have already had their invariants hoisted out of
@@ -245,11 +239,11 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// instructions, we perform another pass to hoist them out of the loop.
//
if (L->hasDedicatedExits())
- Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, CurLoop,
+ Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
CurAST, &SafetyInfo);
if (Preheader)
- Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI,
- CurLoop, CurAST, &SafetyInfo);
+ Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
+ CurAST, &SafetyInfo);
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
@@ -260,9 +254,8 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Loop over all of the alias sets in the tracker object.
for (AliasSet &AS : *CurAST)
- Changed |= promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts,
- PIC, LI, DT, CurLoop,
- CurAST, &SafetyInfo);
+ Changed |= promoteLoopAccessesToScalars(
+ AS, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L, CurAST, &SafetyInfo);
// Once we have promoted values across the loop body we have to recursively
// reform LCSSA as any nested loop may now have values defined within the
@@ -271,8 +264,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// SSAUpdater strategy during promotion that was LCSSA aware and reformed
// it as it went.
if (Changed) {
- auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
- formLCSSARecursively(*L, *DT, LI, SEWP ? &SEWP->getSE() : nullptr);
+ formLCSSARecursively(*L, *DT, LI, SE);
}
}
@@ -283,50 +275,49 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
"Parent loop not left in LCSSA form after LICM!");
- // Clear out loops state information for the next iteration
- CurLoop = nullptr;
- Preheader = nullptr;
-
// If this loop is nested inside of another one, save the alias information
// for when we process the outer loop.
- if (L->getParentLoop())
+ if (L->getParentLoop() && !DeleteAST)
LoopToAliasSetMap[L] = CurAST;
else
delete CurAST;
+
+ if (Changed && SE)
+ SE->forgetLoopDispositions(L);
return Changed;
}
/// Walk the specified region of the CFG (defined by all blocks dominated by
-/// the specified block, and that are in the current loop) in reverse depth
+/// the specified block, and that are in the current loop) in reverse depth
/// first order w.r.t the DominatorTree. This allows us to visit uses before
/// definitions, allowing us to sink a loop body in one pass without iteration.
///
bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
- AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
+ AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo) {
// Verify inputs.
- assert(N != nullptr && AA != nullptr && LI != nullptr &&
- DT != nullptr && CurLoop != nullptr && CurAST != nullptr &&
- SafetyInfo != nullptr && "Unexpected input to sinkRegion");
+ assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
+ CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
+ "Unexpected input to sinkRegion");
- // Set changed as false.
- bool Changed = false;
- // Get basic block
BasicBlock *BB = N->getBlock();
// If this subregion is not in the top level loop at all, exit.
- if (!CurLoop->contains(BB)) return Changed;
+ if (!CurLoop->contains(BB))
+ return false;
// We are processing blocks in reverse dfo, so process children first.
- const std::vector<DomTreeNode*> &Children = N->getChildren();
+ bool Changed = false;
+ const std::vector<DomTreeNode *> &Children = N->getChildren();
for (DomTreeNode *Child : Children)
Changed |= sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
// Only need to process the contents of this block if it is not part of a
// subloop (which would already have been processed).
- if (inSubLoop(BB,CurLoop,LI)) return Changed;
+ if (inSubLoop(BB, CurLoop, LI))
+ return Changed;
- for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
+ for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
Instruction &I = *--II;
// If the instruction is dead, we would try to sink it because it isn't used
@@ -361,21 +352,23 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
///
bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
- AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
+ AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo) {
// Verify inputs.
- assert(N != nullptr && AA != nullptr && LI != nullptr &&
- DT != nullptr && CurLoop != nullptr && CurAST != nullptr &&
- SafetyInfo != nullptr && "Unexpected input to hoistRegion");
- // Set changed as false.
- bool Changed = false;
- // Get basic block
+ assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
+ CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
+ "Unexpected input to hoistRegion");
+
BasicBlock *BB = N->getBlock();
+
// If this subregion is not in the top level loop at all, exit.
- if (!CurLoop->contains(BB)) return Changed;
+ if (!CurLoop->contains(BB))
+ return false;
+
// Only need to process the contents of this block if it is not part of a
// subloop (which would already have been processed).
+ bool Changed = false;
if (!inSubLoop(BB, CurLoop, LI))
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
Instruction &I = *II++;
// Try constant folding this instruction. If all the operands are
// constants, it is technically hoistable, but it would be better to just
@@ -396,12 +389,13 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
//
if (CurLoop->hasLoopInvariantOperands(&I) &&
canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo) &&
- isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo,
- CurLoop->getLoopPreheader()->getTerminator()))
- Changed |= hoist(I, CurLoop->getLoopPreheader());
+ isSafeToExecuteUnconditionally(
+ I, DT, CurLoop, SafetyInfo,
+ CurLoop->getLoopPreheader()->getTerminator()))
+ Changed |= hoist(I, DT, CurLoop, SafetyInfo);
}
- const std::vector<DomTreeNode*> &Children = N->getChildren();
+ const std::vector<DomTreeNode *> &Children = N->getChildren();
for (DomTreeNode *Child : Children)
Changed |= hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
return Changed;
@@ -410,7 +404,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
/// Computes loop safety information, checks loop body & header
/// for the possibility of may throw exception.
///
-void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
+void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
assert(CurLoop != nullptr && "CurLoop cant be null");
BasicBlock *Header = CurLoop->getHeader();
// Setting default safety values.
@@ -419,15 +413,17 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
// Iterate over header and compute safety info.
for (BasicBlock::iterator I = Header->begin(), E = Header->end();
(I != E) && !SafetyInfo->HeaderMayThrow; ++I)
- SafetyInfo->HeaderMayThrow |= I->mayThrow();
-
+ SafetyInfo->HeaderMayThrow |=
+ !isGuaranteedToTransferExecutionToSuccessor(&*I);
+
SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
- // Iterate over loop instructions and compute safety info.
- for (Loop::block_iterator BB = CurLoop->block_begin(),
- BBE = CurLoop->block_end(); (BB != BBE) && !SafetyInfo->MayThrow ; ++BB)
+ // Iterate over loop instructions and compute safety info.
+ for (Loop::block_iterator BB = CurLoop->block_begin(),
+ BBE = CurLoop->block_end();
+ (BB != BBE) && !SafetyInfo->MayThrow; ++BB)
for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
(I != E) && !SafetyInfo->MayThrow; ++I)
- SafetyInfo->MayThrow |= I->mayThrow();
+ SafetyInfo->MayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I);
// Compute funclet colors if we might sink/hoist in a function with a funclet
// personality routine.
@@ -443,11 +439,11 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
///
bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
TargetLibraryInfo *TLI, Loop *CurLoop,
- AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
+ AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo) {
// Loads have extra constraints we have to verify before we can hoist them.
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (!LI->isUnordered())
- return false; // Don't hoist volatile/atomic loads!
+ return false; // Don't hoist volatile/atomic loads!
// Loads from constant memory are always safe to move, even if they end up
// in the same alias set as something that ends up being modified.
@@ -499,7 +495,8 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
break;
}
}
- if (!FoundMod) return true;
+ if (!FoundMod)
+ return true;
}
// FIXME: This should use mod/ref information to see if we can hoist or
@@ -518,9 +515,8 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
// TODO: Plumb the context instruction through to make hoisting and sinking
// more powerful. Hoisting of loads already works due to the special casing
- // above.
- return isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo,
- nullptr);
+ // above.
+ return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo, nullptr);
}
/// Returns true if a PHINode is a trivially replaceable with an
@@ -541,7 +537,7 @@ static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) {
/// blocks of the loop.
///
static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
- const LICMSafetyInfo *SafetyInfo) {
+ const LoopSafetyInfo *SafetyInfo) {
const auto &BlockColors = SafetyInfo->BlockColors;
for (const User *U : I.users()) {
const Instruction *UI = cast<Instruction>(U);
@@ -588,7 +584,7 @@ static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
static Instruction *
CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
const LoopInfo *LI,
- const LICMSafetyInfo *SafetyInfo) {
+ const LoopSafetyInfo *SafetyInfo) {
Instruction *New;
if (auto *CI = dyn_cast<CallInst>(&I)) {
const auto &BlockColors = SafetyInfo->BlockColors;
@@ -621,7 +617,8 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
}
ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
- if (!I.getName().empty()) New->setName(I.getName() + ".le");
+ if (!I.getName().empty())
+ New->setName(I.getName() + ".le");
// Build LCSSA PHI nodes for any in-loop operands. Note that this is
// particularly cheap because we can rip off the PHI node that we're
@@ -652,18 +649,20 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
///
static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
const Loop *CurLoop, AliasSetTracker *CurAST,
- const LICMSafetyInfo *SafetyInfo) {
+ const LoopSafetyInfo *SafetyInfo) {
DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
bool Changed = false;
- if (isa<LoadInst>(I)) ++NumMovedLoads;
- else if (isa<CallInst>(I)) ++NumMovedCalls;
+ if (isa<LoadInst>(I))
+ ++NumMovedLoads;
+ else if (isa<CallInst>(I))
+ ++NumMovedCalls;
++NumSunk;
Changed = true;
#ifndef NDEBUG
SmallVector<BasicBlock *, 32> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
- SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
+ SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
ExitBlocks.end());
#endif
@@ -717,18 +716,30 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
/// When an instruction is found to only use loop invariant operands that
/// is safe to hoist, this instruction is called to do the dirty work.
///
-static bool hoist(Instruction &I, BasicBlock *Preheader) {
- DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
- << I << "\n");
+static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
+ const LoopSafetyInfo *SafetyInfo) {
+ auto *Preheader = CurLoop->getLoopPreheader();
+ DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I
+ << "\n");
+
+ // Metadata can be dependent on conditions we are hoisting above.
+ // Conservatively strip all metadata on the instruction unless we were
+ // guaranteed to execute I if we entered the loop, in which case the metadata
+ // is valid in the loop preheader.
+ if (I.hasMetadataOtherThanDebugLoc() &&
+ // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
+ // time in isGuaranteedToExecute if we don't actually have anything to
+ // drop. It is a compile time optimization, not required for correctness.
+ !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo))
+ I.dropUnknownNonDebugMetadata();
+
// Move the new node to the Preheader, before its terminator.
I.moveBefore(Preheader->getTerminator());
- // Metadata can be dependent on the condition we are hoisting above.
- // Conservatively strip all metadata on the instruction.
- I.dropUnknownNonDebugMetadata();
-
- if (isa<LoadInst>(I)) ++NumMovedLoads;
- else if (isa<CallInst>(I)) ++NumMovedCalls;
+ if (isa<LoadInst>(I))
+ ++NumMovedLoads;
+ else if (isa<CallInst>(I))
+ ++NumMovedCalls;
++NumHoisted;
return true;
}
@@ -736,134 +747,91 @@ static bool hoist(Instruction &I, BasicBlock *Preheader) {
/// Only sink or hoist an instruction if it is not a trapping instruction,
/// or if the instruction is known not to trap when moved to the preheader.
/// or if it is a trapping instruction and is guaranteed to execute.
-static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
+static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
const DominatorTree *DT,
- const TargetLibraryInfo *TLI,
const Loop *CurLoop,
- const LICMSafetyInfo *SafetyInfo,
+ const LoopSafetyInfo *SafetyInfo,
const Instruction *CtxI) {
- if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
+ if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT))
return true;
return isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo);
}
-static bool isGuaranteedToExecute(const Instruction &Inst,
- const DominatorTree *DT,
- const Loop *CurLoop,
- const LICMSafetyInfo * SafetyInfo) {
-
- // We have to check to make sure that the instruction dominates all
- // of the exit blocks. If it doesn't, then there is a path out of the loop
- // which does not execute this instruction, so we can't hoist it.
-
- // If the instruction is in the header block for the loop (which is very
- // common), it is always guaranteed to dominate the exit blocks. Since this
- // is a common case, and can save some work, check it now.
- if (Inst.getParent() == CurLoop->getHeader())
- // If there's a throw in the header block, we can't guarantee we'll reach
- // Inst.
- return !SafetyInfo->HeaderMayThrow;
-
- // Somewhere in this loop there is an instruction which may throw and make us
- // exit the loop.
- if (SafetyInfo->MayThrow)
- return false;
-
- // Get the exit blocks for the current loop.
- SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getExitBlocks(ExitBlocks);
-
- // Verify that the block dominates each of the exit blocks of the loop.
- for (BasicBlock *ExitBlock : ExitBlocks)
- if (!DT->dominates(Inst.getParent(), ExitBlock))
- return false;
-
- // As a degenerate case, if the loop is statically infinite then we haven't
- // proven anything since there are no exit blocks.
- if (ExitBlocks.empty())
- return false;
-
- return true;
-}
-
namespace {
- class LoopPromoter : public LoadAndStorePromoter {
- Value *SomePtr; // Designated pointer to store to.
- SmallPtrSetImpl<Value*> &PointerMustAliases;
- SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
- SmallVectorImpl<Instruction*> &LoopInsertPts;
- PredIteratorCache &PredCache;
- AliasSetTracker &AST;
- LoopInfo &LI;
- DebugLoc DL;
- int Alignment;
- AAMDNodes AATags;
-
- Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
- if (Instruction *I = dyn_cast<Instruction>(V))
- if (Loop *L = LI.getLoopFor(I->getParent()))
- if (!L->contains(BB)) {
- // We need to create an LCSSA PHI node for the incoming value and
- // store that.
- PHINode *PN =
- PHINode::Create(I->getType(), PredCache.size(BB),
- I->getName() + ".lcssa", &BB->front());
- for (BasicBlock *Pred : PredCache.get(BB))
- PN->addIncoming(I, Pred);
- return PN;
- }
- return V;
- }
+class LoopPromoter : public LoadAndStorePromoter {
+ Value *SomePtr; // Designated pointer to store to.
+ SmallPtrSetImpl<Value *> &PointerMustAliases;
+ SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
+ SmallVectorImpl<Instruction *> &LoopInsertPts;
+ PredIteratorCache &PredCache;
+ AliasSetTracker &AST;
+ LoopInfo &LI;
+ DebugLoc DL;
+ int Alignment;
+ AAMDNodes AATags;
- public:
- LoopPromoter(Value *SP,
- ArrayRef<const Instruction *> Insts,
- SSAUpdater &S, SmallPtrSetImpl<Value *> &PMA,
- SmallVectorImpl<BasicBlock *> &LEB,
- SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
- AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
- const AAMDNodes &AATags)
- : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
- LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
- LI(li), DL(dl), Alignment(alignment), AATags(AATags) {}
-
- bool isInstInList(Instruction *I,
- const SmallVectorImpl<Instruction*> &) const override {
- Value *Ptr;
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
- Ptr = LI->getOperand(0);
- else
- Ptr = cast<StoreInst>(I)->getPointerOperand();
- return PointerMustAliases.count(Ptr);
- }
+ Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (Loop *L = LI.getLoopFor(I->getParent()))
+ if (!L->contains(BB)) {
+ // We need to create an LCSSA PHI node for the incoming value and
+ // store that.
+ PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
+ I->getName() + ".lcssa", &BB->front());
+ for (BasicBlock *Pred : PredCache.get(BB))
+ PN->addIncoming(I, Pred);
+ return PN;
+ }
+ return V;
+ }
- void doExtraRewritesBeforeFinalDeletion() const override {
- // Insert stores after in the loop exit blocks. Each exit block gets a
- // store of the live-out values that feed them. Since we've already told
- // the SSA updater about the defs in the loop and the preheader
- // definition, it is all set and we can start using it.
- for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
- BasicBlock *ExitBlock = LoopExitBlocks[i];
- Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
- LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
- Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
- Instruction *InsertPos = LoopInsertPts[i];
- StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
- NewSI->setAlignment(Alignment);
- NewSI->setDebugLoc(DL);
- if (AATags) NewSI->setAAMetadata(AATags);
- }
- }
+public:
+ LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
+ SmallPtrSetImpl<Value *> &PMA,
+ SmallVectorImpl<BasicBlock *> &LEB,
+ SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
+ AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
+ const AAMDNodes &AATags)
+ : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
+ LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
+ LI(li), DL(std::move(dl)), Alignment(alignment), AATags(AATags) {}
+
+ bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction *> &) const override {
+ Value *Ptr;
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ Ptr = LI->getOperand(0);
+ else
+ Ptr = cast<StoreInst>(I)->getPointerOperand();
+ return PointerMustAliases.count(Ptr);
+ }
- void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
- // Update alias analysis.
- AST.copyValue(LI, V);
+ void doExtraRewritesBeforeFinalDeletion() const override {
+ // Insert stores after in the loop exit blocks. Each exit block gets a
+ // store of the live-out values that feed them. Since we've already told
+ // the SSA updater about the defs in the loop and the preheader
+ // definition, it is all set and we can start using it.
+ for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = LoopExitBlocks[i];
+ Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
+ Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
+ Instruction *InsertPos = LoopInsertPts[i];
+ StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
+ NewSI->setAlignment(Alignment);
+ NewSI->setDebugLoc(DL);
+ if (AATags)
+ NewSI->setAAMetadata(AATags);
}
- void instructionDeleted(Instruction *I) const override {
- AST.deleteValue(I);
- }
- };
+ }
+
+ void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
+ // Update alias analysis.
+ AST.copyValue(LI, V);
+ }
+ void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); }
+};
} // end anon namespace
/// Try to promote memory values to scalars by sinking stores out of the
@@ -871,32 +839,28 @@ namespace {
/// the stores in the loop, looking for stores to Must pointers which are
/// loop invariant.
///
-bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
- SmallVectorImpl<BasicBlock*>&ExitBlocks,
- SmallVectorImpl<Instruction*>&InsertPts,
- PredIteratorCache &PIC, LoopInfo *LI,
- DominatorTree *DT, Loop *CurLoop,
- AliasSetTracker *CurAST,
- LICMSafetyInfo * SafetyInfo) {
+bool llvm::promoteLoopAccessesToScalars(
+ AliasSet &AS, SmallVectorImpl<BasicBlock *> &ExitBlocks,
+ SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC,
+ LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
+ Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo) {
// Verify inputs.
- assert(LI != nullptr && DT != nullptr &&
- CurLoop != nullptr && CurAST != nullptr &&
- SafetyInfo != nullptr &&
+ assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
+ CurAST != nullptr && SafetyInfo != nullptr &&
"Unexpected Input to promoteLoopAccessesToScalars");
- // Initially set Changed status to false.
- bool Changed = false;
+
// We can promote this alias set if it has a store, if it is a "Must" alias
// set, if the pointer is loop invariant, and if we are not eliminating any
// volatile loads or stores.
if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
- return Changed;
+ return false;
assert(!AS.empty() &&
"Must alias set should have at least one pointer element in it!");
Value *SomePtr = AS.begin()->getValue();
- BasicBlock * Preheader = CurLoop->getLoopPreheader();
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
// It isn't safe to promote a load/store from the loop if the load/store is
// conditional. For example, turning:
@@ -909,12 +873,27 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
//
// is not safe, because *P may only be valid to access if 'c' is true.
//
+ // The safety property divides into two parts:
+ // 1) The memory may not be dereferenceable on entry to the loop. In this
+ // case, we can't insert the required load in the preheader.
+ // 2) The memory model does not allow us to insert a store along any dynamic
+ // path which did not originally have one.
+ //
// It is safe to promote P if all uses are direct load/stores and if at
// least one is guaranteed to be executed.
bool GuaranteedToExecute = false;
- SmallVector<Instruction*, 64> LoopUses;
- SmallPtrSet<Value*, 4> PointerMustAliases;
+ // It is also safe to promote P if we can prove that speculating a load into
+ // the preheader is safe (i.e. proving dereferenceability on all
+ // paths through the loop), and that the memory can be proven thread local
+ // (so that the memory model requirement doesn't apply.) We first establish
+ // the former, and then run a capture analysis below to establish the later.
+ // We can use any access within the alias set to prove dereferenceability
+ // since they're all must alias.
+ bool CanSpeculateLoad = false;
+
+ SmallVector<Instruction *, 64> LoopUses;
+ SmallPtrSet<Value *, 4> PointerMustAliases;
// We start with an alignment of one and try to find instructions that allow
// us to prove better alignment.
@@ -922,11 +901,32 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
AAMDNodes AATags;
bool HasDedicatedExits = CurLoop->hasDedicatedExits();
+ // Don't sink stores from loops without dedicated block exits. Exits
+ // containing indirect branches are not transformed by loop simplify,
+ // make sure we catch that. An additional load may be generated in the
+ // preheader for SSA updater, so also avoid sinking when no preheader
+ // is available.
+ if (!HasDedicatedExits || !Preheader)
+ return false;
+
+ const DataLayout &MDL = Preheader->getModule()->getDataLayout();
+
+ if (SafetyInfo->MayThrow) {
+ // If a loop can throw, we have to insert a store along each unwind edge.
+ // That said, we can't actually make the unwind edge explicit. Therefore,
+ // we have to prove that the store is dead along the unwind edge.
+ //
+ // Currently, this code just special-cases alloca instructions.
+ if (!isa<AllocaInst>(GetUnderlyingObject(SomePtr, MDL)))
+ return false;
+ }
+
// Check that all of the pointers in the alias set have the same type. We
// cannot (yet) promote a memory location that is loaded and stored in
// different sizes. While we are at it, collect alignment and AA info.
- for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
- Value *ASIV = ASI->getValue();
+ bool Changed = false;
+ for (const auto &ASI : AS) {
+ Value *ASIV = ASI.getValue();
PointerMustAliases.insert(ASIV);
// Check that all of the pointers in the alias set have the same type. We
@@ -947,6 +947,10 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
assert(!Load->isVolatile() && "AST broken");
if (!Load->isSimple())
return Changed;
+
+ if (!GuaranteedToExecute && !CanSpeculateLoad)
+ CanSpeculateLoad = isSafeToExecuteUnconditionally(
+ *Load, DT, CurLoop, SafetyInfo, Preheader->getTerminator());
} else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
@@ -955,13 +959,6 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
assert(!Store->isVolatile() && "AST broken");
if (!Store->isSimple())
return Changed;
- // Don't sink stores from loops without dedicated block exits. Exits
- // containing indirect branches are not transformed by loop simplify,
- // make sure we catch that. An additional load may be generated in the
- // preheader for SSA updater, so also avoid sinking when no preheader
- // is available.
- if (!HasDedicatedExits || !Preheader)
- return Changed;
// Note that we only check GuaranteedToExecute inside the store case
// so that we do not introduce stores where they did not exist before
@@ -972,16 +969,22 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
// instruction will be executed, update the alignment.
// Larger is better, with the exception of 0 being the best alignment.
unsigned InstAlignment = Store->getAlignment();
- if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0)
+ if ((InstAlignment > Alignment || InstAlignment == 0) &&
+ Alignment != 0) {
if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) {
GuaranteedToExecute = true;
Alignment = InstAlignment;
}
+ } else if (!GuaranteedToExecute) {
+ GuaranteedToExecute =
+ isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo);
+ }
- if (!GuaranteedToExecute)
- GuaranteedToExecute = isGuaranteedToExecute(*UI, DT,
- CurLoop, SafetyInfo);
-
+ if (!GuaranteedToExecute && !CanSpeculateLoad) {
+ CanSpeculateLoad = isDereferenceableAndAlignedPointer(
+ Store->getPointerOperand(), Store->getAlignment(), MDL,
+ Preheader->getTerminator(), DT);
+ }
} else
return Changed; // Not a load or store.
@@ -997,8 +1000,17 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
}
}
- // If there isn't a guaranteed-to-execute instruction, we can't promote.
- if (!GuaranteedToExecute)
+ // Check legality per comment above. Otherwise, we can't promote.
+ bool PromotionIsLegal = GuaranteedToExecute;
+ if (!PromotionIsLegal && CanSpeculateLoad) {
+ // If this is a thread local location, then we can insert stores along
+ // paths which originally didn't have them without violating the memory
+ // model.
+ Value *Object = GetUnderlyingObject(SomePtr, MDL);
+ PromotionIsLegal =
+ isAllocLikeFn(Object, TLI) && !PointerMayBeCaptured(Object, true, true);
+ }
+ if (!PromotionIsLegal)
return Changed;
// Figure out the loop exits and their insertion points, if this is the
@@ -1017,7 +1029,8 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
return Changed;
// Otherwise, this is safe to promote, lets do it!
- DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
+ DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
+ << '\n');
Changed = true;
++NumPromoted;
@@ -1028,20 +1041,19 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
DebugLoc DL = LoopUses[0]->getDebugLoc();
// We use the SSAUpdater interface to insert phi nodes as required.
- SmallVector<PHINode*, 16> NewPHIs;
+ SmallVector<PHINode *, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
- LoopPromoter Promoter(SomePtr, LoopUses, SSA,
- PointerMustAliases, ExitBlocks,
+ LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
- LoadInst *PreheaderLoad =
- new LoadInst(SomePtr, SomePtr->getName()+".promoted",
- Preheader->getTerminator());
+ LoadInst *PreheaderLoad = new LoadInst(
+ SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator());
PreheaderLoad->setAlignment(Alignment);
PreheaderLoad->setDebugLoc(DL);
- if (AATags) PreheaderLoad->setAAMetadata(AATags);
+ if (AATags)
+ PreheaderLoad->setAAMetadata(AATags);
SSA.AddAvailableValue(Preheader, PreheaderLoad);
// Rewrite all the loads in the loop and remember all the definitions from
@@ -1055,10 +1067,67 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
return Changed;
}
+/// Returns an owning pointer to an alias set which incorporates aliasing info
+/// from L and all subloops of L.
+/// FIXME: In new pass manager, there is no helper functions to handle loop
+/// analysis such as cloneBasicBlockAnalysis. So the AST needs to be recompute
+/// from scratch for every loop. Hook up with the helper functions when
+/// available in the new pass manager to avoid redundant computation.
+AliasSetTracker *
+LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
+ AliasAnalysis *AA) {
+ AliasSetTracker *CurAST = nullptr;
+ SmallVector<Loop *, 4> RecomputeLoops;
+ for (Loop *InnerL : L->getSubLoops()) {
+ auto MapI = LoopToAliasSetMap.find(InnerL);
+ // If the AST for this inner loop is missing it may have been merged into
+ // some other loop's AST and then that loop unrolled, and so we need to
+ // recompute it.
+ if (MapI == LoopToAliasSetMap.end()) {
+ RecomputeLoops.push_back(InnerL);
+ continue;
+ }
+ AliasSetTracker *InnerAST = MapI->second;
+
+ if (CurAST != nullptr) {
+ // What if InnerLoop was modified by other passes ?
+ CurAST->add(*InnerAST);
+
+ // Once we've incorporated the inner loop's AST into ours, we don't need
+ // the subloop's anymore.
+ delete InnerAST;
+ } else {
+ CurAST = InnerAST;
+ }
+ LoopToAliasSetMap.erase(MapI);
+ }
+ if (CurAST == nullptr)
+ CurAST = new AliasSetTracker(*AA);
+
+ auto mergeLoop = [&](Loop *L) {
+ // Loop over the body of this loop, looking for calls, invokes, and stores.
+ // Because subloops have already been incorporated into AST, we skip blocks
+ // in subloops.
+ for (BasicBlock *BB : L->blocks())
+ if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops.
+ CurAST->add(*BB); // Incorporate the specified basic block
+ };
+
+ // Add everything from the sub loops that are no longer directly available.
+ for (Loop *InnerL : RecomputeLoops)
+ mergeLoop(InnerL);
+
+ // And merge in this loop.
+ mergeLoop(L);
+
+ return CurAST;
+}
+
/// Simple analysis hook. Clone alias set info.
///
-void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
- AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
+void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
+ Loop *L) {
+ AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
if (!AST)
return;
@@ -1067,8 +1136,8 @@ void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
/// Simple Analysis hook. Delete value V from alias set
///
-void LICM::deleteAnalysisValue(Value *V, Loop *L) {
- AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
+void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) {
+ AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
if (!AST)
return;
@@ -1077,21 +1146,20 @@ void LICM::deleteAnalysisValue(Value *V, Loop *L) {
/// Simple Analysis hook. Delete value L from alias set map.
///
-void LICM::deleteAnalysisLoop(Loop *L) {
- AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
+void LegacyLICMPass::deleteAnalysisLoop(Loop *L) {
+ AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
if (!AST)
return;
delete AST;
- LoopToAliasSetMap.erase(L);
+ LICM.getLoopToAliasSetMap().erase(L);
}
-
/// Return true if the body of this loop may store into the memory
/// location pointed to by V.
///
static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
- const AAMDNodes &AAInfo,
+ const AAMDNodes &AAInfo,
AliasSetTracker *CurAST) {
// Check to see if any of the basic blocks in CurLoop invalidate *V.
return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod();
@@ -1104,4 +1172,3 @@ static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
return LI->getLoopFor(BB) != CurLoop;
}
-
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
index 1648878b06286..dfe51a4ce44c5 100644
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/lib/Transforms/Scalar/LoadCombine.cpp
@@ -35,10 +35,12 @@ using namespace llvm;
STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining");
STATISTIC(NumLoadsCombined, "Number of loads combined");
+#define LDCOMBINE_NAME "Combine Adjacent Loads"
+
namespace {
struct PointerOffsetPair {
Value *Pointer;
- uint64_t Offset;
+ APInt Offset;
};
struct LoadPOPPair {
@@ -63,12 +65,16 @@ public:
using llvm::Pass::doInitialization;
bool doInitialization(Function &) override;
bool runOnBasicBlock(BasicBlock &BB) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
- const char *getPassName() const override { return "LoadCombine"; }
+ const char *getPassName() const override { return LDCOMBINE_NAME; }
static char ID;
- typedef IRBuilder<true, TargetFolder> BuilderTy;
+ typedef IRBuilder<TargetFolder> BuilderTy;
private:
BuilderTy *Builder;
@@ -87,22 +93,25 @@ bool LoadCombine::doInitialization(Function &F) {
}
PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) {
+ auto &DL = LI.getModule()->getDataLayout();
+
PointerOffsetPair POP;
POP.Pointer = LI.getPointerOperand();
- POP.Offset = 0;
+ unsigned BitWidth = DL.getPointerSizeInBits(LI.getPointerAddressSpace());
+ POP.Offset = APInt(BitWidth, 0);
+
while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) {
if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) {
- auto &DL = LI.getModule()->getDataLayout();
- unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType());
- APInt Offset(BitWidth, 0);
- if (GEP->accumulateConstantOffset(DL, Offset))
- POP.Offset += Offset.getZExtValue();
- else
+ APInt LastOffset = POP.Offset;
+ if (!GEP->accumulateConstantOffset(DL, POP.Offset)) {
// Can't handle GEPs with variable indices.
+ POP.Offset = LastOffset;
return POP;
+ }
POP.Pointer = GEP->getPointerOperand();
- } else if (auto *BC = dyn_cast<BitCastInst>(POP.Pointer))
+ } else if (auto *BC = dyn_cast<BitCastInst>(POP.Pointer)) {
POP.Pointer = BC->getOperand(0);
+ }
}
return POP;
}
@@ -115,8 +124,8 @@ bool LoadCombine::combineLoads(
continue;
std::sort(Loads.second.begin(), Loads.second.end(),
[](const LoadPOPPair &A, const LoadPOPPair &B) {
- return A.POP.Offset < B.POP.Offset;
- });
+ return A.POP.Offset.slt(B.POP.Offset);
+ });
if (aggregateLoads(Loads.second))
Combined = true;
}
@@ -132,28 +141,31 @@ bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
LoadInst *BaseLoad = nullptr;
SmallVector<LoadPOPPair, 8> AggregateLoads;
bool Combined = false;
- uint64_t PrevOffset = -1ull;
+ bool ValidPrevOffset = false;
+ APInt PrevOffset;
uint64_t PrevSize = 0;
for (auto &L : Loads) {
- if (PrevOffset == -1ull) {
+ if (ValidPrevOffset == false) {
BaseLoad = L.Load;
PrevOffset = L.POP.Offset;
PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
L.Load->getType());
AggregateLoads.push_back(L);
+ ValidPrevOffset = true;
continue;
}
if (L.Load->getAlignment() > BaseLoad->getAlignment())
continue;
- if (L.POP.Offset > PrevOffset + PrevSize) {
+ APInt PrevEnd = PrevOffset + PrevSize;
+ if (L.POP.Offset.sgt(PrevEnd)) {
// No other load will be combinable
if (combineLoads(AggregateLoads))
Combined = true;
AggregateLoads.clear();
- PrevOffset = -1;
+ ValidPrevOffset = false;
continue;
}
- if (L.POP.Offset != PrevOffset + PrevSize)
+ if (L.POP.Offset != PrevEnd)
// This load is offset less than the size of the last load.
// FIXME: We may want to handle this case.
continue;
@@ -199,7 +211,7 @@ bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
Value *Ptr = Builder->CreateConstGEP1_64(
Builder->CreatePointerCast(Loads[0].POP.Pointer,
Builder->getInt8PtrTy(AddressSpace)),
- Loads[0].POP.Offset);
+ Loads[0].POP.Offset.getSExtValue());
LoadInst *NewLoad = new LoadInst(
Builder->CreatePointerCast(
Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize),
@@ -212,7 +224,7 @@ bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
Value *V = Builder->CreateExtractInteger(
L.Load->getModule()->getDataLayout(), NewLoad,
cast<IntegerType>(L.Load->getType()),
- L.POP.Offset - Loads[0].POP.Offset, "combine.extract");
+ (L.POP.Offset - Loads[0].POP.Offset).getZExtValue(), "combine.extract");
L.Load->replaceAllUsesWith(V);
}
@@ -221,12 +233,12 @@ bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
}
bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
- if (skipOptnoneFunction(BB))
+ if (skipBasicBlock(BB))
return false;
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- IRBuilder<true, TargetFolder> TheBuilder(
+ IRBuilder<TargetFolder> TheBuilder(
BB.getContext(), TargetFolder(BB.getModule()->getDataLayout()));
Builder = &TheBuilder;
@@ -260,23 +272,12 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
return Combined;
}
-void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
-
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
-}
-
char LoadCombine::ID = 0;
BasicBlockPass *llvm::createLoadCombinePass() {
return new LoadCombine();
}
-INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", "Combine Adjacent Loads",
- false, false)
+INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_END(LoadCombine, "load-combine", "Combine Adjacent Loads",
- false, false)
-
+INITIALIZE_PASS_END(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false)
diff --git a/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/lib/Transforms/Scalar/LoopDataPrefetch.cpp
new file mode 100644
index 0000000000000..66b59d27dfdeb
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -0,0 +1,304 @@
+//===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a Loop Data Prefetching Pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-data-prefetch"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+// By default, we limit this to creating 16 PHIs (which is a little over half
+// of the allocatable register set).
+static cl::opt<bool>
+PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false),
+ cl::desc("Prefetch write addresses"));
+
+static cl::opt<unsigned>
+ PrefetchDistance("prefetch-distance",
+ cl::desc("Number of instructions to prefetch ahead"),
+ cl::Hidden);
+
+static cl::opt<unsigned>
+ MinPrefetchStride("min-prefetch-stride",
+ cl::desc("Min stride to add prefetches"), cl::Hidden);
+
+static cl::opt<unsigned> MaxPrefetchIterationsAhead(
+ "max-prefetch-iters-ahead",
+ cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden);
+
+STATISTIC(NumPrefetches, "Number of prefetches inserted");
+
+namespace llvm {
+ void initializeLoopDataPrefetchPass(PassRegistry&);
+}
+
+namespace {
+
+ class LoopDataPrefetch : public FunctionPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopDataPrefetch() : FunctionPass(ID) {
+ initializeLoopDataPrefetchPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ // FIXME: For some reason, preserving SE here breaks LSR (even if
+ // this pass changes nothing).
+ // AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ private:
+ bool runOnLoop(Loop *L);
+
+ /// \brief Check if the the stride of the accesses is large enough to
+ /// warrant a prefetch.
+ bool isStrideLargeEnough(const SCEVAddRecExpr *AR);
+
+ unsigned getMinPrefetchStride() {
+ if (MinPrefetchStride.getNumOccurrences() > 0)
+ return MinPrefetchStride;
+ return TTI->getMinPrefetchStride();
+ }
+
+ unsigned getPrefetchDistance() {
+ if (PrefetchDistance.getNumOccurrences() > 0)
+ return PrefetchDistance;
+ return TTI->getPrefetchDistance();
+ }
+
+ unsigned getMaxPrefetchIterationsAhead() {
+ if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
+ return MaxPrefetchIterationsAhead;
+ return TTI->getMaxPrefetchIterationsAhead();
+ }
+
+ AssumptionCache *AC;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ const TargetTransformInfo *TTI;
+ const DataLayout *DL;
+ };
+}
+
+char LoopDataPrefetch::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopDataPrefetch, "loop-data-prefetch",
+ "Loop Data Prefetch", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(LoopDataPrefetch, "loop-data-prefetch",
+ "Loop Data Prefetch", false, false)
+
+FunctionPass *llvm::createLoopDataPrefetchPass() { return new LoopDataPrefetch(); }
+
+bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR) {
+ unsigned TargetMinStride = getMinPrefetchStride();
+ // No need to check if any stride goes.
+ if (TargetMinStride <= 1)
+ return true;
+
+ const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+ // If MinStride is set, don't prefetch unless we can ensure that stride is
+ // larger.
+ if (!ConstStride)
+ return false;
+
+ unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
+ return TargetMinStride <= AbsStride;
+}
+
+bool LoopDataPrefetch::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ DL = &F.getParent()->getDataLayout();
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ // If PrefetchDistance is not set, don't run the pass. This gives an
+ // opportunity for targets to run this pass for selected subtargets only
+ // (whose TTI sets PrefetchDistance).
+ if (getPrefetchDistance() == 0)
+ return false;
+ assert(TTI->getCacheLineSize() && "Cache line size is not set for target");
+
+ bool MadeChange = false;
+
+ for (Loop *I : *LI)
+ for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
+ MadeChange |= runOnLoop(*L);
+
+ return MadeChange;
+}
+
+bool LoopDataPrefetch::runOnLoop(Loop *L) {
+ bool MadeChange = false;
+
+ // Only prefetch in the inner-most loop
+ if (!L->empty())
+ return MadeChange;
+
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, AC, EphValues);
+
+ // Calculate the number of iterations ahead to prefetch
+ CodeMetrics Metrics;
+ for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+ I != IE; ++I) {
+
+ // If the loop already has prefetches, then assume that the user knows
+ // what they are doing and don't add any more.
+ for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
+ J != JE; ++J)
+ if (CallInst *CI = dyn_cast<CallInst>(J))
+ if (Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::prefetch)
+ return MadeChange;
+
+ Metrics.analyzeBasicBlock(*I, *TTI, EphValues);
+ }
+ unsigned LoopSize = Metrics.NumInsts;
+ if (!LoopSize)
+ LoopSize = 1;
+
+ unsigned ItersAhead = getPrefetchDistance() / LoopSize;
+ if (!ItersAhead)
+ ItersAhead = 1;
+
+ if (ItersAhead > getMaxPrefetchIterationsAhead())
+ return MadeChange;
+
+ Function *F = L->getHeader()->getParent();
+ DEBUG(dbgs() << "Prefetching " << ItersAhead
+ << " iterations ahead (loop size: " << LoopSize << ") in "
+ << F->getName() << ": " << *L);
+
+ SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 16> PrefLoads;
+ for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+ I != IE; ++I) {
+ for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
+ J != JE; ++J) {
+ Value *PtrValue;
+ Instruction *MemI;
+
+ if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) {
+ MemI = LMemI;
+ PtrValue = LMemI->getPointerOperand();
+ } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) {
+ if (!PrefetchWrites) continue;
+ MemI = SMemI;
+ PtrValue = SMemI->getPointerOperand();
+ } else continue;
+
+ unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
+ if (PtrAddrSpace)
+ continue;
+
+ if (L->isLoopInvariant(PtrValue))
+ continue;
+
+ const SCEV *LSCEV = SE->getSCEV(PtrValue);
+ const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
+ if (!LSCEVAddRec)
+ continue;
+
+ // Check if the the stride of the accesses is large enough to warrant a
+ // prefetch.
+ if (!isStrideLargeEnough(LSCEVAddRec))
+ continue;
+
+ // We don't want to double prefetch individual cache lines. If this load
+ // is known to be within one cache line of some other load that has
+ // already been prefetched, then don't prefetch this one as well.
+ bool DupPref = false;
+ for (const auto &PrefLoad : PrefLoads) {
+ const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, PrefLoad.second);
+ if (const SCEVConstant *ConstPtrDiff =
+ dyn_cast<SCEVConstant>(PtrDiff)) {
+ int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
+ if (PD < (int64_t) TTI->getCacheLineSize()) {
+ DupPref = true;
+ break;
+ }
+ }
+ }
+ if (DupPref)
+ continue;
+
+ const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr(
+ SE->getConstant(LSCEVAddRec->getType(), ItersAhead),
+ LSCEVAddRec->getStepRecurrence(*SE)));
+ if (!isSafeToExpand(NextLSCEV, *SE))
+ continue;
+
+ PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec));
+
+ Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace);
+ SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr");
+ Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI);
+
+ IRBuilder<> Builder(MemI);
+ Module *M = (*I)->getParent()->getParent();
+ Type *I32 = Type::getInt32Ty((*I)->getContext());
+ Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch);
+ Builder.CreateCall(
+ PrefetchFunc,
+ {PrefPtrValue,
+ ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1),
+ ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
+ ++NumPrefetches;
+ DEBUG(dbgs() << " Access: " << *PtrValue << ", SCEV: " << *LSCEV
+ << "\n");
+ emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F,
+ MemI->getDebugLoc(), "prefetched memory access");
+
+
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 7b1940b48c31b..19b2f89555c2b 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -14,75 +14,28 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LoopDeletion.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/LoopPassManager.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
#define DEBUG_TYPE "loop-delete"
STATISTIC(NumDeleted, "Number of loops deleted");
-namespace {
- class LoopDeletion : public LoopPass {
- public:
- static char ID; // Pass ID, replacement for typeid
- LoopDeletion() : LoopPass(ID) {
- initializeLoopDeletionPass(*PassRegistry::getPassRegistry());
- }
-
- // Possibly eliminate loop L if it is dead.
- bool runOnLoop(Loop *L, LPPassManager &) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
-
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreservedID(LoopSimplifyID);
- AU.addPreservedID(LCSSAID);
- }
-
- private:
- bool isLoopDead(Loop *L, SmallVectorImpl<BasicBlock *> &exitingBlocks,
- SmallVectorImpl<BasicBlock *> &exitBlocks,
- bool &Changed, BasicBlock *Preheader);
-
- };
-}
-
-char LoopDeletion::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
- "Delete dead loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
- "Delete dead loops", false, false)
-
-Pass *llvm::createLoopDeletionPass() {
- return new LoopDeletion();
-}
-
/// isLoopDead - Determined if a loop is dead. This assumes that we've already
/// checked for unique exit and exiting blocks, and that the code is in LCSSA
/// form.
-bool LoopDeletion::isLoopDead(Loop *L,
- SmallVectorImpl<BasicBlock *> &exitingBlocks,
- SmallVectorImpl<BasicBlock *> &exitBlocks,
- bool &Changed, BasicBlock *Preheader) {
+bool LoopDeletionPass::isLoopDead(Loop *L, ScalarEvolution &SE,
+ SmallVectorImpl<BasicBlock *> &exitingBlocks,
+ SmallVectorImpl<BasicBlock *> &exitBlocks,
+ bool &Changed, BasicBlock *Preheader) {
BasicBlock *exitBlock = exitBlocks[0];
// Make sure that all PHI entries coming from the loop are loop invariant.
@@ -91,6 +44,8 @@ bool LoopDeletion::isLoopDead(Loop *L,
// sufficient to guarantee that no loop-variant values are used outside
// of the loop.
BasicBlock::iterator BI = exitBlock->begin();
+ bool AllEntriesInvariant = true;
+ bool AllOutgoingValuesSame = true;
while (PHINode *P = dyn_cast<PHINode>(BI)) {
Value *incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
@@ -98,27 +53,37 @@ bool LoopDeletion::isLoopDead(Loop *L,
// block. If there are different incoming values for different exiting
// blocks, then it is impossible to statically determine which value should
// be used.
- for (unsigned i = 1, e = exitingBlocks.size(); i < e; ++i) {
- if (incoming != P->getIncomingValueForBlock(exitingBlocks[i]))
- return false;
- }
+ AllOutgoingValuesSame =
+ all_of(makeArrayRef(exitingBlocks).slice(1), [&](BasicBlock *BB) {
+ return incoming == P->getIncomingValueForBlock(BB);
+ });
+
+ if (!AllOutgoingValuesSame)
+ break;
if (Instruction *I = dyn_cast<Instruction>(incoming))
- if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
- return false;
+ if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) {
+ AllEntriesInvariant = false;
+ break;
+ }
++BI;
}
+ if (Changed)
+ SE.forgetLoopDispositions(L);
+
+ if (!AllEntriesInvariant || !AllOutgoingValuesSame)
+ return false;
+
// Make sure that no instructions in the block have potential side-effects.
// This includes instructions that could write to memory, and loads that are
// marked volatile. This could be made more aggressive by using aliasing
// information to identify readonly and readnone calls.
for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
LI != LE; ++LI) {
- for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end();
- BI != BE; ++BI) {
- if (BI->mayHaveSideEffects())
+ for (Instruction &I : **LI) {
+ if (I.mayHaveSideEffects())
return false;
}
}
@@ -126,15 +91,15 @@ bool LoopDeletion::isLoopDead(Loop *L,
return true;
}
-/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the
-/// observable behavior of the program other than finite running time. Note
-/// we do ensure that this never remove a loop that might be infinite, as doing
-/// so could change the halting/non-halting nature of a program.
-/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
-/// in order to make various safety checks work.
-bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) {
- if (skipOptnoneFunction(L))
- return false;
+/// Remove dead loops, by which we mean loops that do not impact the observable
+/// behavior of the program other than finite running time. Note we do ensure
+/// that this never remove a loop that might be infinite, as doing so could
+/// change the halting/non-halting nature of a program. NOTE: This entire
+/// process relies pretty heavily on LoopSimplify and LCSSA in order to make
+/// various safety checks work.
+bool LoopDeletionPass::runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
+ LoopInfo &loopInfo) {
+ assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
// We can only remove the loop if there is a preheader that we can
// branch from after removing it.
@@ -151,10 +116,10 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) {
if (L->begin() != L->end())
return false;
- SmallVector<BasicBlock*, 4> exitingBlocks;
+ SmallVector<BasicBlock *, 4> exitingBlocks;
L->getExitingBlocks(exitingBlocks);
- SmallVector<BasicBlock*, 4> exitBlocks;
+ SmallVector<BasicBlock *, 4> exitBlocks;
L->getUniqueExitBlocks(exitBlocks);
// We require that the loop only have a single exit block. Otherwise, we'd
@@ -166,12 +131,11 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) {
// Finally, we have to check that the loop really is dead.
bool Changed = false;
- if (!isLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
+ if (!isLoopDead(L, SE, exitingBlocks, exitBlocks, Changed, preheader))
return Changed;
// Don't remove loops for which we can't solve the trip count.
// They could be infinite, in which case we'd be changing program behavior.
- ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const SCEV *S = SE.getMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(S))
return Changed;
@@ -208,16 +172,14 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) {
// Update the dominator tree and remove the instructions and blocks that will
// be deleted from the reference counting scheme.
- DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SmallVector<DomTreeNode*, 8> ChildNodes;
for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
LI != LE; ++LI) {
// Move all of the block's children to be children of the preheader, which
// allows us to remove the domtree entry for the block.
ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end());
- for (SmallVectorImpl<DomTreeNode *>::iterator DI = ChildNodes.begin(),
- DE = ChildNodes.end(); DI != DE; ++DI) {
- DT.changeImmediateDominator(*DI, DT[preheader]);
+ for (DomTreeNode *ChildNode : ChildNodes) {
+ DT.changeImmediateDominator(ChildNode, DT[preheader]);
}
ChildNodes.clear();
@@ -238,8 +200,8 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) {
// Finally, the blocks from loopinfo. This has to happen late because
// otherwise our loop iterators won't work.
- LoopInfo &loopInfo = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SmallPtrSet<BasicBlock*, 8> blocks;
+
+ SmallPtrSet<BasicBlock *, 8> blocks;
blocks.insert(L->block_begin(), L->block_end());
for (BasicBlock *BB : blocks)
loopInfo.removeBlock(BB);
@@ -252,3 +214,56 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) {
return Changed;
}
+
+PreservedAnalyses LoopDeletionPass::run(Loop &L, AnalysisManager<Loop> &AM) {
+ auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
+ Function *F = L.getHeader()->getParent();
+
+ auto &DT = *FAM.getCachedResult<DominatorTreeAnalysis>(*F);
+ auto &SE = *FAM.getCachedResult<ScalarEvolutionAnalysis>(*F);
+ auto &LI = *FAM.getCachedResult<LoopAnalysis>(*F);
+
+ bool Changed = runImpl(&L, DT, SE, LI);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ return getLoopPassPreservedAnalyses();
+}
+
+namespace {
+class LoopDeletionLegacyPass : public LoopPass {
+public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopDeletionLegacyPass() : LoopPass(ID) {
+ initializeLoopDeletionLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ // Possibly eliminate loop L if it is dead.
+ bool runOnLoop(Loop *L, LPPassManager &) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ getLoopAnalysisUsage(AU);
+ }
+};
+}
+
+char LoopDeletionLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopDeletionLegacyPass, "loop-deletion",
+ "Delete dead loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_END(LoopDeletionLegacyPass, "loop-deletion",
+ "Delete dead loops", false, false)
+
+Pass *llvm::createLoopDeletionPass() { return new LoopDeletionLegacyPass(); }
+
+bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &) {
+ if (skipLoop(L))
+ return false;
+
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ LoopInfo &loopInfo = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+ LoopDeletionPass Impl;
+ return Impl.runImpl(L, DT, SE, loopInfo);
+}
diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp
index 3d3cf3e2890b1..7eca28ed2bb73 100644
--- a/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -22,12 +22,17 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/LoopDistribute.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPassManager.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -60,6 +65,19 @@ static cl::opt<unsigned> DistributeSCEVCheckThreshold(
cl::desc("The maximum number of SCEV checks allowed for Loop "
"Distribution"));
+static cl::opt<unsigned> PragmaDistributeSCEVCheckThreshold(
+ "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
+ cl::Hidden,
+ cl::desc(
+ "The maximum number of SCEV checks allowed for Loop "
+ "Distribution for loop marked with #pragma loop distribute(enable)"));
+
+// Note that the initial value for this depends on whether the pass is invoked
+// directly or from the optimization pipeline.
+static cl::opt<bool> EnableLoopDistribute(
+ "enable-loop-distribute", cl::Hidden,
+ cl::desc("Enable the new, experimental LoopDistribution Pass"));
+
STATISTIC(NumLoopsDistributed, "Number of loops distributed");
namespace {
@@ -170,7 +188,7 @@ public:
// Delete the instructions backwards, as it has a reduced likelihood of
// having to update as many def-use and use-def chains.
- for (auto *Inst : make_range(Unused.rbegin(), Unused.rend())) {
+ for (auto *Inst : reverse(Unused)) {
if (!Inst->use_empty())
Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
Inst->eraseFromParent();
@@ -571,121 +589,39 @@ private:
AccessesType Accesses;
};
-/// \brief The pass class.
-class LoopDistribute : public FunctionPass {
+/// \brief The actual class performing the per-loop work.
+class LoopDistributeForLoop {
public:
- LoopDistribute() : FunctionPass(ID) {
- initializeLoopDistributePass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- LAA = &getAnalysis<LoopAccessAnalysis>();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-
- // Build up a worklist of inner-loops to vectorize. This is necessary as the
- // act of distributing a loop creates new loops and can invalidate iterators
- // across the loops.
- SmallVector<Loop *, 8> Worklist;
-
- for (Loop *TopLevelLoop : *LI)
- for (Loop *L : depth_first(TopLevelLoop))
- // We only handle inner-most loops.
- if (L->empty())
- Worklist.push_back(L);
-
- // Now walk the identified inner loops.
- bool Changed = false;
- for (Loop *L : Worklist)
- Changed |= processLoop(L);
-
- // Process each loop nest in the function.
- return Changed;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<LoopAccessAnalysis>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- }
-
- static char ID;
-
-private:
- /// \brief Filter out checks between pointers from the same partition.
- ///
- /// \p PtrToPartition contains the partition number for pointers. Partition
- /// number -1 means that the pointer is used in multiple partitions. In this
- /// case we can't safely omit the check.
- SmallVector<RuntimePointerChecking::PointerCheck, 4>
- includeOnlyCrossPartitionChecks(
- const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &AllChecks,
- const SmallVectorImpl<int> &PtrToPartition,
- const RuntimePointerChecking *RtPtrChecking) {
- SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
-
- std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
- [&](const RuntimePointerChecking::PointerCheck &Check) {
- for (unsigned PtrIdx1 : Check.first->Members)
- for (unsigned PtrIdx2 : Check.second->Members)
- // Only include this check if there is a pair of pointers
- // that require checking and the pointers fall into
- // separate partitions.
- //
- // (Note that we already know at this point that the two
- // pointer groups need checking but it doesn't follow
- // that each pair of pointers within the two groups need
- // checking as well.
- //
- // In other words we don't want to include a check just
- // because there is a pair of pointers between the two
- // pointer groups that require checks and a different
- // pair whose pointers fall into different partitions.)
- if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
- !RuntimePointerChecking::arePointersInSamePartition(
- PtrToPartition, PtrIdx1, PtrIdx2))
- return true;
- return false;
- });
-
- return Checks;
+ LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
+ ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
+ : L(L), F(F), LI(LI), LAI(nullptr), DT(DT), SE(SE), ORE(ORE) {
+ setForced();
}
/// \brief Try to distribute an inner-most loop.
- bool processLoop(Loop *L) {
+ bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
assert(L->empty() && "Only process inner loops.");
DEBUG(dbgs() << "\nLDist: In \"" << L->getHeader()->getParent()->getName()
<< "\" checking " << *L << "\n");
BasicBlock *PH = L->getLoopPreheader();
- if (!PH) {
- DEBUG(dbgs() << "Skipping; no preheader");
- return false;
- }
- if (!L->getExitBlock()) {
- DEBUG(dbgs() << "Skipping; multiple exit blocks");
- return false;
- }
- // LAA will check that we only have a single exiting block.
+ if (!PH)
+ return fail("no preheader");
+ if (!L->getExitBlock())
+ return fail("multiple exit blocks");
- const LoopAccessInfo &LAI = LAA->getInfo(L, ValueToValueMap());
+ // LAA will check that we only have a single exiting block.
+ LAI = &GetLAA(*L);
// Currently, we only distribute to isolate the part of the loop with
// dependence cycles to enable partial vectorization.
- if (LAI.canVectorizeMemory()) {
- DEBUG(dbgs() << "Skipping; memory operations are safe for vectorization");
- return false;
- }
- auto *Dependences = LAI.getDepChecker().getDependences();
- if (!Dependences || Dependences->empty()) {
- DEBUG(dbgs() << "Skipping; No unsafe dependences to isolate");
- return false;
- }
+ if (LAI->canVectorizeMemory())
+ return fail("memory operations are safe for vectorization");
+
+ auto *Dependences = LAI->getDepChecker().getDependences();
+ if (!Dependences || Dependences->empty())
+ return fail("no unsafe dependences to isolate");
InstPartitionContainer Partitions(L, LI, DT);
@@ -708,7 +644,7 @@ private:
// NumUnsafeDependencesActive > 0 indicates this situation and in this case
// we just keep assigning to the same cyclic partition until
// NumUnsafeDependencesActive reaches 0.
- const MemoryDepChecker &DepChecker = LAI.getDepChecker();
+ const MemoryDepChecker &DepChecker = LAI->getDepChecker();
MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
*Dependences);
@@ -738,14 +674,14 @@ private:
DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
if (Partitions.getSize() < 2)
- return false;
+ return fail("cannot isolate unsafe dependencies");
// Run the merge heuristics: Merge non-cyclic adjacent partitions since we
// should be able to vectorize these together.
Partitions.mergeBeforePopulating();
DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
if (Partitions.getSize() < 2)
- return false;
+ return fail("cannot isolate unsafe dependencies");
// Now, populate the partitions with non-memory operations.
Partitions.populateUsedSet();
@@ -757,15 +693,15 @@ private:
DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
<< Partitions);
if (Partitions.getSize() < 2)
- return false;
+ return fail("cannot isolate unsafe dependencies");
}
// Don't distribute the loop if we need too many SCEV run-time checks.
- const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate();
- if (Pred.getComplexity() > DistributeSCEVCheckThreshold) {
- DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
- return false;
- }
+ const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
+ if (Pred.getComplexity() > (IsForced.getValueOr(false)
+ ? PragmaDistributeSCEVCheckThreshold
+ : DistributeSCEVCheckThreshold))
+ return fail("too many SCEV run-time checks needed.\n");
DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
// We're done forming the partitions set up the reverse mapping from
@@ -779,19 +715,20 @@ private:
SplitBlock(PH, PH->getTerminator(), DT, LI);
// If we need run-time checks, version the loop now.
- auto PtrToPartition = Partitions.computePartitionSetForPointers(LAI);
- const auto *RtPtrChecking = LAI.getRuntimePointerChecking();
+ auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
+ const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
const auto &AllChecks = RtPtrChecking->getChecks();
auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
RtPtrChecking);
if (!Pred.isAlwaysTrue() || !Checks.empty()) {
DEBUG(dbgs() << "\nPointers:\n");
- DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
- LoopVersioning LVer(LAI, L, LI, DT, SE, false);
+ DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
+ LoopVersioning LVer(*LAI, L, LI, DT, SE, false);
LVer.setAliasChecks(std::move(Checks));
- LVer.setSCEVChecks(LAI.PSE.getUnionPredicate());
+ LVer.setSCEVChecks(LAI->getPSE().getUnionPredicate());
LVer.versionLoop(DefsUsedOutside);
+ LVer.annotateLoopWithNoAlias();
}
// Create identical copies of the original loop for each partition and hook
@@ -810,27 +747,244 @@ private:
}
++NumLoopsDistributed;
+ // Report the success.
+ emitOptimizationRemark(F->getContext(), LDIST_NAME, *F, L->getStartLoc(),
+ "distributed loop");
return true;
}
+ /// \brief Provide diagnostics then \return with false.
+ bool fail(llvm::StringRef Message) {
+ LLVMContext &Ctx = F->getContext();
+ bool Forced = isForced().getValueOr(false);
+
+ DEBUG(dbgs() << "Skipping; " << Message << "\n");
+
+ // With Rpass-missed report that distribution failed.
+ ORE->emitOptimizationRemarkMissed(
+ LDIST_NAME, L,
+ "loop not distributed: use -Rpass-analysis=loop-distribute for more "
+ "info");
+
+ // With Rpass-analysis report why. This is on by default if distribution
+ // was requested explicitly.
+ emitOptimizationRemarkAnalysis(
+ Ctx, Forced ? DiagnosticInfoOptimizationRemarkAnalysis::AlwaysPrint
+ : LDIST_NAME,
+ *F, L->getStartLoc(), Twine("loop not distributed: ") + Message);
+
+ // Also issue a warning if distribution was requested explicitly but it
+ // failed.
+ if (Forced)
+ Ctx.diagnose(DiagnosticInfoOptimizationFailure(
+ *F, L->getStartLoc(), "loop not distributed: failed "
+ "explicitly specified loop distribution"));
+
+ return false;
+ }
+
+ /// \brief Return if distribution forced to be enabled/disabled for the loop.
+ ///
+ /// If the optional has a value, it indicates whether distribution was forced
+ /// to be enabled (true) or disabled (false). If the optional has no value
+ /// distribution was not forced either way.
+ const Optional<bool> &isForced() const { return IsForced; }
+
+private:
+ /// \brief Filter out checks between pointers from the same partition.
+ ///
+ /// \p PtrToPartition contains the partition number for pointers. Partition
+ /// number -1 means that the pointer is used in multiple partitions. In this
+ /// case we can't safely omit the check.
+ SmallVector<RuntimePointerChecking::PointerCheck, 4>
+ includeOnlyCrossPartitionChecks(
+ const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &AllChecks,
+ const SmallVectorImpl<int> &PtrToPartition,
+ const RuntimePointerChecking *RtPtrChecking) {
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
+
+ std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
+ [&](const RuntimePointerChecking::PointerCheck &Check) {
+ for (unsigned PtrIdx1 : Check.first->Members)
+ for (unsigned PtrIdx2 : Check.second->Members)
+ // Only include this check if there is a pair of pointers
+ // that require checking and the pointers fall into
+ // separate partitions.
+ //
+ // (Note that we already know at this point that the two
+ // pointer groups need checking but it doesn't follow
+ // that each pair of pointers within the two groups need
+ // checking as well.
+ //
+ // In other words we don't want to include a check just
+ // because there is a pair of pointers between the two
+ // pointer groups that require checks and a different
+ // pair whose pointers fall into different partitions.)
+ if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
+ !RuntimePointerChecking::arePointersInSamePartition(
+ PtrToPartition, PtrIdx1, PtrIdx2))
+ return true;
+ return false;
+ });
+
+ return Checks;
+ }
+
+ /// \brief Check whether the loop metadata is forcing distribution to be
+ /// enabled/disabled.
+ void setForced() {
+ Optional<const MDOperand *> Value =
+ findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
+ if (!Value)
+ return;
+
+ const MDOperand *Op = *Value;
+ assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
+ IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
+ }
+
+ Loop *L;
+ Function *F;
+
// Analyses used.
LoopInfo *LI;
- LoopAccessAnalysis *LAA;
+ const LoopAccessInfo *LAI;
DominatorTree *DT;
ScalarEvolution *SE;
+ OptimizationRemarkEmitter *ORE;
+
+ /// \brief Indicates whether distribution is forced to be enabled/disabled for
+ /// the loop.
+ ///
+ /// If the optional has a value, it indicates whether distribution was forced
+ /// to be enabled (true) or disabled (false). If the optional has no value
+ /// distribution was not forced either way.
+ Optional<bool> IsForced;
+};
+
+/// Shared implementation between new and old PMs.
+static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
+ ScalarEvolution *SE, OptimizationRemarkEmitter *ORE,
+ std::function<const LoopAccessInfo &(Loop &)> &GetLAA,
+ bool ProcessAllLoops) {
+ // Build up a worklist of inner-loops to vectorize. This is necessary as the
+ // act of distributing a loop creates new loops and can invalidate iterators
+ // across the loops.
+ SmallVector<Loop *, 8> Worklist;
+
+ for (Loop *TopLevelLoop : *LI)
+ for (Loop *L : depth_first(TopLevelLoop))
+ // We only handle inner-most loops.
+ if (L->empty())
+ Worklist.push_back(L);
+
+ // Now walk the identified inner loops.
+ bool Changed = false;
+ for (Loop *L : Worklist) {
+ LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
+
+ // If distribution was forced for the specific loop to be
+ // enabled/disabled, follow that. Otherwise use the global flag.
+ if (LDL.isForced().getValueOr(ProcessAllLoops))
+ Changed |= LDL.processLoop(GetLAA);
+ }
+
+ // Process each loop nest in the function.
+ return Changed;
+}
+
+/// \brief The pass class.
+class LoopDistributeLegacy : public FunctionPass {
+public:
+ /// \p ProcessAllLoopsByDefault specifies whether loop distribution should be
+ /// performed by default. Pass -enable-loop-distribute={0,1} overrides this
+ /// default. We use this to keep LoopDistribution off by default when invoked
+ /// from the optimization pipeline but on when invoked explicitly from opt.
+ LoopDistributeLegacy(bool ProcessAllLoopsByDefault = true)
+ : FunctionPass(ID), ProcessAllLoops(ProcessAllLoopsByDefault) {
+ // The default is set by the caller.
+ if (EnableLoopDistribute.getNumOccurrences() > 0)
+ ProcessAllLoops = EnableLoopDistribute;
+ initializeLoopDistributeLegacyPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ std::function<const LoopAccessInfo &(Loop &)> GetLAA =
+ [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
+
+ return runImpl(F, LI, DT, SE, ORE, GetLAA, ProcessAllLoops);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<LoopAccessLegacyAnalysis>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ }
+
+ static char ID;
+
+private:
+ /// \brief Whether distribution should be on in this function. The per-loop
+ /// pragma can override this.
+ bool ProcessAllLoops;
};
} // anonymous namespace
-char LoopDistribute::ID;
+PreservedAnalyses LoopDistributePass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ // FIXME: This does not currently match the behavior from the old PM.
+ // ProcessAllLoops with the old PM defaults to true when invoked from opt and
+ // false when invoked from the optimization pipeline.
+ bool ProcessAllLoops = false;
+ if (EnableLoopDistribute.getNumOccurrences() > 0)
+ ProcessAllLoops = EnableLoopDistribute;
+
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
+ auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
+ std::function<const LoopAccessInfo &(Loop &)> GetLAA =
+ [&](Loop &L) -> const LoopAccessInfo & {
+ return LAM.getResult<LoopAccessAnalysis>(L);
+ };
+
+ bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA, ProcessAllLoops);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
+char LoopDistributeLegacy::ID;
static const char ldist_name[] = "Loop Distribition";
-INITIALIZE_PASS_BEGIN(LoopDistribute, LDIST_NAME, ldist_name, false, false)
+INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
+ false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(LoopDistribute, LDIST_NAME, ldist_name, false, false)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
namespace llvm {
-FunctionPass *createLoopDistributePass() { return new LoopDistribute(); }
+FunctionPass *createLoopDistributePass(bool ProcessAllLoopsByDefault) {
+ return new LoopDistributeLegacy(ProcessAllLoopsByDefault);
+}
}
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 4521640e3947e..1468676a35437 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -26,22 +26,21 @@
// i64 and larger types when i64 is legal and the value has few bits set. It
// would be good to enhance isel to emit a loop for ctpop in this case.
//
-// We should enhance the memset/memcpy recognition to handle multiple stores in
-// the loop. This would handle things like:
-// void foo(_Complex float *P)
-// for (i) { __real__(*P) = 0; __imag__(*P) = 0; }
-//
// This could recognize common matrix multiplies and dot product idioms and
// replace them with calls to BLAS (if linked in??).
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopPassManager.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -55,7 +54,10 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
#define DEBUG_TYPE "loop-idiom"
@@ -65,7 +67,7 @@ STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
namespace {
-class LoopIdiomRecognize : public LoopPass {
+class LoopIdiomRecognize {
Loop *CurLoop;
AliasAnalysis *AA;
DominatorTree *DT;
@@ -76,39 +78,21 @@ class LoopIdiomRecognize : public LoopPass {
const DataLayout *DL;
public:
- static char ID;
- explicit LoopIdiomRecognize() : LoopPass(ID) {
- initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
- }
+ explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
+ LoopInfo *LI, ScalarEvolution *SE,
+ TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+ : CurLoop(nullptr), AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI),
+ DL(DL) {}
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG.
- ///
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addPreserved<SCEVAAWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addPreserved<BasicAAWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
+ bool runOnLoop(Loop *L);
private:
typedef SmallVector<StoreInst *, 8> StoreList;
- StoreList StoreRefsForMemset;
+ typedef MapVector<Value *, StoreList> StoreListMap;
+ StoreListMap StoreRefsForMemset;
+ StoreListMap StoreRefsForMemsetPattern;
StoreList StoreRefsForMemcpy;
bool HasMemset;
bool HasMemsetPattern;
@@ -122,14 +106,18 @@ private:
SmallVectorImpl<BasicBlock *> &ExitBlocks);
void collectStores(BasicBlock *BB);
- bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemcpy);
- bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+ bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemsetPattern,
+ bool &ForMemcpy);
+ bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
+ bool ForMemset);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
unsigned StoreAlignment, Value *StoredVal,
- Instruction *TheStore, const SCEVAddRecExpr *Ev,
- const SCEV *BECount, bool NegStride);
+ Instruction *TheStore,
+ SmallPtrSetImpl<Instruction *> &Stores,
+ const SCEVAddRecExpr *Ev, const SCEV *BECount,
+ bool NegStride);
bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
/// @}
@@ -145,38 +133,82 @@ private:
/// @}
};
+class LoopIdiomRecognizeLegacyPass : public LoopPass {
+public:
+ static char ID;
+ explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
+ initializeLoopIdiomRecognizeLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+ if (skipLoop(L))
+ return false;
+
+ AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ const TargetTransformInfo *TTI =
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *L->getHeader()->getParent());
+ const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
+
+ LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL);
+ return LIR.runOnLoop(L);
+ }
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG.
+ ///
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ getLoopAnalysisUsage(AU);
+ }
+};
} // End anonymous namespace.
-char LoopIdiomRecognize::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L,
+ AnalysisManager<Loop> &AM) {
+ const auto &FAM =
+ AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
+ Function *F = L.getHeader()->getParent();
+
+ // Use getCachedResult because Loop pass cannot trigger a function analysis.
+ auto *AA = FAM.getCachedResult<AAManager>(*F);
+ auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F);
+ auto *LI = FAM.getCachedResult<LoopAnalysis>(*F);
+ auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F);
+ auto *TLI = FAM.getCachedResult<TargetLibraryAnalysis>(*F);
+ const auto *TTI = FAM.getCachedResult<TargetIRAnalysis>(*F);
+ const auto *DL = &L.getHeader()->getModule()->getDataLayout();
+ assert((AA && DT && LI && SE && TLI && TTI && DL) &&
+ "Analyses for Loop Idiom Recognition not available");
+
+ LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL);
+ if (!LIR.runOnLoop(&L))
+ return PreservedAnalyses::all();
+
+ return getLoopPassPreservedAnalyses();
+}
+
+char LoopIdiomRecognizeLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",
+ "Recognize loop idioms", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
- false, false)
+INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom",
+ "Recognize loop idioms", false, false)
-Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
+Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); }
-/// deleteDeadInstruction - Delete this instruction. Before we do, go through
-/// and zero out all the operands of this instruction. If any of them become
-/// dead, delete them and the computation tree that feeds them.
-///
-static void deleteDeadInstruction(Instruction *I,
- const TargetLibraryInfo *TLI) {
- SmallVector<Value *, 16> Operands(I->value_op_begin(), I->value_op_end());
+static void deleteDeadInstruction(Instruction *I) {
I->replaceAllUsesWith(UndefValue::get(I->getType()));
I->eraseFromParent();
- for (Value *Op : Operands)
- RecursivelyDeleteTriviallyDeadInstructions(Op, TLI);
}
//===----------------------------------------------------------------------===//
@@ -185,10 +217,7 @@ static void deleteDeadInstruction(Instruction *I,
//
//===----------------------------------------------------------------------===//
-bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipOptnoneFunction(L))
- return false;
-
+bool LoopIdiomRecognize::runOnLoop(Loop *L) {
CurLoop = L;
// If the loop could not be converted to canonical form, it must have an
// indirectbr in it, just give up.
@@ -200,15 +229,6 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
if (Name == "memset" || Name == "memcpy")
return false;
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *CurLoop->getHeader()->getParent());
- DL = &CurLoop->getHeader()->getModule()->getDataLayout();
-
HasMemset = TLI->has(LibFunc::memset);
HasMemsetPattern = TLI->has(LibFunc::memset_pattern16);
HasMemcpy = TLI->has(LibFunc::memcpy);
@@ -240,6 +260,14 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
<< CurLoop->getHeader()->getName() << "\n");
bool MadeChange = false;
+
+ // The following transforms hoist stores/memsets into the loop pre-header.
+ // Give up if the loop has instructions may throw.
+ LoopSafetyInfo SafetyInfo;
+ computeLoopSafetyInfo(&SafetyInfo, CurLoop);
+ if (SafetyInfo.MayThrow)
+ return MadeChange;
+
// Scan all the blocks in the loop that are not in subloops.
for (auto *BB : CurLoop->getBlocks()) {
// Ignore blocks in subloops.
@@ -258,9 +286,9 @@ static unsigned getStoreSizeInBytes(StoreInst *SI, const DataLayout *DL) {
return (unsigned)SizeInBits >> 3;
}
-static unsigned getStoreStride(const SCEVAddRecExpr *StoreEv) {
+static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
- return ConstStride->getAPInt().getZExtValue();
+ return ConstStride->getAPInt();
}
/// getMemSetPatternValue - If a strided store of the specified value is safe to
@@ -305,11 +333,15 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
}
bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
- bool &ForMemcpy) {
+ bool &ForMemsetPattern, bool &ForMemcpy) {
// Don't touch volatile stores.
if (!SI->isSimple())
return false;
+ // Avoid merging nontemporal stores.
+ if (SI->getMetadata(LLVMContext::MD_nontemporal))
+ return false;
+
Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();
@@ -353,7 +385,7 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
StorePtr->getType()->getPointerAddressSpace() == 0 &&
(PatternValue = getMemSetPatternValue(StoredVal, DL))) {
// It looks like we can use PatternValue!
- ForMemset = true;
+ ForMemsetPattern = true;
return true;
}
@@ -361,7 +393,7 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
if (HasMemcpy) {
// Check to see if the stride matches the size of the store. If so, then we
// know that every byte is touched in the loop.
- unsigned Stride = getStoreStride(StoreEv);
+ APInt Stride = getStoreStride(StoreEv);
unsigned StoreSize = getStoreSizeInBytes(SI, DL);
if (StoreSize != Stride && StoreSize != -Stride)
return false;
@@ -393,6 +425,7 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
StoreRefsForMemset.clear();
+ StoreRefsForMemsetPattern.clear();
StoreRefsForMemcpy.clear();
for (Instruction &I : *BB) {
StoreInst *SI = dyn_cast<StoreInst>(&I);
@@ -400,15 +433,22 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
continue;
bool ForMemset = false;
+ bool ForMemsetPattern = false;
bool ForMemcpy = false;
// Make sure this is a strided store with a constant stride.
- if (!isLegalStore(SI, ForMemset, ForMemcpy))
+ if (!isLegalStore(SI, ForMemset, ForMemsetPattern, ForMemcpy))
continue;
// Save the store locations.
- if (ForMemset)
- StoreRefsForMemset.push_back(SI);
- else if (ForMemcpy)
+ if (ForMemset) {
+ // Find the base pointer.
+ Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
+ StoreRefsForMemset[Ptr].push_back(SI);
+ } else if (ForMemsetPattern) {
+ // Find the base pointer.
+ Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
+ StoreRefsForMemsetPattern[Ptr].push_back(SI);
+ } else if (ForMemcpy)
StoreRefsForMemcpy.push_back(SI);
}
}
@@ -430,9 +470,14 @@ bool LoopIdiomRecognize::runOnLoopBlock(
// Look for store instructions, which may be optimized to memset/memcpy.
collectStores(BB);
- // Look for a single store which can be optimized into a memset.
- for (auto &SI : StoreRefsForMemset)
- MadeChange |= processLoopStore(SI, BECount);
+ // Look for a single store or sets of stores with a common base, which can be
+ // optimized into a memset (memset_pattern). The latter most commonly happens
+ // with structs and handunrolled loops.
+ for (auto &SL : StoreRefsForMemset)
+ MadeChange |= processLoopStores(SL.second, BECount, true);
+
+ for (auto &SL : StoreRefsForMemsetPattern)
+ MadeChange |= processLoopStores(SL.second, BECount, false);
// Optimize the store into a memcpy, if it feeds an similarly strided load.
for (auto &SI : StoreRefsForMemcpy)
@@ -458,26 +503,144 @@ bool LoopIdiomRecognize::runOnLoopBlock(
return MadeChange;
}
-/// processLoopStore - See if this store can be promoted to a memset.
-bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
- assert(SI->isSimple() && "Expected only non-volatile stores.");
+/// processLoopStores - See if this store(s) can be promoted to a memset.
+bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
+ const SCEV *BECount,
+ bool ForMemset) {
+ // Try to find consecutive stores that can be transformed into memsets.
+ SetVector<StoreInst *> Heads, Tails;
+ SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
+
+ // Do a quadratic search on all of the given stores and find
+ // all of the pairs of stores that follow each other.
+ SmallVector<unsigned, 16> IndexQueue;
+ for (unsigned i = 0, e = SL.size(); i < e; ++i) {
+ assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
+
+ Value *FirstStoredVal = SL[i]->getValueOperand();
+ Value *FirstStorePtr = SL[i]->getPointerOperand();
+ const SCEVAddRecExpr *FirstStoreEv =
+ cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
+ APInt FirstStride = getStoreStride(FirstStoreEv);
+ unsigned FirstStoreSize = getStoreSizeInBytes(SL[i], DL);
+
+ // See if we can optimize just this store in isolation.
+ if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
+ Heads.insert(SL[i]);
+ continue;
+ }
- Value *StoredVal = SI->getValueOperand();
- Value *StorePtr = SI->getPointerOperand();
+ Value *FirstSplatValue = nullptr;
+ Constant *FirstPatternValue = nullptr;
- // Check to see if the stride matches the size of the store. If so, then we
- // know that every byte is touched in the loop.
- const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
- unsigned Stride = getStoreStride(StoreEv);
- unsigned StoreSize = getStoreSizeInBytes(SI, DL);
- if (StoreSize != Stride && StoreSize != -Stride)
- return false;
+ if (ForMemset)
+ FirstSplatValue = isBytewiseValue(FirstStoredVal);
+ else
+ FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
+
+ assert((FirstSplatValue || FirstPatternValue) &&
+ "Expected either splat value or pattern value.");
+
+ IndexQueue.clear();
+ // If a store has multiple consecutive store candidates, search Stores
+ // array according to the sequence: from i+1 to e, then from i-1 to 0.
+ // This is because usually pairing with immediate succeeding or preceding
+ // candidate create the best chance to find memset opportunity.
+ unsigned j = 0;
+ for (j = i + 1; j < e; ++j)
+ IndexQueue.push_back(j);
+ for (j = i; j > 0; --j)
+ IndexQueue.push_back(j - 1);
+
+ for (auto &k : IndexQueue) {
+ assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
+ Value *SecondStorePtr = SL[k]->getPointerOperand();
+ const SCEVAddRecExpr *SecondStoreEv =
+ cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
+ APInt SecondStride = getStoreStride(SecondStoreEv);
+
+ if (FirstStride != SecondStride)
+ continue;
- bool NegStride = StoreSize == -Stride;
+ Value *SecondStoredVal = SL[k]->getValueOperand();
+ Value *SecondSplatValue = nullptr;
+ Constant *SecondPatternValue = nullptr;
+
+ if (ForMemset)
+ SecondSplatValue = isBytewiseValue(SecondStoredVal);
+ else
+ SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
+
+ assert((SecondSplatValue || SecondPatternValue) &&
+ "Expected either splat value or pattern value.");
+
+ if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
+ if (ForMemset) {
+ if (FirstSplatValue != SecondSplatValue)
+ continue;
+ } else {
+ if (FirstPatternValue != SecondPatternValue)
+ continue;
+ }
+ Tails.insert(SL[k]);
+ Heads.insert(SL[i]);
+ ConsecutiveChain[SL[i]] = SL[k];
+ break;
+ }
+ }
+ }
+
+ // We may run into multiple chains that merge into a single chain. We mark the
+ // stores that we transformed so that we don't visit the same store twice.
+ SmallPtrSet<Value *, 16> TransformedStores;
+ bool Changed = false;
+
+ // For stores that start but don't end a link in the chain:
+ for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
+ it != e; ++it) {
+ if (Tails.count(*it))
+ continue;
+
+ // We found a store instr that starts a chain. Now follow the chain and try
+ // to transform it.
+ SmallPtrSet<Instruction *, 8> AdjacentStores;
+ StoreInst *I = *it;
+
+ StoreInst *HeadStore = I;
+ unsigned StoreSize = 0;
+
+ // Collect the chain into a list.
+ while (Tails.count(I) || Heads.count(I)) {
+ if (TransformedStores.count(I))
+ break;
+ AdjacentStores.insert(I);
+
+ StoreSize += getStoreSizeInBytes(I, DL);
+ // Move to the next value in the chain.
+ I = ConsecutiveChain[I];
+ }
- // See if we can optimize just this store in isolation.
- return processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
- StoredVal, SI, StoreEv, BECount, NegStride);
+ Value *StoredVal = HeadStore->getValueOperand();
+ Value *StorePtr = HeadStore->getPointerOperand();
+ const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ APInt Stride = getStoreStride(StoreEv);
+
+ // Check to see if the stride matches the size of the stores. If so, then
+ // we know that every byte is touched in the loop.
+ if (StoreSize != Stride && StoreSize != -Stride)
+ continue;
+
+ bool NegStride = StoreSize == -Stride;
+
+ if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(),
+ StoredVal, HeadStore, AdjacentStores, StoreEv,
+ BECount, NegStride)) {
+ TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
+ Changed = true;
+ }
+ }
+
+ return Changed;
}
/// processLoopMemSet - See if this memset can be promoted to a large memset.
@@ -488,7 +651,7 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
return false;
// If we're not allowed to hack on memset, we fail.
- if (!TLI->has(LibFunc::memset))
+ if (!HasMemset)
return false;
Value *Pointer = MSI->getDest();
@@ -507,11 +670,12 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
// Check to see if the stride matches the size of the memset. If so, then we
// know that every byte is touched in the loop.
- const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+ const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+ if (!ConstStride)
+ return false;
- // TODO: Could also handle negative stride here someday, that will require the
- // validity check in mayLoopAccessLocation to be updated though.
- if (!Stride || MSI->getLength() != Stride->getValue())
+ APInt Stride = ConstStride->getAPInt();
+ if (SizeInBytes != Stride && SizeInBytes != -Stride)
return false;
// Verify that the memset value is loop invariant. If not, we can't promote
@@ -520,18 +684,22 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
return false;
+ SmallPtrSet<Instruction *, 1> MSIs;
+ MSIs.insert(MSI);
+ bool NegStride = SizeInBytes == -Stride;
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
- MSI->getAlignment(), SplatValue, MSI, Ev,
- BECount, /*NegStride=*/false);
+ MSI->getAlignment(), SplatValue, MSI, MSIs, Ev,
+ BECount, NegStride);
}
/// mayLoopAccessLocation - Return true if the specified loop might access the
/// specified pointer location, which is a loop-strided access. The 'Access'
/// argument specifies what the verboten forms of access are (read or write).
-static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
- const SCEV *BECount, unsigned StoreSize,
- AliasAnalysis &AA,
- Instruction *IgnoredStore) {
+static bool
+mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
+ const SCEV *BECount, unsigned StoreSize,
+ AliasAnalysis &AA,
+ SmallPtrSetImpl<Instruction *> &IgnoredStores) {
// Get the location that may be stored across the loop. Since the access is
// strided positively through memory, we say that the modified location starts
// at the pointer and has infinite size.
@@ -550,8 +718,9 @@ static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
++BI)
- for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
- if (&*I != IgnoredStore && (AA.getModRefInfo(&*I, StoreLoc) & Access))
+ for (Instruction &I : **BI)
+ if (IgnoredStores.count(&I) == 0 &&
+ (AA.getModRefInfo(&I, StoreLoc) & Access))
return true;
return false;
@@ -574,7 +743,8 @@ static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
/// transform this into a memset or memset_pattern in the loop preheader, do so.
bool LoopIdiomRecognize::processLoopStridedStore(
Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
- Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev,
+ Value *StoredVal, Instruction *TheStore,
+ SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
const SCEV *BECount, bool NegStride) {
Value *SplatValue = isBytewiseValue(StoredVal);
Constant *PatternValue = nullptr;
@@ -609,7 +779,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Value *BasePtr =
Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize,
- *AA, TheStore)) {
+ *AA, Stores)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
@@ -644,13 +814,14 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Value *MSP =
M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(),
Int8PtrTy, Int8PtrTy, IntPtr, (void *)nullptr);
+ inferLibFuncAttributes(*M->getFunction("memset_pattern16"), *TLI);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
GlobalValue::PrivateLinkage,
PatternValue, ".memset_pattern");
- GV->setUnnamedAddr(true); // Ok to merge these.
+ GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
GV->setAlignment(16);
Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
@@ -662,7 +833,8 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- deleteDeadInstruction(TheStore, TLI);
+ for (auto *I : Stores)
+ deleteDeadInstruction(I);
++NumMemSet;
return true;
}
@@ -676,7 +848,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
Value *StorePtr = SI->getPointerOperand();
const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
- unsigned Stride = getStoreStride(StoreEv);
+ APInt Stride = getStoreStride(StoreEv);
unsigned StoreSize = getStoreSizeInBytes(SI, DL);
bool NegStride = StoreSize == -Stride;
@@ -714,8 +886,10 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
Value *StoreBasePtr = Expander.expandCodeFor(
StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
+ SmallPtrSet<Instruction *, 1> Stores;
+ Stores.insert(SI);
if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
- StoreSize, *AA, SI)) {
+ StoreSize, *AA, Stores)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
@@ -735,7 +909,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
- *AA, SI)) {
+ *AA, Stores)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
@@ -769,7 +943,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// Okay, the memcpy has been formed. Zap the original store and anything that
// feeds into it.
- deleteDeadInstruction(SI, TLI);
+ deleteDeadInstruction(SI);
++NumMemCpy;
return true;
}
@@ -993,7 +1167,7 @@ bool LoopIdiomRecognize::recognizePopcount() {
}
static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
- DebugLoc DL) {
+ const DebugLoc &DL) {
Value *Ops[] = {Val};
Type *Tys[] = {Val->getType()};
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index b4102fe9ba340..629cb87d7a916 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -11,88 +11,43 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopPassManager.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
#define DEBUG_TYPE "loop-instsimplify"
STATISTIC(NumSimplified, "Number of redundant instructions simplified");
-namespace {
- class LoopInstSimplify : public LoopPass {
- public:
- static char ID; // Pass ID, replacement for typeid
- LoopInstSimplify() : LoopPass(ID) {
- initializeLoopInstSimplifyPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnLoop(Loop*, LPPassManager&) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addPreservedID(LCSSAID);
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
- };
-}
-
-char LoopInstSimplify::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
- "Simplify instructions in loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_END(LoopInstSimplify, "loop-instsimplify",
- "Simplify instructions in loops", false, false)
-
-Pass *llvm::createLoopInstSimplifyPass() {
- return new LoopInstSimplify();
-}
-
-bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipOptnoneFunction(L))
- return false;
-
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
- *L->getHeader()->getParent());
-
- SmallVector<BasicBlock*, 8> ExitBlocks;
+static bool SimplifyLoopInst(Loop *L, DominatorTree *DT, LoopInfo *LI,
+ AssumptionCache *AC,
+ const TargetLibraryInfo *TLI) {
+ SmallVector<BasicBlock *, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
array_pod_sort(ExitBlocks.begin(), ExitBlocks.end());
- SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+ SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
// The bit we are stealing from the pointer represents whether this basic
// block is the header of a subloop, in which case we only process its phis.
- typedef PointerIntPair<BasicBlock*, 1> WorklistItem;
+ typedef PointerIntPair<BasicBlock *, 1> WorklistItem;
SmallVector<WorklistItem, 16> VisitStack;
- SmallPtrSet<BasicBlock*, 32> Visited;
+ SmallPtrSet<BasicBlock *, 32> Visited;
bool Changed = false;
bool LocalChanged;
@@ -122,7 +77,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Don't bother simplifying unused instructions.
if (!I->use_empty()) {
- Value *V = SimplifyInstruction(I, DL, TLI, DT, &AC);
+ Value *V = SimplifyInstruction(I, DL, TLI, DT, AC);
if (V && LI->replacementPreservesLCSSAForm(I, V)) {
// Mark all uses for resimplification next time round the loop.
for (User *U : I->users())
@@ -133,14 +88,13 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
++NumSimplified;
}
}
- bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
- if (res) {
- // RecursivelyDeleteTriviallyDeadInstruction can remove
- // more than one instruction, so simply incrementing the
- // iterator does not work. When instructions get deleted
- // re-iterate instead.
- BI = BB->begin(); BE = BB->end();
- LocalChanged |= res;
+ if (RecursivelyDeleteTriviallyDeadInstructions(I, TLI)) {
+ // RecursivelyDeleteTriviallyDeadInstruction can remove more than one
+ // instruction, so simply incrementing the iterator does not work.
+ // When instructions get deleted re-iterate instead.
+ BI = BB->begin();
+ BE = BB->end();
+ LocalChanged = true;
}
if (IsSubloopHeader && !isa<PHINode>(I))
@@ -148,8 +102,10 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Add all successors to the worklist, except for loop exit blocks and the
- // bodies of subloops. We visit the headers of loops so that we can process
- // their phis, but we contract the rest of the subloop body and only follow
+ // bodies of subloops. We visit the headers of loops so that we can
+ // process
+ // their phis, but we contract the rest of the subloop body and only
+ // follow
// edges leading back to the original loop.
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
++SI) {
@@ -158,11 +114,11 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
continue;
const Loop *SuccLoop = LI->getLoopFor(SuccBB);
- if (SuccLoop && SuccLoop->getHeader() == SuccBB
- && L->contains(SuccLoop)) {
+ if (SuccLoop && SuccLoop->getHeader() == SuccBB &&
+ L->contains(SuccLoop)) {
VisitStack.push_back(WorklistItem(SuccBB, true));
- SmallVector<BasicBlock*, 8> SubLoopExitBlocks;
+ SmallVector<BasicBlock *, 8> SubLoopExitBlocks;
SuccLoop->getExitBlocks(SubLoopExitBlocks);
for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) {
@@ -174,8 +130,8 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
continue;
}
- bool IsExitBlock = std::binary_search(ExitBlocks.begin(),
- ExitBlocks.end(), SuccBB);
+ bool IsExitBlock =
+ std::binary_search(ExitBlocks.begin(), ExitBlocks.end(), SuccBB);
if (IsExitBlock)
continue;
@@ -193,3 +149,68 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
return Changed;
}
+
+namespace {
+class LoopInstSimplifyLegacyPass : public LoopPass {
+public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopInstSimplifyLegacyPass() : LoopPass(ID) {
+ initializeLoopInstSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+ if (skipLoop(L))
+ return false;
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ AssumptionCache *AC =
+ &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
+ *L->getHeader()->getParent());
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+
+ return SimplifyLoopInst(L, DT, LI, AC, TLI);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.setPreservesCFG();
+ getLoopAnalysisUsage(AU);
+ }
+};
+}
+
+PreservedAnalyses LoopInstSimplifyPass::run(Loop &L,
+ AnalysisManager<Loop> &AM) {
+ const auto &FAM =
+ AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
+ Function *F = L.getHeader()->getParent();
+
+ // Use getCachedResult because Loop pass cannot trigger a function analysis.
+ auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F);
+ auto *LI = FAM.getCachedResult<LoopAnalysis>(*F);
+ auto *AC = FAM.getCachedResult<AssumptionAnalysis>(*F);
+ const auto *TLI = FAM.getCachedResult<TargetLibraryAnalysis>(*F);
+ assert((LI && AC && TLI) && "Analyses for Loop Inst Simplify not available");
+
+ if (!SimplifyLoopInst(&L, DT, LI, AC, TLI))
+ return PreservedAnalyses::all();
+
+ return getLoopPassPreservedAnalyses();
+}
+
+char LoopInstSimplifyLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInstSimplifyLegacyPass, "loop-instsimplify",
+ "Simplify instructions in loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(LoopInstSimplifyLegacyPass, "loop-instsimplify",
+ "Simplify instructions in loops", false, false)
+
+Pass *llvm::createLoopInstSimplifyPass() {
+ return new LoopInstSimplifyLegacyPass();
+}
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp
index 4295235a3f364..9241ec3652773 100644
--- a/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
@@ -72,7 +71,7 @@ void printDepMatrix(CharMatrix &DepMatrix) {
#endif
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
- Loop *L, DependenceAnalysis *DA) {
+ Loop *L, DependenceInfo *DI) {
typedef SmallVector<Value *, 16> ValueVector;
ValueVector MemInstr;
@@ -117,7 +116,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
continue;
if (isa<LoadInst>(Src) && isa<LoadInst>(Des))
continue;
- if (auto D = DA->depends(Src, Des, true)) {
+ if (auto D = DI->depends(Src, Des, true)) {
DEBUG(dbgs() << "Found Dependency between Src=" << Src << " Des=" << Des
<< "\n");
if (D->isFlow()) {
@@ -404,12 +403,9 @@ public:
private:
void splitInnerLoopLatch(Instruction *);
- void splitOuterLoopLatch();
void splitInnerLoopHeader();
bool adjustLoopLinks();
void adjustLoopPreheaders();
- void adjustOuterLoopPreheader();
- void adjustInnerLoopPreheader();
bool adjustLoopBranches();
void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred,
BasicBlock *NewPred);
@@ -430,11 +426,11 @@ struct LoopInterchange : public FunctionPass {
static char ID;
ScalarEvolution *SE;
LoopInfo *LI;
- DependenceAnalysis *DA;
+ DependenceInfo *DI;
DominatorTree *DT;
bool PreserveLCSSA;
LoopInterchange()
- : FunctionPass(ID), SE(nullptr), LI(nullptr), DA(nullptr), DT(nullptr) {
+ : FunctionPass(ID), SE(nullptr), LI(nullptr), DI(nullptr), DT(nullptr) {
initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
}
@@ -443,15 +439,18 @@ struct LoopInterchange : public FunctionPass {
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<DependenceAnalysis>();
+ AU.addRequired<DependenceAnalysisWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
}
bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- DA = &getAnalysis<DependenceAnalysis>();
+ DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
@@ -472,8 +471,7 @@ struct LoopInterchange : public FunctionPass {
}
bool isComputableLoopNest(LoopVector LoopList) {
- for (auto I = LoopList.begin(), E = LoopList.end(); I != E; ++I) {
- Loop *L = *I;
+ for (Loop *L : LoopList) {
const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
if (ExitCountOuter == SE->getCouldNotCompute()) {
DEBUG(dbgs() << "Couldn't compute Backedge count\n");
@@ -491,7 +489,7 @@ struct LoopInterchange : public FunctionPass {
return true;
}
- unsigned selectLoopForInterchange(LoopVector LoopList) {
+ unsigned selectLoopForInterchange(const LoopVector &LoopList) {
// TODO: Add a better heuristic to select the loop to be interchanged based
// on the dependence matrix. Currently we select the innermost loop.
return LoopList.size() - 1;
@@ -515,7 +513,7 @@ struct LoopInterchange : public FunctionPass {
<< "\n");
if (!populateDependencyMatrix(DependencyMatrix, LoopList.size(),
- OuterMostLoop, DA)) {
+ OuterMostLoop, DI)) {
DEBUG(dbgs() << "Populating Dependency matrix failed\n");
return false;
}
@@ -813,7 +811,6 @@ bool LoopInterchangeLegality::currentLimitations() {
// A[j+1][i+2] = A[j][i]+k;
// }
// }
- bool FoundInduction = false;
Instruction *InnerIndexVarInc = nullptr;
if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader)
InnerIndexVarInc =
@@ -829,17 +826,17 @@ bool LoopInterchangeLegality::currentLimitations() {
// we do not have any instruction between the induction variable and branch
// instruction.
- for (auto I = InnerLoopLatch->rbegin(), E = InnerLoopLatch->rend();
- I != E && !FoundInduction; ++I) {
- if (isa<BranchInst>(*I) || isa<CmpInst>(*I) || isa<TruncInst>(*I))
+ bool FoundInduction = false;
+ for (const Instruction &I : reverse(*InnerLoopLatch)) {
+ if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I))
continue;
- const Instruction &Ins = *I;
// We found an instruction. If this is not induction variable then it is not
// safe to split this loop latch.
- if (!Ins.isIdenticalTo(InnerIndexVarInc))
+ if (!I.isIdenticalTo(InnerIndexVarInc))
return true;
- else
- FoundInduction = true;
+
+ FoundInduction = true;
+ break;
}
// The loop latch ended and we didn't find the induction variable return as
// current limitation.
@@ -903,8 +900,7 @@ int LoopInterchangeProfitability::getInstrOrderCost() {
BadOrder = GoodOrder = 0;
for (auto BI = InnerLoop->block_begin(), BE = InnerLoop->block_end();
BI != BE; ++BI) {
- for (auto I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) {
- const Instruction &Ins = *I;
+ for (Instruction &Ins : **BI) {
if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
unsigned NumOp = GEP->getNumOperands();
bool FoundInnerInduction = false;
@@ -1073,13 +1069,6 @@ void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI);
}
-void LoopInterchangeTransform::splitOuterLoopLatch() {
- BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
- BasicBlock *OuterLatchLcssaPhiBlock = OuterLoopLatch;
- OuterLoopLatch = SplitBlock(OuterLatchLcssaPhiBlock,
- OuterLoopLatch->getFirstNonPHI(), DT, LI);
-}
-
void LoopInterchangeTransform::splitInnerLoopHeader() {
// Split the inner loop header out. Here make sure that the reduction PHI's
@@ -1102,8 +1091,7 @@ void LoopInterchangeTransform::splitInnerLoopHeader() {
PHI->replaceAllUsesWith(V);
PHIVec.push_back((PHI));
}
- for (auto I = PHIVec.begin(), E = PHIVec.end(); I != E; ++I) {
- PHINode *P = *I;
+ for (PHINode *P : PHIVec) {
P->eraseFromParent();
}
} else {
@@ -1124,20 +1112,6 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
FromBB->getTerminator()->getIterator());
}
-void LoopInterchangeTransform::adjustOuterLoopPreheader() {
- BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
- BasicBlock *InnerPreHeader = InnerLoop->getLoopPreheader();
-
- moveBBContents(OuterLoopPreHeader, InnerPreHeader->getTerminator());
-}
-
-void LoopInterchangeTransform::adjustInnerLoopPreheader() {
- BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
- BasicBlock *OuterHeader = OuterLoop->getHeader();
-
- moveBBContents(InnerLoopPreHeader, OuterHeader->getTerminator());
-}
-
void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock,
BasicBlock *OldPred,
BasicBlock *NewPred) {
@@ -1234,8 +1208,7 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
PHINode *LcssaPhi = cast<PHINode>(I);
LcssaVec.push_back(LcssaPhi);
}
- for (auto I = LcssaVec.begin(), E = LcssaVec.end(); I != E; ++I) {
- PHINode *P = *I;
+ for (PHINode *P : LcssaVec) {
Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch);
P->replaceAllUsesWith(Incoming);
P->eraseFromParent();
@@ -1294,11 +1267,11 @@ char LoopInterchange::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
"Interchanges loops for cache reuse", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 1064d088514d5..f29228c7659e2 100644
--- a/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include <forward_list>
@@ -61,7 +62,8 @@ struct StoreToLoadForwardingCandidate {
/// \brief Return true if the dependence from the store to the load has a
/// distance of one. E.g. A[i+1] = A[i]
- bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE) const {
+ bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
+ Loop *L) const {
Value *LoadPtr = Load->getPointerOperand();
Value *StorePtr = Store->getPointerOperand();
Type *LoadPtrType = LoadPtr->getType();
@@ -72,6 +74,13 @@ struct StoreToLoadForwardingCandidate {
LoadType == StorePtr->getType()->getPointerElementType() &&
"Should be a known dependence");
+ // Currently we only support accesses with unit stride. FIXME: we should be
+ // able to handle non unit stirde as well as long as the stride is equal to
+ // the dependence distance.
+ if (getPtrStride(PSE, LoadPtr, L) != 1 ||
+ getPtrStride(PSE, StorePtr, L) != 1)
+ return false;
+
auto &DL = Load->getParent()->getModule()->getDataLayout();
unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
@@ -83,7 +92,7 @@ struct StoreToLoadForwardingCandidate {
auto *Dist = cast<SCEVConstant>(
PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
const APInt &Val = Dist->getAPInt();
- return Val.abs() == TypeByteSize;
+ return Val == TypeByteSize;
}
Value *getLoadPtr() const { return Load->getPointerOperand(); }
@@ -110,12 +119,17 @@ bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
});
}
+/// \brief Return true if the load is not executed on all paths in the loop.
+static bool isLoadConditional(LoadInst *Load, Loop *L) {
+ return Load->getParent() != L->getHeader();
+}
+
/// \brief The per-loop class that does most of the work.
class LoadEliminationForLoop {
public:
LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
DominatorTree *DT)
- : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.PSE) {}
+ : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {}
/// \brief Look through the loop-carried and loop-independent dependences in
/// this loop and find store->load dependences.
@@ -162,6 +176,12 @@ public:
auto *Load = dyn_cast<LoadInst>(Destination);
if (!Load)
continue;
+
+ // Only progagate the value if they are of the same type.
+ if (Store->getPointerOperand()->getType() !=
+ Load->getPointerOperand()->getType())
+ continue;
+
Candidates.emplace_front(Load, Store);
}
@@ -219,12 +239,12 @@ public:
if (OtherCand == nullptr)
continue;
- // Handle the very basic of case when the two stores are in the same
- // block so deciding which one forwards is easy. The later one forwards
- // as long as they both have a dependence distance of one to the load.
+ // Handle the very basic case when the two stores are in the same block
+ // so deciding which one forwards is easy. The later one forwards as
+ // long as they both have a dependence distance of one to the load.
if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
- Cand.isDependenceDistanceOfOne(PSE) &&
- OtherCand->isDependenceDistanceOfOne(PSE)) {
+ Cand.isDependenceDistanceOfOne(PSE, L) &&
+ OtherCand->isDependenceDistanceOfOne(PSE, L)) {
// They are in the same block, the later one will forward to the load.
if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
OtherCand = &Cand;
@@ -429,14 +449,21 @@ public:
unsigned NumForwarding = 0;
for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) {
DEBUG(dbgs() << "Candidate " << Cand);
+
// Make sure that the stored values is available everywhere in the loop in
// the next iteration.
if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
continue;
+ // If the load is conditional we can't hoist its 0-iteration instance to
+ // the preheader because that would make it unconditional. Thus we would
+ // access a memory location that the original loop did not access.
+ if (isLoadConditional(Cand.Load, L))
+ continue;
+
// Check whether the SCEV difference is the same as the induction step,
// thus we load the value in the next iteration.
- if (!Cand.isDependenceDistanceOfOne(PSE))
+ if (!Cand.isDependenceDistanceOfOne(PSE, L))
continue;
++NumForwarding;
@@ -459,18 +486,25 @@ public:
return false;
}
- if (LAI.PSE.getUnionPredicate().getComplexity() >
+ if (LAI.getPSE().getUnionPredicate().getComplexity() >
LoadElimSCEVCheckThreshold) {
DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
return false;
}
- // Point of no-return, start the transformation. First, version the loop if
- // necessary.
- if (!Checks.empty() || !LAI.PSE.getUnionPredicate().isAlwaysTrue()) {
+ if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
+ if (L->getHeader()->getParent()->optForSize()) {
+ DEBUG(dbgs() << "Versioning is needed but not allowed when optimizing "
+ "for size.\n");
+ return false;
+ }
+
+ // Point of no-return, start the transformation. First, version the loop
+ // if necessary.
+
LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false);
LV.setAliasChecks(std::move(Checks));
- LV.setSCEVChecks(LAI.PSE.getUnionPredicate());
+ LV.setSCEVChecks(LAI.getPSE().getUnionPredicate());
LV.versionLoop();
}
@@ -508,8 +542,11 @@ public:
}
bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *LAA = &getAnalysis<LoopAccessAnalysis>();
+ auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
// Build up a worklist of inner-loops to vectorize. This is necessary as the
@@ -526,7 +563,7 @@ public:
// Now walk the identified inner loops.
bool Changed = false;
for (Loop *L : Worklist) {
- const LoopAccessInfo &LAI = LAA->getInfo(L, ValueToValueMap());
+ const LoopAccessInfo &LAI = LAA->getInfo(L);
// The actual work is performed by LoadEliminationForLoop.
LoadEliminationForLoop LEL(L, LI, LAI, DT);
Changed |= LEL.processLoop();
@@ -537,9 +574,10 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequiredID(LoopSimplifyID);
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<LoopAccessAnalysis>();
+ AU.addRequired<LoopAccessLegacyAnalysis>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -554,9 +592,10 @@ static const char LLE_name[] = "Loop Load Elimination";
INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
namespace llvm {
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index 27c2d8824df06..d2f1b66076a6c 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -14,7 +14,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -128,9 +128,8 @@ NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
namespace {
enum IterationLimits {
- /// The maximum number of iterations that we'll try and reroll. This
- /// has to be less than 25 in order to fit into a SmallBitVector.
- IL_MaxRerollIterations = 16,
+ /// The maximum number of iterations that we'll try and reroll.
+ IL_MaxRerollIterations = 32,
/// The bitvector index used by loop induction variables and other
/// instructions that belong to all iterations.
IL_All,
@@ -147,13 +146,8 @@ namespace {
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ getLoopAnalysisUsage(AU);
}
protected:
@@ -169,6 +163,9 @@ namespace {
// Map between induction variable and its increment
DenseMap<Instruction *, int64_t> IVToIncMap;
+ // For loop with multiple induction variable, remember the one used only to
+ // control the loop.
+ Instruction *LoopControlIV;
// A chain of isomorphic instructions, identified by a single-use PHI
// representing a reduction. Only the last value may be used outside the
@@ -356,9 +353,11 @@ namespace {
ScalarEvolution *SE, AliasAnalysis *AA,
TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI,
bool PreserveLCSSA,
- DenseMap<Instruction *, int64_t> &IncrMap)
+ DenseMap<Instruction *, int64_t> &IncrMap,
+ Instruction *LoopCtrlIV)
: Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
- PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap) {}
+ PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
+ LoopControlIV(LoopCtrlIV) {}
/// Stage 1: Find all the DAG roots for the induction variable.
bool findRoots();
@@ -370,7 +369,7 @@ namespace {
void replace(const SCEV *IterCount);
protected:
- typedef MapVector<Instruction*, SmallBitVector> UsesTy;
+ typedef MapVector<Instruction*, BitVector> UsesTy;
bool findRootsRecursive(Instruction *IVU,
SmallInstructionSet SubsumedInsts);
@@ -396,6 +395,8 @@ namespace {
bool instrDependsOn(Instruction *I,
UsesTy::iterator Start,
UsesTy::iterator End);
+ void replaceIV(Instruction *Inst, Instruction *IV, const SCEV *IterCount);
+ void updateNonLoopCtrlIncr();
LoopReroll *Parent;
@@ -426,8 +427,18 @@ namespace {
UsesTy Uses;
// Map between induction variable and its increment
DenseMap<Instruction *, int64_t> &IVToIncMap;
+ Instruction *LoopControlIV;
};
+ // Check if it is a compare-like instruction whose user is a branch
+ bool isCompareUsedByBranch(Instruction *I) {
+ auto *TI = I->getParent()->getTerminator();
+ if (!isa<BranchInst>(TI) || !isa<CmpInst>(I))
+ return false;
+ return I->hasOneUse() && TI->getOperand(0) == I;
+ };
+
+ bool isLoopControlIV(Loop *L, Instruction *IV);
void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
void collectPossibleReductions(Loop *L,
ReductionTracker &Reductions);
@@ -438,10 +449,7 @@ namespace {
char LoopReroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
@@ -460,6 +468,110 @@ static bool hasUsesOutsideLoop(Instruction *I, Loop *L) {
return false;
}
+static const SCEVConstant *getIncrmentFactorSCEV(ScalarEvolution *SE,
+ const SCEV *SCEVExpr,
+ Instruction &IV) {
+ const SCEVMulExpr *MulSCEV = dyn_cast<SCEVMulExpr>(SCEVExpr);
+
+ // If StepRecurrence of a SCEVExpr is a constant (c1 * c2, c2 = sizeof(ptr)),
+ // Return c1.
+ if (!MulSCEV && IV.getType()->isPointerTy())
+ if (const SCEVConstant *IncSCEV = dyn_cast<SCEVConstant>(SCEVExpr)) {
+ const PointerType *PTy = cast<PointerType>(IV.getType());
+ Type *ElTy = PTy->getElementType();
+ const SCEV *SizeOfExpr =
+ SE->getSizeOfExpr(SE->getEffectiveSCEVType(IV.getType()), ElTy);
+ if (IncSCEV->getValue()->getValue().isNegative()) {
+ const SCEV *NewSCEV =
+ SE->getUDivExpr(SE->getNegativeSCEV(SCEVExpr), SizeOfExpr);
+ return dyn_cast<SCEVConstant>(SE->getNegativeSCEV(NewSCEV));
+ } else {
+ return dyn_cast<SCEVConstant>(SE->getUDivExpr(SCEVExpr, SizeOfExpr));
+ }
+ }
+
+ if (!MulSCEV)
+ return nullptr;
+
+ // If StepRecurrence of a SCEVExpr is a c * sizeof(x), where c is constant,
+ // Return c.
+ const SCEVConstant *CIncSCEV = nullptr;
+ for (const SCEV *Operand : MulSCEV->operands()) {
+ if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Operand)) {
+ CIncSCEV = Constant;
+ } else if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Operand)) {
+ Type *AllocTy;
+ if (!Unknown->isSizeOf(AllocTy))
+ break;
+ } else {
+ return nullptr;
+ }
+ }
+ return CIncSCEV;
+}
+
+// Check if an IV is only used to control the loop. There are two cases:
+// 1. It only has one use which is loop increment, and the increment is only
+// used by comparison and the PHI (could has sext with nsw in between), and the
+// comparison is only used by branch.
+// 2. It is used by loop increment and the comparison, the loop increment is
+// only used by the PHI, and the comparison is used only by the branch.
+bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
+ unsigned IVUses = IV->getNumUses();
+ if (IVUses != 2 && IVUses != 1)
+ return false;
+
+ for (auto *User : IV->users()) {
+ int32_t IncOrCmpUses = User->getNumUses();
+ bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(User));
+
+ // User can only have one or two uses.
+ if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
+ return false;
+
+ // Case 1
+ if (IVUses == 1) {
+ // The only user must be the loop increment.
+ // The loop increment must have two uses.
+ if (IsCompInst || IncOrCmpUses != 2)
+ return false;
+ }
+
+ // Case 2
+ if (IVUses == 2 && IncOrCmpUses != 1)
+ return false;
+
+ // The users of the IV must be a binary operation or a comparison
+ if (auto *BO = dyn_cast<BinaryOperator>(User)) {
+ if (BO->getOpcode() == Instruction::Add) {
+ // Loop Increment
+ // User of Loop Increment should be either PHI or CMP
+ for (auto *UU : User->users()) {
+ if (PHINode *PN = dyn_cast<PHINode>(UU)) {
+ if (PN != IV)
+ return false;
+ }
+ // Must be a CMP or an ext (of a value with nsw) then CMP
+ else {
+ Instruction *UUser = dyn_cast<Instruction>(UU);
+ // Skip SExt if we are extending an nsw value
+ // TODO: Allow ZExt too
+ if (BO->hasNoSignedWrap() && UUser && UUser->getNumUses() == 1 &&
+ isa<SExtInst>(UUser))
+ UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
+ if (!isCompareUsedByBranch(UUser))
+ return false;
+ }
+ }
+ } else
+ return false;
+ // Compare : can only have one use, and must be branch
+ } else if (!IsCompInst)
+ return false;
+ }
+ return true;
+}
+
// Collect the list of loop induction variables with respect to which it might
// be possible to reroll the loop.
void LoopReroll::collectPossibleIVs(Loop *L,
@@ -469,7 +581,7 @@ void LoopReroll::collectPossibleIVs(Loop *L,
IE = Header->getFirstInsertionPt(); I != IE; ++I) {
if (!isa<PHINode>(I))
continue;
- if (!I->getType()->isIntegerTy())
+ if (!I->getType()->isIntegerTy() && !I->getType()->isPointerTy())
continue;
if (const SCEVAddRecExpr *PHISCEV =
@@ -478,15 +590,27 @@ void LoopReroll::collectPossibleIVs(Loop *L,
continue;
if (!PHISCEV->isAffine())
continue;
- if (const SCEVConstant *IncSCEV =
- dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE))) {
- const APInt &AInt = IncSCEV->getAPInt().abs();
+ const SCEVConstant *IncSCEV = nullptr;
+ if (I->getType()->isPointerTy())
+ IncSCEV =
+ getIncrmentFactorSCEV(SE, PHISCEV->getStepRecurrence(*SE), *I);
+ else
+ IncSCEV = dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE));
+ if (IncSCEV) {
+ const APInt &AInt = IncSCEV->getValue()->getValue().abs();
if (IncSCEV->getValue()->isZero() || AInt.uge(MaxInc))
continue;
IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
<< "\n");
- PossibleIVs.push_back(&*I);
+
+ if (isLoopControlIV(L, &*I)) {
+ assert(!LoopControlIV && "Found two loop control only IV");
+ LoopControlIV = &(*I);
+ DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I << " = "
+ << *PHISCEV << "\n");
+ } else
+ PossibleIVs.push_back(&*I);
}
}
}
@@ -611,9 +735,8 @@ void LoopReroll::DAGRootTracker::collectInLoopUserSet(
const SmallInstructionSet &Exclude,
const SmallInstructionSet &Final,
DenseSet<Instruction *> &Users) {
- for (SmallInstructionVector::const_iterator I = Roots.begin(),
- IE = Roots.end(); I != IE; ++I)
- collectInLoopUserSet(*I, Exclude, Final, Users);
+ for (Instruction *Root : Roots)
+ collectInLoopUserSet(Root, Exclude, Final, Users);
}
static bool isSimpleLoadStore(Instruction *I) {
@@ -651,10 +774,12 @@ static bool isSimpleArithmeticOp(User *IVU) {
static bool isLoopIncrement(User *U, Instruction *IV) {
BinaryOperator *BO = dyn_cast<BinaryOperator>(U);
- if (!BO || BO->getOpcode() != Instruction::Add)
+
+ if ((BO && BO->getOpcode() != Instruction::Add) ||
+ (!BO && !isa<GetElementPtrInst>(U)))
return false;
- for (auto *UU : BO->users()) {
+ for (auto *UU : U->users()) {
PHINode *PN = dyn_cast<PHINode>(UU);
if (PN && PN == IV)
return true;
@@ -1031,6 +1156,33 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
Uses[I].set(IL_All);
}
+ // Make sure we mark loop-control-only PHIs as used in all iterations. See
+ // comment above LoopReroll::isLoopControlIV for more information.
+ BasicBlock *Header = L->getHeader();
+ if (LoopControlIV && LoopControlIV != IV) {
+ for (auto *U : LoopControlIV->users()) {
+ Instruction *IVUser = dyn_cast<Instruction>(U);
+ // IVUser could be loop increment or compare
+ Uses[IVUser].set(IL_All);
+ for (auto *UU : IVUser->users()) {
+ Instruction *UUser = dyn_cast<Instruction>(UU);
+ // UUser could be compare, PHI or branch
+ Uses[UUser].set(IL_All);
+ // Skip SExt
+ if (isa<SExtInst>(UUser)) {
+ UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
+ Uses[UUser].set(IL_All);
+ }
+ // Is UUser a compare instruction?
+ if (UU->hasOneUse()) {
+ Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
+ if (BI == cast<BranchInst>(Header->getTerminator()))
+ Uses[BI].set(IL_All);
+ }
+ }
+ }
+ }
+
// Make sure all instructions in the loop are in one and only one
// set.
for (auto &KV : Uses) {
@@ -1272,61 +1424,136 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
++J;
}
- bool Negative = IVToIncMap[IV] < 0;
- const DataLayout &DL = Header->getModule()->getDataLayout();
- // We need to create a new induction variable for each different BaseInst.
- for (auto &DRS : RootSets) {
- // Insert the new induction variable.
- const SCEVAddRecExpr *RealIVSCEV =
- cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
- const SCEV *Start = RealIVSCEV->getStart();
- const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>(SE->getAddRecExpr(
- Start, SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1), L,
- SCEV::FlagAnyWrap));
- { // Limit the lifetime of SCEVExpander.
- SCEVExpander Expander(*SE, DL, "reroll");
- Value *NewIV = Expander.expandCodeFor(H, IV->getType(), &Header->front());
-
- for (auto &KV : Uses) {
- if (KV.second.find_first() == 0)
- KV.first->replaceUsesOfWith(DRS.BaseInst, NewIV);
- }
+ bool HasTwoIVs = LoopControlIV && LoopControlIV != IV;
+
+ if (HasTwoIVs) {
+ updateNonLoopCtrlIncr();
+ replaceIV(LoopControlIV, LoopControlIV, IterCount);
+ } else
+ // We need to create a new induction variable for each different BaseInst.
+ for (auto &DRS : RootSets)
+ // Insert the new induction variable.
+ replaceIV(DRS.BaseInst, IV, IterCount);
- if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
- // FIXME: Why do we need this check?
- if (Uses[BI].find_first() == IL_All) {
- const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
+ SimplifyInstructionsInBlock(Header, TLI);
+ DeleteDeadPHIs(Header, TLI);
+}
- // Iteration count SCEV minus 1
- const SCEV *ICMinus1SCEV = SE->getMinusSCEV(
- ICSCEV, SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1));
+// For non-loop-control IVs, we only need to update the last increment
+// with right amount, then we are done.
+void LoopReroll::DAGRootTracker::updateNonLoopCtrlIncr() {
+ const SCEV *NewInc = nullptr;
+ for (auto *LoopInc : LoopIncs) {
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LoopInc);
+ const SCEVConstant *COp = nullptr;
+ if (GEP && LoopInc->getOperand(0)->getType()->isPointerTy()) {
+ COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
+ } else {
+ COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(0)));
+ if (!COp)
+ COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
+ }
- Value *ICMinus1; // Iteration count minus 1
- if (isa<SCEVConstant>(ICMinus1SCEV)) {
- ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
- } else {
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader)
- Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
+ assert(COp && "Didn't find constant operand of LoopInc!\n");
- ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
- Preheader->getTerminator());
- }
+ const APInt &AInt = COp->getValue()->getValue();
+ const SCEV *ScaleSCEV = SE->getConstant(COp->getType(), Scale);
+ if (AInt.isNegative()) {
+ NewInc = SE->getNegativeSCEV(COp);
+ NewInc = SE->getUDivExpr(NewInc, ScaleSCEV);
+ NewInc = SE->getNegativeSCEV(NewInc);
+ } else
+ NewInc = SE->getUDivExpr(COp, ScaleSCEV);
+
+ LoopInc->setOperand(1, dyn_cast<SCEVConstant>(NewInc)->getValue());
+ }
+}
- Value *Cond =
- new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond");
- BI->setCondition(Cond);
+void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
+ Instruction *InstIV,
+ const SCEV *IterCount) {
+ BasicBlock *Header = L->getHeader();
+ int64_t Inc = IVToIncMap[InstIV];
+ bool NeedNewIV = InstIV == LoopControlIV;
+ bool Negative = !NeedNewIV && Inc < 0;
+
+ const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(Inst));
+ const SCEV *Start = RealIVSCEV->getStart();
+
+ if (NeedNewIV)
+ Start = SE->getConstant(Start->getType(), 0);
+
+ const SCEV *SizeOfExpr = nullptr;
+ const SCEV *IncrExpr =
+ SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1);
+ if (auto *PTy = dyn_cast<PointerType>(Inst->getType())) {
+ Type *ElTy = PTy->getElementType();
+ SizeOfExpr =
+ SE->getSizeOfExpr(SE->getEffectiveSCEVType(Inst->getType()), ElTy);
+ IncrExpr = SE->getMulExpr(IncrExpr, SizeOfExpr);
+ }
+ const SCEV *NewIVSCEV =
+ SE->getAddRecExpr(Start, IncrExpr, L, SCEV::FlagAnyWrap);
+
+ { // Limit the lifetime of SCEVExpander.
+ const DataLayout &DL = Header->getModule()->getDataLayout();
+ SCEVExpander Expander(*SE, DL, "reroll");
+ Value *NewIV =
+ Expander.expandCodeFor(NewIVSCEV, InstIV->getType(), &Header->front());
+
+ for (auto &KV : Uses)
+ if (KV.second.find_first() == 0)
+ KV.first->replaceUsesOfWith(Inst, NewIV);
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
+ // FIXME: Why do we need this check?
+ if (Uses[BI].find_first() == IL_All) {
+ const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
+
+ if (NeedNewIV)
+ ICSCEV = SE->getMulExpr(IterCount,
+ SE->getConstant(IterCount->getType(), Scale));
+
+ // Iteration count SCEV minus or plus 1
+ const SCEV *MinusPlus1SCEV =
+ SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1);
+ if (Inst->getType()->isPointerTy()) {
+ assert(SizeOfExpr && "SizeOfExpr is not initialized");
+ MinusPlus1SCEV = SE->getMulExpr(MinusPlus1SCEV, SizeOfExpr);
+ }
- if (BI->getSuccessor(1) != Header)
- BI->swapSuccessors();
+ const SCEV *ICMinusPlus1SCEV = SE->getMinusSCEV(ICSCEV, MinusPlus1SCEV);
+ // Iteration count minus 1
+ Instruction *InsertPtr = nullptr;
+ if (isa<SCEVConstant>(ICMinusPlus1SCEV)) {
+ InsertPtr = BI;
+ } else {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader)
+ Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
+ InsertPtr = Preheader->getTerminator();
}
+
+ if (!isa<PointerType>(NewIV->getType()) && NeedNewIV &&
+ (SE->getTypeSizeInBits(NewIV->getType()) <
+ SE->getTypeSizeInBits(ICMinusPlus1SCEV->getType()))) {
+ IRBuilder<> Builder(BI);
+ Builder.SetCurrentDebugLocation(BI->getDebugLoc());
+ NewIV = Builder.CreateSExt(NewIV, ICMinusPlus1SCEV->getType());
+ }
+ Value *ICMinusPlus1 = Expander.expandCodeFor(
+ ICMinusPlus1SCEV, NewIV->getType(), InsertPtr);
+
+ Value *Cond =
+ new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinusPlus1, "exitcond");
+ BI->setCondition(Cond);
+
+ if (BI->getSuccessor(1) != Header)
+ BI->swapSuccessors();
}
}
}
-
- SimplifyInstructionsInBlock(Header, TLI);
- DeleteDeadPHIs(Header, TLI);
}
// Validate the selected reductions. All iterations must have an isomorphic
@@ -1334,9 +1561,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
// entries must appear in order.
bool LoopReroll::ReductionTracker::validateSelected() {
// For a non-associative reduction, the chain entries must appear in order.
- for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
- RI != RIE; ++RI) {
- int i = *RI;
+ for (int i : Reds) {
int PrevIter = 0, BaseCount = 0, Count = 0;
for (Instruction *J : PossibleReds[i]) {
// Note that all instructions in the chain must have been found because
@@ -1380,9 +1605,7 @@ bool LoopReroll::ReductionTracker::validateSelected() {
void LoopReroll::ReductionTracker::replaceSelected() {
// Fixup reductions to refer to the last instruction associated with the
// first iteration (not the last).
- for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
- RI != RIE; ++RI) {
- int i = *RI;
+ for (int i : Reds) {
int j = 0;
for (int e = PossibleReds[i].size(); j != e; ++j)
if (PossibleRedIter[PossibleReds[i][j]] != 0) {
@@ -1396,9 +1619,8 @@ void LoopReroll::ReductionTracker::replaceSelected() {
Users.push_back(cast<Instruction>(U));
}
- for (SmallInstructionVector::iterator J = Users.begin(),
- JE = Users.end(); J != JE; ++J)
- (*J)->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
+ for (Instruction *User : Users)
+ User->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
PossibleReds[i][j]);
}
}
@@ -1450,7 +1672,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
const SCEV *IterCount,
ReductionTracker &Reductions) {
DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
- IVToIncMap);
+ IVToIncMap, LoopControlIV);
if (!DAGRoots.findRoots())
return false;
@@ -1472,7 +1694,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
}
bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipOptnoneFunction(L))
+ if (skipLoop(L))
return false;
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
@@ -1487,41 +1709,46 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
"] Loop %" << Header->getName() << " (" <<
L->getNumBlocks() << " block(s))\n");
- bool Changed = false;
-
// For now, we'll handle only single BB loops.
if (L->getNumBlocks() > 1)
- return Changed;
+ return false;
if (!SE->hasLoopInvariantBackedgeTakenCount(L))
- return Changed;
+ return false;
const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
const SCEV *IterCount = SE->getAddExpr(LIBETC, SE->getOne(LIBETC->getType()));
+ DEBUG(dbgs() << "\n Before Reroll:\n" << *(L->getHeader()) << "\n");
DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
// First, we need to find the induction variable with respect to which we can
// reroll (there may be several possible options).
SmallInstructionVector PossibleIVs;
IVToIncMap.clear();
+ LoopControlIV = nullptr;
collectPossibleIVs(L, PossibleIVs);
if (PossibleIVs.empty()) {
DEBUG(dbgs() << "LRR: No possible IVs found\n");
- return Changed;
+ return false;
}
ReductionTracker Reductions;
collectPossibleReductions(L, Reductions);
+ bool Changed = false;
// For each possible IV, collect the associated possible set of 'root' nodes
// (i+1, i+2, etc.).
- for (SmallInstructionVector::iterator I = PossibleIVs.begin(),
- IE = PossibleIVs.end(); I != IE; ++I)
- if (reroll(*I, L, Header, IterCount, Reductions)) {
+ for (Instruction *PossibleIV : PossibleIVs)
+ if (reroll(PossibleIV, L, Header, IterCount, Reductions)) {
Changed = true;
break;
}
+ DEBUG(dbgs() << "\n After Reroll:\n" << *(L->getHeader()) << "\n");
+
+ // Trip count of L has changed so SE must be re-evaluated.
+ if (Changed)
+ SE->forgetLoop(L);
return Changed;
}
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 5e6c2da08cc32..7a06a25a7073e 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LoopRotation.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -20,6 +20,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopPassManager.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -32,20 +33,46 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
#define DEBUG_TYPE "loop-rotate"
-static cl::opt<unsigned>
-DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden,
- cl::desc("The default maximum header size for automatic loop rotation"));
+static cl::opt<unsigned> DefaultRotationThreshold(
+ "rotation-max-header-size", cl::init(16), cl::Hidden,
+ cl::desc("The default maximum header size for automatic loop rotation"));
STATISTIC(NumRotated, "Number of loops rotated");
+namespace {
+/// A simple loop rotation transformation.
+class LoopRotate {
+ const unsigned MaxHeaderSize;
+ LoopInfo *LI;
+ const TargetTransformInfo *TTI;
+ AssumptionCache *AC;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+
+public:
+ LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
+ const TargetTransformInfo *TTI, AssumptionCache *AC,
+ DominatorTree *DT, ScalarEvolution *SE)
+ : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE) {
+ }
+ bool processLoop(Loop *L);
+
+private:
+ bool rotateLoop(Loop *L, bool SimplifiedLatch);
+ bool simplifyLoopLatch(Loop *L);
+};
+} // end anonymous namespace
+
/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
/// old header into the preheader. If there were uses of the values produced by
/// these instruction that were outside of the loop, we have to insert PHI nodes
@@ -69,7 +96,7 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
if (OrigHeaderVal->use_empty())
continue;
- Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
+ Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal);
// The value now exits in two versions: the initial value in the preheader
// and the loop "next" value in the original header.
@@ -79,7 +106,8 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
// Visit each use of the OrigHeader instruction.
for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
- UE = OrigHeaderVal->use_end(); UI != UE; ) {
+ UE = OrigHeaderVal->use_end();
+ UI != UE;) {
// Grab the use before incrementing the iterator.
Use &U = *UI;
@@ -108,6 +136,41 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
// Anything else can be handled by SSAUpdater.
SSA.RewriteUse(U);
}
+
+ // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug
+ // intrinsics.
+ LLVMContext &C = OrigHeader->getContext();
+ if (auto *VAM = ValueAsMetadata::getIfExists(OrigHeaderVal)) {
+ if (auto *MAV = MetadataAsValue::getIfExists(C, VAM)) {
+ for (auto UI = MAV->use_begin(), E = MAV->use_end(); UI != E;) {
+ // Grab the use before incrementing the iterator. Otherwise, altering
+ // the Use will invalidate the iterator.
+ Use &U = *UI++;
+ DbgInfoIntrinsic *UserInst = dyn_cast<DbgInfoIntrinsic>(U.getUser());
+ if (!UserInst)
+ continue;
+
+ // The original users in the OrigHeader are already using the original
+ // definitions.
+ BasicBlock *UserBB = UserInst->getParent();
+ if (UserBB == OrigHeader)
+ continue;
+
+ // Users in the OrigPreHeader need to use the value to which the
+ // original definitions are mapped and anything else can be handled by
+ // the SSAUpdater. To avoid adding PHINodes, check if the value is
+ // available in UserBB, if not substitute undef.
+ Value *NewVal;
+ if (UserBB == OrigPreheader)
+ NewVal = OrigPreHeaderVal;
+ else if (SSA.HasValueForBlock(UserBB))
+ NewVal = SSA.GetValueInMiddleOfBlock(UserBB);
+ else
+ NewVal = UndefValue::get(OrigHeaderVal->getType());
+ U = MetadataAsValue::get(C, ValueAsMetadata::get(NewVal));
+ }
+ }
+ }
}
}
@@ -121,10 +184,7 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
/// rotation. LoopRotate should be repeatable and converge to a canonical
/// form. This property is satisfied because simplifying the loop latch can only
/// happen once across multiple invocations of the LoopRotate pass.
-static bool rotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
- const TargetTransformInfo *TTI, AssumptionCache *AC,
- DominatorTree *DT, ScalarEvolution *SE,
- bool SimplifiedLatch) {
+bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
@@ -162,7 +222,14 @@ static bool rotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
if (Metrics.notDuplicatable) {
DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
- << " instructions: "; L->dump());
+ << " instructions: ";
+ L->dump());
+ return false;
+ }
+ if (Metrics.convergent) {
+ DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent "
+ "instructions: ";
+ L->dump());
return false;
}
if (Metrics.NumInsts > MaxHeaderSize)
@@ -225,10 +292,9 @@ static bool rotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
// executing in each iteration of the loop. This means it is safe to hoist
// something that might trap, but isn't safe to hoist something that reads
// memory (without proving that the loop doesn't write).
- if (L->hasLoopInvariantOperands(Inst) &&
- !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
- !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst) &&
- !isa<AllocaInst>(Inst)) {
+ if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() &&
+ !Inst->mayWriteToMemory() && !isa<TerminatorInst>(Inst) &&
+ !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
Inst->moveBefore(LoopEntryBranch);
continue;
}
@@ -238,7 +304,7 @@ static bool rotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
// Eagerly remap the operands of the instruction.
RemapInstruction(C, ValueMap,
- RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
// With the operands remapped, see if the instruction constant folds or is
// otherwise simplifyable. This commonly occurs because the entry from PHI
@@ -248,13 +314,18 @@ static bool rotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
if (V && LI->replacementPreservesLCSSAForm(C, V)) {
// If so, then delete the temporary instruction and stick the folded value
// in the map.
- delete C;
ValueMap[Inst] = V;
+ if (!C->mayHaveSideEffects()) {
+ delete C;
+ C = nullptr;
+ }
} else {
+ ValueMap[Inst] = C;
+ }
+ if (C) {
// Otherwise, stick the new instruction into the new block!
C->setName(Inst->getName());
C->insertBefore(LoopEntryBranch);
- ValueMap[Inst] = C;
}
}
@@ -280,7 +351,6 @@ static bool rotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
L->moveToHeader(NewHeader);
assert(L->getHeader() == NewHeader && "Latch block is our new header");
-
// At this point, we've finished our major CFG changes. As part of cloning
// the loop into the preheader we've simplified instructions and the
// duplicated conditional branch may now be branching on a constant. If it is
@@ -291,8 +361,8 @@ static bool rotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
assert(PHBI->isConditional() && "Should be clone of BI condbr!");
if (!isa<ConstantInt>(PHBI->getCondition()) ||
- PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero())
- != NewHeader) {
+ PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) !=
+ NewHeader) {
// The conditional branch can't be folded, handle the general case.
// Update DominatorTree to reflect the CFG change we just made. Then split
// edges as necessary to preserve LoopSimplify form.
@@ -329,18 +399,17 @@ static bool rotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
// be split.
SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
bool SplitLatchEdge = false;
- for (SmallVectorImpl<BasicBlock *>::iterator PI = ExitPreds.begin(),
- PE = ExitPreds.end();
- PI != PE; ++PI) {
+ for (BasicBlock *ExitPred : ExitPreds) {
// We only need to split loop exit edges.
- Loop *PredLoop = LI->getLoopFor(*PI);
+ Loop *PredLoop = LI->getLoopFor(ExitPred);
if (!PredLoop || PredLoop->contains(Exit))
continue;
- if (isa<IndirectBrInst>((*PI)->getTerminator()))
+ if (isa<IndirectBrInst>(ExitPred->getTerminator()))
continue;
- SplitLatchEdge |= L->getLoopLatch() == *PI;
+ SplitLatchEdge |= L->getLoopLatch() == ExitPred;
BasicBlock *ExitSplit = SplitCriticalEdge(
- *PI, Exit, CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
+ ExitPred, Exit,
+ CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
ExitSplit->moveBefore(Exit);
}
assert(SplitLatchEdge &&
@@ -384,8 +453,8 @@ static bool rotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
}
}
- // If the dominator changed, this may have an effect on other
- // predecessors, continue until we reach a fixpoint.
+ // If the dominator changed, this may have an effect on other
+ // predecessors, continue until we reach a fixpoint.
} while (Changed);
}
}
@@ -432,7 +501,7 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
// GEPs are cheap if all indices are constant.
if (!cast<GEPOperator>(I)->hasAllConstantIndices())
return false;
- // fall-thru to increment case
+ // fall-thru to increment case
case Instruction::Add:
case Instruction::Sub:
case Instruction::And:
@@ -441,11 +510,10 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr: {
- Value *IVOpnd = !isa<Constant>(I->getOperand(0))
- ? I->getOperand(0)
- : !isa<Constant>(I->getOperand(1))
- ? I->getOperand(1)
- : nullptr;
+ Value *IVOpnd =
+ !isa<Constant>(I->getOperand(0))
+ ? I->getOperand(0)
+ : !isa<Constant>(I->getOperand(1)) ? I->getOperand(1) : nullptr;
if (!IVOpnd)
return false;
@@ -482,7 +550,7 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
/// canonical form so downstream passes can handle it.
///
/// I don't believe this invalidates SCEV.
-static bool simplifyLoopLatch(Loop *L, LoopInfo *LI, DominatorTree *DT) {
+bool LoopRotate::simplifyLoopLatch(Loop *L) {
BasicBlock *Latch = L->getLoopLatch();
if (!Latch || Latch->hasAddressTaken())
return false;
@@ -503,7 +571,7 @@ static bool simplifyLoopLatch(Loop *L, LoopInfo *LI, DominatorTree *DT) {
return false;
DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
- << LastExit->getName() << "\n");
+ << LastExit->getName() << "\n");
// Hoist the instructions from Latch into LastExit.
LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(),
@@ -527,26 +595,19 @@ static bool simplifyLoopLatch(Loop *L, LoopInfo *LI, DominatorTree *DT) {
return true;
}
-/// Rotate \c L as many times as possible. Return true if the loop is rotated
-/// at least once.
-static bool iterativelyRotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
- const TargetTransformInfo *TTI,
- AssumptionCache *AC, DominatorTree *DT,
- ScalarEvolution *SE) {
+/// Rotate \c L, and return true if any modification was made.
+bool LoopRotate::processLoop(Loop *L) {
// Save the loop metadata.
MDNode *LoopMD = L->getLoopID();
// Simplify the loop latch before attempting to rotate the header
// upward. Rotation may not be needed if the loop tail can be folded into the
// loop exit.
- bool SimplifiedLatch = simplifyLoopLatch(L, LI, DT);
+ bool SimplifiedLatch = simplifyLoopLatch(L);
- // One loop can be rotated multiple times.
- bool MadeChange = false;
- while (rotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE, SimplifiedLatch)) {
- MadeChange = true;
- SimplifiedLatch = false;
- }
+ bool MadeChange = rotateLoop(L, SimplifiedLatch);
+ assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) &&
+ "Loop latch should be exiting after loop-rotate.");
// Restore the loop metadata.
// NB! We presume LoopRotation DOESN'T ADD its own metadata.
@@ -556,15 +617,37 @@ static bool iterativelyRotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
return MadeChange;
}
+LoopRotatePass::LoopRotatePass() {}
+
+PreservedAnalyses LoopRotatePass::run(Loop &L, AnalysisManager<Loop> &AM) {
+ auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
+ Function *F = L.getHeader()->getParent();
+
+ auto *LI = FAM.getCachedResult<LoopAnalysis>(*F);
+ const auto *TTI = FAM.getCachedResult<TargetIRAnalysis>(*F);
+ auto *AC = FAM.getCachedResult<AssumptionAnalysis>(*F);
+ assert((LI && TTI && AC) && "Analyses for loop rotation not available");
+
+ // Optional analyses.
+ auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F);
+ auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F);
+ LoopRotate LR(DefaultRotationThreshold, LI, TTI, AC, DT, SE);
+
+ bool Changed = LR.processLoop(&L);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ return getLoopPassPreservedAnalyses();
+}
+
namespace {
-class LoopRotate : public LoopPass {
+class LoopRotateLegacyPass : public LoopPass {
unsigned MaxHeaderSize;
public:
static char ID; // Pass ID, replacement for typeid
- LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
- initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+ LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
+ initializeLoopRotateLegacyPassPass(*PassRegistry::getPassRegistry());
if (SpecifiedMaxHeaderSize == -1)
MaxHeaderSize = DefaultRotationThreshold;
else
@@ -573,24 +656,13 @@ public:
// LCSSA form makes instruction renaming easier.
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addPreserved<AAResultsWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addPreserved<SCEVAAWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addPreserved<BasicAAWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
+ getLoopAnalysisUsage(AU);
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (skipOptnoneFunction(L))
+ if (skipLoop(L))
return false;
Function &F = *L->getHeader()->getParent();
@@ -601,24 +673,21 @@ public:
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
auto *SE = SEWP ? &SEWP->getSE() : nullptr;
-
- return iterativelyRotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE);
+ LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE);
+ return LR.processLoop(L);
}
};
}
-char LoopRotate::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+char LoopRotateLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops",
+ false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false,
+ false)
Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
- return new LoopRotate(MaxHeaderSize);
+ return new LoopRotateLegacyPass(MaxHeaderSize);
}
diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
new file mode 100644
index 0000000000000..ec227932c09e5
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -0,0 +1,114 @@
+//===--------- LoopSimplifyCFG.cpp - Loop CFG Simplification Pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Loop SimplifyCFG Pass. This pass is responsible for
+// basic loop CFG cleanup, primarily to assist other loop passes. If you
+// encounter a noncanonical CFG construct that causes another loop pass to
+// perform suboptimally, this is the place to fix it up.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopPassManager.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-simplifycfg"
+
+static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI) {
+ bool Changed = false;
+ // Copy blocks into a temporary array to avoid iterator invalidation issues
+ // as we remove them.
+ SmallVector<WeakVH, 16> Blocks(L.blocks());
+
+ for (auto &Block : Blocks) {
+ // Attempt to merge blocks in the trivial case. Don't modify blocks which
+ // belong to other loops.
+ BasicBlock *Succ = cast_or_null<BasicBlock>(Block);
+ if (!Succ)
+ continue;
+
+ BasicBlock *Pred = Succ->getSinglePredecessor();
+ if (!Pred || !Pred->getSingleSuccessor() || LI.getLoopFor(Pred) != &L)
+ continue;
+
+ // Pred is going to disappear, so we need to update the loop info.
+ if (L.getHeader() == Pred)
+ L.moveToHeader(Succ);
+ LI.removeBlock(Pred);
+ MergeBasicBlockIntoOnlyPred(Succ, &DT);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, AnalysisManager<Loop> &AM) {
+ const auto &FAM =
+ AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
+ Function *F = L.getHeader()->getParent();
+
+ auto *LI = FAM.getCachedResult<LoopAnalysis>(*F);
+ auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F);
+ assert((LI && DT) && "Analyses for LoopSimplifyCFG not available");
+
+ if (!simplifyLoopCFG(L, *DT, *LI))
+ return PreservedAnalyses::all();
+ return getLoopPassPreservedAnalyses();
+}
+
+namespace {
+class LoopSimplifyCFGLegacyPass : public LoopPass {
+public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopSimplifyCFGLegacyPass() : LoopPass(ID) {
+ initializeLoopSimplifyCFGLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &) override {
+ if (skipLoop(L))
+ return false;
+
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ return simplifyLoopCFG(*L, DT, LI);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DependenceAnalysisWrapperPass>();
+ getLoopAnalysisUsage(AU);
+ }
+};
+}
+
+char LoopSimplifyCFGLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSimplifyCFGLegacyPass, "loop-simplifycfg",
+ "Simplify loop CFG", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_END(LoopSimplifyCFGLegacyPass, "loop-simplifycfg",
+ "Simplify loop CFG", false, false)
+
+Pass *llvm::createLoopSimplifyCFGPass() {
+ return new LoopSimplifyCFGLegacyPass();
+}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index acfdec43d21ae..77c77eb7d798c 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -684,10 +684,6 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::prefetch:
- case Intrinsic::x86_sse_storeu_ps:
- case Intrinsic::x86_sse2_storeu_pd:
- case Intrinsic::x86_sse2_storeu_dq:
- case Intrinsic::x86_sse2_storel_dq:
if (II->getArgOperand(0) == OperandVal)
isAddress = true;
break;
@@ -704,18 +700,6 @@ static MemAccessTy getAccessType(const Instruction *Inst) {
AccessTy.AddrSpace = SI->getPointerAddressSpace();
} else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
AccessTy.AddrSpace = LI->getPointerAddressSpace();
- } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- // Addressing modes can also be folded into prefetches and a variety
- // of intrinsics.
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::x86_sse_storeu_ps:
- case Intrinsic::x86_sse2_storeu_pd:
- case Intrinsic::x86_sse2_storeu_dq:
- case Intrinsic::x86_sse2_storel_dq:
- AccessTy.MemTy = II->getArgOperand(0)->getType();
- break;
- }
}
// All pointers have the same requirements, so canonicalize them to an
@@ -963,8 +947,8 @@ void Cost::RateRegister(const SCEV *Reg,
isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
++SetupCost;
- NumIVMuls += isa<SCEVMulExpr>(Reg) &&
- SE.hasComputableLoopEvolution(Reg, L);
+ NumIVMuls += isa<SCEVMulExpr>(Reg) &&
+ SE.hasComputableLoopEvolution(Reg, L);
}
/// Record this register in the set. If we haven't seen it before, rate
@@ -2752,34 +2736,31 @@ void LSRInstance::CollectChains() {
LatchPath.push_back(LoopHeader);
// Walk the instruction stream from the loop header to the loop latch.
- for (SmallVectorImpl<BasicBlock *>::reverse_iterator
- BBIter = LatchPath.rbegin(), BBEnd = LatchPath.rend();
- BBIter != BBEnd; ++BBIter) {
- for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end();
- I != E; ++I) {
+ for (BasicBlock *BB : reverse(LatchPath)) {
+ for (Instruction &I : *BB) {
// Skip instructions that weren't seen by IVUsers analysis.
- if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&*I))
+ if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))
continue;
// Ignore users that are part of a SCEV expression. This way we only
// consider leaf IV Users. This effectively rediscovers a portion of
// IVUsers analysis but in program order this time.
- if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(&*I)))
+ if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I)))
continue;
// Remove this instruction from any NearUsers set it may be in.
for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
ChainIdx < NChains; ++ChainIdx) {
- ChainUsersVec[ChainIdx].NearUsers.erase(&*I);
+ ChainUsersVec[ChainIdx].NearUsers.erase(&I);
}
// Search for operands that can be chained.
SmallPtrSet<Instruction*, 4> UniqueOperands;
- User::op_iterator IVOpEnd = I->op_end();
- User::op_iterator IVOpIter = findIVOperand(I->op_begin(), IVOpEnd, L, SE);
+ User::op_iterator IVOpEnd = I.op_end();
+ User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE);
while (IVOpIter != IVOpEnd) {
Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
if (UniqueOperands.insert(IVOpInst).second)
- ChainInstruction(&*I, IVOpInst, ChainUsersVec);
+ ChainInstruction(&I, IVOpInst, ChainUsersVec);
IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
}
} // Continue walking down the instructions.
@@ -4331,28 +4312,15 @@ BasicBlock::iterator
LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs)
const {
+ Instruction *Tentative = &*IP;
for (;;) {
- const Loop *IPLoop = LI.getLoopFor(IP->getParent());
- unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
-
- BasicBlock *IDom;
- for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
- if (!Rung) return IP;
- Rung = Rung->getIDom();
- if (!Rung) return IP;
- IDom = Rung->getBlock();
-
- // Don't climb into a loop though.
- const Loop *IDomLoop = LI.getLoopFor(IDom);
- unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
- if (IDomDepth <= IPLoopDepth &&
- (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
- break;
- }
-
bool AllDominate = true;
Instruction *BetterPos = nullptr;
- Instruction *Tentative = IDom->getTerminator();
+ // Don't bother attempting to insert before a catchswitch, their basic block
+ // cannot have other non-PHI instructions.
+ if (isa<CatchSwitchInst>(Tentative))
+ return IP;
+
for (Instruction *Inst : Inputs) {
if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
AllDominate = false;
@@ -4360,7 +4328,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
}
// Attempt to find an insert position in the middle of the block,
// instead of at the end, so that it can be used for other expansions.
- if (IDom == Inst->getParent() &&
+ if (Tentative->getParent() == Inst->getParent() &&
(!BetterPos || !DT.dominates(Inst, BetterPos)))
BetterPos = &*std::next(BasicBlock::iterator(Inst));
}
@@ -4370,6 +4338,26 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
IP = BetterPos->getIterator();
else
IP = Tentative->getIterator();
+
+ const Loop *IPLoop = LI.getLoopFor(IP->getParent());
+ unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
+
+ BasicBlock *IDom;
+ for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
+ if (!Rung) return IP;
+ Rung = Rung->getIDom();
+ if (!Rung) return IP;
+ IDom = Rung->getBlock();
+
+ // Don't climb into a loop though.
+ const Loop *IDomLoop = LI.getLoopFor(IDom);
+ unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
+ if (IDomDepth <= IPLoopDepth &&
+ (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
+ break;
+ }
+
+ Tentative = IDom->getTerminator();
}
return IP;
@@ -4426,7 +4414,7 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
while (isa<PHINode>(IP)) ++IP;
// Ignore landingpad instructions.
- while (!isa<TerminatorInst>(IP) && IP->isEHPad()) ++IP;
+ while (IP->isEHPad()) ++IP;
// Ignore debug intrinsics.
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
@@ -4961,7 +4949,7 @@ INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
@@ -4991,16 +4979,16 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// Requiring LoopSimplify a second time here prevents IVUsers from running
// twice, since LoopSimplify was invalidated by running ScalarEvolution.
AU.addRequiredID(LoopSimplifyID);
- AU.addRequired<IVUsers>();
- AU.addPreserved<IVUsers>();
+ AU.addRequired<IVUsersWrapperPass>();
+ AU.addPreserved<IVUsersWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
- if (skipOptnoneFunction(L))
+ if (skipLoop(L))
return false;
- auto &IU = getAnalysis<IVUsers>();
+ auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index ecef6dbe24e64..91af4a1922ce1 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -12,13 +12,13 @@
// counts of loops easily.
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopUnrollAnalyzer.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -31,8 +31,11 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <climits>
+#include <utility>
using namespace llvm;
@@ -43,40 +46,54 @@ static cl::opt<unsigned>
cl::desc("The baseline cost threshold for loop unrolling"));
static cl::opt<unsigned> UnrollPercentDynamicCostSavedThreshold(
- "unroll-percent-dynamic-cost-saved-threshold", cl::Hidden,
+ "unroll-percent-dynamic-cost-saved-threshold", cl::init(50), cl::Hidden,
cl::desc("The percentage of estimated dynamic cost which must be saved by "
"unrolling to allow unrolling up to the max threshold."));
static cl::opt<unsigned> UnrollDynamicCostSavingsDiscount(
- "unroll-dynamic-cost-savings-discount", cl::Hidden,
+ "unroll-dynamic-cost-savings-discount", cl::init(100), cl::Hidden,
cl::desc("This is the amount discounted from the total unroll cost when "
"the unrolled form has a high dynamic cost savings (triggered by "
"the '-unroll-perecent-dynamic-cost-saved-threshold' flag)."));
static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
- "unroll-max-iteration-count-to-analyze", cl::init(0), cl::Hidden,
+ "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
cl::desc("Don't allow loop unrolling to simulate more than this number of"
"iterations when checking full unroll profitability"));
-static cl::opt<unsigned>
-UnrollCount("unroll-count", cl::Hidden,
- cl::desc("Use this unroll count for all loops including those with "
- "unroll_count pragma values, for testing purposes"));
+static cl::opt<unsigned> UnrollCount(
+ "unroll-count", cl::Hidden,
+ cl::desc("Use this unroll count for all loops including those with "
+ "unroll_count pragma values, for testing purposes"));
-static cl::opt<bool>
-UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
- cl::desc("Allows loops to be partially unrolled until "
- "-unroll-threshold loop size is reached."));
+static cl::opt<unsigned> UnrollMaxCount(
+ "unroll-max-count", cl::Hidden,
+ cl::desc("Set the max unroll count for partial and runtime unrolling, for"
+ "testing purposes"));
+
+static cl::opt<unsigned> UnrollFullMaxCount(
+ "unroll-full-max-count", cl::Hidden,
+ cl::desc(
+ "Set the max unroll count for full unrolling, for testing purposes"));
static cl::opt<bool>
-UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
- cl::desc("Unroll loops with run-time trip counts"));
+ UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
+ cl::desc("Allows loops to be partially unrolled until "
+ "-unroll-threshold loop size is reached."));
-static cl::opt<unsigned>
-PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
- cl::desc("Unrolled size limit for loops with an unroll(full) or "
- "unroll_count pragma."));
+static cl::opt<bool> UnrollAllowRemainder(
+ "unroll-allow-remainder", cl::Hidden,
+ cl::desc("Allow generation of a loop remainder (extra iterations) "
+ "when unrolling a loop."));
+static cl::opt<bool>
+ UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
+ cl::desc("Unroll loops with run-time trip counts"));
+
+static cl::opt<unsigned> PragmaUnrollThreshold(
+ "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
+ cl::desc("Unrolled size limit for loops with an unroll(full) or "
+ "unroll_count pragma."));
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
@@ -88,26 +105,28 @@ static const unsigned NoThreshold = UINT_MAX;
static const unsigned DefaultUnrollRuntimeCount = 8;
/// Gather the various unrolling parameters based on the defaults, compiler
-/// flags, TTI overrides, pragmas, and user specified parameters.
+/// flags, TTI overrides and user specified parameters.
static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
- Optional<bool> UserRuntime, unsigned PragmaCount, bool PragmaFullUnroll,
- bool PragmaEnableUnroll, unsigned TripCount) {
+ Optional<bool> UserRuntime) {
TargetTransformInfo::UnrollingPreferences UP;
// Set up the defaults
UP.Threshold = 150;
- UP.PercentDynamicCostSavedThreshold = 20;
- UP.DynamicCostSavingsDiscount = 2000;
- UP.OptSizeThreshold = 50;
+ UP.PercentDynamicCostSavedThreshold = 50;
+ UP.DynamicCostSavingsDiscount = 100;
+ UP.OptSizeThreshold = 0;
UP.PartialThreshold = UP.Threshold;
- UP.PartialOptSizeThreshold = UP.OptSizeThreshold;
+ UP.PartialOptSizeThreshold = 0;
UP.Count = 0;
UP.MaxCount = UINT_MAX;
+ UP.FullUnrollMaxCount = UINT_MAX;
UP.Partial = false;
UP.Runtime = false;
+ UP.AllowRemainder = true;
UP.AllowExpensiveTripCount = false;
+ UP.Force = false;
// Override with any target specific settings
TTI.getUnrollingPreferences(L, UP);
@@ -118,12 +137,6 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.PartialThreshold = UP.PartialOptSizeThreshold;
}
- // Apply unroll count pragmas
- if (PragmaCount)
- UP.Count = PragmaCount;
- else if (PragmaFullUnroll)
- UP.Count = TripCount;
-
// Apply any user values specified by cl::opt
if (UnrollThreshold.getNumOccurrences() > 0) {
UP.Threshold = UnrollThreshold;
@@ -134,10 +147,14 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UnrollPercentDynamicCostSavedThreshold;
if (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0)
UP.DynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
- if (UnrollCount.getNumOccurrences() > 0)
- UP.Count = UnrollCount;
+ if (UnrollMaxCount.getNumOccurrences() > 0)
+ UP.MaxCount = UnrollMaxCount;
+ if (UnrollFullMaxCount.getNumOccurrences() > 0)
+ UP.FullUnrollMaxCount = UnrollFullMaxCount;
if (UnrollAllowPartial.getNumOccurrences() > 0)
UP.Partial = UnrollAllowPartial;
+ if (UnrollAllowRemainder.getNumOccurrences() > 0)
+ UP.AllowRemainder = UnrollAllowRemainder;
if (UnrollRuntime.getNumOccurrences() > 0)
UP.Runtime = UnrollRuntime;
@@ -153,259 +170,42 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
if (UserRuntime.hasValue())
UP.Runtime = *UserRuntime;
- if (PragmaCount > 0 ||
- ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0)) {
- // If the loop has an unrolling pragma, we want to be more aggressive with
- // unrolling limits. Set thresholds to at least the PragmaTheshold value
- // which is larger than the default limits.
- if (UP.Threshold != NoThreshold)
- UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
- if (UP.PartialThreshold != NoThreshold)
- UP.PartialThreshold =
- std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
- }
-
return UP;
}
namespace {
-// This class is used to get an estimate of the optimization effects that we
-// could get from complete loop unrolling. It comes from the fact that some
-// loads might be replaced with concrete constant values and that could trigger
-// a chain of instruction simplifications.
-//
-// E.g. we might have:
-// int a[] = {0, 1, 0};
-// v = 0;
-// for (i = 0; i < 3; i ++)
-// v += b[i]*a[i];
-// If we completely unroll the loop, we would get:
-// v = b[0]*a[0] + b[1]*a[1] + b[2]*a[2]
-// Which then will be simplified to:
-// v = b[0]* 0 + b[1]* 1 + b[2]* 0
-// And finally:
-// v = b[1]
-class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> {
- typedef InstVisitor<UnrolledInstAnalyzer, bool> Base;
- friend class InstVisitor<UnrolledInstAnalyzer, bool>;
- struct SimplifiedAddress {
- Value *Base = nullptr;
- ConstantInt *Offset = nullptr;
- };
+/// A struct to densely store the state of an instruction after unrolling at
+/// each iteration.
+///
+/// This is designed to work like a tuple of <Instruction *, int> for the
+/// purposes of hashing and lookup, but to be able to associate two boolean
+/// states with each key.
+struct UnrolledInstState {
+ Instruction *I;
+ int Iteration : 30;
+ unsigned IsFree : 1;
+ unsigned IsCounted : 1;
+};
-public:
- UnrolledInstAnalyzer(unsigned Iteration,
- DenseMap<Value *, Constant *> &SimplifiedValues,
- ScalarEvolution &SE)
- : SimplifiedValues(SimplifiedValues), SE(SE) {
- IterationNumber = SE.getConstant(APInt(64, Iteration));
+/// Hashing and equality testing for a set of the instruction states.
+struct UnrolledInstStateKeyInfo {
+ typedef DenseMapInfo<Instruction *> PtrInfo;
+ typedef DenseMapInfo<std::pair<Instruction *, int>> PairInfo;
+ static inline UnrolledInstState getEmptyKey() {
+ return {PtrInfo::getEmptyKey(), 0, 0, 0};
}
-
- // Allow access to the initial visit method.
- using Base::visit;
-
-private:
- /// \brief A cache of pointer bases and constant-folded offsets corresponding
- /// to GEP (or derived from GEP) instructions.
- ///
- /// In order to find the base pointer one needs to perform non-trivial
- /// traversal of the corresponding SCEV expression, so it's good to have the
- /// results saved.
- DenseMap<Value *, SimplifiedAddress> SimplifiedAddresses;
-
- /// \brief SCEV expression corresponding to number of currently simulated
- /// iteration.
- const SCEV *IterationNumber;
-
- /// \brief A Value->Constant map for keeping values that we managed to
- /// constant-fold on the given iteration.
- ///
- /// While we walk the loop instructions, we build up and maintain a mapping
- /// of simplified values specific to this iteration. The idea is to propagate
- /// any special information we have about loads that can be replaced with
- /// constants after complete unrolling, and account for likely simplifications
- /// post-unrolling.
- DenseMap<Value *, Constant *> &SimplifiedValues;
-
- ScalarEvolution &SE;
-
- /// \brief Try to simplify instruction \param I using its SCEV expression.
- ///
- /// The idea is that some AddRec expressions become constants, which then
- /// could trigger folding of other instructions. However, that only happens
- /// for expressions whose start value is also constant, which isn't always the
- /// case. In another common and important case the start value is just some
- /// address (i.e. SCEVUnknown) - in this case we compute the offset and save
- /// it along with the base address instead.
- bool simplifyInstWithSCEV(Instruction *I) {
- if (!SE.isSCEVable(I->getType()))
- return false;
-
- const SCEV *S = SE.getSCEV(I);
- if (auto *SC = dyn_cast<SCEVConstant>(S)) {
- SimplifiedValues[I] = SC->getValue();
- return true;
- }
-
- auto *AR = dyn_cast<SCEVAddRecExpr>(S);
- if (!AR)
- return false;
-
- const SCEV *ValueAtIteration = AR->evaluateAtIteration(IterationNumber, SE);
- // Check if the AddRec expression becomes a constant.
- if (auto *SC = dyn_cast<SCEVConstant>(ValueAtIteration)) {
- SimplifiedValues[I] = SC->getValue();
- return true;
- }
-
- // Check if the offset from the base address becomes a constant.
- auto *Base = dyn_cast<SCEVUnknown>(SE.getPointerBase(S));
- if (!Base)
- return false;
- auto *Offset =
- dyn_cast<SCEVConstant>(SE.getMinusSCEV(ValueAtIteration, Base));
- if (!Offset)
- return false;
- SimplifiedAddress Address;
- Address.Base = Base->getValue();
- Address.Offset = Offset->getValue();
- SimplifiedAddresses[I] = Address;
- return true;
+ static inline UnrolledInstState getTombstoneKey() {
+ return {PtrInfo::getTombstoneKey(), 0, 0, 0};
}
-
- /// Base case for the instruction visitor.
- bool visitInstruction(Instruction &I) {
- return simplifyInstWithSCEV(&I);
+ static inline unsigned getHashValue(const UnrolledInstState &S) {
+ return PairInfo::getHashValue({S.I, S.Iteration});
}
-
- /// Try to simplify binary operator I.
- ///
- /// TODO: Probably it's worth to hoist the code for estimating the
- /// simplifications effects to a separate class, since we have a very similar
- /// code in InlineCost already.
- bool visitBinaryOperator(BinaryOperator &I) {
- Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
- if (!isa<Constant>(LHS))
- if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
- LHS = SimpleLHS;
- if (!isa<Constant>(RHS))
- if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
- RHS = SimpleRHS;
-
- Value *SimpleV = nullptr;
- const DataLayout &DL = I.getModule()->getDataLayout();
- if (auto FI = dyn_cast<FPMathOperator>(&I))
- SimpleV =
- SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
- else
- SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
-
- if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
- SimplifiedValues[&I] = C;
-
- if (SimpleV)
- return true;
- return Base::visitBinaryOperator(I);
- }
-
- /// Try to fold load I.
- bool visitLoad(LoadInst &I) {
- Value *AddrOp = I.getPointerOperand();
-
- auto AddressIt = SimplifiedAddresses.find(AddrOp);
- if (AddressIt == SimplifiedAddresses.end())
- return false;
- ConstantInt *SimplifiedAddrOp = AddressIt->second.Offset;
-
- auto *GV = dyn_cast<GlobalVariable>(AddressIt->second.Base);
- // We're only interested in loads that can be completely folded to a
- // constant.
- if (!GV || !GV->hasDefinitiveInitializer() || !GV->isConstant())
- return false;
-
- ConstantDataSequential *CDS =
- dyn_cast<ConstantDataSequential>(GV->getInitializer());
- if (!CDS)
- return false;
-
- // We might have a vector load from an array. FIXME: for now we just bail
- // out in this case, but we should be able to resolve and simplify such
- // loads.
- if(!CDS->isElementTypeCompatible(I.getType()))
- return false;
-
- int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
- assert(SimplifiedAddrOp->getValue().getActiveBits() < 64 &&
- "Unexpectedly large index value.");
- int64_t Index = SimplifiedAddrOp->getSExtValue() / ElemSize;
- if (Index >= CDS->getNumElements()) {
- // FIXME: For now we conservatively ignore out of bound accesses, but
- // we're allowed to perform the optimization in this case.
- return false;
- }
-
- Constant *CV = CDS->getElementAsConstant(Index);
- assert(CV && "Constant expected.");
- SimplifiedValues[&I] = CV;
-
- return true;
- }
-
- bool visitCastInst(CastInst &I) {
- // Propagate constants through casts.
- Constant *COp = dyn_cast<Constant>(I.getOperand(0));
- if (!COp)
- COp = SimplifiedValues.lookup(I.getOperand(0));
- if (COp)
- if (Constant *C =
- ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
- SimplifiedValues[&I] = C;
- return true;
- }
-
- return Base::visitCastInst(I);
- }
-
- bool visitCmpInst(CmpInst &I) {
- Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
-
- // First try to handle simplified comparisons.
- if (!isa<Constant>(LHS))
- if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
- LHS = SimpleLHS;
- if (!isa<Constant>(RHS))
- if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
- RHS = SimpleRHS;
-
- if (!isa<Constant>(LHS) && !isa<Constant>(RHS)) {
- auto SimplifiedLHS = SimplifiedAddresses.find(LHS);
- if (SimplifiedLHS != SimplifiedAddresses.end()) {
- auto SimplifiedRHS = SimplifiedAddresses.find(RHS);
- if (SimplifiedRHS != SimplifiedAddresses.end()) {
- SimplifiedAddress &LHSAddr = SimplifiedLHS->second;
- SimplifiedAddress &RHSAddr = SimplifiedRHS->second;
- if (LHSAddr.Base == RHSAddr.Base) {
- LHS = LHSAddr.Offset;
- RHS = RHSAddr.Offset;
- }
- }
- }
- }
-
- if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
- if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
- if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
- SimplifiedValues[&I] = C;
- return true;
- }
- }
- }
-
- return Base::visitCmpInst(I);
+ static inline bool isEqual(const UnrolledInstState &LHS,
+ const UnrolledInstState &RHS) {
+ return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
}
};
-} // namespace
-
+}
namespace {
struct EstimatedUnrollCost {
@@ -441,18 +241,25 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
"The unroll iterations max is too large!");
+ // Only analyze inner loops. We can't properly estimate cost of nested loops
+ // and we won't visit inner loops again anyway.
+ if (!L->empty())
+ return None;
+
// Don't simulate loops with a big or unknown tripcount
if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
TripCount > UnrollMaxIterationsCountToAnalyze)
return None;
SmallSetVector<BasicBlock *, 16> BBWorklist;
+ SmallSetVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitWorklist;
DenseMap<Value *, Constant *> SimplifiedValues;
SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
// The estimated cost of the unrolled form of the loop. We try to estimate
// this by simplifying as much as we can while computing the estimate.
int UnrolledCost = 0;
+
// We also track the estimated dynamic (that is, actually executed) cost in
// the rolled form. This helps identify cases when the savings from unrolling
// aren't just exposing dead control flows, but actual reduced dynamic
@@ -460,6 +267,97 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
// unrolling.
int RolledDynamicCost = 0;
+ // We track the simplification of each instruction in each iteration. We use
+ // this to recursively merge costs into the unrolled cost on-demand so that
+ // we don't count the cost of any dead code. This is essentially a map from
+ // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
+ DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap;
+
+ // A small worklist used to accumulate cost of instructions from each
+ // observable and reached root in the loop.
+ SmallVector<Instruction *, 16> CostWorklist;
+
+ // PHI-used worklist used between iterations while accumulating cost.
+ SmallVector<Instruction *, 4> PHIUsedList;
+
+ // Helper function to accumulate cost for instructions in the loop.
+ auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
+ assert(Iteration >= 0 && "Cannot have a negative iteration!");
+ assert(CostWorklist.empty() && "Must start with an empty cost list");
+ assert(PHIUsedList.empty() && "Must start with an empty phi used list");
+ CostWorklist.push_back(&RootI);
+ for (;; --Iteration) {
+ do {
+ Instruction *I = CostWorklist.pop_back_val();
+
+ // InstCostMap only uses I and Iteration as a key, the other two values
+ // don't matter here.
+ auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
+ if (CostIter == InstCostMap.end())
+ // If an input to a PHI node comes from a dead path through the loop
+ // we may have no cost data for it here. What that actually means is
+ // that it is free.
+ continue;
+ auto &Cost = *CostIter;
+ if (Cost.IsCounted)
+ // Already counted this instruction.
+ continue;
+
+ // Mark that we are counting the cost of this instruction now.
+ Cost.IsCounted = true;
+
+ // If this is a PHI node in the loop header, just add it to the PHI set.
+ if (auto *PhiI = dyn_cast<PHINode>(I))
+ if (PhiI->getParent() == L->getHeader()) {
+ assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
+ "inherently simplify during unrolling.");
+ if (Iteration == 0)
+ continue;
+
+ // Push the incoming value from the backedge into the PHI used list
+ // if it is an in-loop instruction. We'll use this to populate the
+ // cost worklist for the next iteration (as we count backwards).
+ if (auto *OpI = dyn_cast<Instruction>(
+ PhiI->getIncomingValueForBlock(L->getLoopLatch())))
+ if (L->contains(OpI))
+ PHIUsedList.push_back(OpI);
+ continue;
+ }
+
+ // First accumulate the cost of this instruction.
+ if (!Cost.IsFree) {
+ UnrolledCost += TTI.getUserCost(I);
+ DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration
+ << "): ");
+ DEBUG(I->dump());
+ }
+
+ // We must count the cost of every operand which is not free,
+ // recursively. If we reach a loop PHI node, simply add it to the set
+ // to be considered on the next iteration (backwards!).
+ for (Value *Op : I->operands()) {
+ // Check whether this operand is free due to being a constant or
+ // outside the loop.
+ auto *OpI = dyn_cast<Instruction>(Op);
+ if (!OpI || !L->contains(OpI))
+ continue;
+
+ // Otherwise accumulate its cost.
+ CostWorklist.push_back(OpI);
+ }
+ } while (!CostWorklist.empty());
+
+ if (PHIUsedList.empty())
+ // We've exhausted the search.
+ break;
+
+ assert(Iteration > 0 &&
+ "Cannot track PHI-used values past the first iteration!");
+ CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
+ PHIUsedList.clear();
+ }
+ };
+
// Ensure that we don't violate the loop structure invariants relied on by
// this analysis.
assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
@@ -502,7 +400,7 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
while (!SimplifiedInputValues.empty())
SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
- UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE);
+ UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
BBWorklist.clear();
BBWorklist.insert(L->getHeader());
@@ -514,22 +412,32 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
// it. We don't change the actual IR, just count optimization
// opportunities.
for (Instruction &I : *BB) {
- int InstCost = TTI.getUserCost(&I);
+ // Track this instruction's expected baseline cost when executing the
+ // rolled loop form.
+ RolledDynamicCost += TTI.getUserCost(&I);
// Visit the instruction to analyze its loop cost after unrolling,
- // and if the visitor returns false, include this instruction in the
- // unrolled cost.
- if (!Analyzer.visit(I))
- UnrolledCost += InstCost;
- else {
- DEBUG(dbgs() << " " << I
- << " would be simplified if loop is unrolled.\n");
- (void)0;
- }
+ // and if the visitor returns true, mark the instruction as free after
+ // unrolling and continue.
+ bool IsFree = Analyzer.visit(I);
+ bool Inserted = InstCostMap.insert({&I, (int)Iteration,
+ (unsigned)IsFree,
+ /*IsCounted*/ false}).second;
+ (void)Inserted;
+ assert(Inserted && "Cannot have a state for an unvisited instruction!");
+
+ if (IsFree)
+ continue;
- // Also track this instructions expected cost when executing the rolled
- // loop form.
- RolledDynamicCost += InstCost;
+ // If the instruction might have a side-effect recursively account for
+ // the cost of it and all the instructions leading up to it.
+ if (I.mayHaveSideEffects())
+ AddCostRecursively(I, Iteration);
+
+ // Can't properly model a cost of a call.
+ // FIXME: With a proper cost model we should be able to do it.
+ if(isa<CallInst>(&I))
+ return None;
// If unrolled body turns out to be too big, bail out.
if (UnrolledCost > MaxUnrolledLoopSize) {
@@ -545,42 +453,45 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
// Add in the live successors by first checking whether we have terminator
// that may be simplified based on the values simplified by this call.
+ BasicBlock *KnownSucc = nullptr;
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
if (BI->isConditional()) {
if (Constant *SimpleCond =
SimplifiedValues.lookup(BI->getCondition())) {
- BasicBlock *Succ = nullptr;
// Just take the first successor if condition is undef
if (isa<UndefValue>(SimpleCond))
- Succ = BI->getSuccessor(0);
- else
- Succ = BI->getSuccessor(
- cast<ConstantInt>(SimpleCond)->isZero() ? 1 : 0);
- if (L->contains(Succ))
- BBWorklist.insert(Succ);
- continue;
+ KnownSucc = BI->getSuccessor(0);
+ else if (ConstantInt *SimpleCondVal =
+ dyn_cast<ConstantInt>(SimpleCond))
+ KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
}
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
if (Constant *SimpleCond =
SimplifiedValues.lookup(SI->getCondition())) {
- BasicBlock *Succ = nullptr;
// Just take the first successor if condition is undef
if (isa<UndefValue>(SimpleCond))
- Succ = SI->getSuccessor(0);
- else
- Succ = SI->findCaseValue(cast<ConstantInt>(SimpleCond))
- .getCaseSuccessor();
- if (L->contains(Succ))
- BBWorklist.insert(Succ);
- continue;
+ KnownSucc = SI->getSuccessor(0);
+ else if (ConstantInt *SimpleCondVal =
+ dyn_cast<ConstantInt>(SimpleCond))
+ KnownSucc = SI->findCaseValue(SimpleCondVal).getCaseSuccessor();
}
}
+ if (KnownSucc) {
+ if (L->contains(KnownSucc))
+ BBWorklist.insert(KnownSucc);
+ else
+ ExitWorklist.insert({BB, KnownSucc});
+ continue;
+ }
// Add BB's successors to the worklist.
for (BasicBlock *Succ : successors(BB))
if (L->contains(Succ))
BBWorklist.insert(Succ);
+ else
+ ExitWorklist.insert({BB, Succ});
+ AddCostRecursively(*TI, Iteration);
}
// If we found no optimization opportunities on the first iteration, we
@@ -591,6 +502,23 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
return None;
}
}
+
+ while (!ExitWorklist.empty()) {
+ BasicBlock *ExitingBB, *ExitBB;
+ std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
+
+ for (Instruction &I : *ExitBB) {
+ auto *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
+ break;
+
+ Value *Op = PN->getIncomingValueForBlock(ExitingBB);
+ if (auto *OpI = dyn_cast<Instruction>(Op))
+ if (L->contains(OpI))
+ AddCostRecursively(*OpI, TripCount - 1);
+ }
+ }
+
DEBUG(dbgs() << "Analysis finished:\n"
<< "UnrolledCost: " << UnrolledCost << ", "
<< "RolledDynamicCost: " << RolledDynamicCost << "\n");
@@ -599,18 +527,18 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
/// ApproximateLoopSize - Approximate the size of the loop.
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
- bool &NotDuplicatable,
+ bool &NotDuplicatable, bool &Convergent,
const TargetTransformInfo &TTI,
AssumptionCache *AC) {
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, AC, EphValues);
CodeMetrics Metrics;
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I)
- Metrics.analyzeBasicBlock(*I, TTI, EphValues);
+ for (BasicBlock *BB : L->blocks())
+ Metrics.analyzeBasicBlock(BB, TTI, EphValues);
NumCalls = Metrics.NumInlineCandidates;
NotDuplicatable = Metrics.notDuplicatable;
+ Convergent = Metrics.convergent;
unsigned LoopSize = Metrics.NumInsts;
@@ -676,21 +604,22 @@ static unsigned UnrollCountPragmaValue(const Loop *L) {
// unrolling pass is run more than once (which it generally is).
static void SetLoopAlreadyUnrolled(Loop *L) {
MDNode *LoopID = L->getLoopID();
- if (!LoopID) return;
-
// First remove any existing loop unrolling metadata.
SmallVector<Metadata *, 4> MDs;
// Reserve first location for self reference to the LoopID metadata node.
MDs.push_back(nullptr);
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- bool IsUnrollMetadata = false;
- MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
- if (MD) {
- const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
- IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
+
+ if (LoopID) {
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ bool IsUnrollMetadata = false;
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (MD) {
+ const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
+ }
+ if (!IsUnrollMetadata)
+ MDs.push_back(LoopID->getOperand(i));
}
- if (!IsUnrollMetadata)
- MDs.push_back(LoopID->getOperand(i));
}
// Add unroll(disable) metadata to disable future unrolling.
@@ -737,9 +666,9 @@ static bool canUnrollCompletely(Loop *L, unsigned Threshold,
(int64_t)UnrolledCost - (int64_t)DynamicCostSavingsDiscount <=
(int64_t)Threshold) {
DEBUG(dbgs() << " Can fully unroll, because unrolling will reduce the "
- "expected dynamic cost by " << PercentDynamicCostSaved
- << "% (threshold: " << PercentDynamicCostSavedThreshold
- << "%)\n"
+ "expected dynamic cost by "
+ << PercentDynamicCostSaved << "% (threshold: "
+ << PercentDynamicCostSavedThreshold << "%)\n"
<< " and the unrolled cost (" << UnrolledCost
<< ") is less than the max threshold ("
<< DynamicCostSavingsDiscount << ").\n");
@@ -758,82 +687,77 @@ static bool canUnrollCompletely(Loop *L, unsigned Threshold,
return false;
}
-static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
- ScalarEvolution *SE, const TargetTransformInfo &TTI,
- AssumptionCache &AC, bool PreserveLCSSA,
- Optional<unsigned> ProvidedCount,
- Optional<unsigned> ProvidedThreshold,
- Optional<bool> ProvidedAllowPartial,
- Optional<bool> ProvidedRuntime) {
- BasicBlock *Header = L->getHeader();
- DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
- << "] Loop %" << Header->getName() << "\n");
+// Returns true if unroll count was set explicitly.
+// Calculates unroll count and writes it to UP.Count.
+static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
+ DominatorTree &DT, LoopInfo *LI,
+ ScalarEvolution *SE, unsigned TripCount,
+ unsigned TripMultiple, unsigned LoopSize,
+ TargetTransformInfo::UnrollingPreferences &UP) {
+ // BEInsns represents number of instructions optimized when "back edge"
+ // becomes "fall through" in unrolled loop.
+ // For now we count a conditional branch on a backedge and a comparison
+ // feeding it.
+ unsigned BEInsns = 2;
+ // Check for explicit Count.
+ // 1st priority is unroll count set by "unroll-count" option.
+ bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
+ if (UserUnrollCount) {
+ UP.Count = UnrollCount;
+ UP.AllowExpensiveTripCount = true;
+ UP.Force = true;
+ if (UP.AllowRemainder &&
+ (LoopSize - BEInsns) * UP.Count + BEInsns < UP.Threshold)
+ return true;
+ }
- if (HasUnrollDisablePragma(L)) {
- return false;
+ // 2nd priority is unroll count set by pragma.
+ unsigned PragmaCount = UnrollCountPragmaValue(L);
+ if (PragmaCount > 0) {
+ UP.Count = PragmaCount;
+ UP.Runtime = true;
+ UP.AllowExpensiveTripCount = true;
+ UP.Force = true;
+ if (UP.AllowRemainder &&
+ (LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold)
+ return true;
}
bool PragmaFullUnroll = HasUnrollFullPragma(L);
- bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
- unsigned PragmaCount = UnrollCountPragmaValue(L);
- bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0;
-
- // Find trip count and trip multiple if count is not available
- unsigned TripCount = 0;
- unsigned TripMultiple = 1;
- // If there are multiple exiting blocks but one of them is the latch, use the
- // latch for the trip count estimation. Otherwise insist on a single exiting
- // block for the trip count estimation.
- BasicBlock *ExitingBlock = L->getLoopLatch();
- if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
- ExitingBlock = L->getExitingBlock();
- if (ExitingBlock) {
- TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
- TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+ if (PragmaFullUnroll && TripCount != 0) {
+ UP.Count = TripCount;
+ if ((LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold)
+ return false;
}
- TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
- ProvidedRuntime, PragmaCount, PragmaFullUnroll, PragmaEnableUnroll,
- TripCount);
-
- unsigned Count = UP.Count;
- bool CountSetExplicitly = Count != 0;
- // Use a heuristic count if we didn't set anything explicitly.
- if (!CountSetExplicitly)
- Count = TripCount == 0 ? DefaultUnrollRuntimeCount : TripCount;
- if (TripCount && Count > TripCount)
- Count = TripCount;
+ bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
+ bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
+ PragmaEnableUnroll || UserUnrollCount;
- unsigned NumInlineCandidates;
- bool notDuplicatable;
- unsigned LoopSize =
- ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI, &AC);
- DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
+ uint64_t UnrolledSize;
+ DebugLoc LoopLoc = L->getStartLoc();
+ Function *F = L->getHeader()->getParent();
+ LLVMContext &Ctx = F->getContext();
- // When computing the unrolled size, note that the conditional branch on the
- // backedge and the comparison feeding it are not replicated like the rest of
- // the loop body (which is why 2 is subtracted).
- uint64_t UnrolledSize = (uint64_t)(LoopSize-2) * Count + 2;
- if (notDuplicatable) {
- DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
- << " instructions.\n");
- return false;
- }
- if (NumInlineCandidates != 0) {
- DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
- return false;
+ if (ExplicitUnroll && TripCount != 0) {
+ // If the loop has an unrolling pragma, we want to be more aggressive with
+ // unrolling limits. Set thresholds to at least the PragmaThreshold value
+ // which is larger than the default limits.
+ UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
+ UP.PartialThreshold =
+ std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
}
- // Given Count, TripCount and thresholds determine the type of
- // unrolling which is to be performed.
- enum { Full = 0, Partial = 1, Runtime = 2 };
- int Unrolling;
- if (TripCount && Count == TripCount) {
- Unrolling = Partial;
- // If the loop is really small, we don't need to run an expensive analysis.
+ // 3rd priority is full unroll count.
+ // Full unroll make sense only when TripCount could be staticaly calculated.
+ // Also we need to check if we exceed FullUnrollMaxCount.
+ if (TripCount && TripCount <= UP.FullUnrollMaxCount) {
+ // When computing the unrolled size, note that BEInsns are not replicated
+ // like the rest of the loop body.
+ UnrolledSize = (uint64_t)(LoopSize - BEInsns) * TripCount + BEInsns;
if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount,
UnrolledSize, UnrolledSize)) {
- Unrolling = Full;
+ UP.Count = TripCount;
+ return ExplicitUnroll;
} else {
// The loop isn't that small, but we still can fully unroll it if that
// helps to remove a significant number of instructions.
@@ -845,99 +769,216 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
UP.PercentDynamicCostSavedThreshold,
UP.DynamicCostSavingsDiscount,
Cost->UnrolledCost, Cost->RolledDynamicCost)) {
- Unrolling = Full;
+ UP.Count = TripCount;
+ return ExplicitUnroll;
}
}
- } else if (TripCount && Count < TripCount) {
- Unrolling = Partial;
- } else {
- Unrolling = Runtime;
}
- // Reduce count based on the type of unrolling and the threshold values.
- unsigned OriginalCount = Count;
- bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || UP.Runtime;
- // Don't unroll a runtime trip count loop with unroll full pragma.
- if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
- AllowRuntime = false;
- }
- if (Unrolling == Partial) {
- bool AllowPartial = PragmaEnableUnroll || UP.Partial;
- if (!AllowPartial && !CountSetExplicitly) {
+ // 4rd priority is partial unrolling.
+ // Try partial unroll only when TripCount could be staticaly calculated.
+ if (TripCount) {
+ if (UP.Count == 0)
+ UP.Count = TripCount;
+ UP.Partial |= ExplicitUnroll;
+ if (!UP.Partial) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
+ UP.Count = 0;
return false;
}
- if (UP.PartialThreshold != NoThreshold &&
- UnrolledSize > UP.PartialThreshold) {
+ if (UP.PartialThreshold != NoThreshold) {
// Reduce unroll count to be modulo of TripCount for partial unrolling.
- Count = (std::max(UP.PartialThreshold, 3u) - 2) / (LoopSize - 2);
- while (Count != 0 && TripCount % Count != 0)
- Count--;
- }
- } else if (Unrolling == Runtime) {
- if (!AllowRuntime && !CountSetExplicitly) {
- DEBUG(dbgs() << " will not try to unroll loop with runtime trip count "
- << "-unroll-runtime not given\n");
- return false;
- }
- // Reduce unroll count to be the largest power-of-two factor of
- // the original count which satisfies the threshold limit.
- while (Count != 0 && UnrolledSize > UP.PartialThreshold) {
- Count >>= 1;
- UnrolledSize = (LoopSize-2) * Count + 2;
+ UnrolledSize = (uint64_t)(LoopSize - BEInsns) * UP.Count + BEInsns;
+ if (UnrolledSize > UP.PartialThreshold)
+ UP.Count = (std::max(UP.PartialThreshold, 3u) - BEInsns) /
+ (LoopSize - BEInsns);
+ if (UP.Count > UP.MaxCount)
+ UP.Count = UP.MaxCount;
+ while (UP.Count != 0 && TripCount % UP.Count != 0)
+ UP.Count--;
+ if (UP.AllowRemainder && UP.Count <= 1) {
+ // If there is no Count that is modulo of TripCount, set Count to
+ // largest power-of-two factor that satisfies the threshold limit.
+ // As we'll create fixup loop, do the type of unrolling only if
+ // remainder loop is allowed.
+ UP.Count = DefaultUnrollRuntimeCount;
+ UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
+ while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
+ UP.Count >>= 1;
+ UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
+ }
+ }
+ if (UP.Count < 2) {
+ if (PragmaEnableUnroll)
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to unroll loop as directed by unroll(enable) pragma "
+ "because unrolled size is too large.");
+ UP.Count = 0;
+ }
+ } else {
+ UP.Count = TripCount;
}
- if (Count > UP.MaxCount)
- Count = UP.MaxCount;
- DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n");
- }
-
- if (HasPragma) {
- if (PragmaCount != 0)
- // If loop has an unroll count pragma mark loop as unrolled to prevent
- // unrolling beyond that requested by the pragma.
- SetLoopAlreadyUnrolled(L);
-
- // Emit optimization remarks if we are unable to unroll the loop
- // as directed by a pragma.
- DebugLoc LoopLoc = L->getStartLoc();
- Function *F = Header->getParent();
- LLVMContext &Ctx = F->getContext();
- if ((PragmaCount > 0) && Count != OriginalCount) {
- emitOptimizationRemarkMissed(
- Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to unroll loop the number of times directed by "
- "unroll_count pragma because unrolled size is too large.");
- } else if (PragmaFullUnroll && !TripCount) {
- emitOptimizationRemarkMissed(
- Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to fully unroll loop as directed by unroll(full) pragma "
- "because loop has a runtime trip count.");
- } else if (PragmaEnableUnroll && Count != TripCount && Count < 2) {
+ if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
+ UP.Count != TripCount)
emitOptimizationRemarkMissed(
Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to unroll loop as directed by unroll(enable) pragma because "
+ "Unable to fully unroll loop as directed by unroll pragma because "
"unrolled size is too large.");
- } else if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
- Count != TripCount) {
+ return ExplicitUnroll;
+ }
+ assert(TripCount == 0 &&
+ "All cases when TripCount is constant should be covered here.");
+ if (PragmaFullUnroll)
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to fully unroll loop as directed by unroll(full) pragma "
+ "because loop has a runtime trip count.");
+
+ // 5th priority is runtime unrolling.
+ // Don't unroll a runtime trip count loop when it is disabled.
+ if (HasRuntimeUnrollDisablePragma(L)) {
+ UP.Count = 0;
+ return false;
+ }
+ // Reduce count based on the type of unrolling and the threshold values.
+ UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
+ if (!UP.Runtime) {
+ DEBUG(dbgs() << " will not try to unroll loop with runtime trip count "
+ << "-unroll-runtime not given\n");
+ UP.Count = 0;
+ return false;
+ }
+ if (UP.Count == 0)
+ UP.Count = DefaultUnrollRuntimeCount;
+ UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
+
+ // Reduce unroll count to be the largest power-of-two factor of
+ // the original count which satisfies the threshold limit.
+ while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
+ UP.Count >>= 1;
+ UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
+ }
+
+#ifndef NDEBUG
+ unsigned OrigCount = UP.Count;
+#endif
+
+ if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
+ while (UP.Count != 0 && TripMultiple % UP.Count != 0)
+ UP.Count >>= 1;
+ DEBUG(dbgs() << "Remainder loop is restricted (that could architecture "
+ "specific or because the loop contains a convergent "
+ "instruction), so unroll count must divide the trip "
+ "multiple, "
+ << TripMultiple << ". Reducing unroll count from "
+ << OrigCount << " to " << UP.Count << ".\n");
+ if (PragmaCount > 0 && !UP.AllowRemainder)
emitOptimizationRemarkMissed(
Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to fully unroll loop as directed by unroll pragma because "
- "unrolled size is too large.");
- }
+ Twine("Unable to unroll loop the number of times directed by "
+ "unroll_count pragma because remainder loop is restricted "
+ "(that could architecture specific or because the loop "
+ "contains a convergent instruction) and so must have an unroll "
+ "count that divides the loop trip multiple of ") +
+ Twine(TripMultiple) + ". Unrolling instead " + Twine(UP.Count) +
+ " time(s).");
}
- if (Unrolling != Full && Count < 2) {
- // Partial unrolling by 1 is a nop. For full unrolling, a factor
- // of 1 makes sense because loop control can be eliminated.
+ if (UP.Count > UP.MaxCount)
+ UP.Count = UP.MaxCount;
+ DEBUG(dbgs() << " partially unrolling with count: " << UP.Count << "\n");
+ if (UP.Count < 2)
+ UP.Count = 0;
+ return ExplicitUnroll;
+}
+
+static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
+ ScalarEvolution *SE, const TargetTransformInfo &TTI,
+ AssumptionCache &AC, bool PreserveLCSSA,
+ Optional<unsigned> ProvidedCount,
+ Optional<unsigned> ProvidedThreshold,
+ Optional<bool> ProvidedAllowPartial,
+ Optional<bool> ProvidedRuntime) {
+ DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName()
+ << "] Loop %" << L->getHeader()->getName() << "\n");
+ if (HasUnrollDisablePragma(L)) {
return false;
}
+ unsigned NumInlineCandidates;
+ bool NotDuplicatable;
+ bool Convergent;
+ unsigned LoopSize = ApproximateLoopSize(
+ L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC);
+ DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
+ if (NotDuplicatable) {
+ DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
+ << " instructions.\n");
+ return false;
+ }
+ if (NumInlineCandidates != 0) {
+ DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
+ return false;
+ }
+ if (!L->isLoopSimplifyForm()) {
+ DEBUG(
+ dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
+ return false;
+ }
+
+ // Find trip count and trip multiple if count is not available
+ unsigned TripCount = 0;
+ unsigned TripMultiple = 1;
+ // If there are multiple exiting blocks but one of them is the latch, use the
+ // latch for the trip count estimation. Otherwise insist on a single exiting
+ // block for the trip count estimation.
+ BasicBlock *ExitingBlock = L->getLoopLatch();
+ if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
+ ExitingBlock = L->getExitingBlock();
+ if (ExitingBlock) {
+ TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
+ TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+ }
+
+ TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
+ L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
+ ProvidedRuntime);
+
+ // If the loop contains a convergent operation, the prelude we'd add
+ // to do the first few instructions before we hit the unrolled loop
+ // is unsafe -- it adds a control-flow dependency to the convergent
+ // operation. Therefore restrict remainder loop (try unrollig without).
+ //
+ // TODO: This is quite conservative. In practice, convergent_op()
+ // is likely to be called unconditionally in the loop. In this
+ // case, the program would be ill-formed (on most architectures)
+ // unless n were the same on all threads in a thread group.
+ // Assuming n is the same on all threads, any kind of unrolling is
+ // safe. But currently llvm's notion of convergence isn't powerful
+ // enough to express this.
+ if (Convergent)
+ UP.AllowRemainder = false;
+
+ bool IsCountSetExplicitly = computeUnrollCount(L, TTI, DT, LI, SE, TripCount,
+ TripMultiple, LoopSize, UP);
+ if (!UP.Count)
+ return false;
+ // Unroll factor (Count) must be less or equal to TripCount.
+ if (TripCount && UP.Count > TripCount)
+ UP.Count = TripCount;
+
// Unroll the loop.
- if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount,
- TripMultiple, LI, SE, &DT, &AC, PreserveLCSSA))
+ if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
+ UP.AllowExpensiveTripCount, TripMultiple, LI, SE, &DT, &AC,
+ PreserveLCSSA))
return false;
+ // If loop has an unroll count pragma or unrolled by explicitly set count
+ // mark loop as unrolled to prevent unrolling beyond that requested.
+ if (IsCountSetExplicitly)
+ SetLoopAlreadyUnrolled(L);
return true;
}
@@ -948,8 +989,9 @@ public:
LoopUnroll(Optional<unsigned> Threshold = None,
Optional<unsigned> Count = None,
Optional<bool> AllowPartial = None, Optional<bool> Runtime = None)
- : LoopPass(ID), ProvidedCount(Count), ProvidedThreshold(Threshold),
- ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime) {
+ : LoopPass(ID), ProvidedCount(std::move(Count)),
+ ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
+ ProvidedRuntime(Runtime) {
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
@@ -959,7 +1001,7 @@ public:
Optional<bool> ProvidedRuntime;
bool runOnLoop(Loop *L, LPPassManager &) override {
- if (skipOptnoneFunction(L))
+ if (skipLoop(L))
return false;
Function &F = *L->getHeader()->getParent();
@@ -982,35 +1024,19 @@ public:
///
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
- // If loop unroll does not preserve dom info then LCSSA pass on next
- // loop will receive invalid dom info.
- // For now, recreate dom info, if loop is unrolled.
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
+ // FIXME: Loop passes are required to preserve domtree, and for now we just
+ // recreate dom info if anything gets unrolled.
+ getLoopAnalysisUsage(AU);
}
};
}
char LoopUnroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 95d7f8a3beda2..71980e85e8cac 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -55,6 +55,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
#include <map>
#include <set>
@@ -64,6 +65,7 @@ using namespace llvm;
STATISTIC(NumBranches, "Number of branches unswitched");
STATISTIC(NumSwitches, "Number of switches unswitched");
+STATISTIC(NumGuards, "Number of guards unswitched");
STATISTIC(NumSelects , "Number of selects unswitched");
STATISTIC(NumTrivial , "Number of unswitches that are trivial");
STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
@@ -187,6 +189,9 @@ namespace {
BasicBlock *loopHeader;
BasicBlock *loopPreheader;
+ bool SanitizeMemory;
+ LoopSafetyInfo SafetyInfo;
+
// LoopBlocks contains all of the basic blocks of the loop, including the
// preheader of the loop, the body of the loop, and the exit blocks of the
// loop, in that order.
@@ -211,17 +216,8 @@ namespace {
///
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
+ getLoopAnalysisUsage(AU);
}
private:
@@ -382,11 +378,9 @@ void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop,
char LoopUnswitch::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
false, false)
@@ -396,7 +390,11 @@ Pass *llvm::createLoopUnswitchPass(bool Os) {
/// Cond is a condition that occurs in L. If it is invariant in the loop, or has
/// an invariant piece, return the invariant. Otherwise, return null.
-static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
+static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
+ DenseMap<Value *, Value *> &Cache) {
+ auto CacheIt = Cache.find(Cond);
+ if (CacheIt != Cache.end())
+ return CacheIt->second;
// We started analyze new instruction, increment scanned instructions counter.
++TotalInsts;
@@ -411,8 +409,10 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
// TODO: Handle: br (VARIANT|INVARIANT).
// Hoist simple values out.
- if (L->makeLoopInvariant(Cond, Changed))
+ if (L->makeLoopInvariant(Cond, Changed)) {
+ Cache[Cond] = Cond;
return Cond;
+ }
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
if (BO->getOpcode() == Instruction::And ||
@@ -420,17 +420,29 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
// If either the left or right side is invariant, we can unswitch on this,
// which will cause the branch to go away in one loop and the condition to
// simplify in the other one.
- if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed))
+ if (Value *LHS =
+ FindLIVLoopCondition(BO->getOperand(0), L, Changed, Cache)) {
+ Cache[Cond] = LHS;
return LHS;
- if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed))
+ }
+ if (Value *RHS =
+ FindLIVLoopCondition(BO->getOperand(1), L, Changed, Cache)) {
+ Cache[Cond] = RHS;
return RHS;
+ }
}
+ Cache[Cond] = nullptr;
return nullptr;
}
+static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
+ DenseMap<Value *, Value *> Cache;
+ return FindLIVLoopCondition(Cond, L, Changed, Cache);
+}
+
bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
- if (skipOptnoneFunction(L))
+ if (skipLoop(L))
return false;
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
@@ -441,6 +453,10 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();
+ SanitizeMemory = F->hasFnAttribute(Attribute::SanitizeMemory);
+ if (SanitizeMemory)
+ computeLoopSafetyInfo(&SafetyInfo, L);
+
EnabledPGO = F->getEntryCount().hasValue();
if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
@@ -499,17 +515,34 @@ bool LoopUnswitch::processCurrentLoop() {
return true;
}
- // Do not unswitch loops containing convergent operations, as we might be
- // making them control dependent on the unswitch value when they were not
- // before.
- // FIXME: This could be refined to only bail if the convergent operation is
- // not already control-dependent on the unswitch value.
+ // Run through the instructions in the loop, keeping track of three things:
+ //
+ // - That we do not unswitch loops containing convergent operations, as we
+ // might be making them control dependent on the unswitch value when they
+ // were not before.
+ // FIXME: This could be refined to only bail if the convergent operation is
+ // not already control-dependent on the unswitch value.
+ //
+ // - That basic blocks in the loop contain invokes whose predecessor edges we
+ // cannot split.
+ //
+ // - The set of guard intrinsics encountered (these are non terminator
+ // instructions that are also profitable to be unswitched).
+
+ SmallVector<IntrinsicInst *, 4> Guards;
+
for (const auto BB : currentLoop->blocks()) {
for (auto &I : *BB) {
auto CS = CallSite(&I);
if (!CS) continue;
if (CS.hasFnAttr(Attribute::Convergent))
return false;
+ if (auto *II = dyn_cast<InvokeInst>(&I))
+ if (!II->getUnwindDest()->canSplitPredecessors())
+ return false;
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == Intrinsic::experimental_guard)
+ Guards.push_back(II);
}
}
@@ -529,12 +562,36 @@ bool LoopUnswitch::processCurrentLoop() {
return false;
}
+ for (IntrinsicInst *Guard : Guards) {
+ Value *LoopCond =
+ FindLIVLoopCondition(Guard->getOperand(0), currentLoop, Changed);
+ if (LoopCond &&
+ UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) {
+ // NB! Unswitching (if successful) could have erased some of the
+ // instructions in Guards leaving dangling pointers there. This is fine
+ // because we're returning now, and won't look at Guards again.
+ ++NumGuards;
+ return true;
+ }
+ }
+
// Loop over all of the basic blocks in the loop. If we find an interior
// block that is branching on a loop-invariant condition, we can unswitch this
// loop.
for (Loop::block_iterator I = currentLoop->block_begin(),
E = currentLoop->block_end(); I != E; ++I) {
TerminatorInst *TI = (*I)->getTerminator();
+
+ // Unswitching on a potentially uninitialized predicate is not
+ // MSan-friendly. Limit this to the cases when the original predicate is
+ // guaranteed to execute, to avoid creating a use-of-uninitialized-value
+ // in the code that did not have one.
+ // This is a workaround for the discrepancy between LLVM IR and MSan
+ // semantics. See PR28054 for more details.
+ if (SanitizeMemory &&
+ !isGuaranteedToExecute(*TI, DT, currentLoop, &SafetyInfo))
+ continue;
+
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
// If this isn't branching on an invariant condition, we can't unswitch
// it.
@@ -628,8 +685,8 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
// Okay, everything after this looks good, check to make sure that this block
// doesn't include any side effects.
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (I->mayHaveSideEffects())
+ for (Instruction &I : *BB)
+ if (I.mayHaveSideEffects())
return false;
return true;
@@ -679,8 +736,8 @@ static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
New.addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
// Add all of the subloops to the new loop.
- for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- CloneLoop(*I, &New, VM, LI, LPM);
+ for (Loop *I : *L)
+ CloneLoop(I, &New, VM, LI, LPM);
return &New;
}
@@ -1075,10 +1132,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// Rewrite the code to refer to itself.
for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
- for (BasicBlock::iterator I = NewBlocks[i]->begin(),
- E = NewBlocks[i]->end(); I != E; ++I)
- RemapInstruction(&*I, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
+ for (Instruction &I : *NewBlocks[i])
+ RemapInstruction(&I, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
// Rewrite the original preheader to select between versions of the loop.
BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
@@ -1180,9 +1236,8 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Worklist.push_back(UI);
}
- for (std::vector<Instruction*>::iterator UI = Worklist.begin(),
- UE = Worklist.end(); UI != UE; ++UI)
- (*UI)->replaceUsesOfWith(LIC, Replacement);
+ for (Instruction *UI : Worklist)
+ UI->replaceUsesOfWith(LIC, Replacement);
SimplifyCode(Worklist, L);
return;
diff --git a/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/lib/Transforms/Scalar/LoopVersioningLICM.cpp
new file mode 100644
index 0000000000000..0ccf0af7165b5
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -0,0 +1,571 @@
+//===----------- LoopVersioningLICM.cpp - LICM Loop Versioning ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// When alias analysis is uncertain about the aliasing between any two accesses,
+// it will return MayAlias. This uncertainty from alias analysis restricts LICM
+// from proceeding further. In cases where alias analysis is uncertain we might
+// use loop versioning as an alternative.
+//
+// Loop Versioning will create a version of the loop with aggressive aliasing
+// assumptions in addition to the original with conservative (default) aliasing
+// assumptions. The version of the loop making aggressive aliasing assumptions
+// will have all the memory accesses marked as no-alias. These two versions of
+// loop will be preceded by a memory runtime check. This runtime check consists
+// of bound checks for all unique memory accessed in loop, and it ensures the
+// lack of memory aliasing. The result of the runtime check determines which of
+// the loop versions is executed: If the runtime check detects any memory
+// aliasing, then the original loop is executed. Otherwise, the version with
+// aggressive aliasing assumptions is used.
+//
+// Following are the top level steps:
+//
+// a) Perform LoopVersioningLICM's feasibility check.
+// b) If loop is a candidate for versioning then create a memory bound check,
+// by considering all the memory accesses in loop body.
+// c) Clone original loop and set all memory accesses as no-alias in new loop.
+// d) Set original loop & versioned loop as a branch target of the runtime check
+// result.
+//
+// It transforms loop as shown below:
+//
+// +----------------+
+// |Runtime Memcheck|
+// +----------------+
+// |
+// +----------+----------------+----------+
+// | |
+// +---------+----------+ +-----------+----------+
+// |Orig Loop Preheader | |Cloned Loop Preheader |
+// +--------------------+ +----------------------+
+// | |
+// +--------------------+ +----------------------+
+// |Orig Loop Body | |Cloned Loop Body |
+// +--------------------+ +----------------------+
+// | |
+// +--------------------+ +----------------------+
+// |Orig Loop Exit Block| |Cloned Loop Exit Block|
+// +--------------------+ +-----------+----------+
+// | |
+// +----------+--------------+-----------+
+// |
+// +-----+----+
+// |Join Block|
+// +----------+
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/PredIteratorCache.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+
+#define DEBUG_TYPE "loop-versioning-licm"
+static const char* LICMVersioningMetaData =
+ "llvm.loop.licm_versioning.disable";
+
+using namespace llvm;
+
+/// Threshold minimum allowed percentage for possible
+/// invariant instructions in a loop.
+static cl::opt<float>
+ LVInvarThreshold("licm-versioning-invariant-threshold",
+ cl::desc("LoopVersioningLICM's minimum allowed percentage"
+ "of possible invariant instructions per loop"),
+ cl::init(25), cl::Hidden);
+
+/// Threshold for maximum allowed loop nest/depth
+static cl::opt<unsigned> LVLoopDepthThreshold(
+ "licm-versioning-max-depth-threshold",
+ cl::desc(
+ "LoopVersioningLICM's threshold for maximum allowed loop nest/depth"),
+ cl::init(2), cl::Hidden);
+
+/// \brief Create MDNode for input string.
+static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) {
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+ Metadata *MDs[] = {
+ MDString::get(Context, Name),
+ ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};
+ return MDNode::get(Context, MDs);
+}
+
+/// \brief Set input string into loop metadata by keeping other values intact.
+void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
+ unsigned V) {
+ SmallVector<Metadata *, 4> MDs(1);
+ // If the loop already has metadata, retain it.
+ MDNode *LoopID = TheLoop->getLoopID();
+ if (LoopID) {
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
+ MDs.push_back(Node);
+ }
+ }
+ // Add new metadata.
+ MDs.push_back(createStringMetadata(TheLoop, MDString, V));
+ // Replace current metadata node with new one.
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+ MDNode *NewLoopID = MDNode::get(Context, MDs);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ TheLoop->setLoopID(NewLoopID);
+}
+
+namespace {
+struct LoopVersioningLICM : public LoopPass {
+ static char ID;
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequiredID(LCSSAID);
+ AU.addRequired<LoopAccessLegacyAnalysis>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+
+ LoopVersioningLICM()
+ : LoopPass(ID), AA(nullptr), SE(nullptr), LI(nullptr), DT(nullptr),
+ TLI(nullptr), LAA(nullptr), LAI(nullptr), Changed(false),
+ Preheader(nullptr), CurLoop(nullptr), CurAST(nullptr),
+ LoopDepthThreshold(LVLoopDepthThreshold),
+ InvariantThreshold(LVInvarThreshold), LoadAndStoreCounter(0),
+ InvariantCounter(0), IsReadOnlyLoop(true) {
+ initializeLoopVersioningLICMPass(*PassRegistry::getPassRegistry());
+ }
+
+ AliasAnalysis *AA; // Current AliasAnalysis information
+ ScalarEvolution *SE; // Current ScalarEvolution
+ LoopInfo *LI; // Current LoopInfo
+ DominatorTree *DT; // Dominator Tree for the current Loop.
+ TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding.
+ LoopAccessLegacyAnalysis *LAA; // Current LoopAccessAnalysis
+ const LoopAccessInfo *LAI; // Current Loop's LoopAccessInfo
+
+ bool Changed; // Set to true when we change anything.
+ BasicBlock *Preheader; // The preheader block of the current loop.
+ Loop *CurLoop; // The current loop we are working on.
+ AliasSetTracker *CurAST; // AliasSet information for the current loop.
+ ValueToValueMap Strides;
+
+ unsigned LoopDepthThreshold; // Maximum loop nest threshold
+ float InvariantThreshold; // Minimum invariant threshold
+ unsigned LoadAndStoreCounter; // Counter to track num of load & store
+ unsigned InvariantCounter; // Counter to track num of invariant
+ bool IsReadOnlyLoop; // Read only loop marker.
+
+ bool isLegalForVersioning();
+ bool legalLoopStructure();
+ bool legalLoopInstructions();
+ bool legalLoopMemoryAccesses();
+ bool isLoopAlreadyVisited();
+ void setNoAliasToLoop(Loop *);
+ bool instructionSafeForVersioning(Instruction *);
+ const char *getPassName() const override { return "Loop Versioning"; }
+};
+}
+
+/// \brief Check loop structure and confirms it's good for LoopVersioningLICM.
+bool LoopVersioningLICM::legalLoopStructure() {
+ // Loop must have a preheader, if not return false.
+ if (!CurLoop->getLoopPreheader()) {
+ DEBUG(dbgs() << " loop preheader is missing\n");
+ return false;
+ }
+ // Loop should be innermost loop, if not return false.
+ if (CurLoop->getSubLoops().size()) {
+ DEBUG(dbgs() << " loop is not innermost\n");
+ return false;
+ }
+ // Loop should have a single backedge, if not return false.
+ if (CurLoop->getNumBackEdges() != 1) {
+ DEBUG(dbgs() << " loop has multiple backedges\n");
+ return false;
+ }
+ // Loop must have a single exiting block, if not return false.
+ if (!CurLoop->getExitingBlock()) {
+ DEBUG(dbgs() << " loop has multiple exiting block\n");
+ return false;
+ }
+ // We only handle bottom-tested loop, i.e. loop in which the condition is
+ // checked at the end of each iteration. With that we can assume that all
+ // instructions in the loop are executed the same number of times.
+ if (CurLoop->getExitingBlock() != CurLoop->getLoopLatch()) {
+ DEBUG(dbgs() << " loop is not bottom tested\n");
+ return false;
+ }
+ // Parallel loops must not have aliasing loop-invariant memory accesses.
+ // Hence we don't need to version anything in this case.
+ if (CurLoop->isAnnotatedParallel()) {
+ DEBUG(dbgs() << " Parallel loop is not worth versioning\n");
+ return false;
+ }
+ // Loop depth more then LoopDepthThreshold are not allowed
+ if (CurLoop->getLoopDepth() > LoopDepthThreshold) {
+ DEBUG(dbgs() << " loop depth is more then threshold\n");
+ return false;
+ }
+ // Loop should have a dedicated exit block, if not return false.
+ if (!CurLoop->hasDedicatedExits()) {
+ DEBUG(dbgs() << " loop does not has dedicated exit blocks\n");
+ return false;
+ }
+ // We need to be able to compute the loop trip count in order
+ // to generate the bound checks.
+ const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop);
+ if (ExitCount == SE->getCouldNotCompute()) {
+ DEBUG(dbgs() << " loop does not has trip count\n");
+ return false;
+ }
+ return true;
+}
+
+/// \brief Check memory accesses in loop and confirms it's good for
+/// LoopVersioningLICM.
+bool LoopVersioningLICM::legalLoopMemoryAccesses() {
+ bool HasMayAlias = false;
+ bool TypeSafety = false;
+ bool HasMod = false;
+ // Memory check:
+ // Transform phase will generate a versioned loop and also a runtime check to
+ // ensure the pointers are independent and they don’t alias.
+ // In version variant of loop, alias meta data asserts that all access are
+ // mutually independent.
+ //
+ // Pointers aliasing in alias domain are avoided because with multiple
+ // aliasing domains we may not be able to hoist potential loop invariant
+ // access out of the loop.
+ //
+ // Iterate over alias tracker sets, and confirm AliasSets doesn't have any
+ // must alias set.
+ for (const auto &I : *CurAST) {
+ const AliasSet &AS = I;
+ // Skip Forward Alias Sets, as this should be ignored as part of
+ // the AliasSetTracker object.
+ if (AS.isForwardingAliasSet())
+ continue;
+ // With MustAlias its not worth adding runtime bound check.
+ if (AS.isMustAlias())
+ return false;
+ Value *SomePtr = AS.begin()->getValue();
+ bool TypeCheck = true;
+ // Check for Mod & MayAlias
+ HasMayAlias |= AS.isMayAlias();
+ HasMod |= AS.isMod();
+ for (const auto &A : AS) {
+ Value *Ptr = A.getValue();
+ // Alias tracker should have pointers of same data type.
+ TypeCheck = (TypeCheck && (SomePtr->getType() == Ptr->getType()));
+ }
+ // At least one alias tracker should have pointers of same data type.
+ TypeSafety |= TypeCheck;
+ }
+ // Ensure types should be of same type.
+ if (!TypeSafety) {
+ DEBUG(dbgs() << " Alias tracker type safety failed!\n");
+ return false;
+ }
+ // Ensure loop body shouldn't be read only.
+ if (!HasMod) {
+ DEBUG(dbgs() << " No memory modified in loop body\n");
+ return false;
+ }
+ // Make sure alias set has may alias case.
+ // If there no alias memory ambiguity, return false.
+ if (!HasMayAlias) {
+ DEBUG(dbgs() << " No ambiguity in memory access.\n");
+ return false;
+ }
+ return true;
+}
+
+/// \brief Check loop instructions safe for Loop versioning.
+/// It returns true if it's safe else returns false.
+/// Consider following:
+/// 1) Check all load store in loop body are non atomic & non volatile.
+/// 2) Check function call safety, by ensuring its not accessing memory.
+/// 3) Loop body shouldn't have any may throw instruction.
+bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) {
+ assert(I != nullptr && "Null instruction found!");
+ // Check function call safety
+ if (isa<CallInst>(I) && !AA->doesNotAccessMemory(CallSite(I))) {
+ DEBUG(dbgs() << " Unsafe call site found.\n");
+ return false;
+ }
+ // Avoid loops with possiblity of throw
+ if (I->mayThrow()) {
+ DEBUG(dbgs() << " May throw instruction found in loop body\n");
+ return false;
+ }
+ // If current instruction is load instructions
+ // make sure it's a simple load (non atomic & non volatile)
+ if (I->mayReadFromMemory()) {
+ LoadInst *Ld = dyn_cast<LoadInst>(I);
+ if (!Ld || !Ld->isSimple()) {
+ DEBUG(dbgs() << " Found a non-simple load.\n");
+ return false;
+ }
+ LoadAndStoreCounter++;
+ Value *Ptr = Ld->getPointerOperand();
+ // Check loop invariant.
+ if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop))
+ InvariantCounter++;
+ }
+ // If current instruction is store instruction
+ // make sure it's a simple store (non atomic & non volatile)
+ else if (I->mayWriteToMemory()) {
+ StoreInst *St = dyn_cast<StoreInst>(I);
+ if (!St || !St->isSimple()) {
+ DEBUG(dbgs() << " Found a non-simple store.\n");
+ return false;
+ }
+ LoadAndStoreCounter++;
+ Value *Ptr = St->getPointerOperand();
+ // Check loop invariant.
+ if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop))
+ InvariantCounter++;
+
+ IsReadOnlyLoop = false;
+ }
+ return true;
+}
+
+/// \brief Check loop instructions and confirms it's good for
+/// LoopVersioningLICM.
+bool LoopVersioningLICM::legalLoopInstructions() {
+ // Resetting counters.
+ LoadAndStoreCounter = 0;
+ InvariantCounter = 0;
+ IsReadOnlyLoop = true;
+ // Iterate over loop blocks and instructions of each block and check
+ // instruction safety.
+ for (auto *Block : CurLoop->getBlocks())
+ for (auto &Inst : *Block) {
+ // If instruction is unsafe just return false.
+ if (!instructionSafeForVersioning(&Inst))
+ return false;
+ }
+ // Get LoopAccessInfo from current loop.
+ LAI = &LAA->getInfo(CurLoop);
+ // Check LoopAccessInfo for need of runtime check.
+ if (LAI->getRuntimePointerChecking()->getChecks().empty()) {
+ DEBUG(dbgs() << " LAA: Runtime check not found !!\n");
+ return false;
+ }
+ // Number of runtime-checks should be less then RuntimeMemoryCheckThreshold
+ if (LAI->getNumRuntimePointerChecks() >
+ VectorizerParams::RuntimeMemoryCheckThreshold) {
+ DEBUG(dbgs() << " LAA: Runtime checks are more than threshold !!\n");
+ return false;
+ }
+ // Loop should have at least one invariant load or store instruction.
+ if (!InvariantCounter) {
+ DEBUG(dbgs() << " Invariant not found !!\n");
+ return false;
+ }
+ // Read only loop not allowed.
+ if (IsReadOnlyLoop) {
+ DEBUG(dbgs() << " Found a read-only loop!\n");
+ return false;
+ }
+ // Profitablity check:
+ // Check invariant threshold, should be in limit.
+ if (InvariantCounter * 100 < InvariantThreshold * LoadAndStoreCounter) {
+ DEBUG(dbgs()
+ << " Invariant load & store are less then defined threshold\n");
+ DEBUG(dbgs() << " Invariant loads & stores: "
+ << ((InvariantCounter * 100) / LoadAndStoreCounter) << "%\n");
+ DEBUG(dbgs() << " Invariant loads & store threshold: "
+ << InvariantThreshold << "%\n");
+ return false;
+ }
+ return true;
+}
+
+/// \brief It checks loop is already visited or not.
+/// check loop meta data, if loop revisited return true
+/// else false.
+bool LoopVersioningLICM::isLoopAlreadyVisited() {
+ // Check LoopVersioningLICM metadata into loop
+ if (findStringMetadataForLoop(CurLoop, LICMVersioningMetaData)) {
+ return true;
+ }
+ return false;
+}
+
+/// \brief Checks legality for LoopVersioningLICM by considering following:
+/// a) loop structure legality b) loop instruction legality
+/// c) loop memory access legality.
+/// Return true if legal else returns false.
+bool LoopVersioningLICM::isLegalForVersioning() {
+ DEBUG(dbgs() << "Loop: " << *CurLoop);
+ // Make sure not re-visiting same loop again.
+ if (isLoopAlreadyVisited()) {
+ DEBUG(
+ dbgs() << " Revisiting loop in LoopVersioningLICM not allowed.\n\n");
+ return false;
+ }
+ // Check loop structure leagality.
+ if (!legalLoopStructure()) {
+ DEBUG(
+ dbgs() << " Loop structure not suitable for LoopVersioningLICM\n\n");
+ return false;
+ }
+ // Check loop instruction leagality.
+ if (!legalLoopInstructions()) {
+ DEBUG(dbgs()
+ << " Loop instructions not suitable for LoopVersioningLICM\n\n");
+ return false;
+ }
+ // Check loop memory access leagality.
+ if (!legalLoopMemoryAccesses()) {
+ DEBUG(dbgs()
+ << " Loop memory access not suitable for LoopVersioningLICM\n\n");
+ return false;
+ }
+ // Loop versioning is feasible, return true.
+ DEBUG(dbgs() << " Loop Versioning found to be beneficial\n\n");
+ return true;
+}
+
+/// \brief Update loop with aggressive aliasing assumptions.
+/// It marks no-alias to any pairs of memory operations by assuming
+/// loop should not have any must-alias memory accesses pairs.
+/// During LoopVersioningLICM legality we ignore loops having must
+/// aliasing memory accesses.
+void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) {
+ // Get latch terminator instruction.
+ Instruction *I = VerLoop->getLoopLatch()->getTerminator();
+ // Create alias scope domain.
+ MDBuilder MDB(I->getContext());
+ MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain");
+ StringRef Name = "LVAliasScope";
+ SmallVector<Metadata *, 4> Scopes, NoAliases;
+ MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
+ // Iterate over each instruction of loop.
+ // set no-alias for all load & store instructions.
+ for (auto *Block : CurLoop->getBlocks()) {
+ for (auto &Inst : *Block) {
+ // Only interested in instruction that may modify or read memory.
+ if (!Inst.mayReadFromMemory() && !Inst.mayWriteToMemory())
+ continue;
+ Scopes.push_back(NewScope);
+ NoAliases.push_back(NewScope);
+ // Set no-alias for current instruction.
+ Inst.setMetadata(
+ LLVMContext::MD_noalias,
+ MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_noalias),
+ MDNode::get(Inst.getContext(), NoAliases)));
+ // set alias-scope for current instruction.
+ Inst.setMetadata(
+ LLVMContext::MD_alias_scope,
+ MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_alias_scope),
+ MDNode::get(Inst.getContext(), Scopes)));
+ }
+ }
+}
+
+bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipLoop(L))
+ return false;
+ Changed = false;
+ // Get Analysis information.
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
+ LAI = nullptr;
+ // Set Current Loop
+ CurLoop = L;
+ // Get the preheader block.
+ Preheader = L->getLoopPreheader();
+ // Initial allocation
+ CurAST = new AliasSetTracker(*AA);
+
+ // Loop over the body of this loop, construct AST.
+ for (auto *Block : L->getBlocks()) {
+ if (LI->getLoopFor(Block) == L) // Ignore blocks in subloop.
+ CurAST->add(*Block); // Incorporate the specified basic block
+ }
+ // Check feasiblity of LoopVersioningLICM.
+ // If versioning found to be feasible and beneficial then proceed
+ // else simply return, by cleaning up memory.
+ if (isLegalForVersioning()) {
+ // Do loop versioning.
+ // Create memcheck for memory accessed inside loop.
+ // Clone original loop, and set blocks properly.
+ LoopVersioning LVer(*LAI, CurLoop, LI, DT, SE, true);
+ LVer.versionLoop();
+ // Set Loop Versioning metaData for original loop.
+ addStringMetadataToLoop(LVer.getNonVersionedLoop(), LICMVersioningMetaData);
+ // Set Loop Versioning metaData for version loop.
+ addStringMetadataToLoop(LVer.getVersionedLoop(), LICMVersioningMetaData);
+ // Set "llvm.mem.parallel_loop_access" metaData to versioned loop.
+ addStringMetadataToLoop(LVer.getVersionedLoop(),
+ "llvm.mem.parallel_loop_access");
+ // Update version loop with aggressive aliasing assumption.
+ setNoAliasToLoop(LVer.getVersionedLoop());
+ Changed = true;
+ }
+ // Delete allocated memory.
+ delete CurAST;
+ return Changed;
+}
+
+char LoopVersioningLICM::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopVersioningLICM, "loop-versioning-licm",
+ "Loop Versioning For LICM", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(LoopVersioningLICM, "loop-versioning-licm",
+ "Loop Versioning For LICM", false, false)
+
+Pass *llvm::createLoopVersioningLICMPass() { return new LoopVersioningLICM(); }
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index 41511bcb7b048..08e60b16bedff 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -12,11 +12,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LowerAtomic.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
#define DEBUG_TYPE "loweratomic"
@@ -100,49 +101,74 @@ static bool LowerFenceInst(FenceInst *FI) {
}
static bool LowerLoadInst(LoadInst *LI) {
- LI->setAtomic(NotAtomic);
+ LI->setAtomic(AtomicOrdering::NotAtomic);
return true;
}
static bool LowerStoreInst(StoreInst *SI) {
- SI->setAtomic(NotAtomic);
+ SI->setAtomic(AtomicOrdering::NotAtomic);
return true;
}
-namespace {
- struct LowerAtomic : public BasicBlockPass {
- static char ID;
- LowerAtomic() : BasicBlockPass(ID) {
- initializeLowerAtomicPass(*PassRegistry::getPassRegistry());
- }
- bool runOnBasicBlock(BasicBlock &BB) override {
- if (skipOptnoneFunction(BB))
- return false;
- bool Changed = false;
- for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
- Instruction *Inst = &*DI++;
- if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
- Changed |= LowerFenceInst(FI);
- else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Inst))
- Changed |= LowerAtomicCmpXchgInst(CXI);
- else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Inst))
- Changed |= LowerAtomicRMWInst(RMWI);
- else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- if (LI->isAtomic())
- LowerLoadInst(LI);
- } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (SI->isAtomic())
- LowerStoreInst(SI);
- }
- }
- return Changed;
+static bool runOnBasicBlock(BasicBlock &BB) {
+ bool Changed = false;
+ for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE;) {
+ Instruction *Inst = &*DI++;
+ if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
+ Changed |= LowerFenceInst(FI);
+ else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Inst))
+ Changed |= LowerAtomicCmpXchgInst(CXI);
+ else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Inst))
+ Changed |= LowerAtomicRMWInst(RMWI);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ if (LI->isAtomic())
+ LowerLoadInst(LI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->isAtomic())
+ LowerStoreInst(SI);
}
+ }
+ return Changed;
+}
+
+static bool lowerAtomics(Function &F) {
+ bool Changed = false;
+ for (BasicBlock &BB : F) {
+ Changed |= runOnBasicBlock(BB);
+ }
+ return Changed;
+}
+
+PreservedAnalyses LowerAtomicPass::run(Function &F, FunctionAnalysisManager &) {
+ if (lowerAtomics(F))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+namespace {
+class LowerAtomicLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ LowerAtomicLegacyPass() : FunctionPass(ID) {
+ initializeLowerAtomicLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ FunctionAnalysisManager DummyFAM;
+ auto PA = Impl.run(F, DummyFAM);
+ return !PA.areAllPreserved();
+ }
+
+private:
+ LowerAtomicPass Impl;
};
}
-char LowerAtomic::ID = 0;
-INITIALIZE_PASS(LowerAtomic, "loweratomic",
- "Lower atomic intrinsics to non-atomic form",
- false, false)
+char LowerAtomicLegacyPass::ID = 0;
+INITIALIZE_PASS(LowerAtomicLegacyPass, "loweratomic",
+ "Lower atomic intrinsics to non-atomic form", false, false)
-Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); }
+Pass *llvm::createLowerAtomicPass() { return new LowerAtomicLegacyPass(); }
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 2ace902a7a1b8..79f0db1163a4c 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -34,12 +34,24 @@ using namespace llvm;
STATISTIC(ExpectIntrinsicsHandled,
"Number of 'expect' intrinsic instructions handled");
-static cl::opt<uint32_t>
-LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64),
- cl::desc("Weight of the branch likely to be taken (default = 64)"));
-static cl::opt<uint32_t>
-UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4),
- cl::desc("Weight of the branch unlikely to be taken (default = 4)"));
+// These default values are chosen to represent an extremely skewed outcome for
+// a condition, but they leave some room for interpretation by later passes.
+//
+// If the documentation for __builtin_expect() was made explicit that it should
+// only be used in extreme cases, we could make this ratio higher. As it stands,
+// programmers may be using __builtin_expect() / llvm.expect to annotate that a
+// branch is likely or unlikely to be taken.
+//
+// There is a known dependency on this ratio in CodeGenPrepare when transforming
+// 'select' instructions. It may be worthwhile to hoist these values to some
+// shared space, so they can be used directly by other passes.
+
+static cl::opt<uint32_t> LikelyBranchWeight(
+ "likely-branch-weight", cl::Hidden, cl::init(2000),
+ cl::desc("Weight of the branch likely to be taken (default = 2000)"));
+static cl::opt<uint32_t> UnlikelyBranchWeight(
+ "unlikely-branch-weight", cl::Hidden, cl::init(1),
+ cl::desc("Weight of the branch unlikely to be taken (default = 1)"));
static bool handleSwitchExpect(SwitchInst &SI) {
CallInst *CI = dyn_cast<CallInst>(SI.getCondition());
@@ -158,7 +170,8 @@ static bool lowerExpectIntrinsic(Function &F) {
return Changed;
}
-PreservedAnalyses LowerExpectIntrinsicPass::run(Function &F) {
+PreservedAnalyses LowerExpectIntrinsicPass::run(Function &F,
+ FunctionAnalysisManager &) {
if (lowerExpectIntrinsic(F))
return PreservedAnalyses::none();
diff --git a/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
new file mode 100644
index 0000000000000..57491007d0141
--- /dev/null
+++ b/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
@@ -0,0 +1,123 @@
+//===- LowerGuardIntrinsic.cpp - Lower the guard intrinsic ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the llvm.experimental.guard intrinsic to a conditional call
+// to @llvm.experimental.deoptimize. Once this happens, the guard can no longer
+// be widened.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+static cl::opt<uint32_t> PredicatePassBranchWeight(
+ "guards-predicate-pass-branch-weight", cl::Hidden, cl::init(1 << 20),
+ cl::desc("The probability of a guard failing is assumed to be the "
+ "reciprocal of this value (default = 1 << 20)"));
+
+namespace {
+struct LowerGuardIntrinsic : public FunctionPass {
+ static char ID;
+ LowerGuardIntrinsic() : FunctionPass(ID) {
+ initializeLowerGuardIntrinsicPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+}
+
+static void MakeGuardControlFlowExplicit(Function *DeoptIntrinsic,
+ CallInst *CI) {
+ OperandBundleDef DeoptOB(*CI->getOperandBundle(LLVMContext::OB_deopt));
+ SmallVector<Value *, 4> Args(std::next(CI->arg_begin()), CI->arg_end());
+
+ auto *CheckBB = CI->getParent();
+ auto *DeoptBlockTerm =
+ SplitBlockAndInsertIfThen(CI->getArgOperand(0), CI, true);
+
+ auto *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
+
+ // SplitBlockAndInsertIfThen inserts control flow that branches to
+ // DeoptBlockTerm if the condition is true. We want the opposite.
+ CheckBI->swapSuccessors();
+
+ CheckBI->getSuccessor(0)->setName("guarded");
+ CheckBI->getSuccessor(1)->setName("deopt");
+
+ if (auto *MD = CI->getMetadata(LLVMContext::MD_make_implicit))
+ CheckBI->setMetadata(LLVMContext::MD_make_implicit, MD);
+
+ MDBuilder MDB(CI->getContext());
+ CheckBI->setMetadata(LLVMContext::MD_prof,
+ MDB.createBranchWeights(PredicatePassBranchWeight, 1));
+
+ IRBuilder<> B(DeoptBlockTerm);
+ auto *DeoptCall = B.CreateCall(DeoptIntrinsic, Args, {DeoptOB}, "");
+
+ if (DeoptIntrinsic->getReturnType()->isVoidTy()) {
+ B.CreateRetVoid();
+ } else {
+ DeoptCall->setName("deoptcall");
+ B.CreateRet(DeoptCall);
+ }
+
+ DeoptCall->setCallingConv(CI->getCallingConv());
+ DeoptBlockTerm->eraseFromParent();
+}
+
+bool LowerGuardIntrinsic::runOnFunction(Function &F) {
+ // Check if we can cheaply rule out the possibility of not having any work to
+ // do.
+ auto *GuardDecl = F.getParent()->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_guard));
+ if (!GuardDecl || GuardDecl->use_empty())
+ return false;
+
+ SmallVector<CallInst *, 8> ToLower;
+ for (auto &I : instructions(F))
+ if (auto *CI = dyn_cast<CallInst>(&I))
+ if (auto *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::experimental_guard)
+ ToLower.push_back(CI);
+
+ if (ToLower.empty())
+ return false;
+
+ auto *DeoptIntrinsic = Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::experimental_deoptimize, {F.getReturnType()});
+ DeoptIntrinsic->setCallingConv(GuardDecl->getCallingConv());
+
+ for (auto *CI : ToLower) {
+ MakeGuardControlFlowExplicit(DeoptIntrinsic, CI);
+ CI->eraseFromParent();
+ }
+
+ return true;
+}
+
+char LowerGuardIntrinsic::ID = 0;
+INITIALIZE_PASS(LowerGuardIntrinsic, "lower-guard-intrinsic",
+ "Lower the guard intrinsic to normal control flow", false,
+ false)
+
+Pass *llvm::createLowerGuardIntrinsicPass() {
+ return new LowerGuardIntrinsic();
+}
diff --git a/lib/Transforms/Scalar/Makefile b/lib/Transforms/Scalar/Makefile
deleted file mode 100644
index cc42fd00ac7da..0000000000000
--- a/lib/Transforms/Scalar/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Transforms/Scalar/Makefile ----------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMScalarOpts
-BUILD_ARCHIVE = 1
-
-include $(LEVEL)/Makefile.common
-
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 6b43b0f7a2ad8..d64c658f84369 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -12,22 +12,16 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -184,7 +178,7 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// size. If so, check to see whether we will end up actually reducing the
// number of stores used.
unsigned Bytes = unsigned(End-Start);
- unsigned MaxIntSize = DL.getLargestLegalIntTypeSize();
+ unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
if (MaxIntSize == 0)
MaxIntSize = 1;
unsigned NumPointerStores = Bytes / MaxIntSize;
@@ -301,19 +295,16 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
}
//===----------------------------------------------------------------------===//
-// MemCpyOpt Pass
+// MemCpyOptLegacyPass Pass
//===----------------------------------------------------------------------===//
namespace {
- class MemCpyOpt : public FunctionPass {
- MemoryDependenceAnalysis *MD;
- TargetLibraryInfo *TLI;
+ class MemCpyOptLegacyPass : public FunctionPass {
+ MemCpyOptPass Impl;
public:
static char ID; // Pass identification, replacement for typeid
- MemCpyOpt() : FunctionPass(ID) {
- initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
- MD = nullptr;
- TLI = nullptr;
+ MemCpyOptLegacyPass() : FunctionPass(ID) {
+ initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@@ -324,11 +315,11 @@ namespace {
AU.setPreservesCFG();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addRequired<MemoryDependenceWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<MemoryDependenceAnalysis>();
+ AU.addPreserved<MemoryDependenceWrapperPass>();
}
// Helper functions
@@ -348,29 +339,30 @@ namespace {
bool iterateOnFunction(Function &F);
};
- char MemCpyOpt::ID = 0;
+ char MemCpyOptLegacyPass::ID = 0;
}
/// The public interface to this file...
-FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
+FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
-INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
false, false)
/// When scanning forward over instructions, we look for some other patterns to
/// fold away. In particular, this looks for stores to neighboring locations of
/// memory. If it sees enough consecutive ones, it attempts to merge them
/// together into a memcpy/memset.
-Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
- Value *StartPtr, Value *ByteVal) {
+Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
+ Value *StartPtr,
+ Value *ByteVal) {
const DataLayout &DL = StartInst->getModule()->getDataLayout();
// Okay, so we now have a single store that can be splatable. Scan to find
@@ -493,7 +485,93 @@ static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
return std::min(StoreAlign, LoadAlign);
}
-bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
+// This method try to lift a store instruction before position P.
+// It will lift the store and its argument + that anything that
+// may alias with these.
+// The method returns true if it was successful.
+static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P) {
+ // If the store alias this position, early bail out.
+ MemoryLocation StoreLoc = MemoryLocation::get(SI);
+ if (AA.getModRefInfo(P, StoreLoc) != MRI_NoModRef)
+ return false;
+
+ // Keep track of the arguments of all instruction we plan to lift
+ // so we can make sure to lift them as well if apropriate.
+ DenseSet<Instruction*> Args;
+ if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()))
+ if (Ptr->getParent() == SI->getParent())
+ Args.insert(Ptr);
+
+ // Instruction to lift before P.
+ SmallVector<Instruction*, 8> ToLift;
+
+ // Memory locations of lifted instructions.
+ SmallVector<MemoryLocation, 8> MemLocs;
+ MemLocs.push_back(StoreLoc);
+
+ // Lifted callsites.
+ SmallVector<ImmutableCallSite, 8> CallSites;
+
+ for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
+ auto *C = &*I;
+
+ bool MayAlias = AA.getModRefInfo(C) != MRI_NoModRef;
+
+ bool NeedLift = false;
+ if (Args.erase(C))
+ NeedLift = true;
+ else if (MayAlias) {
+ NeedLift = std::any_of(MemLocs.begin(), MemLocs.end(),
+ [C, &AA](const MemoryLocation &ML) {
+ return AA.getModRefInfo(C, ML);
+ });
+
+ if (!NeedLift)
+ NeedLift = std::any_of(CallSites.begin(), CallSites.end(),
+ [C, &AA](const ImmutableCallSite &CS) {
+ return AA.getModRefInfo(C, CS);
+ });
+ }
+
+ if (!NeedLift)
+ continue;
+
+ if (MayAlias) {
+ if (auto CS = ImmutableCallSite(C)) {
+ // If we can't lift this before P, it's game over.
+ if (AA.getModRefInfo(P, CS) != MRI_NoModRef)
+ return false;
+
+ CallSites.push_back(CS);
+ } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
+ // If we can't lift this before P, it's game over.
+ auto ML = MemoryLocation::get(C);
+ if (AA.getModRefInfo(P, ML) != MRI_NoModRef)
+ return false;
+
+ MemLocs.push_back(ML);
+ } else
+ // We don't know how to lift this instruction.
+ return false;
+ }
+
+ ToLift.push_back(C);
+ for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k)
+ if (auto *A = dyn_cast<Instruction>(C->getOperand(k)))
+ if (A->getParent() == SI->getParent())
+ Args.insert(A);
+ }
+
+ // We made it, we need to lift
+ for (auto *I : reverse(ToLift)) {
+ DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
+ I->moveBefore(P);
+ }
+
+ return true;
+}
+
+bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
// Avoid merging nontemporal stores since the resulting
@@ -514,7 +592,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
auto *T = LI->getType();
if (T->isAggregateType()) {
- AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ AliasAnalysis &AA = LookupAliasAnalysis();
MemoryLocation LoadLoc = MemoryLocation::get(LI);
// We use alias analysis to check if an instruction may store to
@@ -522,26 +600,20 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// such an instruction is found, we try to promote there instead
// of at the store position.
Instruction *P = SI;
- for (BasicBlock::iterator I = ++LI->getIterator(), E = SI->getIterator();
- I != E; ++I) {
- if (!(AA.getModRefInfo(&*I, LoadLoc) & MRI_Mod))
- continue;
-
- // We found an instruction that may write to the loaded memory.
- // We can try to promote at this position instead of the store
- // position if nothing alias the store memory after this and the store
- // destination is not in the range.
- P = &*I;
- for (; I != E; ++I) {
- MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (&*I == SI->getOperand(1) ||
- AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
- P = nullptr;
- break;
- }
+ for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
+ if (AA.getModRefInfo(&I, LoadLoc) & MRI_Mod) {
+ P = &I;
+ break;
}
+ }
- break;
+ // We found an instruction that may write to the loaded memory.
+ // We can try to promote at this position instead of the store
+ // position if nothing alias the store memory after this and the store
+ // destination is not in the range.
+ if (P && P != SI) {
+ if (!moveUp(AA, SI, P))
+ P = nullptr;
}
// If a valid insertion position is found, then we can promote
@@ -594,7 +666,9 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (C) {
// Check that nothing touches the dest of the "copy" between
// the call and the store.
- AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ Value *CpyDest = SI->getPointerOperand()->stripPointerCasts();
+ bool CpyDestIsLocal = isa<AllocaInst>(CpyDest);
+ AliasAnalysis &AA = LookupAliasAnalysis();
MemoryLocation StoreLoc = MemoryLocation::get(SI);
for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator();
I != E; --I) {
@@ -602,6 +676,12 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
C = nullptr;
break;
}
+ // The store to dest may never happen if an exception can be thrown
+ // between the load and the store.
+ if (I->mayThrow() && !CpyDestIsLocal) {
+ C = nullptr;
+ break;
+ }
}
}
@@ -665,7 +745,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
return false;
}
-bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
+bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
// See if there is another memset or store neighboring this memset which
// allows us to widen out the memset to do a single larger store.
if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
@@ -681,10 +761,9 @@ bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
/// Takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
-bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
- Value *cpyDest, Value *cpySrc,
- uint64_t cpyLen, unsigned cpyAlign,
- CallInst *C) {
+bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
+ Value *cpySrc, uint64_t cpyLen,
+ unsigned cpyAlign, CallInst *C) {
// The general transformation to keep in mind is
//
// call @func(..., src, ...)
@@ -699,6 +778,11 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// src only holds uninitialized values at the moment of the call, meaning that
// the memcpy can be discarded rather than moved.
+ // Lifetime marks shouldn't be operated on.
+ if (Function *F = C->getCalledFunction())
+ if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
+ return false;
+
// Deliberately get the source and destination with bitcasts stripped away,
// because we'll need to do type comparisons based on the underlying type.
CallSite CS(C);
@@ -734,6 +818,10 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
if (destSize < srcSize)
return false;
} else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
+ // The store to dest may never happen if the call can throw.
+ if (C->mayThrow())
+ return false;
+
if (A->getDereferenceableBytes() < srcSize) {
// If the destination is an sret parameter then only accesses that are
// outside of the returned struct type can trap.
@@ -805,7 +893,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// Since we're changing the parameter to the callsite, we need to make sure
// that what would be the new parameter dominates the callsite.
- DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ DominatorTree &DT = LookupDomTree();
if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
if (!DT.dominates(cpyDestInst, C))
return false;
@@ -814,7 +902,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// unexpected manner, for example via a global, which we deduce from
// the use analysis, we also need to know that it does not sneakily
// access dest. We rely on AA to figure this out for us.
- AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ AliasAnalysis &AA = LookupAliasAnalysis();
ModRefInfo MR = AA.getModRefInfo(C, cpyDest, srcSize);
// If necessary, perform additional analysis.
if (MR != MRI_NoModRef)
@@ -867,7 +955,8 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
-bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
+bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
+ MemCpyInst *MDep) {
// We can only transforms memcpy's where the dest of one is the source of the
// other.
if (M->getSource() != MDep->getDest() || MDep->isVolatile())
@@ -888,7 +977,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
return false;
- AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ AliasAnalysis &AA = LookupAliasAnalysis();
// Verify that the copied-from memory doesn't change in between the two
// transfers. For example, in:
@@ -954,8 +1043,8 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
/// memcpy(dst, src, src_size);
/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
/// \endcode
-bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
- MemSetInst *MemSet) {
+bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
+ MemSetInst *MemSet) {
// We can only transform memset/memcpy with the same destination.
if (MemSet->getDest() != MemCpy->getDest())
return false;
@@ -1019,8 +1108,8 @@ bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
/// When dst2_size <= dst1_size.
///
/// The \p MemCpy must have a Constant length.
-bool MemCpyOpt::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
- MemSetInst *MemSet) {
+bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
+ MemSetInst *MemSet) {
// This only makes sense on memcpy(..., memset(...), ...).
if (MemSet->getRawDest() != MemCpy->getRawSource())
return false;
@@ -1043,7 +1132,7 @@ bool MemCpyOpt::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
/// B to be a memcpy from X to Z (or potentially a memmove, depending on
/// circumstances). This allows later passes to remove the first memcpy
/// altogether.
-bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
+bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
// We can only optimize non-volatile memcpy's.
if (M->isVolatile()) return false;
@@ -1141,8 +1230,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
/// not to alias.
-bool MemCpyOpt::processMemMove(MemMoveInst *M) {
- AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
+ AliasAnalysis &AA = LookupAliasAnalysis();
if (!TLI->has(LibFunc::memmove))
return false;
@@ -1152,7 +1241,8 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
MemoryLocation::getForSource(M)))
return false;
- DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
+ DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
+ << "\n");
// If not, then we know we can transform this.
Type *ArgTys[3] = { M->getRawDest()->getType(),
@@ -1170,7 +1260,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
}
/// This is called on every byval argument in call sites.
-bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
+bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
// Find out what feeds this byval argument.
Value *ByValArg = CS.getArgument(ArgNo);
@@ -1202,10 +1292,8 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
// If it is greater than the memcpy, then we check to see if we can force the
// source of the memcpy to the alignment we need. If we fail, we bail out.
- AssumptionCache &AC =
- getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
- *CS->getParent()->getParent());
- DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ AssumptionCache &AC = LookupAssumptionCache();
+ DominatorTree &DT = LookupDomTree();
if (MDep->getAlignment() < ByValAlign &&
getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,
CS.getInstruction(), &AC, &DT) < ByValAlign)
@@ -1231,7 +1319,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
"tmpcast", CS.getInstruction());
- DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
+ DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
<< " " << *MDep << "\n"
<< " " << *CS.getInstruction() << "\n");
@@ -1241,13 +1329,13 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
return true;
}
-/// Executes one iteration of MemCpyOpt.
-bool MemCpyOpt::iterateOnFunction(Function &F) {
+/// Executes one iteration of MemCpyOptPass.
+bool MemCpyOptPass::iterateOnFunction(Function &F) {
bool MadeChange = false;
// Walk all instruction in the function.
- for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+ for (BasicBlock &BB : F) {
+ for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
// Avoid invalidating the iterator.
Instruction *I = &*BI++;
@@ -1269,7 +1357,8 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
// Reprocess the instruction if desired.
if (RepeatInstruction) {
- if (BI != BB->begin()) --BI;
+ if (BI != BB.begin())
+ --BI;
MadeChange = true;
}
}
@@ -1278,14 +1367,42 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
return MadeChange;
}
-/// This is the main transformation entry point for a function.
-bool MemCpyOpt::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
- return false;
+PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
+
+ auto &MD = AM.getResult<MemoryDependenceAnalysis>(F);
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+
+ auto LookupAliasAnalysis = [&]() -> AliasAnalysis & {
+ return AM.getResult<AAManager>(F);
+ };
+ auto LookupAssumptionCache = [&]() -> AssumptionCache & {
+ return AM.getResult<AssumptionAnalysis>(F);
+ };
+ auto LookupDomTree = [&]() -> DominatorTree & {
+ return AM.getResult<DominatorTreeAnalysis>(F);
+ };
+
+ bool MadeChange = runImpl(F, &MD, &TLI, LookupAliasAnalysis,
+ LookupAssumptionCache, LookupDomTree);
+ if (!MadeChange)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<GlobalsAA>();
+ PA.preserve<MemoryDependenceAnalysis>();
+ return PA;
+}
+bool MemCpyOptPass::runImpl(
+ Function &F, MemoryDependenceResults *MD_, TargetLibraryInfo *TLI_,
+ std::function<AliasAnalysis &()> LookupAliasAnalysis_,
+ std::function<AssumptionCache &()> LookupAssumptionCache_,
+ std::function<DominatorTree &()> LookupDomTree_) {
bool MadeChange = false;
- MD = &getAnalysis<MemoryDependenceAnalysis>();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ MD = MD_;
+ TLI = TLI_;
+ LookupAliasAnalysis = std::move(LookupAliasAnalysis_);
+ LookupAssumptionCache = std::move(LookupAssumptionCache_);
+ LookupDomTree = std::move(LookupDomTree_);
// If we don't have at least memset and memcpy, there is little point of doing
// anything here. These are required by a freestanding implementation, so if
@@ -1302,3 +1419,25 @@ bool MemCpyOpt::runOnFunction(Function &F) {
MD = nullptr;
return MadeChange;
}
+
+/// This is the main transformation entry point for a function.
+bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ auto *MD = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
+ auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+
+ auto LookupAliasAnalysis = [this]() -> AliasAnalysis & {
+ return getAnalysis<AAResultsWrapperPass>().getAAResults();
+ };
+ auto LookupAssumptionCache = [this, &F]() -> AssumptionCache & {
+ return getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ };
+ auto LookupDomTree = [this]() -> DominatorTree & {
+ return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ };
+
+ return Impl.runImpl(F, MD, TLI, LookupAliasAnalysis, LookupAssumptionCache,
+ LookupDomTree);
+}
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index c812d618c16ac..30261b7550019 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -72,9 +72,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
@@ -82,51 +80,37 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include <vector>
using namespace llvm;
#define DEBUG_TYPE "mldst-motion"
+namespace {
//===----------------------------------------------------------------------===//
// MergedLoadStoreMotion Pass
//===----------------------------------------------------------------------===//
+class MergedLoadStoreMotion {
+ MemoryDependenceResults *MD = nullptr;
+ AliasAnalysis *AA = nullptr;
-namespace {
-class MergedLoadStoreMotion : public FunctionPass {
- AliasAnalysis *AA;
- MemoryDependenceAnalysis *MD;
+ // The mergeLoad/Store algorithms could have Size0 * Size1 complexity,
+ // where Size0 and Size1 are the #instructions on the two sides of
+ // the diamond. The constant chosen here is arbitrary. Compiler Time
+ // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl.
+ const int MagicCompileTimeControl = 250;
public:
- static char ID; // Pass identification, replacement for typeid
- MergedLoadStoreMotion()
- : FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) {
- initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
+ bool run(Function &F, MemoryDependenceResults *MD, AliasAnalysis &AA);
private:
- // This transformation requires dominator postdominator info
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<MemoryDependenceAnalysis>();
- }
-
- // Helper routines
-
///
/// \brief Remove instruction from parent and update memory dependence
/// analysis.
@@ -135,9 +119,9 @@ private:
BasicBlock *getDiamondTail(BasicBlock *BB);
bool isDiamondHead(BasicBlock *BB);
// Routines for hoisting loads
- bool isLoadHoistBarrierInRange(const Instruction& Start,
- const Instruction& End,
- LoadInst* LI);
+ bool isLoadHoistBarrierInRange(const Instruction &Start,
+ const Instruction &End, LoadInst *LI,
+ bool SafeToLoadUnconditionally);
LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI);
void hoistInstruction(BasicBlock *BB, Instruction *HoistCand,
Instruction *ElseInst);
@@ -151,31 +135,8 @@ private:
const Instruction &End, MemoryLocation Loc);
bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst);
bool mergeStores(BasicBlock *BB);
- // The mergeLoad/Store algorithms could have Size0 * Size1 complexity,
- // where Size0 and Size1 are the #instructions on the two sides of
- // the diamond. The constant chosen here is arbitrary. Compiler Time
- // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl.
- const int MagicCompileTimeControl;
};
-
-char MergedLoadStoreMotion::ID = 0;
-} // anonymous namespace
-
-///
-/// \brief createMergedLoadStoreMotionPass - The public interface to this file.
-///
-FunctionPass *llvm::createMergedLoadStoreMotionPass() {
- return new MergedLoadStoreMotion();
-}
-
-INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion",
- "MergedLoadStoreMotion", false, false)
-INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_END(MergedLoadStoreMotion, "mldst-motion",
- "MergedLoadStoreMotion", false, false)
+} // end anonymous namespace
///
/// \brief Remove instruction from parent and update memory dependence analysis.
@@ -184,9 +145,9 @@ void MergedLoadStoreMotion::removeInstruction(Instruction *Inst) {
// Notify the memory dependence analysis.
if (MD) {
MD->removeInstruction(Inst);
- if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ if (auto *LI = dyn_cast<LoadInst>(Inst))
MD->invalidateCachedPointerInfo(LI->getPointerOperand());
- if (Inst->getType()->getScalarType()->isPointerTy()) {
+ if (Inst->getType()->isPtrOrPtrVectorTy()) {
MD->invalidateCachedPointerInfo(Inst);
}
}
@@ -198,10 +159,7 @@ void MergedLoadStoreMotion::removeInstruction(Instruction *Inst) {
///
BasicBlock *MergedLoadStoreMotion::getDiamondTail(BasicBlock *BB) {
assert(isDiamondHead(BB) && "Basic block is not head of a diamond");
- BranchInst *BI = (BranchInst *)(BB->getTerminator());
- BasicBlock *Succ0 = BI->getSuccessor(0);
- BasicBlock *Tail = Succ0->getTerminator()->getSuccessor(0);
- return Tail;
+ return BB->getTerminator()->getSuccessor(0)->getSingleSuccessor();
}
///
@@ -210,25 +168,22 @@ BasicBlock *MergedLoadStoreMotion::getDiamondTail(BasicBlock *BB) {
bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
if (!BB)
return false;
- if (!isa<BranchInst>(BB->getTerminator()))
- return false;
- if (BB->getTerminator()->getNumSuccessors() != 2)
+ auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isConditional())
return false;
- BranchInst *BI = (BranchInst *)(BB->getTerminator());
BasicBlock *Succ0 = BI->getSuccessor(0);
BasicBlock *Succ1 = BI->getSuccessor(1);
- if (!Succ0->getSinglePredecessor() ||
- Succ0->getTerminator()->getNumSuccessors() != 1)
+ if (!Succ0->getSinglePredecessor())
return false;
- if (!Succ1->getSinglePredecessor() ||
- Succ1->getTerminator()->getNumSuccessors() != 1)
+ if (!Succ1->getSinglePredecessor())
return false;
- BasicBlock *Tail = Succ0->getTerminator()->getSuccessor(0);
+ BasicBlock *Succ0Succ = Succ0->getSingleSuccessor();
+ BasicBlock *Succ1Succ = Succ1->getSingleSuccessor();
// Ignore triangles.
- if (Succ1->getTerminator()->getSuccessor(0) != Tail)
+ if (!Succ0Succ || !Succ1Succ || Succ0Succ != Succ1Succ)
return false;
return true;
}
@@ -240,9 +195,14 @@ bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
/// being loaded or protect against the load from happening
/// it is considered a hoist barrier.
///
-bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start,
- const Instruction& End,
- LoadInst* LI) {
+bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(
+ const Instruction &Start, const Instruction &End, LoadInst *LI,
+ bool SafeToLoadUnconditionally) {
+ if (!SafeToLoadUnconditionally)
+ for (const Instruction &Inst :
+ make_range(Start.getIterator(), End.getIterator()))
+ if (!isGuaranteedToTransferExecutionToSuccessor(&Inst))
+ return true;
MemoryLocation Loc = MemoryLocation::get(LI);
return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod);
}
@@ -256,23 +216,28 @@ bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start,
///
LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1,
LoadInst *Load0) {
-
+ BasicBlock *BB0 = Load0->getParent();
+ BasicBlock *Head = BB0->getSinglePredecessor();
+ bool SafeToLoadUnconditionally = isSafeToLoadUnconditionally(
+ Load0->getPointerOperand(), Load0->getAlignment(),
+ Load0->getModule()->getDataLayout(),
+ /*ScanFrom=*/Head->getTerminator());
for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE;
++BBI) {
Instruction *Inst = &*BBI;
// Only merge and hoist loads when their result in used only in BB
- if (!isa<LoadInst>(Inst) || Inst->isUsedOutsideOfBlock(BB1))
+ auto *Load1 = dyn_cast<LoadInst>(Inst);
+ if (!Load1 || Inst->isUsedOutsideOfBlock(BB1))
continue;
- LoadInst *Load1 = dyn_cast<LoadInst>(Inst);
- BasicBlock *BB0 = Load0->getParent();
-
MemoryLocation Loc0 = MemoryLocation::get(Load0);
MemoryLocation Loc1 = MemoryLocation::get(Load1);
- if (AA->isMustAlias(Loc0, Loc1) && Load0->isSameOperationAs(Load1) &&
- !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1) &&
- !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0)) {
+ if (Load0->isSameOperationAs(Load1) && AA->isMustAlias(Loc0, Loc1) &&
+ !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1,
+ SafeToLoadUnconditionally) &&
+ !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0,
+ SafeToLoadUnconditionally)) {
return Load1;
}
}
@@ -319,11 +284,10 @@ void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB,
///
bool MergedLoadStoreMotion::isSafeToHoist(Instruction *I) const {
BasicBlock *Parent = I->getParent();
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- Instruction *Instr = dyn_cast<Instruction>(I->getOperand(i));
- if (Instr && Instr->getParent() == Parent)
- return false;
- }
+ for (Use &U : I->operands())
+ if (auto *Instr = dyn_cast<Instruction>(&U))
+ if (Instr->getParent() == Parent)
+ return false;
return true;
}
@@ -333,8 +297,8 @@ bool MergedLoadStoreMotion::isSafeToHoist(Instruction *I) const {
bool MergedLoadStoreMotion::hoistLoad(BasicBlock *BB, LoadInst *L0,
LoadInst *L1) {
// Only one definition?
- Instruction *A0 = dyn_cast<Instruction>(L0->getPointerOperand());
- Instruction *A1 = dyn_cast<Instruction>(L1->getPointerOperand());
+ auto *A0 = dyn_cast<Instruction>(L0->getPointerOperand());
+ auto *A1 = dyn_cast<Instruction>(L1->getPointerOperand());
if (A0 && A1 && A0->isIdenticalTo(A1) && isSafeToHoist(A0) &&
A0->hasOneUse() && (A0->getParent() == L0->getParent()) &&
A1->hasOneUse() && (A1->getParent() == L1->getParent()) &&
@@ -345,8 +309,8 @@ bool MergedLoadStoreMotion::hoistLoad(BasicBlock *BB, LoadInst *L0,
hoistInstruction(BB, A0, A1);
hoistInstruction(BB, L0, L1);
return true;
- } else
- return false;
+ }
+ return false;
}
///
@@ -358,7 +322,7 @@ bool MergedLoadStoreMotion::hoistLoad(BasicBlock *BB, LoadInst *L0,
bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
bool MergedLoads = false;
assert(isDiamondHead(BB));
- BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ BranchInst *BI = cast<BranchInst>(BB->getTerminator());
BasicBlock *Succ0 = BI->getSuccessor(0);
BasicBlock *Succ1 = BI->getSuccessor(1);
// #Instructions in Succ1 for Compile Time Control
@@ -369,8 +333,8 @@ bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
Instruction *I = &*BBI;
++BBI;
- // Only move non-simple (atomic, volatile) loads.
- LoadInst *L0 = dyn_cast<LoadInst>(I);
+ // Don't move non-simple (atomic, volatile) loads.
+ auto *L0 = dyn_cast<LoadInst>(I);
if (!L0 || !L0->isSimple() || L0->isUsedOutsideOfBlock(Succ0))
continue;
@@ -399,6 +363,10 @@ bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start,
const Instruction &End,
MemoryLocation Loc) {
+ for (const Instruction &Inst :
+ make_range(Start.getIterator(), End.getIterator()))
+ if (Inst.mayThrow())
+ return true;
return AA->canInstructionRangeModRef(Start, End, Loc, MRI_ModRef);
}
@@ -411,22 +379,16 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
StoreInst *Store0) {
DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n");
BasicBlock *BB0 = Store0->getParent();
- for (BasicBlock::reverse_iterator RBI = BB1->rbegin(), RBE = BB1->rend();
- RBI != RBE; ++RBI) {
- Instruction *Inst = &*RBI;
-
- if (!isa<StoreInst>(Inst))
- continue;
-
- StoreInst *Store1 = cast<StoreInst>(Inst);
+ for (Instruction &Inst : reverse(*BB1)) {
+ auto *Store1 = dyn_cast<StoreInst>(&Inst);
+ if (!Store1)
+ continue;
MemoryLocation Loc0 = MemoryLocation::get(Store0);
MemoryLocation Loc1 = MemoryLocation::get(Store1);
if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
- !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))),
- BB1->back(), Loc1) &&
- !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store0))),
- BB0->back(), Loc0)) {
+ !isStoreSinkBarrierInRange(*Store1->getNextNode(), BB1->back(), Loc1) &&
+ !isStoreSinkBarrierInRange(*Store0->getNextNode(), BB0->back(), Loc0)) {
return Store1;
}
}
@@ -439,17 +401,17 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
StoreInst *S1) {
// Create a phi if the values mismatch.
- PHINode *NewPN = nullptr;
Value *Opd1 = S0->getValueOperand();
Value *Opd2 = S1->getValueOperand();
- if (Opd1 != Opd2) {
- NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
- &BB->front());
- NewPN->addIncoming(Opd1, S0->getParent());
- NewPN->addIncoming(Opd2, S1->getParent());
- if (MD && NewPN->getType()->getScalarType()->isPointerTy())
- MD->invalidateCachedPointerInfo(NewPN);
- }
+ if (Opd1 == Opd2)
+ return nullptr;
+
+ auto *NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
+ &BB->front());
+ NewPN->addIncoming(Opd1, S0->getParent());
+ NewPN->addIncoming(Opd2, S1->getParent());
+ if (MD && NewPN->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(NewPN);
return NewPN;
}
@@ -461,8 +423,8 @@ PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
StoreInst *S1) {
// Only one definition?
- Instruction *A0 = dyn_cast<Instruction>(S0->getPointerOperand());
- Instruction *A1 = dyn_cast<Instruction>(S1->getPointerOperand());
+ auto *A0 = dyn_cast<Instruction>(S0->getPointerOperand());
+ auto *A1 = dyn_cast<Instruction>(S1->getPointerOperand());
if (A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() &&
(A0->getParent() == S0->getParent()) && A1->hasOneUse() &&
(A1->getParent() == S1->getParent()) && isa<GetElementPtrInst>(A0)) {
@@ -476,7 +438,7 @@ bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
S0->dropUnknownNonDebugMetadata();
// Create the new store to be inserted at the join point.
- StoreInst *SNew = (StoreInst *)(S0->clone());
+ StoreInst *SNew = cast<StoreInst>(S0->clone());
Instruction *ANew = A0->clone();
SNew->insertBefore(&*InsertPt);
ANew->insertBefore(SNew);
@@ -484,9 +446,8 @@ bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
assert(S0->getParent() == A0->getParent());
assert(S1->getParent() == A1->getParent());
- PHINode *NewPN = getPHIOperand(BB, S0, S1);
// New PHI operand? Use it.
- if (NewPN)
+ if (PHINode *NewPN = getPHIOperand(BB, S0, S1))
SNew->setOperand(0, NewPN);
removeInstruction(S0);
removeInstruction(S1);
@@ -532,11 +493,9 @@ bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
Instruction *I = &*RBI;
++RBI;
- // Sink move non-simple (atomic, volatile) stores
- if (!isa<StoreInst>(I))
- continue;
- StoreInst *S0 = (StoreInst *)I;
- if (!S0->isSimple())
+ // Don't sink non-simple (atomic, volatile) stores.
+ auto *S0 = dyn_cast<StoreInst>(I);
+ if (!S0 || !S0->isSimple())
continue;
++NStores;
@@ -551,22 +510,18 @@ bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
// is likely stale at this point.
if (!Res)
break;
- else {
- RBI = Pred0->rbegin();
- RBE = Pred0->rend();
- DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump());
- }
+ RBI = Pred0->rbegin();
+ RBE = Pred0->rend();
+ DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump());
}
}
return MergedStores;
}
-///
-/// \brief Run the transformation for each function
-///
-bool MergedLoadStoreMotion::runOnFunction(Function &F) {
- MD = getAnalysisIfAvailable<MemoryDependenceAnalysis>();
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+bool MergedLoadStoreMotion::run(Function &F, MemoryDependenceResults *MD,
+ AliasAnalysis &AA) {
+ this->MD = MD;
+ this->AA = &AA;
bool Changed = false;
DEBUG(dbgs() << "Instruction Merger\n");
@@ -585,3 +540,66 @@ bool MergedLoadStoreMotion::runOnFunction(Function &F) {
}
return Changed;
}
+
+namespace {
+class MergedLoadStoreMotionLegacyPass : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ MergedLoadStoreMotionLegacyPass() : FunctionPass(ID) {
+ initializeMergedLoadStoreMotionLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ ///
+ /// \brief Run the transformation for each function
+ ///
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ MergedLoadStoreMotion Impl;
+ auto *MDWP = getAnalysisIfAvailable<MemoryDependenceWrapperPass>();
+ return Impl.run(F, MDWP ? &MDWP->getMemDep() : nullptr,
+ getAnalysis<AAResultsWrapperPass>().getAAResults());
+ }
+
+private:
+ // This transformation requires dominator postdominator info
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<MemoryDependenceWrapperPass>();
+ }
+};
+
+char MergedLoadStoreMotionLegacyPass::ID = 0;
+} // anonymous namespace
+
+///
+/// \brief createMergedLoadStoreMotionPass - The public interface to this file.
+///
+FunctionPass *llvm::createMergedLoadStoreMotionPass() {
+ return new MergedLoadStoreMotionLegacyPass();
+}
+
+INITIALIZE_PASS_BEGIN(MergedLoadStoreMotionLegacyPass, "mldst-motion",
+ "MergedLoadStoreMotion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(MergedLoadStoreMotionLegacyPass, "mldst-motion",
+ "MergedLoadStoreMotion", false, false)
+
+PreservedAnalyses
+MergedLoadStoreMotionPass::run(Function &F, AnalysisManager<Function> &AM) {
+ MergedLoadStoreMotion Impl;
+ auto *MD = AM.getCachedResult<MemoryDependenceAnalysis>(F);
+ auto &AA = AM.getResult<AAManager>(F);
+ if (!Impl.run(F, MD, AA))
+ return PreservedAnalyses::all();
+
+ // FIXME: This should also 'preserve the CFG'.
+ PreservedAnalyses PA;
+ PA.preserve<GlobalsAA>();
+ PA.preserve<MemoryDependenceAnalysis>();
+ return PA;
+}
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
index c8f885e7eec53..ed754fa710253 100644
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -208,7 +208,7 @@ FunctionPass *llvm::createNaryReassociatePass() {
}
bool NaryReassociate::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
+ if (skipFunction(F))
return false;
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -299,49 +299,18 @@ Instruction *NaryReassociate::tryReassociate(Instruction *I) {
}
}
-// FIXME: extract this method into TTI->getGEPCost.
static bool isGEPFoldable(GetElementPtrInst *GEP,
- const TargetTransformInfo *TTI,
- const DataLayout *DL) {
- GlobalVariable *BaseGV = nullptr;
- int64_t BaseOffset = 0;
- bool HasBaseReg = false;
- int64_t Scale = 0;
-
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand()))
- BaseGV = GV;
- else
- HasBaseReg = true;
-
- gep_type_iterator GTI = gep_type_begin(GEP);
- for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I, ++GTI) {
- if (isa<SequentialType>(*GTI)) {
- int64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
- if (ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I)) {
- BaseOffset += ConstIdx->getSExtValue() * ElementSize;
- } else {
- // Needs scale register.
- if (Scale != 0) {
- // No addressing mode takes two scale registers.
- return false;
- }
- Scale = ElementSize;
- }
- } else {
- StructType *STy = cast<StructType>(*GTI);
- uint64_t Field = cast<ConstantInt>(*I)->getZExtValue();
- BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field);
- }
- }
-
- unsigned AddrSpace = GEP->getPointerAddressSpace();
- return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV,
- BaseOffset, HasBaseReg, Scale, AddrSpace);
+ const TargetTransformInfo *TTI) {
+ SmallVector<const Value*, 4> Indices;
+ for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
+ Indices.push_back(*I);
+ return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
+ Indices) == TargetTransformInfo::TCC_Free;
}
Instruction *NaryReassociate::tryReassociateGEP(GetElementPtrInst *GEP) {
// Not worth reassociating GEP if it is foldable.
- if (isGEPFoldable(GEP, TTI, DL))
+ if (isGEPFoldable(GEP, TTI))
return nullptr;
gep_type_iterator GTI = gep_type_begin(*GEP);
@@ -434,7 +403,7 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
// NewGEP = (char *)Candidate + RHS * sizeof(IndexedType)
uint64_t IndexedSize = DL->getTypeAllocSize(IndexedType);
- Type *ElementType = GEP->getType()->getElementType();
+ Type *ElementType = GEP->getResultElementType();
uint64_t ElementSize = DL->getTypeAllocSize(ElementType);
// Another less rare case: because I is not necessarily the last index of the
// GEP, the size of the type at the I-th index (IndexedSize) is not
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 9f26f78892c65..c4b3e3464f409 100644
--- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -13,12 +13,10 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -26,85 +24,9 @@ using namespace llvm;
#define DEBUG_TYPE "partially-inline-libcalls"
-namespace {
- class PartiallyInlineLibCalls : public FunctionPass {
- public:
- static char ID;
-
- PartiallyInlineLibCalls() :
- FunctionPass(ID) {
- initializePartiallyInlineLibCallsPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnFunction(Function &F) override;
-
- private:
- /// Optimize calls to sqrt.
- bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
- BasicBlock &CurrBB, Function::iterator &BB);
- };
-
- char PartiallyInlineLibCalls::ID = 0;
-}
-
-INITIALIZE_PASS(PartiallyInlineLibCalls, "partially-inline-libcalls",
- "Partially inline calls to library functions", false, false)
-
-void PartiallyInlineLibCalls::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- FunctionPass::getAnalysisUsage(AU);
-}
-
-bool PartiallyInlineLibCalls::runOnFunction(Function &F) {
- bool Changed = false;
- Function::iterator CurrBB;
- TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- const TargetTransformInfo *TTI =
- &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
- CurrBB = BB++;
-
- for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
- II != IE; ++II) {
- CallInst *Call = dyn_cast<CallInst>(&*II);
- Function *CalledFunc;
-
- if (!Call || !(CalledFunc = Call->getCalledFunction()))
- continue;
-
- // Skip if function either has local linkage or is not a known library
- // function.
- LibFunc::Func LibFunc;
- if (CalledFunc->hasLocalLinkage() || !CalledFunc->hasName() ||
- !TLI->getLibFunc(CalledFunc->getName(), LibFunc))
- continue;
-
- switch (LibFunc) {
- case LibFunc::sqrtf:
- case LibFunc::sqrt:
- if (TTI->haveFastSqrt(Call->getType()) &&
- optimizeSQRT(Call, CalledFunc, *CurrBB, BB))
- break;
- continue;
- default:
- continue;
- }
- Changed = true;
- break;
- }
- }
-
- return Changed;
-}
-
-bool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call,
- Function *CalledFunc,
- BasicBlock &CurrBB,
- Function::iterator &BB) {
+static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
+ BasicBlock &CurrBB, Function::iterator &BB) {
// There is no need to change the IR, since backend will emit sqrt
// instruction if the call has already been marked read-only.
if (Call->onlyReadsMemory())
@@ -158,6 +80,97 @@ bool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call,
return true;
}
+static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI) {
+ bool Changed = false;
+
+ Function::iterator CurrBB;
+ for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
+ CurrBB = BB++;
+
+ for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
+ II != IE; ++II) {
+ CallInst *Call = dyn_cast<CallInst>(&*II);
+ Function *CalledFunc;
+
+ if (!Call || !(CalledFunc = Call->getCalledFunction()))
+ continue;
+
+ // Skip if function either has local linkage or is not a known library
+ // function.
+ LibFunc::Func LibFunc;
+ if (CalledFunc->hasLocalLinkage() || !CalledFunc->hasName() ||
+ !TLI->getLibFunc(CalledFunc->getName(), LibFunc))
+ continue;
+
+ switch (LibFunc) {
+ case LibFunc::sqrtf:
+ case LibFunc::sqrt:
+ if (TTI->haveFastSqrt(Call->getType()) &&
+ optimizeSQRT(Call, CalledFunc, *CurrBB, BB))
+ break;
+ continue;
+ default:
+ continue;
+ }
+
+ Changed = true;
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+PreservedAnalyses
+PartiallyInlineLibCallsPass::run(Function &F, AnalysisManager<Function> &AM) {
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ if (!runPartiallyInlineLibCalls(F, &TLI, &TTI))
+ return PreservedAnalyses::all();
+ return PreservedAnalyses::none();
+}
+
+namespace {
+class PartiallyInlineLibCallsLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ PartiallyInlineLibCallsLegacyPass() : FunctionPass(ID) {
+ initializePartiallyInlineLibCallsLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
+ TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ const TargetTransformInfo *TTI =
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ return runPartiallyInlineLibCalls(F, TLI, TTI);
+ }
+};
+}
+
+char PartiallyInlineLibCallsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(PartiallyInlineLibCallsLegacyPass,
+ "partially-inline-libcalls",
+ "Partially inline calls to library functions", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(PartiallyInlineLibCallsLegacyPass,
+ "partially-inline-libcalls",
+ "Partially inline calls to library functions", false, false)
+
FunctionPass *llvm::createPartiallyInlineLibCallsPass() {
- return new PartiallyInlineLibCalls();
+ return new PartiallyInlineLibCallsLegacyPass();
}
diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index b56b355991200..e47b636348e33 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -49,45 +49,32 @@
//===----------------------------------------------------------------------===//
#include "llvm/Pass.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/ADT/SetOperations.h"
+
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/IR/BasicBlock.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Statepoint.h"
-#include "llvm/IR/Value.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#define DEBUG_TYPE "safepoint-placement"
+
STATISTIC(NumEntrySafepoints, "Number of entry safepoints inserted");
-STATISTIC(NumCallSafepoints, "Number of call safepoints inserted");
STATISTIC(NumBackedgeSafepoints, "Number of backedge safepoints inserted");
-STATISTIC(CallInLoop, "Number of loops w/o safepoints due to calls in loop");
-STATISTIC(FiniteExecution, "Number of loops w/o safepoints finite execution");
+STATISTIC(CallInLoop,
+ "Number of loops without safepoints due to calls in loop");
+STATISTIC(FiniteExecution,
+ "Number of loops without safepoints finite execution");
using namespace llvm;
@@ -108,9 +95,6 @@ static cl::opt<int> CountedLoopTripWidth("spp-counted-loop-trip-width",
static cl::opt<bool> SplitBackedge("spp-split-backedge", cl::Hidden,
cl::init(false));
-// Print tracing output
-static cl::opt<bool> TraceLSP("spp-trace", cl::Hidden, cl::init(false));
-
namespace {
/// An analysis pass whose purpose is to identify each of the backedges in
@@ -138,8 +122,8 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
bool runOnLoop(Loop *);
void runOnLoopAndSubLoops(Loop *L) {
// Visit all the subloops
- for (auto I = L->begin(), E = L->end(); I != E; I++)
- runOnLoopAndSubLoops(*I);
+ for (Loop *I : *L)
+ runOnLoopAndSubLoops(I);
runOnLoop(L);
}
@@ -147,8 +131,8 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- for (auto I = LI->begin(), E = LI->end(); I != E; I++) {
- runOnLoopAndSubLoops(*I);
+ for (Loop *I : *LI) {
+ runOnLoopAndSubLoops(I);
}
return false;
}
@@ -200,13 +184,9 @@ static bool needsStatepoint(const CallSite &CS) {
if (call->isInlineAsm())
return false;
}
- if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS)) {
- return false;
- }
- return true;
-}
-static Value *ReplaceWithStatepoint(const CallSite &CS);
+ return !(isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS));
+}
/// Returns true if this loop is known to contain a call safepoint which
/// must unconditionally execute on any iteration of the loop which returns
@@ -278,43 +258,44 @@ static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
return /* not finite */ false;
}
-static void scanOneBB(Instruction *start, Instruction *end,
- std::vector<CallInst *> &calls,
- std::set<BasicBlock *> &seen,
- std::vector<BasicBlock *> &worklist) {
- for (BasicBlock::iterator itr(start);
- itr != start->getParent()->end() && itr != BasicBlock::iterator(end);
- itr++) {
- if (CallInst *CI = dyn_cast<CallInst>(&*itr)) {
- calls.push_back(CI);
- }
+static void scanOneBB(Instruction *Start, Instruction *End,
+ std::vector<CallInst *> &Calls,
+ DenseSet<BasicBlock *> &Seen,
+ std::vector<BasicBlock *> &Worklist) {
+ for (BasicBlock::iterator BBI(Start), BBE0 = Start->getParent()->end(),
+ BBE1 = BasicBlock::iterator(End);
+ BBI != BBE0 && BBI != BBE1; BBI++) {
+ if (CallInst *CI = dyn_cast<CallInst>(&*BBI))
+ Calls.push_back(CI);
+
// FIXME: This code does not handle invokes
- assert(!dyn_cast<InvokeInst>(&*itr) &&
+ assert(!isa<InvokeInst>(&*BBI) &&
"support for invokes in poll code needed");
+
// Only add the successor blocks if we reach the terminator instruction
// without encountering end first
- if (itr->isTerminator()) {
- BasicBlock *BB = itr->getParent();
+ if (BBI->isTerminator()) {
+ BasicBlock *BB = BBI->getParent();
for (BasicBlock *Succ : successors(BB)) {
- if (seen.count(Succ) == 0) {
- worklist.push_back(Succ);
- seen.insert(Succ);
+ if (Seen.insert(Succ).second) {
+ Worklist.push_back(Succ);
}
}
}
}
}
-static void scanInlinedCode(Instruction *start, Instruction *end,
- std::vector<CallInst *> &calls,
- std::set<BasicBlock *> &seen) {
- calls.clear();
- std::vector<BasicBlock *> worklist;
- seen.insert(start->getParent());
- scanOneBB(start, end, calls, seen, worklist);
- while (!worklist.empty()) {
- BasicBlock *BB = worklist.back();
- worklist.pop_back();
- scanOneBB(&*BB->begin(), end, calls, seen, worklist);
+
+static void scanInlinedCode(Instruction *Start, Instruction *End,
+ std::vector<CallInst *> &Calls,
+ DenseSet<BasicBlock *> &Seen) {
+ Calls.clear();
+ std::vector<BasicBlock *> Worklist;
+ Seen.insert(Start->getParent());
+ scanOneBB(Start, End, Calls, Seen, Worklist);
+ while (!Worklist.empty()) {
+ BasicBlock *BB = Worklist.back();
+ Worklist.pop_back();
+ scanOneBB(&*BB->begin(), End, Calls, Seen, Worklist);
}
}
@@ -324,29 +305,27 @@ bool PlaceBackedgeSafepointsImpl::runOnLoop(Loop *L) {
// Note: In common usage, there will be only one edge due to LoopSimplify
// having run sometime earlier in the pipeline, but this code must be correct
// w.r.t. loops with multiple backedges.
- BasicBlock *header = L->getHeader();
+ BasicBlock *Header = L->getHeader();
SmallVector<BasicBlock*, 16> LoopLatches;
L->getLoopLatches(LoopLatches);
- for (BasicBlock *pred : LoopLatches) {
- assert(L->contains(pred));
+ for (BasicBlock *Pred : LoopLatches) {
+ assert(L->contains(Pred));
// Make a policy decision about whether this loop needs a safepoint or
// not. Note that this is about unburdening the optimizer in loops, not
// avoiding the runtime cost of the actual safepoint.
if (!AllBackedges) {
- if (mustBeFiniteCountedLoop(L, SE, pred)) {
- if (TraceLSP)
- errs() << "skipping safepoint placement in finite loop\n";
+ if (mustBeFiniteCountedLoop(L, SE, Pred)) {
+ DEBUG(dbgs() << "skipping safepoint placement in finite loop\n");
FiniteExecution++;
continue;
}
if (CallSafepointsEnabled &&
- containsUnconditionalCallSafepoint(L, header, pred, *DT)) {
+ containsUnconditionalCallSafepoint(L, Header, Pred, *DT)) {
// Note: This is only semantically legal since we won't do any further
// IPO or inlining before the actual call insertion.. If we hadn't, we
// might latter loose this call safepoint.
- if (TraceLSP)
- errs() << "skipping safepoint placement due to unconditional call\n";
+ DEBUG(dbgs() << "skipping safepoint placement due to unconditional call\n");
CallInLoop++;
continue;
}
@@ -360,14 +339,11 @@ bool PlaceBackedgeSafepointsImpl::runOnLoop(Loop *L) {
// Safepoint insertion would involve creating a new basic block (as the
// target of the current backedge) which does the safepoint (of all live
// variables) and branches to the true header
- TerminatorInst *term = pred->getTerminator();
+ TerminatorInst *Term = Pred->getTerminator();
- if (TraceLSP) {
- errs() << "[LSP] terminator instruction: ";
- term->dump();
- }
+ DEBUG(dbgs() << "[LSP] terminator instruction: " << *Term);
- PollLocations.push_back(term);
+ PollLocations.push_back(Term);
}
return false;
@@ -411,27 +387,26 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
// hasNextInstruction and nextInstruction are used to iterate
// through a "straight line" execution sequence.
- auto hasNextInstruction = [](Instruction *I) {
- if (!I->isTerminator()) {
+ auto HasNextInstruction = [](Instruction *I) {
+ if (!I->isTerminator())
return true;
- }
+
BasicBlock *nextBB = I->getParent()->getUniqueSuccessor();
return nextBB && (nextBB->getUniquePredecessor() != nullptr);
};
- auto nextInstruction = [&hasNextInstruction](Instruction *I) {
- assert(hasNextInstruction(I) &&
+ auto NextInstruction = [&](Instruction *I) {
+ assert(HasNextInstruction(I) &&
"first check if there is a next instruction!");
- if (I->isTerminator()) {
+
+ if (I->isTerminator())
return &I->getParent()->getUniqueSuccessor()->front();
- } else {
- return &*++I->getIterator();
- }
+ return &*++I->getIterator();
};
- Instruction *cursor = nullptr;
- for (cursor = &F.getEntryBlock().front(); hasNextInstruction(cursor);
- cursor = nextInstruction(cursor)) {
+ Instruction *Cursor = nullptr;
+ for (Cursor = &F.getEntryBlock().front(); HasNextInstruction(Cursor);
+ Cursor = NextInstruction(Cursor)) {
// We need to ensure a safepoint poll occurs before any 'real' call. The
// easiest way to ensure finite execution between safepoints in the face of
@@ -440,51 +415,17 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
// which can grow the stack by an unbounded amount. This isn't required
// for GC semantics per se, but is a common requirement for languages
// which detect stack overflow via guard pages and then throw exceptions.
- if (auto CS = CallSite(cursor)) {
+ if (auto CS = CallSite(Cursor)) {
if (doesNotRequireEntrySafepointBefore(CS))
continue;
break;
}
}
- assert((hasNextInstruction(cursor) || cursor->isTerminator()) &&
+ assert((HasNextInstruction(Cursor) || Cursor->isTerminator()) &&
"either we stopped because of a call, or because of terminator");
- return cursor;
-}
-
-/// Identify the list of call sites which need to be have parseable state
-static void findCallSafepoints(Function &F,
- std::vector<CallSite> &Found /*rval*/) {
- assert(Found.empty() && "must be empty!");
- for (Instruction &I : instructions(F)) {
- Instruction *inst = &I;
- if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) {
- CallSite CS(inst);
-
- // No safepoint needed or wanted
- if (!needsStatepoint(CS)) {
- continue;
- }
-
- Found.push_back(CS);
- }
- }
-}
-
-/// Implement a unique function which doesn't require we sort the input
-/// vector. Doing so has the effect of changing the output of a couple of
-/// tests in ways which make them less useful in testing fused safepoints.
-template <typename T> static void unique_unsorted(std::vector<T> &vec) {
- std::set<T> seen;
- std::vector<T> tmp;
- vec.reserve(vec.size());
- std::swap(tmp, vec);
- for (auto V : tmp) {
- if (seen.insert(V).second) {
- vec.push_back(V);
- }
- }
+ return Cursor;
}
static const char *const GCSafepointPollName = "gc.safepoint_poll";
@@ -514,24 +455,6 @@ static bool enableEntrySafepoints(Function &F) { return !NoEntry; }
static bool enableBackedgeSafepoints(Function &F) { return !NoBackedge; }
static bool enableCallSafepoints(Function &F) { return !NoCall; }
-// Normalize basic block to make it ready to be target of invoke statepoint.
-// Ensure that 'BB' does not have phi nodes. It may require spliting it.
-static BasicBlock *normalizeForInvokeSafepoint(BasicBlock *BB,
- BasicBlock *InvokeParent) {
- BasicBlock *ret = BB;
-
- if (!BB->getUniquePredecessor()) {
- ret = SplitBlockPredecessors(BB, InvokeParent, "");
- }
-
- // Now that 'ret' has unique predecessor we can safely remove all phi nodes
- // from it
- FoldSingleEntryPHINodes(ret);
- assert(!isa<PHINode>(ret->begin()));
-
- return ret;
-}
-
bool PlaceSafepoints::runOnFunction(Function &F) {
if (F.isDeclaration() || F.empty()) {
// This is a declaration, nothing to do. Must exit early to avoid crash in
@@ -549,13 +472,13 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
if (!shouldRewriteFunction(F))
return false;
- bool modified = false;
+ bool Modified = false;
// In various bits below, we rely on the fact that uses are reachable from
// defs. When there are basic blocks unreachable from the entry, dominance
// and reachablity queries return non-sensical results. Thus, we preprocess
// the function to ensure these properties hold.
- modified |= removeUnreachableBlocks(F);
+ Modified |= removeUnreachableBlocks(F);
// STEP 1 - Insert the safepoint polling locations. We do not need to
// actually insert parse points yet. That will be done for all polls and
@@ -574,8 +497,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
// with for the moment.
legacy::FunctionPassManager FPM(F.getParent());
bool CanAssumeCallSafepoints = enableCallSafepoints(F);
- PlaceBackedgeSafepointsImpl *PBS =
- new PlaceBackedgeSafepointsImpl(CanAssumeCallSafepoints);
+ auto *PBS = new PlaceBackedgeSafepointsImpl(CanAssumeCallSafepoints);
FPM.add(PBS);
FPM.run(F);
@@ -603,7 +525,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
// The poll location must be the terminator of a loop latch block.
for (TerminatorInst *Term : PollLocations) {
// We are inserting a poll, the function is modified
- modified = true;
+ Modified = true;
if (SplitBackedge) {
// Split the backedge of the loop and insert the poll within that new
@@ -643,14 +565,13 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
}
if (enableEntrySafepoints(F)) {
- Instruction *Location = findLocationForEntrySafepoint(F, DT);
- if (!Location) {
- // policy choice not to insert?
- } else {
+ if (Instruction *Location = findLocationForEntrySafepoint(F, DT)) {
PollsNeeded.push_back(Location);
- modified = true;
+ Modified = true;
NumEntrySafepoints++;
}
+ // TODO: else we should assert that there was, in fact, a policy choice to
+ // not insert a entry safepoint poll.
}
// Now that we've identified all the needed safepoint poll locations, insert
@@ -661,71 +582,8 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(),
RuntimeCalls.end());
}
- PollsNeeded.clear(); // make sure we don't accidentally use
- // The dominator tree has been invalidated by the inlining performed in the
- // above loop. TODO: Teach the inliner how to update the dom tree?
- DT.recalculate(F);
-
- if (enableCallSafepoints(F)) {
- std::vector<CallSite> Calls;
- findCallSafepoints(F, Calls);
- NumCallSafepoints += Calls.size();
- ParsePointNeeded.insert(ParsePointNeeded.end(), Calls.begin(), Calls.end());
- }
-
- // Unique the vectors since we can end up with duplicates if we scan the call
- // site for call safepoints after we add it for entry or backedge. The
- // only reason we need tracking at all is that some functions might have
- // polls but not call safepoints and thus we might miss marking the runtime
- // calls for the polls. (This is useful in test cases!)
- unique_unsorted(ParsePointNeeded);
-
- // Any parse point (no matter what source) will be handled here
-
- // We're about to start modifying the function
- if (!ParsePointNeeded.empty())
- modified = true;
-
- // Now run through and insert the safepoints, but do _NOT_ update or remove
- // any existing uses. We have references to live variables that need to
- // survive to the last iteration of this loop.
- std::vector<Value *> Results;
- Results.reserve(ParsePointNeeded.size());
- for (size_t i = 0; i < ParsePointNeeded.size(); i++) {
- CallSite &CS = ParsePointNeeded[i];
-
- // For invoke statepoints we need to remove all phi nodes at the normal
- // destination block.
- // Reason for this is that we can place gc_result only after last phi node
- // in basic block. We will get malformed code after RAUW for the
- // gc_result if one of this phi nodes uses result from the invoke.
- if (InvokeInst *Invoke = dyn_cast<InvokeInst>(CS.getInstruction())) {
- normalizeForInvokeSafepoint(Invoke->getNormalDest(),
- Invoke->getParent());
- }
-
- Value *GCResult = ReplaceWithStatepoint(CS);
- Results.push_back(GCResult);
- }
- assert(Results.size() == ParsePointNeeded.size());
-
- // Adjust all users of the old call sites to use the new ones instead
- for (size_t i = 0; i < ParsePointNeeded.size(); i++) {
- CallSite &CS = ParsePointNeeded[i];
- Value *GCResult = Results[i];
- if (GCResult) {
- // Can not RAUW for the invoke gc result in case of phi nodes preset.
- assert(CS.isCall() || !isa<PHINode>(cast<Instruction>(GCResult)->getParent()->begin()));
-
- // Replace all uses with the new call
- CS.getInstruction()->replaceAllUsesWith(GCResult);
- }
- // Now that we've handled all uses, remove the original call itself
- // Note: The insert point can't be the deleted instruction!
- CS.getInstruction()->eraseFromParent();
- }
- return modified;
+ return Modified;
}
char PlaceBackedgeSafepointsImpl::ID = 0;
@@ -763,191 +621,60 @@ InsertSafepointPoll(Instruction *InsertBefore,
auto *F = M->getFunction(GCSafepointPollName);
assert(F && "gc.safepoint_poll function is missing");
- assert(F->getType()->getElementType() ==
+ assert(F->getValueType() ==
FunctionType::get(Type::getVoidTy(M->getContext()), false) &&
"gc.safepoint_poll declared with wrong type");
assert(!F->empty() && "gc.safepoint_poll must be a non-empty function");
CallInst *PollCall = CallInst::Create(F, "", InsertBefore);
// Record some information about the call site we're replacing
- BasicBlock::iterator before(PollCall), after(PollCall);
- bool isBegin(false);
- if (before == OrigBB->begin()) {
- isBegin = true;
- } else {
- before--;
- }
- after++;
- assert(after != OrigBB->end() && "must have successor");
+ BasicBlock::iterator Before(PollCall), After(PollCall);
+ bool IsBegin = false;
+ if (Before == OrigBB->begin())
+ IsBegin = true;
+ else
+ Before--;
- // do the actual inlining
+ After++;
+ assert(After != OrigBB->end() && "must have successor");
+
+ // Do the actual inlining
InlineFunctionInfo IFI;
bool InlineStatus = InlineFunction(PollCall, IFI);
assert(InlineStatus && "inline must succeed");
(void)InlineStatus; // suppress warning in release-asserts
- // Check post conditions
+ // Check post-conditions
assert(IFI.StaticAllocas.empty() && "can't have allocs");
- std::vector<CallInst *> calls; // new calls
- std::set<BasicBlock *> BBs; // new BBs + insertee
+ std::vector<CallInst *> Calls; // new calls
+ DenseSet<BasicBlock *> BBs; // new BBs + insertee
+
// Include only the newly inserted instructions, Note: begin may not be valid
// if we inserted to the beginning of the basic block
- BasicBlock::iterator start;
- if (isBegin) {
- start = OrigBB->begin();
- } else {
- start = before;
- start++;
- }
+ BasicBlock::iterator Start = IsBegin ? OrigBB->begin() : std::next(Before);
// If your poll function includes an unreachable at the end, that's not
// valid. Bugpoint likes to create this, so check for it.
- assert(isPotentiallyReachable(&*start, &*after, nullptr, nullptr) &&
+ assert(isPotentiallyReachable(&*Start, &*After) &&
"malformed poll function");
- scanInlinedCode(&*(start), &*(after), calls, BBs);
- assert(!calls.empty() && "slow path not found for safepoint poll");
+ scanInlinedCode(&*Start, &*After, Calls, BBs);
+ assert(!Calls.empty() && "slow path not found for safepoint poll");
// Record the fact we need a parsable state at the runtime call contained in
// the poll function. This is required so that the runtime knows how to
// parse the last frame when we actually take the safepoint (i.e. execute
// the slow path)
assert(ParsePointsNeeded.empty());
- for (size_t i = 0; i < calls.size(); i++) {
-
+ for (auto *CI : Calls) {
// No safepoint needed or wanted
- if (!needsStatepoint(calls[i])) {
+ if (!needsStatepoint(CI))
continue;
- }
// These are likely runtime calls. Should we assert that via calling
// convention or something?
- ParsePointsNeeded.push_back(CallSite(calls[i]));
- }
- assert(ParsePointsNeeded.size() <= calls.size());
-}
-
-/// Replaces the given call site (Call or Invoke) with a gc.statepoint
-/// intrinsic with an empty deoptimization arguments list. This does
-/// NOT do explicit relocation for GC support.
-static Value *ReplaceWithStatepoint(const CallSite &CS /* to replace */) {
- assert(CS.getInstruction()->getModule() && "must be set");
-
- // TODO: technically, a pass is not allowed to get functions from within a
- // function pass since it might trigger a new function addition. Refactor
- // this logic out to the initialization of the pass. Doesn't appear to
- // matter in practice.
-
- // Then go ahead and use the builder do actually do the inserts. We insert
- // immediately before the previous instruction under the assumption that all
- // arguments will be available here. We can't insert afterwards since we may
- // be replacing a terminator.
- IRBuilder<> Builder(CS.getInstruction());
-
- // Note: The gc args are not filled in at this time, that's handled by
- // RewriteStatepointsForGC (which is currently under review).
-
- // Create the statepoint given all the arguments
- Instruction *Token = nullptr;
-
- uint64_t ID;
- uint32_t NumPatchBytes;
-
- AttributeSet OriginalAttrs = CS.getAttributes();
- Attribute AttrID =
- OriginalAttrs.getAttribute(AttributeSet::FunctionIndex, "statepoint-id");
- Attribute AttrNumPatchBytes = OriginalAttrs.getAttribute(
- AttributeSet::FunctionIndex, "statepoint-num-patch-bytes");
-
- AttrBuilder AttrsToRemove;
- bool HasID = AttrID.isStringAttribute() &&
- !AttrID.getValueAsString().getAsInteger(10, ID);
-
- if (HasID)
- AttrsToRemove.addAttribute("statepoint-id");
- else
- ID = 0xABCDEF00;
-
- bool HasNumPatchBytes =
- AttrNumPatchBytes.isStringAttribute() &&
- !AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes);
-
- if (HasNumPatchBytes)
- AttrsToRemove.addAttribute("statepoint-num-patch-bytes");
- else
- NumPatchBytes = 0;
-
- OriginalAttrs = OriginalAttrs.removeAttributes(
- CS.getInstruction()->getContext(), AttributeSet::FunctionIndex,
- AttrsToRemove);
-
- if (CS.isCall()) {
- CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
- CallInst *Call = Builder.CreateGCStatepointCall(
- ID, NumPatchBytes, CS.getCalledValue(),
- makeArrayRef(CS.arg_begin(), CS.arg_end()), None, None,
- "safepoint_token");
- Call->setTailCall(ToReplace->isTailCall());
- Call->setCallingConv(ToReplace->getCallingConv());
-
- // In case if we can handle this set of attributes - set up function
- // attributes directly on statepoint and return attributes later for
- // gc_result intrinsic.
- Call->setAttributes(OriginalAttrs.getFnAttributes());
-
- Token = Call;
-
- // Put the following gc_result and gc_relocate calls immediately after
- // the old call (which we're about to delete).
- assert(ToReplace->getNextNode() && "not a terminator, must have next");
- Builder.SetInsertPoint(ToReplace->getNextNode());
- Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
- } else if (CS.isInvoke()) {
- InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());
-
- // Insert the new invoke into the old block. We'll remove the old one in a
- // moment at which point this will become the new terminator for the
- // original block.
- Builder.SetInsertPoint(ToReplace->getParent());
- InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
- ID, NumPatchBytes, CS.getCalledValue(), ToReplace->getNormalDest(),
- ToReplace->getUnwindDest(), makeArrayRef(CS.arg_begin(), CS.arg_end()),
- None, None, "safepoint_token");
-
- Invoke->setCallingConv(ToReplace->getCallingConv());
-
- // In case if we can handle this set of attributes - set up function
- // attributes directly on statepoint and return attributes later for
- // gc_result intrinsic.
- Invoke->setAttributes(OriginalAttrs.getFnAttributes());
-
- Token = Invoke;
-
- // We'll insert the gc.result into the normal block
- BasicBlock *NormalDest = ToReplace->getNormalDest();
- // Can not insert gc.result in case of phi nodes preset.
- // Should have removed this cases prior to running this function
- assert(!isa<PHINode>(NormalDest->begin()));
- Instruction *IP = &*(NormalDest->getFirstInsertionPt());
- Builder.SetInsertPoint(IP);
- } else {
- llvm_unreachable("unexpect type of CallSite");
- }
- assert(Token);
-
- // Handle the return value of the original call - update all uses to use a
- // gc_result hanging off the statepoint node we just inserted
-
- // Only add the gc_result iff there is actually a used result
- if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
- std::string TakenName =
- CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
- CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), TakenName);
- GCResult->setAttributes(OriginalAttrs.getRetAttributes());
- return GCResult;
- } else {
- // No return value for the call.
- return nullptr;
+ ParsePointsNeeded.push_back(CallSite(CI));
}
+ assert(ParsePointsNeeded.size() <= Calls.size());
}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index bcadd4e2bee69..b930a8fb7e999 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -20,7 +20,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/Reassociate.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
@@ -39,9 +39,11 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
+using namespace reassociate;
#define DEBUG_TYPE "reassociate"
@@ -49,17 +51,6 @@ STATISTIC(NumChanged, "Number of insts reassociated");
STATISTIC(NumAnnihil, "Number of expr tree annihilated");
STATISTIC(NumFactor , "Number of multiplies factored");
-namespace {
- struct ValueEntry {
- unsigned Rank;
- Value *Op;
- ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {}
- };
- inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) {
- return LHS.Rank > RHS.Rank; // Sort so that highest rank goes to start.
- }
-}
-
#ifndef NDEBUG
/// Print out the expression identified in the Ops list.
///
@@ -75,120 +66,35 @@ static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
}
#endif
-namespace {
- /// \brief Utility class representing a base and exponent pair which form one
- /// factor of some product.
- struct Factor {
- Value *Base;
- unsigned Power;
-
- Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {}
-
- /// \brief Sort factors in descending order by their power.
- struct PowerDescendingSorter {
- bool operator()(const Factor &LHS, const Factor &RHS) {
- return LHS.Power > RHS.Power;
- }
- };
-
- /// \brief Compare factors for equal powers.
- struct PowerEqual {
- bool operator()(const Factor &LHS, const Factor &RHS) {
- return LHS.Power == RHS.Power;
- }
- };
- };
-
- /// Utility class representing a non-constant Xor-operand. We classify
- /// non-constant Xor-Operands into two categories:
- /// C1) The operand is in the form "X & C", where C is a constant and C != ~0
- /// C2)
- /// C2.1) The operand is in the form of "X | C", where C is a non-zero
- /// constant.
- /// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
- /// operand as "E | 0"
- class XorOpnd {
- public:
- XorOpnd(Value *V);
-
- bool isInvalid() const { return SymbolicPart == nullptr; }
- bool isOrExpr() const { return isOr; }
- Value *getValue() const { return OrigVal; }
- Value *getSymbolicPart() const { return SymbolicPart; }
- unsigned getSymbolicRank() const { return SymbolicRank; }
- const APInt &getConstPart() const { return ConstPart; }
-
- void Invalidate() { SymbolicPart = OrigVal = nullptr; }
- void setSymbolicRank(unsigned R) { SymbolicRank = R; }
-
- // Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank.
- // The purpose is twofold:
- // 1) Cluster together the operands sharing the same symbolic-value.
- // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
- // could potentially shorten crital path, and expose more loop-invariants.
- // Note that values' rank are basically defined in RPO order (FIXME).
- // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
- // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
- // "z" in the order of X-Y-Z is better than any other orders.
- struct PtrSortFunctor {
- bool operator()(XorOpnd * const &LHS, XorOpnd * const &RHS) {
- return LHS->getSymbolicRank() < RHS->getSymbolicRank();
- }
- };
- private:
- Value *OrigVal;
- Value *SymbolicPart;
- APInt ConstPart;
- unsigned SymbolicRank;
- bool isOr;
- };
-}
-
-namespace {
- class Reassociate : public FunctionPass {
- DenseMap<BasicBlock*, unsigned> RankMap;
- DenseMap<AssertingVH<Value>, unsigned> ValueRankMap;
- SetVector<AssertingVH<Instruction> > RedoInsts;
- bool MadeChange;
- public:
- static char ID; // Pass identification, replacement for typeid
- Reassociate() : FunctionPass(ID) {
- initializeReassociatePass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
- private:
- void BuildRankMap(Function &F);
- unsigned getRank(Value *V);
- void canonicalizeOperands(Instruction *I);
- void ReassociateExpression(BinaryOperator *I);
- void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
- Value *OptimizeExpression(BinaryOperator *I,
- SmallVectorImpl<ValueEntry> &Ops);
- Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
- Value *OptimizeXor(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
- bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt &ConstOpnd,
- Value *&Res);
- bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
- APInt &ConstOpnd, Value *&Res);
- bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
- SmallVectorImpl<Factor> &Factors);
- Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder,
- SmallVectorImpl<Factor> &Factors);
- Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
- Value *RemoveFactorFromExpression(Value *V, Value *Factor);
- void EraseInst(Instruction *I);
- void RecursivelyEraseDeadInsts(Instruction *I,
- SetVector<AssertingVH<Instruction>> &Insts);
- void OptimizeInst(Instruction *I);
- Instruction *canonicalizeNegConstExpr(Instruction *I);
- };
-}
+/// Utility class representing a non-constant Xor-operand. We classify
+/// non-constant Xor-Operands into two categories:
+/// C1) The operand is in the form "X & C", where C is a constant and C != ~0
+/// C2)
+/// C2.1) The operand is in the form of "X | C", where C is a non-zero
+/// constant.
+/// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
+/// operand as "E | 0"
+class llvm::reassociate::XorOpnd {
+public:
+ XorOpnd(Value *V);
+
+ bool isInvalid() const { return SymbolicPart == nullptr; }
+ bool isOrExpr() const { return isOr; }
+ Value *getValue() const { return OrigVal; }
+ Value *getSymbolicPart() const { return SymbolicPart; }
+ unsigned getSymbolicRank() const { return SymbolicRank; }
+ const APInt &getConstPart() const { return ConstPart; }
+
+ void Invalidate() { SymbolicPart = OrigVal = nullptr; }
+ void setSymbolicRank(unsigned R) { SymbolicRank = R; }
+
+private:
+ Value *OrigVal;
+ Value *SymbolicPart;
+ APInt ConstPart;
+ unsigned SymbolicRank;
+ bool isOr;
+};
XorOpnd::XorOpnd(Value *V) {
assert(!isa<ConstantInt>(V) && "No ConstantInt");
@@ -217,13 +123,6 @@ XorOpnd::XorOpnd(Value *V) {
isOr = true;
}
-char Reassociate::ID = 0;
-INITIALIZE_PASS(Reassociate, "reassociate",
- "Reassociate expressions", false, false)
-
-// Public interface to the Reassociate pass
-FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
-
/// Return true if V is an instruction of the specified opcode and if it
/// only has one use.
static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
@@ -246,7 +145,8 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
return nullptr;
}
-void Reassociate::BuildRankMap(Function &F) {
+void ReassociatePass::BuildRankMap(
+ Function &F, ReversePostOrderTraversal<Function *> &RPOT) {
unsigned i = 2;
// Assign distinct ranks to function arguments.
@@ -255,22 +155,19 @@ void Reassociate::BuildRankMap(Function &F) {
DEBUG(dbgs() << "Calculated Rank[" << I->getName() << "] = " << i << "\n");
}
- ReversePostOrderTraversal<Function*> RPOT(&F);
- for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
- E = RPOT.end(); I != E; ++I) {
- BasicBlock *BB = *I;
+ for (BasicBlock *BB : RPOT) {
unsigned BBRank = RankMap[BB] = ++i << 16;
// Walk the basic block, adding precomputed ranks for any instructions that
// we cannot move. This ensures that the ranks for these instructions are
// all different in the block.
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (mayBeMemoryDependent(*I))
- ValueRankMap[&*I] = ++BBRank;
+ for (Instruction &I : *BB)
+ if (mayBeMemoryDependent(I))
+ ValueRankMap[&I] = ++BBRank;
}
}
-unsigned Reassociate::getRank(Value *V) {
+unsigned ReassociatePass::getRank(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
@@ -301,7 +198,7 @@ unsigned Reassociate::getRank(Value *V) {
}
// Canonicalize constants to RHS. Otherwise, sort the operands by rank.
-void Reassociate::canonicalizeOperands(Instruction *I) {
+void ReassociatePass::canonicalizeOperands(Instruction *I) {
assert(isa<BinaryOperator>(I) && "Expected binary operator.");
assert(I->isCommutative() && "Expected commutative operator.");
@@ -711,8 +608,8 @@ static bool LinearizeExprTree(BinaryOperator *I,
/// Now that the operands for this expression tree are
/// linearized and optimized, emit them in-order.
-void Reassociate::RewriteExprTree(BinaryOperator *I,
- SmallVectorImpl<ValueEntry> &Ops) {
+void ReassociatePass::RewriteExprTree(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
assert(Ops.size() > 1 && "Single values should be used directly!");
// Since our optimizations should never increase the number of operations, the
@@ -1095,7 +992,7 @@ static Value *EmitAddTreeOfValues(Instruction *I,
/// If V is an expression tree that is a multiplication sequence,
/// and if this sequence contains a multiply by Factor,
/// remove Factor from the tree and return the new tree.
-Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
+Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
if (!BO)
return nullptr;
@@ -1129,7 +1026,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
}
} else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) {
- APFloat F1(FC1->getValueAPF());
+ const APFloat &F1 = FC1->getValueAPF();
APFloat F2(FC2->getValueAPF());
F2.changeSign();
if (F1.compare(F2) == APFloat::cmpEqual) {
@@ -1258,9 +1155,9 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
// If it was successful, true is returned, and the "R" and "C" is returned
// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
// and both "Res" and "ConstOpnd" remain unchanged.
-//
-bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
- APInt &ConstOpnd, Value *&Res) {
+//
+bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
+ APInt &ConstOpnd, Value *&Res) {
// Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
// = ((x | c1) ^ c1) ^ (c1 ^ c2)
// = (x & ~c1) ^ (c1 ^ c2)
@@ -1294,8 +1191,9 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
// via "Res" and "ConstOpnd", respectively (If the entire expression is
// evaluated to a constant, the Res is set to NULL); otherwise, false is
// returned, and both "Res" and "ConstOpnd" remain unchanged.
-bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
- APInt &ConstOpnd, Value *&Res) {
+bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
+ XorOpnd *Opnd2, APInt &ConstOpnd,
+ Value *&Res) {
Value *X = Opnd1->getSymbolicPart();
if (X != Opnd2->getSymbolicPart())
return false;
@@ -1369,8 +1267,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
/// to a single Value, it is returned, otherwise the Ops list is mutated as
/// necessary.
-Value *Reassociate::OptimizeXor(Instruction *I,
- SmallVectorImpl<ValueEntry> &Ops) {
+Value *ReassociatePass::OptimizeXor(Instruction *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
return V;
@@ -1405,7 +1303,19 @@ Value *Reassociate::OptimizeXor(Instruction *I,
// the same symbolic value cluster together. For instance, the input operand
// sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
// ("x | 123", "x & 789", "y & 456").
- std::stable_sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor());
+ //
+ // The purpose is twofold:
+ // 1) Cluster together the operands sharing the same symbolic-value.
+ // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
+ // could potentially shorten crital path, and expose more loop-invariants.
+ // Note that values' rank are basically defined in RPO order (FIXME).
+ // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
+ // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
+ // "z" in the order of X-Y-Z is better than any other orders.
+ std::stable_sort(OpndPtrs.begin(), OpndPtrs.end(),
+ [](XorOpnd *LHS, XorOpnd *RHS) {
+ return LHS->getSymbolicRank() < RHS->getSymbolicRank();
+ });
// Step 3: Combine adjacent operands
XorOpnd *PrevOpnd = nullptr;
@@ -1478,8 +1388,8 @@ Value *Reassociate::OptimizeXor(Instruction *I,
/// Optimize a series of operands to an 'add' instruction. This
/// optimizes based on identities. If it can be reduced to a single Value, it
/// is returned, otherwise the Ops list is mutated as necessary.
-Value *Reassociate::OptimizeAdd(Instruction *I,
- SmallVectorImpl<ValueEntry> &Ops) {
+Value *ReassociatePass::OptimizeAdd(Instruction *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
// Scan the operand lists looking for X and -X pairs. If we find any, we
// can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
// scan for any
@@ -1716,8 +1626,8 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
///
/// \returns Whether any factors have a power greater than one.
-bool Reassociate::collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
- SmallVectorImpl<Factor> &Factors) {
+bool ReassociatePass::collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
+ SmallVectorImpl<Factor> &Factors) {
// FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
// Compute the sum of powers of simplifiable factors.
unsigned FactorPowerSum = 0;
@@ -1763,7 +1673,10 @@ bool Reassociate::collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
// below our mininum of '4'.
assert(FactorPowerSum >= 4);
- std::stable_sort(Factors.begin(), Factors.end(), Factor::PowerDescendingSorter());
+ std::stable_sort(Factors.begin(), Factors.end(),
+ [](const Factor &LHS, const Factor &RHS) {
+ return LHS.Power > RHS.Power;
+ });
return true;
}
@@ -1790,8 +1703,9 @@ static Value *buildMultiplyTree(IRBuilder<> &Builder,
/// equal and the powers are sorted in decreasing order, compute the minimal
/// DAG of multiplies to compute the final product, and return that product
/// value.
-Value *Reassociate::buildMinimalMultiplyDAG(IRBuilder<> &Builder,
- SmallVectorImpl<Factor> &Factors) {
+Value *
+ReassociatePass::buildMinimalMultiplyDAG(IRBuilder<> &Builder,
+ SmallVectorImpl<Factor> &Factors) {
assert(Factors[0].Power);
SmallVector<Value *, 4> OuterProduct;
for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
@@ -1822,7 +1736,9 @@ Value *Reassociate::buildMinimalMultiplyDAG(IRBuilder<> &Builder,
// Unique factors with equal powers -- we've folded them into the first one's
// base.
Factors.erase(std::unique(Factors.begin(), Factors.end(),
- Factor::PowerEqual()),
+ [](const Factor &LHS, const Factor &RHS) {
+ return LHS.Power == RHS.Power;
+ }),
Factors.end());
// Iteratively collect the base of each factor with an add power into the
@@ -1845,8 +1761,8 @@ Value *Reassociate::buildMinimalMultiplyDAG(IRBuilder<> &Builder,
return V;
}
-Value *Reassociate::OptimizeMul(BinaryOperator *I,
- SmallVectorImpl<ValueEntry> &Ops) {
+Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
// We can only optimize the multiplies when there is a chain of more than
// three, such that a balanced tree might require fewer total multiplies.
if (Ops.size() < 4)
@@ -1869,8 +1785,8 @@ Value *Reassociate::OptimizeMul(BinaryOperator *I,
return nullptr;
}
-Value *Reassociate::OptimizeExpression(BinaryOperator *I,
- SmallVectorImpl<ValueEntry> &Ops) {
+Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
// Now that we have the linearized expression tree, try to optimize it.
// Start by folding any constants that we found.
Constant *Cst = nullptr;
@@ -1930,7 +1846,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
// Remove dead instructions and if any operands are trivially dead add them to
// Insts so they will be removed as well.
-void Reassociate::RecursivelyEraseDeadInsts(
+void ReassociatePass::RecursivelyEraseDeadInsts(
Instruction *I, SetVector<AssertingVH<Instruction>> &Insts) {
assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
SmallVector<Value *, 4> Ops(I->op_begin(), I->op_end());
@@ -1945,7 +1861,7 @@ void Reassociate::RecursivelyEraseDeadInsts(
}
/// Zap the given instruction, adding interesting operands to the work list.
-void Reassociate::EraseInst(Instruction *I) {
+void ReassociatePass::EraseInst(Instruction *I) {
assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
// Erase the dead instruction.
@@ -1969,7 +1885,7 @@ void Reassociate::EraseInst(Instruction *I) {
// Canonicalize expressions of the following form:
// x + (-Constant * y) -> x - (Constant * y)
// x - (-Constant * y) -> x + (Constant * y)
-Instruction *Reassociate::canonicalizeNegConstExpr(Instruction *I) {
+Instruction *ReassociatePass::canonicalizeNegConstExpr(Instruction *I) {
if (!I->hasOneUse() || I->getType()->isVectorTy())
return nullptr;
@@ -2046,7 +1962,7 @@ Instruction *Reassociate::canonicalizeNegConstExpr(Instruction *I) {
/// Inspect and optimize the given instruction. Note that erasing
/// instructions is not allowed.
-void Reassociate::OptimizeInst(Instruction *I) {
+void ReassociatePass::OptimizeInst(Instruction *I) {
// Only consider operations that we understand.
if (!isa<BinaryOperator>(I))
return;
@@ -2173,7 +2089,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
ReassociateExpression(BO);
}
-void Reassociate::ReassociateExpression(BinaryOperator *I) {
+void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
// First, walk the expression tree, linearizing the tree, collecting the
// operand information.
SmallVector<RepeatedValue, 8> Tree;
@@ -2255,46 +2171,53 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
RewriteExprTree(I, Ops);
}
-bool Reassociate::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
- return false;
-
- // Calculate the rank map for F
- BuildRankMap(F);
+PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
+ // Reassociate needs for each instruction to have its operands already
+ // processed, so we first perform a RPOT of the basic blocks so that
+ // when we process a basic block, all its dominators have been processed
+ // before.
+ ReversePostOrderTraversal<Function *> RPOT(&F);
+ BuildRankMap(F, RPOT);
MadeChange = false;
- for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ for (BasicBlock *BI : RPOT) {
+ // Use a worklist to keep track of which instructions have been processed
+ // (and which insts won't be optimized again) so when redoing insts,
+ // optimize insts rightaway which won't be processed later.
+ SmallSet<Instruction *, 8> Worklist;
+
+ // Insert all instructions in the BB
+ for (Instruction &I : *BI)
+ Worklist.insert(&I);
+
// Optimize every instruction in the basic block.
- for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; )
+ for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;) {
+ // This instruction has been processed.
+ Worklist.erase(&*II);
if (isInstructionTriviallyDead(&*II)) {
EraseInst(&*II++);
} else {
OptimizeInst(&*II);
- assert(II->getParent() == BI && "Moved to a different block!");
+ assert(II->getParent() == &*BI && "Moved to a different block!");
++II;
}
- // Make a copy of all the instructions to be redone so we can remove dead
- // instructions.
- SetVector<AssertingVH<Instruction>> ToRedo(RedoInsts);
- // Iterate over all instructions to be reevaluated and remove trivially dead
- // instructions. If any operand of the trivially dead instruction becomes
- // dead mark it for deletion as well. Continue this process until all
- // trivially dead instructions have been removed.
- while (!ToRedo.empty()) {
- Instruction *I = ToRedo.pop_back_val();
- if (isInstructionTriviallyDead(I))
- RecursivelyEraseDeadInsts(I, ToRedo);
- }
-
- // Now that we have removed dead instructions, we can reoptimize the
- // remaining instructions.
- while (!RedoInsts.empty()) {
- Instruction *I = RedoInsts.pop_back_val();
- if (isInstructionTriviallyDead(I))
- EraseInst(I);
- else
- OptimizeInst(I);
+ // If the above optimizations produced new instructions to optimize or
+ // made modifications which need to be redone, do them now if they won't
+ // be handled later.
+ while (!RedoInsts.empty()) {
+ Instruction *I = RedoInsts.pop_back_val();
+ // Process instructions that won't be processed later, either
+ // inside the block itself or in another basic block (based on rank),
+ // since these will be processed later.
+ if ((I->getParent() != BI || !Worklist.count(I)) &&
+ RankMap[I->getParent()] <= RankMap[BI]) {
+ if (isInstructionTriviallyDead(I))
+ EraseInst(I);
+ else
+ OptimizeInst(I);
+ }
+ }
}
}
@@ -2302,5 +2225,46 @@ bool Reassociate::runOnFunction(Function &F) {
RankMap.clear();
ValueRankMap.clear();
- return MadeChange;
+ if (MadeChange) {
+ // FIXME: This should also 'preserve the CFG'.
+ auto PA = PreservedAnalyses();
+ PA.preserve<GlobalsAA>();
+ return PA;
+ }
+
+ return PreservedAnalyses::all();
+}
+
+namespace {
+ class ReassociateLegacyPass : public FunctionPass {
+ ReassociatePass Impl;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ ReassociateLegacyPass() : FunctionPass(ID) {
+ initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
+ FunctionAnalysisManager DummyFAM;
+ auto PA = Impl.run(F, DummyFAM);
+ return !PA.areAllPreserved();
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+ };
+}
+
+char ReassociateLegacyPass::ID = 0;
+INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
+ "Reassociate expressions", false, false)
+
+// Public interface to the Reassociate pass
+FunctionPass *llvm::createReassociatePass() {
+ return new ReassociateLegacyPass();
}
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 915f89780c080..615029dd161bb 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -68,7 +68,7 @@ INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots",
false, false)
bool RegToMem::runOnFunction(Function &F) {
- if (F.isDeclaration())
+ if (F.isDeclaration() || skipFunction(F))
return false;
// Insert all new allocas into entry block.
@@ -89,10 +89,9 @@ bool RegToMem::runOnFunction(Function &F) {
// Find the escaped instructions. But don't create stack slots for
// allocas in entry block.
std::list<Instruction*> WorkList;
- for (Function::iterator ibb = F.begin(), ibe = F.end();
- ibb != ibe; ++ibb)
- for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
- iib != iie; ++iib) {
+ for (BasicBlock &ibb : F)
+ for (BasicBlock::iterator iib = ibb.begin(), iie = ibb.end(); iib != iie;
+ ++iib) {
if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) &&
valueEscapes(&*iib)) {
WorkList.push_front(&*iib);
@@ -101,25 +100,22 @@ bool RegToMem::runOnFunction(Function &F) {
// Demote escaped instructions
NumRegsDemoted += WorkList.size();
- for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
- ile = WorkList.end(); ilb != ile; ++ilb)
- DemoteRegToStack(**ilb, false, AllocaInsertionPoint);
+ for (Instruction *ilb : WorkList)
+ DemoteRegToStack(*ilb, false, AllocaInsertionPoint);
WorkList.clear();
// Find all phi's
- for (Function::iterator ibb = F.begin(), ibe = F.end();
- ibb != ibe; ++ibb)
- for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
- iib != iie; ++iib)
+ for (BasicBlock &ibb : F)
+ for (BasicBlock::iterator iib = ibb.begin(), iie = ibb.end(); iib != iie;
+ ++iib)
if (isa<PHINode>(iib))
WorkList.push_front(&*iib);
// Demote phi nodes
NumPhisDemoted += WorkList.size();
- for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
- ile = WorkList.end(); ilb != ile; ++ilb)
- DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint);
+ for (Instruction *ilb : WorkList)
+ DemotePHIToStack(cast<PHINode>(ilb), AllocaInsertionPoint);
return true;
}
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index d77d5745e60cc..bab39a32677ff 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -14,7 +14,6 @@
#include "llvm/Pass.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/Statistic.h"
@@ -63,7 +62,7 @@ static cl::opt<unsigned>
RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden,
cl::init(6));
-#ifdef XDEBUG
+#ifdef EXPENSIVE_CHECKS
static bool ClobberNonLive = true;
#else
static bool ClobberNonLive = false;
@@ -72,19 +71,10 @@ static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
cl::location(ClobberNonLive),
cl::Hidden);
-static cl::opt<bool> UseDeoptBundles("rs4gc-use-deopt-bundles", cl::Hidden,
- cl::init(false));
static cl::opt<bool>
AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
cl::Hidden, cl::init(true));
-/// Should we split vectors of pointers into their individual elements? This
-/// is known to be buggy, but the alternate implementation isn't yet ready.
-/// This is purely to provide a debugging and dianostic hook until the vector
-/// split is replaced with vector relocations.
-static cl::opt<bool> UseVectorSplit("rs4gc-split-vector-values", cl::Hidden,
- cl::init(true));
-
namespace {
struct RewriteStatepointsForGC : public ModulePass {
static char ID; // Pass identification, replacement for typeid
@@ -141,24 +131,25 @@ ModulePass *llvm::createRewriteStatepointsForGCPass() {
INITIALIZE_PASS_BEGIN(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
"Make relocations explicit at statepoints", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
"Make relocations explicit at statepoints", false, false)
namespace {
struct GCPtrLivenessData {
/// Values defined in this block.
- DenseMap<BasicBlock *, DenseSet<Value *>> KillSet;
+ MapVector<BasicBlock *, SetVector<Value *>> KillSet;
/// Values used in this block (and thus live); does not included values
/// killed within this block.
- DenseMap<BasicBlock *, DenseSet<Value *>> LiveSet;
+ MapVector<BasicBlock *, SetVector<Value *>> LiveSet;
/// Values live into this basic block (i.e. used by any
/// instruction in this basic block or ones reachable from here)
- DenseMap<BasicBlock *, DenseSet<Value *>> LiveIn;
+ MapVector<BasicBlock *, SetVector<Value *>> LiveIn;
/// Values live out of this basic block (i.e. live into
/// any successor block)
- DenseMap<BasicBlock *, DenseSet<Value *>> LiveOut;
+ MapVector<BasicBlock *, SetVector<Value *>> LiveOut;
};
// The type of the internal cache used inside the findBasePointers family
@@ -171,9 +162,9 @@ struct GCPtrLivenessData {
// Generally, after the execution of a full findBasePointer call, only the
// base relation will remain. Internally, we add a mixture of the two
// types, then update all the second type to the first type
-typedef DenseMap<Value *, Value *> DefiningValueMapTy;
-typedef DenseSet<Value *> StatepointLiveSetTy;
-typedef DenseMap<AssertingVH<Instruction>, AssertingVH<Value>>
+typedef MapVector<Value *, Value *> DefiningValueMapTy;
+typedef SetVector<Value *> StatepointLiveSetTy;
+typedef MapVector<AssertingVH<Instruction>, AssertingVH<Value>>
RematerializedValueMapTy;
struct PartiallyConstructedSafepointRecord {
@@ -181,7 +172,7 @@ struct PartiallyConstructedSafepointRecord {
StatepointLiveSetTy LiveSet;
/// Mapping from live pointers to a base-defining-value
- DenseMap<Value *, Value *> PointerToBase;
+ MapVector<Value *, Value *> PointerToBase;
/// The *new* gc.statepoint instruction itself. This produces the token
/// that normal path gc.relocates and the gc.result are tied to.
@@ -199,9 +190,8 @@ struct PartiallyConstructedSafepointRecord {
}
static ArrayRef<Use> GetDeoptBundleOperands(ImmutableCallSite CS) {
- assert(UseDeoptBundles && "Should not be called otherwise!");
-
- Optional<OperandBundleUse> DeoptBundle = CS.getOperandBundle("deopt");
+ Optional<OperandBundleUse> DeoptBundle =
+ CS.getOperandBundle(LLVMContext::OB_deopt);
if (!DeoptBundle.hasValue()) {
assert(AllowStatepointWithNoDeoptInfo &&
@@ -229,7 +219,7 @@ static bool isGCPointerType(Type *T) {
// For the sake of this example GC, we arbitrarily pick addrspace(1) as our
// GC managed heap. We know that a pointer into this heap needs to be
// updated and that no other pointer does.
- return (1 == PT->getAddressSpace());
+ return PT->getAddressSpace() == 1;
return false;
}
@@ -260,8 +250,7 @@ static bool containsGCPtrType(Type *Ty) {
if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
return containsGCPtrType(AT->getElementType());
if (StructType *ST = dyn_cast<StructType>(Ty))
- return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
- containsGCPtrType);
+ return any_of(ST->subtypes(), containsGCPtrType);
return false;
}
@@ -273,19 +262,6 @@ static bool isUnhandledGCPointerType(Type *Ty) {
}
#endif
-static bool order_by_name(Value *a, Value *b) {
- if (a->hasName() && b->hasName()) {
- return -1 == a->getName().compare(b->getName());
- } else if (a->hasName() && !b->hasName()) {
- return true;
- } else if (!a->hasName() && b->hasName()) {
- return false;
- } else {
- // Better than nothing, but not stable
- return a < b;
- }
-}
-
// Return the name of the value suffixed with the provided value, or if the
// value didn't have a name, the default value specified.
static std::string suffixed_name_or(Value *V, StringRef Suffix,
@@ -297,30 +273,25 @@ static std::string suffixed_name_or(Value *V, StringRef Suffix,
// given instruction. The analysis is performed immediately before the
// given instruction. Values defined by that instruction are not considered
// live. Values used by that instruction are considered live.
-static void analyzeParsePointLiveness(
- DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData,
- const CallSite &CS, PartiallyConstructedSafepointRecord &result) {
- Instruction *inst = CS.getInstruction();
+static void
+analyzeParsePointLiveness(DominatorTree &DT,
+ GCPtrLivenessData &OriginalLivenessData, CallSite CS,
+ PartiallyConstructedSafepointRecord &Result) {
+ Instruction *Inst = CS.getInstruction();
StatepointLiveSetTy LiveSet;
- findLiveSetAtInst(inst, OriginalLivenessData, LiveSet);
+ findLiveSetAtInst(Inst, OriginalLivenessData, LiveSet);
if (PrintLiveSet) {
- // Note: This output is used by several of the test cases
- // The order of elements in a set is not stable, put them in a vec and sort
- // by name
- SmallVector<Value *, 64> Temp;
- Temp.insert(Temp.end(), LiveSet.begin(), LiveSet.end());
- std::sort(Temp.begin(), Temp.end(), order_by_name);
- errs() << "Live Variables:\n";
- for (Value *V : Temp)
+ dbgs() << "Live Variables:\n";
+ for (Value *V : LiveSet)
dbgs() << " " << V->getName() << " " << *V << "\n";
}
if (PrintLiveSetSize) {
- errs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n";
- errs() << "Number live values: " << LiveSet.size() << "\n";
+ dbgs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n";
+ dbgs() << "Number live values: " << LiveSet.size() << "\n";
}
- result.LiveSet = LiveSet;
+ Result.LiveSet = LiveSet;
}
static bool isKnownBaseResult(Value *V);
@@ -372,8 +343,10 @@ findBaseDefiningValueOfVector(Value *I) {
return BaseDefiningValueResult(I, true);
if (isa<Constant>(I))
- // Constant vectors consist only of constant pointers.
- return BaseDefiningValueResult(I, true);
+ // Base of constant vector consists only of constant null pointers.
+ // For reasoning see similar case inside 'findBaseDefiningValue' function.
+ return BaseDefiningValueResult(ConstantAggregateZero::get(I->getType()),
+ true);
if (isa<LoadInst>(I))
return BaseDefiningValueResult(I, true);
@@ -415,14 +388,20 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// We should have never reached here if this argument isn't an gc value
return BaseDefiningValueResult(I, true);
- if (isa<Constant>(I))
+ if (isa<Constant>(I)) {
// We assume that objects with a constant base (e.g. a global) can't move
// and don't need to be reported to the collector because they are always
- // live. All constants have constant bases. Besides global references, all
- // kinds of constants (e.g. undef, constant expressions, null pointers) can
- // be introduced by the inliner or the optimizer, especially on dynamically
- // dead paths. See e.g. test4 in constants.ll.
- return BaseDefiningValueResult(I, true);
+ // live. Besides global references, all kinds of constants (e.g. undef,
+ // constant expressions, null pointers) can be introduced by the inliner or
+ // the optimizer, especially on dynamically dead paths.
+ // Here we treat all of them as having single null base. By doing this we
+ // trying to avoid problems reporting various conflicts in a form of
+ // "phi (const1, const2)" or "phi (const, regular gc ptr)".
+ // See constant.ll file for relevant test cases.
+
+ return BaseDefiningValueResult(
+ ConstantPointerNull::get(cast<PointerType>(I->getType())), true);
+ }
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Value *Def = CI->stripPointerCasts();
@@ -570,30 +549,36 @@ class BDVState {
public:
enum Status { Unknown, Base, Conflict };
- BDVState(Status s, Value *b = nullptr) : status(s), base(b) {
- assert(status != Base || b);
+ BDVState() : Status(Unknown), BaseValue(nullptr) {}
+
+ explicit BDVState(Status Status, Value *BaseValue = nullptr)
+ : Status(Status), BaseValue(BaseValue) {
+ assert(Status != Base || BaseValue);
}
- explicit BDVState(Value *b) : status(Base), base(b) {}
- BDVState() : status(Unknown), base(nullptr) {}
- Status getStatus() const { return status; }
- Value *getBase() const { return base; }
+ explicit BDVState(Value *BaseValue) : Status(Base), BaseValue(BaseValue) {}
+
+ Status getStatus() const { return Status; }
+ Value *getBaseValue() const { return BaseValue; }
bool isBase() const { return getStatus() == Base; }
bool isUnknown() const { return getStatus() == Unknown; }
bool isConflict() const { return getStatus() == Conflict; }
- bool operator==(const BDVState &other) const {
- return base == other.base && status == other.status;
+ bool operator==(const BDVState &Other) const {
+ return BaseValue == Other.BaseValue && Status == Other.Status;
}
bool operator!=(const BDVState &other) const { return !(*this == other); }
LLVM_DUMP_METHOD
- void dump() const { print(dbgs()); dbgs() << '\n'; }
-
+ void dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+ }
+
void print(raw_ostream &OS) const {
- switch (status) {
+ switch (getStatus()) {
case Unknown:
OS << "U";
break;
@@ -604,13 +589,13 @@ public:
OS << "C";
break;
};
- OS << " (" << base << " - "
- << (base ? base->getName() : "nullptr") << "): ";
+ OS << " (" << getBaseValue() << " - "
+ << (getBaseValue() ? getBaseValue()->getName() : "nullptr") << "): ";
}
private:
- Status status;
- AssertingVH<Value> base; // non null only if status == base
+ Status Status;
+ AssertingVH<Value> BaseValue; // Non-null only if Status == Base.
};
}
@@ -621,75 +606,50 @@ static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
}
#endif
-namespace {
-// Values of type BDVState form a lattice, and this is a helper
-// class that implementes the meet operation. The meat of the meet
-// operation is implemented in MeetBDVStates::pureMeet
-class MeetBDVStates {
-public:
- /// Initializes the currentResult to the TOP state so that if can be met with
- /// any other state to produce that state.
- MeetBDVStates() {}
-
- // Destructively meet the current result with the given BDVState
- void meetWith(BDVState otherState) {
- currentResult = meet(otherState, currentResult);
- }
+static BDVState meetBDVStateImpl(const BDVState &LHS, const BDVState &RHS) {
+ switch (LHS.getStatus()) {
+ case BDVState::Unknown:
+ return RHS;
- BDVState getResult() const { return currentResult; }
+ case BDVState::Base:
+ assert(LHS.getBaseValue() && "can't be null");
+ if (RHS.isUnknown())
+ return LHS;
-private:
- BDVState currentResult;
-
- /// Perform a meet operation on two elements of the BDVState lattice.
- static BDVState meet(BDVState LHS, BDVState RHS) {
- assert((pureMeet(LHS, RHS) == pureMeet(RHS, LHS)) &&
- "math is wrong: meet does not commute!");
- BDVState Result = pureMeet(LHS, RHS);
- DEBUG(dbgs() << "meet of " << LHS << " with " << RHS
- << " produced " << Result << "\n");
- return Result;
- }
-
- static BDVState pureMeet(const BDVState &stateA, const BDVState &stateB) {
- switch (stateA.getStatus()) {
- case BDVState::Unknown:
- return stateB;
-
- case BDVState::Base:
- assert(stateA.getBase() && "can't be null");
- if (stateB.isUnknown())
- return stateA;
-
- if (stateB.isBase()) {
- if (stateA.getBase() == stateB.getBase()) {
- assert(stateA == stateB && "equality broken!");
- return stateA;
- }
- return BDVState(BDVState::Conflict);
+ if (RHS.isBase()) {
+ if (LHS.getBaseValue() == RHS.getBaseValue()) {
+ assert(LHS == RHS && "equality broken!");
+ return LHS;
}
- assert(stateB.isConflict() && "only three states!");
return BDVState(BDVState::Conflict);
-
- case BDVState::Conflict:
- return stateA;
}
- llvm_unreachable("only three states!");
+ assert(RHS.isConflict() && "only three states!");
+ return BDVState(BDVState::Conflict);
+
+ case BDVState::Conflict:
+ return LHS;
}
-};
+ llvm_unreachable("only three states!");
}
+// Values of type BDVState form a lattice, and this function implements the meet
+// operation.
+static BDVState meetBDVState(BDVState LHS, BDVState RHS) {
+ BDVState Result = meetBDVStateImpl(LHS, RHS);
+ assert(Result == meetBDVStateImpl(RHS, LHS) &&
+ "Math is wrong: meet does not commute!");
+ return Result;
+}
-/// For a given value or instruction, figure out what base ptr it's derived
-/// from. For gc objects, this is simply itself. On success, returns a value
-/// which is the base pointer. (This is reliable and can be used for
-/// relocation.) On failure, returns nullptr.
-static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
- Value *def = findBaseOrBDV(I, cache);
+/// For a given value or instruction, figure out what base ptr its derived from.
+/// For gc objects, this is simply itself. On success, returns a value which is
+/// the base pointer. (This is reliable and can be used for relocation.) On
+/// failure, returns nullptr.
+static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
+ Value *Def = findBaseOrBDV(I, Cache);
- if (isKnownBaseResult(def)) {
- return def;
- }
+ if (isKnownBaseResult(Def))
+ return Def;
// Here's the rough algorithm:
// - For every SSA value, construct a mapping to either an actual base
@@ -731,14 +691,14 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
// one for which we don't already know a definite base value for
/* scope */ {
SmallVector<Value*, 16> Worklist;
- Worklist.push_back(def);
- States.insert(std::make_pair(def, BDVState()));
+ Worklist.push_back(Def);
+ States.insert({Def, BDVState()});
while (!Worklist.empty()) {
Value *Current = Worklist.pop_back_val();
assert(!isKnownBaseResult(Current) && "why did it get added?");
auto visitIncomingValue = [&](Value *InVal) {
- Value *Base = findBaseOrBDV(InVal, cache);
+ Value *Base = findBaseOrBDV(InVal, Cache);
if (isKnownBaseResult(Base))
// Known bases won't need new instructions introduced and can be
// ignored safely
@@ -748,12 +708,12 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
if (States.insert(std::make_pair(Base, BDVState())).second)
Worklist.push_back(Base);
};
- if (PHINode *Phi = dyn_cast<PHINode>(Current)) {
- for (Value *InVal : Phi->incoming_values())
+ if (PHINode *PN = dyn_cast<PHINode>(Current)) {
+ for (Value *InVal : PN->incoming_values())
visitIncomingValue(InVal);
- } else if (SelectInst *Sel = dyn_cast<SelectInst>(Current)) {
- visitIncomingValue(Sel->getTrueValue());
- visitIncomingValue(Sel->getFalseValue());
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(Current)) {
+ visitIncomingValue(SI->getTrueValue());
+ visitIncomingValue(SI->getFalseValue());
} else if (auto *EE = dyn_cast<ExtractElementInst>(Current)) {
visitIncomingValue(EE->getVectorOperand());
} else if (auto *IE = dyn_cast<InsertElementInst>(Current)) {
@@ -762,7 +722,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
} else {
// There is one known class of instructions we know we don't handle.
assert(isa<ShuffleVectorInst>(Current));
- llvm_unreachable("unimplemented instruction case");
+ llvm_unreachable("Unimplemented instruction case");
}
}
}
@@ -784,12 +744,12 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
return I->second;
};
- bool progress = true;
- while (progress) {
+ bool Progress = true;
+ while (Progress) {
#ifndef NDEBUG
- const size_t oldSize = States.size();
+ const size_t OldSize = States.size();
#endif
- progress = false;
+ Progress = false;
// We're only changing values in this loop, thus safe to keep iterators.
// Since this is computing a fixed point, the order of visit does not
// effect the result. TODO: We could use a worklist here and make this run
@@ -801,38 +761,39 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
// Given an input value for the current instruction, return a BDVState
// instance which represents the BDV of that value.
auto getStateForInput = [&](Value *V) mutable {
- Value *BDV = findBaseOrBDV(V, cache);
+ Value *BDV = findBaseOrBDV(V, Cache);
return getStateForBDV(BDV);
};
- MeetBDVStates calculateMeet;
- if (SelectInst *select = dyn_cast<SelectInst>(BDV)) {
- calculateMeet.meetWith(getStateForInput(select->getTrueValue()));
- calculateMeet.meetWith(getStateForInput(select->getFalseValue()));
- } else if (PHINode *Phi = dyn_cast<PHINode>(BDV)) {
- for (Value *Val : Phi->incoming_values())
- calculateMeet.meetWith(getStateForInput(Val));
+ BDVState NewState;
+ if (SelectInst *SI = dyn_cast<SelectInst>(BDV)) {
+ NewState = meetBDVState(NewState, getStateForInput(SI->getTrueValue()));
+ NewState =
+ meetBDVState(NewState, getStateForInput(SI->getFalseValue()));
+ } else if (PHINode *PN = dyn_cast<PHINode>(BDV)) {
+ for (Value *Val : PN->incoming_values())
+ NewState = meetBDVState(NewState, getStateForInput(Val));
} else if (auto *EE = dyn_cast<ExtractElementInst>(BDV)) {
// The 'meet' for an extractelement is slightly trivial, but it's still
// useful in that it drives us to conflict if our input is.
- calculateMeet.meetWith(getStateForInput(EE->getVectorOperand()));
+ NewState =
+ meetBDVState(NewState, getStateForInput(EE->getVectorOperand()));
} else {
// Given there's a inherent type mismatch between the operands, will
// *always* produce Conflict.
auto *IE = cast<InsertElementInst>(BDV);
- calculateMeet.meetWith(getStateForInput(IE->getOperand(0)));
- calculateMeet.meetWith(getStateForInput(IE->getOperand(1)));
+ NewState = meetBDVState(NewState, getStateForInput(IE->getOperand(0)));
+ NewState = meetBDVState(NewState, getStateForInput(IE->getOperand(1)));
}
- BDVState oldState = States[BDV];
- BDVState newState = calculateMeet.getResult();
- if (oldState != newState) {
- progress = true;
- States[BDV] = newState;
+ BDVState OldState = States[BDV];
+ if (OldState != NewState) {
+ Progress = true;
+ States[BDV] = NewState;
}
}
- assert(oldSize == States.size() &&
+ assert(OldSize == States.size() &&
"fixed point shouldn't be adding any new nodes to state");
}
@@ -842,7 +803,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
}
#endif
-
+
// Insert Phis for all conflicts
// TODO: adjust naming patterns to avoid this order of iteration dependency
for (auto Pair : States) {
@@ -856,14 +817,13 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
// The problem is that we need to convert from a vector base to a scalar
// base for the particular indice we're interested in.
if (State.isBase() && isa<ExtractElementInst>(I) &&
- isa<VectorType>(State.getBase()->getType())) {
+ isa<VectorType>(State.getBaseValue()->getType())) {
auto *EE = cast<ExtractElementInst>(I);
// TODO: In many cases, the new instruction is just EE itself. We should
// exploit this, but can't do it here since it would break the invariant
// about the BDV not being known to be a base.
- auto *BaseInst = ExtractElementInst::Create(State.getBase(),
- EE->getIndexOperand(),
- "base_ee", EE);
+ auto *BaseInst = ExtractElementInst::Create(
+ State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE);
BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
States[I] = BDVState(BDVState::Base, BaseInst);
}
@@ -871,10 +831,8 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
// Since we're joining a vector and scalar base, they can never be the
// same. As a result, we should always see insert element having reached
// the conflict state.
- if (isa<InsertElementInst>(I)) {
- assert(State.isConflict());
- }
-
+ assert(!isa<InsertElementInst>(I) || State.isConflict());
+
if (!State.isConflict())
continue;
@@ -887,12 +845,11 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
assert(NumPreds > 0 && "how did we reach here");
std::string Name = suffixed_name_or(I, ".base", "base_phi");
return PHINode::Create(I->getType(), NumPreds, Name, I);
- } else if (SelectInst *Sel = dyn_cast<SelectInst>(I)) {
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
// The undef will be replaced later
- UndefValue *Undef = UndefValue::get(Sel->getType());
+ UndefValue *Undef = UndefValue::get(SI->getType());
std::string Name = suffixed_name_or(I, ".base", "base_select");
- return SelectInst::Create(Sel->getCondition(), Undef,
- Undef, Name, Sel);
+ return SelectInst::Create(SI->getCondition(), Undef, Undef, Name, SI);
} else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
UndefValue *Undef = UndefValue::get(EE->getVectorOperand()->getType());
std::string Name = suffixed_name_or(I, ".base", "base_ee");
@@ -906,7 +863,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
return InsertElementInst::Create(VecUndef, ScalarUndef,
IE->getOperand(2), Name, IE);
}
-
};
Instruction *BaseInst = MakeBaseInstPlaceholder(I);
// Add metadata marking this as a base value
@@ -921,24 +877,21 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
// instruction to propagate the base of it's BDV and have entered that newly
// introduced instruction into the state table. In either case, we are
// assured to be able to determine an instruction which produces it's base
- // pointer.
+ // pointer.
auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) {
- Value *BDV = findBaseOrBDV(Input, cache);
+ Value *BDV = findBaseOrBDV(Input, Cache);
Value *Base = nullptr;
if (isKnownBaseResult(BDV)) {
Base = BDV;
} else {
// Either conflict or base.
assert(States.count(BDV));
- Base = States[BDV].getBase();
+ Base = States[BDV].getBaseValue();
}
- assert(Base && "can't be null");
+ assert(Base && "Can't be null");
// The cast is needed since base traversal may strip away bitcasts
- if (Base->getType() != Input->getType() &&
- InsertPt) {
- Base = new BitCastInst(Base, Input->getType(), "cast",
- InsertPt);
- }
+ if (Base->getType() != Input->getType() && InsertPt)
+ Base = new BitCastInst(Base, Input->getType(), "cast", InsertPt);
return Base;
};
@@ -954,12 +907,12 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
if (!State.isConflict())
continue;
- if (PHINode *basephi = dyn_cast<PHINode>(State.getBase())) {
- PHINode *phi = cast<PHINode>(BDV);
- unsigned NumPHIValues = phi->getNumIncomingValues();
+ if (PHINode *BasePHI = dyn_cast<PHINode>(State.getBaseValue())) {
+ PHINode *PN = cast<PHINode>(BDV);
+ unsigned NumPHIValues = PN->getNumIncomingValues();
for (unsigned i = 0; i < NumPHIValues; i++) {
- Value *InVal = phi->getIncomingValue(i);
- BasicBlock *InBB = phi->getIncomingBlock(i);
+ Value *InVal = PN->getIncomingValue(i);
+ BasicBlock *InBB = PN->getIncomingBlock(i);
// If we've already seen InBB, add the same incoming value
// we added for it earlier. The IR verifier requires phi
@@ -970,22 +923,21 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
// bitcasts (and hence two distinct values) as incoming
// values for the same basic block.
- int blockIndex = basephi->getBasicBlockIndex(InBB);
- if (blockIndex != -1) {
- Value *oldBase = basephi->getIncomingValue(blockIndex);
- basephi->addIncoming(oldBase, InBB);
-
+ int BlockIndex = BasePHI->getBasicBlockIndex(InBB);
+ if (BlockIndex != -1) {
+ Value *OldBase = BasePHI->getIncomingValue(BlockIndex);
+ BasePHI->addIncoming(OldBase, InBB);
+
#ifndef NDEBUG
Value *Base = getBaseForInput(InVal, nullptr);
- // In essence this assert states: the only way two
- // values incoming from the same basic block may be
- // different is by being different bitcasts of the same
- // value. A cleanup that remains TODO is changing
- // findBaseOrBDV to return an llvm::Value of the correct
- // type (and still remain pure). This will remove the
- // need to add bitcasts.
- assert(Base->stripPointerCasts() == oldBase->stripPointerCasts() &&
- "sanity -- findBaseOrBDV should be pure!");
+ // In essence this assert states: the only way two values
+ // incoming from the same basic block may be different is by
+ // being different bitcasts of the same value. A cleanup
+ // that remains TODO is changing findBaseOrBDV to return an
+ // llvm::Value of the correct type (and still remain pure).
+ // This will remove the need to add bitcasts.
+ assert(Base->stripPointerCasts() == OldBase->stripPointerCasts() &&
+ "Sanity -- findBaseOrBDV should be pure!");
#endif
continue;
}
@@ -994,28 +946,25 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
// need to insert a bitcast in the incoming block.
// TODO: Need to split critical edges if insertion is needed
Value *Base = getBaseForInput(InVal, InBB->getTerminator());
- basephi->addIncoming(Base, InBB);
+ BasePHI->addIncoming(Base, InBB);
}
- assert(basephi->getNumIncomingValues() == NumPHIValues);
- } else if (SelectInst *BaseSel = dyn_cast<SelectInst>(State.getBase())) {
- SelectInst *Sel = cast<SelectInst>(BDV);
- // Operand 1 & 2 are true, false path respectively. TODO: refactor to
- // something more safe and less hacky.
- for (int i = 1; i <= 2; i++) {
- Value *InVal = Sel->getOperand(i);
- // Find the instruction which produces the base for each input. We may
- // need to insert a bitcast.
- Value *Base = getBaseForInput(InVal, BaseSel);
- BaseSel->setOperand(i, Base);
- }
- } else if (auto *BaseEE = dyn_cast<ExtractElementInst>(State.getBase())) {
+ assert(BasePHI->getNumIncomingValues() == NumPHIValues);
+ } else if (SelectInst *BaseSI =
+ dyn_cast<SelectInst>(State.getBaseValue())) {
+ SelectInst *SI = cast<SelectInst>(BDV);
+
+ // Find the instruction which produces the base for each input.
+ // We may need to insert a bitcast.
+ BaseSI->setTrueValue(getBaseForInput(SI->getTrueValue(), BaseSI));
+ BaseSI->setFalseValue(getBaseForInput(SI->getFalseValue(), BaseSI));
+ } else if (auto *BaseEE =
+ dyn_cast<ExtractElementInst>(State.getBaseValue())) {
Value *InVal = cast<ExtractElementInst>(BDV)->getVectorOperand();
// Find the instruction which produces the base for each input. We may
// need to insert a bitcast.
- Value *Base = getBaseForInput(InVal, BaseEE);
- BaseEE->setOperand(0, Base);
+ BaseEE->setOperand(0, getBaseForInput(InVal, BaseEE));
} else {
- auto *BaseIE = cast<InsertElementInst>(State.getBase());
+ auto *BaseIE = cast<InsertElementInst>(State.getBaseValue());
auto *BdvIE = cast<InsertElementInst>(BDV);
auto UpdateOperand = [&](int OperandIdx) {
Value *InVal = BdvIE->getOperand(OperandIdx);
@@ -1025,69 +974,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
UpdateOperand(0); // vector operand
UpdateOperand(1); // scalar operand
}
-
- }
-
- // Now that we're done with the algorithm, see if we can optimize the
- // results slightly by reducing the number of new instructions needed.
- // Arguably, this should be integrated into the algorithm above, but
- // doing as a post process step is easier to reason about for the moment.
- DenseMap<Value *, Value *> ReverseMap;
- SmallPtrSet<Instruction *, 16> NewInsts;
- SmallSetVector<AssertingVH<Instruction>, 16> Worklist;
- // Note: We need to visit the states in a deterministic order. We uses the
- // Keys we sorted above for this purpose. Note that we are papering over a
- // bigger problem with the algorithm above - it's visit order is not
- // deterministic. A larger change is needed to fix this.
- for (auto Pair : States) {
- auto *BDV = Pair.first;
- auto State = Pair.second;
- Value *Base = State.getBase();
- assert(BDV && Base);
- assert(!isKnownBaseResult(BDV) && "why did it get added?");
- assert(isKnownBaseResult(Base) &&
- "must be something we 'know' is a base pointer");
- if (!State.isConflict())
- continue;
-
- ReverseMap[Base] = BDV;
- if (auto *BaseI = dyn_cast<Instruction>(Base)) {
- NewInsts.insert(BaseI);
- Worklist.insert(BaseI);
- }
- }
- auto ReplaceBaseInstWith = [&](Value *BDV, Instruction *BaseI,
- Value *Replacement) {
- // Add users which are new instructions (excluding self references)
- for (User *U : BaseI->users())
- if (auto *UI = dyn_cast<Instruction>(U))
- if (NewInsts.count(UI) && UI != BaseI)
- Worklist.insert(UI);
- // Then do the actual replacement
- NewInsts.erase(BaseI);
- ReverseMap.erase(BaseI);
- BaseI->replaceAllUsesWith(Replacement);
- assert(States.count(BDV));
- assert(States[BDV].isConflict() && States[BDV].getBase() == BaseI);
- States[BDV] = BDVState(BDVState::Conflict, Replacement);
- BaseI->eraseFromParent();
- };
- const DataLayout &DL = cast<Instruction>(def)->getModule()->getDataLayout();
- while (!Worklist.empty()) {
- Instruction *BaseI = Worklist.pop_back_val();
- assert(NewInsts.count(BaseI));
- Value *Bdv = ReverseMap[BaseI];
- if (auto *BdvI = dyn_cast<Instruction>(Bdv))
- if (BaseI->isIdenticalTo(BdvI)) {
- DEBUG(dbgs() << "Identical Base: " << *BaseI << "\n");
- ReplaceBaseInstWith(Bdv, BaseI, Bdv);
- continue;
- }
- if (Value *V = SimplifyInstruction(BaseI, DL)) {
- DEBUG(dbgs() << "Base " << *BaseI << " simplified to " << *V << "\n");
- ReplaceBaseInstWith(Bdv, BaseI, V);
- continue;
- }
}
// Cache all of our results so we can cheaply reuse them
@@ -1095,25 +981,27 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
// relation and one of the base pointer relation! FIXME
for (auto Pair : States) {
auto *BDV = Pair.first;
- Value *base = Pair.second.getBase();
- assert(BDV && base);
+ Value *Base = Pair.second.getBaseValue();
+ assert(BDV && Base);
+ assert(!isKnownBaseResult(BDV) && "why did it get added?");
- std::string fromstr = cache.count(BDV) ? cache[BDV]->getName() : "none";
DEBUG(dbgs() << "Updating base value cache"
- << " for: " << BDV->getName()
- << " from: " << fromstr
- << " to: " << base->getName() << "\n");
-
- if (cache.count(BDV)) {
- // Once we transition from the BDV relation being store in the cache to
+ << " for: " << BDV->getName() << " from: "
+ << (Cache.count(BDV) ? Cache[BDV]->getName().str() : "none")
+ << " to: " << Base->getName() << "\n");
+
+ if (Cache.count(BDV)) {
+ assert(isKnownBaseResult(Base) &&
+ "must be something we 'know' is a base pointer");
+ // Once we transition from the BDV relation being store in the Cache to
// the base relation being stored, it must be stable
- assert((!isKnownBaseResult(cache[BDV]) || cache[BDV] == base) &&
+ assert((!isKnownBaseResult(Cache[BDV]) || Cache[BDV] == Base) &&
"base relation should be stable");
}
- cache[BDV] = base;
+ Cache[BDV] = Base;
}
- assert(cache.count(def));
- return cache[def];
+ assert(Cache.count(Def));
+ return Cache[Def];
}
// For a set of live pointers (base and/or derived), identify the base
@@ -1133,15 +1021,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
// pointer was a base pointer.
static void
findBasePointers(const StatepointLiveSetTy &live,
- DenseMap<Value *, Value *> &PointerToBase,
+ MapVector<Value *, Value *> &PointerToBase,
DominatorTree *DT, DefiningValueMapTy &DVCache) {
- // For the naming of values inserted to be deterministic - which makes for
- // much cleaner and more stable tests - we need to assign an order to the
- // live values. DenseSets do not provide a deterministic order across runs.
- SmallVector<Value *, 64> Temp;
- Temp.insert(Temp.end(), live.begin(), live.end());
- std::sort(Temp.begin(), Temp.end(), order_by_name);
- for (Value *ptr : Temp) {
+ for (Value *ptr : live) {
Value *base = findBasePointer(ptr, DVCache);
assert(base && "failed to find base pointer");
PointerToBase[ptr] = base;
@@ -1149,41 +1031,24 @@ findBasePointers(const StatepointLiveSetTy &live,
DT->dominates(cast<Instruction>(base)->getParent(),
cast<Instruction>(ptr)->getParent())) &&
"The base we found better dominate the derived pointer");
-
- // If you see this trip and like to live really dangerously, the code should
- // be correct, just with idioms the verifier can't handle. You can try
- // disabling the verifier at your own substantial risk.
- assert(!isa<ConstantPointerNull>(base) &&
- "the relocation code needs adjustment to handle the relocation of "
- "a null pointer constant without causing false positives in the "
- "safepoint ir verifier.");
}
}
/// Find the required based pointers (and adjust the live set) for the given
/// parse point.
static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
- const CallSite &CS,
+ CallSite CS,
PartiallyConstructedSafepointRecord &result) {
- DenseMap<Value *, Value *> PointerToBase;
+ MapVector<Value *, Value *> PointerToBase;
findBasePointers(result.LiveSet, PointerToBase, &DT, DVCache);
if (PrintBasePointers) {
- // Note: Need to print these in a stable order since this is checked in
- // some tests.
errs() << "Base Pairs (w/o Relocation):\n";
- SmallVector<Value *, 64> Temp;
- Temp.reserve(PointerToBase.size());
- for (auto Pair : PointerToBase) {
- Temp.push_back(Pair.first);
- }
- std::sort(Temp.begin(), Temp.end(), order_by_name);
- for (Value *Ptr : Temp) {
- Value *Base = PointerToBase[Ptr];
+ for (auto &Pair : PointerToBase) {
errs() << " derived ";
- Ptr->printAsOperand(errs(), false);
+ Pair.first->printAsOperand(errs(), false);
errs() << " base ";
- Base->printAsOperand(errs(), false);
+ Pair.second->printAsOperand(errs(), false);
errs() << "\n";;
}
}
@@ -1194,7 +1059,7 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
/// Given an updated version of the dataflow liveness results, update the
/// liveset and base pointer maps for the call site CS.
static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
- const CallSite &CS,
+ CallSite CS,
PartiallyConstructedSafepointRecord &result);
static void recomputeLiveInValues(
@@ -1206,8 +1071,7 @@ static void recomputeLiveInValues(
computeLiveInValues(DT, F, RevisedLivenessData);
for (size_t i = 0; i < records.size(); i++) {
struct PartiallyConstructedSafepointRecord &info = records[i];
- const CallSite &CS = toUpdate[i];
- recomputeLiveInValues(RevisedLivenessData, CS, info);
+ recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info);
}
}
@@ -1257,8 +1121,7 @@ static AttributeSet legalizeCallAttributes(AttributeSet AS) {
// These attributes control the generation of the gc.statepoint call /
// invoke itself; and once the gc.statepoint is in place, they're of no
// use.
- if (Attr.hasAttribute("statepoint-num-patch-bytes") ||
- Attr.hasAttribute("statepoint-id"))
+ if (isStatepointDirectiveAttr(Attr))
continue;
Ret = Ret.addAttributes(
@@ -1349,11 +1212,37 @@ namespace {
class DeferredReplacement {
AssertingVH<Instruction> Old;
AssertingVH<Instruction> New;
+ bool IsDeoptimize = false;
+
+ DeferredReplacement() {}
public:
- explicit DeferredReplacement(Instruction *Old, Instruction *New) :
- Old(Old), New(New) {
- assert(Old != New && "Not allowed!");
+ static DeferredReplacement createRAUW(Instruction *Old, Instruction *New) {
+ assert(Old != New && Old && New &&
+ "Cannot RAUW equal values or to / from null!");
+
+ DeferredReplacement D;
+ D.Old = Old;
+ D.New = New;
+ return D;
+ }
+
+ static DeferredReplacement createDelete(Instruction *ToErase) {
+ DeferredReplacement D;
+ D.Old = ToErase;
+ return D;
+ }
+
+ static DeferredReplacement createDeoptimizeReplacement(Instruction *Old) {
+#ifndef NDEBUG
+ auto *F = cast<CallInst>(Old)->getCalledFunction();
+ assert(F && F->getIntrinsicID() == Intrinsic::experimental_deoptimize &&
+ "Only way to construct a deoptimize deferred replacement");
+#endif
+ DeferredReplacement D;
+ D.Old = Old;
+ D.IsDeoptimize = true;
+ return D;
}
/// Does the task represented by this instance.
@@ -1362,12 +1251,23 @@ public:
Instruction *NewI = New;
assert(OldI != NewI && "Disallowed at construction?!");
+ assert((!IsDeoptimize || !New) &&
+ "Deoptimize instrinsics are not replaced!");
Old = nullptr;
New = nullptr;
if (NewI)
OldI->replaceAllUsesWith(NewI);
+
+ if (IsDeoptimize) {
+ // Note: we've inserted instructions, so the call to llvm.deoptimize may
+ // not necessarilly be followed by the matching return.
+ auto *RI = cast<ReturnInst>(OldI->getParent()->getTerminator());
+ new UnreachableInst(RI->getContext(), RI);
+ RI->eraseFromParent();
+ }
+
OldI->eraseFromParent();
}
};
@@ -1380,8 +1280,6 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
PartiallyConstructedSafepointRecord &Result,
std::vector<DeferredReplacement> &Replacements) {
assert(BasePtrs.size() == LiveVariables.size());
- assert((UseDeoptBundles || isStatepoint(CS)) &&
- "This method expects to be rewriting a statepoint");
// Then go ahead and use the builder do actually do the inserts. We insert
// immediately before the previous instruction under the assumption that all
@@ -1391,47 +1289,53 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
IRBuilder<> Builder(InsertBefore);
ArrayRef<Value *> GCArgs(LiveVariables);
- uint64_t StatepointID = 0xABCDEF00;
+ uint64_t StatepointID = StatepointDirectives::DefaultStatepointID;
uint32_t NumPatchBytes = 0;
uint32_t Flags = uint32_t(StatepointFlags::None);
- ArrayRef<Use> CallArgs;
- ArrayRef<Use> DeoptArgs;
+ ArrayRef<Use> CallArgs(CS.arg_begin(), CS.arg_end());
+ ArrayRef<Use> DeoptArgs = GetDeoptBundleOperands(CS);
ArrayRef<Use> TransitionArgs;
-
- Value *CallTarget = nullptr;
-
- if (UseDeoptBundles) {
- CallArgs = {CS.arg_begin(), CS.arg_end()};
- DeoptArgs = GetDeoptBundleOperands(CS);
- // TODO: we don't fill in TransitionArgs or Flags in this branch, but we
- // could have an operand bundle for that too.
- AttributeSet OriginalAttrs = CS.getAttributes();
-
- Attribute AttrID = OriginalAttrs.getAttribute(AttributeSet::FunctionIndex,
- "statepoint-id");
- if (AttrID.isStringAttribute())
- AttrID.getValueAsString().getAsInteger(10, StatepointID);
-
- Attribute AttrNumPatchBytes = OriginalAttrs.getAttribute(
- AttributeSet::FunctionIndex, "statepoint-num-patch-bytes");
- if (AttrNumPatchBytes.isStringAttribute())
- AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes);
-
- CallTarget = CS.getCalledValue();
- } else {
- // This branch will be gone soon, and we will soon only support the
- // UseDeoptBundles == true configuration.
- Statepoint OldSP(CS);
- StatepointID = OldSP.getID();
- NumPatchBytes = OldSP.getNumPatchBytes();
- Flags = OldSP.getFlags();
-
- CallArgs = {OldSP.arg_begin(), OldSP.arg_end()};
- DeoptArgs = {OldSP.vm_state_begin(), OldSP.vm_state_end()};
- TransitionArgs = {OldSP.gc_transition_args_begin(),
- OldSP.gc_transition_args_end()};
- CallTarget = OldSP.getCalledValue();
+ if (auto TransitionBundle =
+ CS.getOperandBundle(LLVMContext::OB_gc_transition)) {
+ Flags |= uint32_t(StatepointFlags::GCTransition);
+ TransitionArgs = TransitionBundle->Inputs;
+ }
+
+ // Instead of lowering calls to @llvm.experimental.deoptimize as normal calls
+ // with a return value, we lower then as never returning calls to
+ // __llvm_deoptimize that are followed by unreachable to get better codegen.
+ bool IsDeoptimize = false;
+
+ StatepointDirectives SD =
+ parseStatepointDirectivesFromAttrs(CS.getAttributes());
+ if (SD.NumPatchBytes)
+ NumPatchBytes = *SD.NumPatchBytes;
+ if (SD.StatepointID)
+ StatepointID = *SD.StatepointID;
+
+ Value *CallTarget = CS.getCalledValue();
+ if (Function *F = dyn_cast<Function>(CallTarget)) {
+ if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) {
+ // Calls to llvm.experimental.deoptimize are lowered to calls to the
+ // __llvm_deoptimize symbol. We want to resolve this now, since the
+ // verifier does not allow taking the address of an intrinsic function.
+
+ SmallVector<Type *, 8> DomainTy;
+ for (Value *Arg : CallArgs)
+ DomainTy.push_back(Arg->getType());
+ auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy,
+ /* isVarArg = */ false);
+
+ // Note: CallTarget can be a bitcast instruction of a symbol if there are
+ // calls to @llvm.experimental.deoptimize with different argument types in
+ // the same module. This is fine -- we assume the frontend knew what it
+ // was doing when generating this kind of IR.
+ CallTarget =
+ F->getParent()->getOrInsertFunction("__llvm_deoptimize", FTy);
+
+ IsDeoptimize = true;
+ }
}
// Create the statepoint given all the arguments
@@ -1514,7 +1418,13 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
}
assert(Token && "Should be set in one of the above branches!");
- if (UseDeoptBundles) {
+ if (IsDeoptimize) {
+ // If we're wrapping an @llvm.experimental.deoptimize in a statepoint, we
+ // transform the tail-call like structure to a call to a void function
+ // followed by unreachable to get better codegen.
+ Replacements.push_back(
+ DeferredReplacement::createDeoptimizeReplacement(CS.getInstruction()));
+ } else {
Token->setName("statepoint_token");
if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
StringRef Name =
@@ -1528,24 +1438,12 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// llvm::Instruction. Instead, we defer the replacement and deletion to
// after the live sets have been made explicit in the IR, and we no longer
// have raw pointers to worry about.
- Replacements.emplace_back(CS.getInstruction(), GCResult);
+ Replacements.emplace_back(
+ DeferredReplacement::createRAUW(CS.getInstruction(), GCResult));
} else {
- Replacements.emplace_back(CS.getInstruction(), nullptr);
+ Replacements.emplace_back(
+ DeferredReplacement::createDelete(CS.getInstruction()));
}
- } else {
- assert(!CS.getInstruction()->hasNUsesOrMore(2) &&
- "only valid use before rewrite is gc.result");
- assert(!CS.getInstruction()->hasOneUse() ||
- isGCResult(cast<Instruction>(*CS.getInstruction()->user_begin())));
-
- // Take the name of the original statepoint token if there was one.
- Token->takeName(CS.getInstruction());
-
- // Update the gc.result of the original statepoint (if any) to use the newly
- // inserted statepoint. This is safe to do here since the token can't be
- // considered a live reference.
- CS.getInstruction()->replaceAllUsesWith(Token);
- CS.getInstruction()->eraseFromParent();
}
Result.StatepointToken = Token;
@@ -1555,43 +1453,13 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, Token, Builder);
}
-namespace {
-struct NameOrdering {
- Value *Base;
- Value *Derived;
-
- bool operator()(NameOrdering const &a, NameOrdering const &b) {
- return -1 == a.Derived->getName().compare(b.Derived->getName());
- }
-};
-}
-
-static void StabilizeOrder(SmallVectorImpl<Value *> &BaseVec,
- SmallVectorImpl<Value *> &LiveVec) {
- assert(BaseVec.size() == LiveVec.size());
-
- SmallVector<NameOrdering, 64> Temp;
- for (size_t i = 0; i < BaseVec.size(); i++) {
- NameOrdering v;
- v.Base = BaseVec[i];
- v.Derived = LiveVec[i];
- Temp.push_back(v);
- }
-
- std::sort(Temp.begin(), Temp.end(), NameOrdering());
- for (size_t i = 0; i < BaseVec.size(); i++) {
- BaseVec[i] = Temp[i].Base;
- LiveVec[i] = Temp[i].Derived;
- }
-}
-
// Replace an existing gc.statepoint with a new one and a set of gc.relocates
// which make the relocations happening at this safepoint explicit.
//
// WARNING: Does not do any fixup to adjust users of the original live
// values. That's the callers responsibility.
static void
-makeStatepointExplicit(DominatorTree &DT, const CallSite &CS,
+makeStatepointExplicit(DominatorTree &DT, CallSite CS,
PartiallyConstructedSafepointRecord &Result,
std::vector<DeferredReplacement> &Replacements) {
const auto &LiveSet = Result.LiveSet;
@@ -1609,11 +1477,6 @@ makeStatepointExplicit(DominatorTree &DT, const CallSite &CS,
}
assert(LiveVec.size() == BaseVec.size());
- // To make the output IR slightly more stable (for use in diffs), ensure a
- // fixed order of the values in the safepoint (by sorting the value name).
- // The order is otherwise meaningless.
- StabilizeOrder(BaseVec, LiveVec);
-
// Do the actual rewriting and delete the old statepoint
makeStatepointExplicitImpl(CS, BaseVec, LiveVec, Result, Replacements);
}
@@ -1634,7 +1497,7 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
if (!Relocate)
continue;
- Value *OriginalValue = const_cast<Value *>(Relocate->getDerivedPtr());
+ Value *OriginalValue = Relocate->getDerivedPtr();
assert(AllocaMap.count(OriginalValue));
Value *Alloca = AllocaMap[OriginalValue];
@@ -1660,11 +1523,10 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
// Helper function for the "relocationViaAlloca". Similar to the
// "insertRelocationStores" but works for rematerialized values.
-static void
-insertRematerializationStores(
- RematerializedValueMapTy RematerializedValues,
- DenseMap<Value *, Value *> &AllocaMap,
- DenseSet<Value *> &VisitedLiveValues) {
+static void insertRematerializationStores(
+ const RematerializedValueMapTy &RematerializedValues,
+ DenseMap<Value *, Value *> &AllocaMap,
+ DenseSet<Value *> &VisitedLiveValues) {
for (auto RematerializedValuePair: RematerializedValues) {
Instruction *RematerializedValue = RematerializedValuePair.first;
@@ -1691,9 +1553,8 @@ static void relocationViaAlloca(
// record initial number of (static) allocas; we'll check we have the same
// number when we get done.
int InitialAllocaNum = 0;
- for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
- I++)
- if (isa<AllocaInst>(*I))
+ for (Instruction &I : F.getEntryBlock())
+ if (isa<AllocaInst>(I))
InitialAllocaNum++;
#endif
@@ -1777,8 +1638,7 @@ static void relocationViaAlloca(
auto InsertClobbersAt = [&](Instruction *IP) {
for (auto *AI : ToClobber) {
- auto AIType = cast<PointerType>(AI->getType());
- auto PT = cast<PointerType>(AIType->getElementType());
+ auto PT = cast<PointerType>(AI->getAllocatedType());
Constant *CPN = ConstantPointerNull::get(PT);
StoreInst *Store = new StoreInst(CPN, AI);
Store->insertBefore(IP);
@@ -1919,141 +1779,7 @@ static void findLiveReferences(
computeLiveInValues(DT, F, OriginalLivenessData);
for (size_t i = 0; i < records.size(); i++) {
struct PartiallyConstructedSafepointRecord &info = records[i];
- const CallSite &CS = toUpdate[i];
- analyzeParsePointLiveness(DT, OriginalLivenessData, CS, info);
- }
-}
-
-/// Remove any vector of pointers from the live set by scalarizing them over the
-/// statepoint instruction. Adds the scalarized pieces to the live set. It
-/// would be preferable to include the vector in the statepoint itself, but
-/// the lowering code currently does not handle that. Extending it would be
-/// slightly non-trivial since it requires a format change. Given how rare
-/// such cases are (for the moment?) scalarizing is an acceptable compromise.
-static void splitVectorValues(Instruction *StatepointInst,
- StatepointLiveSetTy &LiveSet,
- DenseMap<Value *, Value *>& PointerToBase,
- DominatorTree &DT) {
- SmallVector<Value *, 16> ToSplit;
- for (Value *V : LiveSet)
- if (isa<VectorType>(V->getType()))
- ToSplit.push_back(V);
-
- if (ToSplit.empty())
- return;
-
- DenseMap<Value *, SmallVector<Value *, 16>> ElementMapping;
-
- Function &F = *(StatepointInst->getParent()->getParent());
-
- DenseMap<Value *, AllocaInst *> AllocaMap;
- // First is normal return, second is exceptional return (invoke only)
- DenseMap<Value *, std::pair<Value *, Value *>> Replacements;
- for (Value *V : ToSplit) {
- AllocaInst *Alloca =
- new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI());
- AllocaMap[V] = Alloca;
-
- VectorType *VT = cast<VectorType>(V->getType());
- IRBuilder<> Builder(StatepointInst);
- SmallVector<Value *, 16> Elements;
- for (unsigned i = 0; i < VT->getNumElements(); i++)
- Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i)));
- ElementMapping[V] = Elements;
-
- auto InsertVectorReform = [&](Instruction *IP) {
- Builder.SetInsertPoint(IP);
- Builder.SetCurrentDebugLocation(IP->getDebugLoc());
- Value *ResultVec = UndefValue::get(VT);
- for (unsigned i = 0; i < VT->getNumElements(); i++)
- ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i],
- Builder.getInt32(i));
- return ResultVec;
- };
-
- if (isa<CallInst>(StatepointInst)) {
- BasicBlock::iterator Next(StatepointInst);
- Next++;
- Instruction *IP = &*(Next);
- Replacements[V].first = InsertVectorReform(IP);
- Replacements[V].second = nullptr;
- } else {
- InvokeInst *Invoke = cast<InvokeInst>(StatepointInst);
- // We've already normalized - check that we don't have shared destination
- // blocks
- BasicBlock *NormalDest = Invoke->getNormalDest();
- assert(!isa<PHINode>(NormalDest->begin()));
- BasicBlock *UnwindDest = Invoke->getUnwindDest();
- assert(!isa<PHINode>(UnwindDest->begin()));
- // Insert insert element sequences in both successors
- Instruction *IP = &*(NormalDest->getFirstInsertionPt());
- Replacements[V].first = InsertVectorReform(IP);
- IP = &*(UnwindDest->getFirstInsertionPt());
- Replacements[V].second = InsertVectorReform(IP);
- }
- }
-
- for (Value *V : ToSplit) {
- AllocaInst *Alloca = AllocaMap[V];
-
- // Capture all users before we start mutating use lists
- SmallVector<Instruction *, 16> Users;
- for (User *U : V->users())
- Users.push_back(cast<Instruction>(U));
-
- for (Instruction *I : Users) {
- if (auto Phi = dyn_cast<PHINode>(I)) {
- for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++)
- if (V == Phi->getIncomingValue(i)) {
- LoadInst *Load = new LoadInst(
- Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
- Phi->setIncomingValue(i, Load);
- }
- } else {
- LoadInst *Load = new LoadInst(Alloca, "", I);
- I->replaceUsesOfWith(V, Load);
- }
- }
-
- // Store the original value and the replacement value into the alloca
- StoreInst *Store = new StoreInst(V, Alloca);
- if (auto I = dyn_cast<Instruction>(V))
- Store->insertAfter(I);
- else
- Store->insertAfter(Alloca);
-
- // Normal return for invoke, or call return
- Instruction *Replacement = cast<Instruction>(Replacements[V].first);
- (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
- // Unwind return for invoke only
- Replacement = cast_or_null<Instruction>(Replacements[V].second);
- if (Replacement)
- (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
- }
-
- // apply mem2reg to promote alloca to SSA
- SmallVector<AllocaInst *, 16> Allocas;
- for (Value *V : ToSplit)
- Allocas.push_back(AllocaMap[V]);
- PromoteMemToReg(Allocas, DT);
-
- // Update our tracking of live pointers and base mappings to account for the
- // changes we just made.
- for (Value *V : ToSplit) {
- auto &Elements = ElementMapping[V];
-
- LiveSet.erase(V);
- LiveSet.insert(Elements.begin(), Elements.end());
- // We need to update the base mapping as well.
- assert(PointerToBase.count(V));
- Value *OldBase = PointerToBase[V];
- auto &BaseElements = ElementMapping[OldBase];
- PointerToBase.erase(V);
- assert(Elements.size() == BaseElements.size());
- for (unsigned i = 0; i < Elements.size(); i++) {
- Value *Elem = Elements[i];
- PointerToBase[Elem] = BaseElements[i];
- }
+ analyzeParsePointLiveness(DT, OriginalLivenessData, toUpdate[i], info);
}
}
@@ -2109,7 +1835,7 @@ chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
// Cost of the address calculation
- Type *ValTy = GEP->getPointerOperandType()->getPointerElementType();
+ Type *ValTy = GEP->getSourceElementType();
Cost += TTI.getAddressComputationCost(ValTy);
// And cost of the GEP itself
@@ -2244,7 +1970,7 @@ static void rematerializeLiveValues(CallSite CS,
// Remove rematerializaed values from the live set
for (auto LiveValue: LiveValuesToBeDeleted) {
- Info.LiveSet.erase(LiveValue);
+ Info.LiveSet.remove(LiveValue);
}
}
@@ -2257,11 +1983,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
- for (CallSite CS : ToUpdate) {
- assert(CS.getInstruction()->getParent()->getParent() == &F);
- assert((UseDeoptBundles || isStatepoint(CS)) &&
- "expected to already be a deopt statepoint");
- }
+ for (CallSite CS : ToUpdate)
+ assert(CS.getInstruction()->getFunction() == &F);
#endif
// When inserting gc.relocates for invokes, we need to be able to insert at
@@ -2287,12 +2010,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
for (CallSite CS : ToUpdate) {
SmallVector<Value *, 64> DeoptValues;
- iterator_range<const Use *> DeoptStateRange =
- UseDeoptBundles
- ? iterator_range<const Use *>(GetDeoptBundleOperands(CS))
- : iterator_range<const Use *>(Statepoint(CS).vm_state_args());
-
- for (Value *Arg : DeoptStateRange) {
+ for (Value *Arg : GetDeoptBundleOperands(CS)) {
assert(!isUnhandledGCPointerType(Arg->getType()) &&
"support for FCA unimplemented");
if (isHandledGCPointerType(Arg->getType()))
@@ -2374,29 +2092,13 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
for (auto &Info : Records)
for (auto &BasePair : Info.PointerToBase)
if (isa<Constant>(BasePair.second))
- Info.LiveSet.erase(BasePair.first);
+ Info.LiveSet.remove(BasePair.first);
for (CallInst *CI : Holders)
CI->eraseFromParent();
Holders.clear();
- // Do a limited scalarization of any live at safepoint vector values which
- // contain pointers. This enables this pass to run after vectorization at
- // the cost of some possible performance loss. Note: This is known to not
- // handle updating of the side tables correctly which can lead to relocation
- // bugs when the same vector is live at multiple statepoints. We're in the
- // process of implementing the alternate lowering - relocating the
- // vector-of-pointers as first class item and updating the backend to
- // understand that - but that's not yet complete.
- if (UseVectorSplit)
- for (size_t i = 0; i < Records.size(); i++) {
- PartiallyConstructedSafepointRecord &Info = Records[i];
- Instruction *Statepoint = ToUpdate[i].getInstruction();
- splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
- Info.PointerToBase, DT);
- }
-
// In order to reduce live set of statepoint we might choose to rematerialize
// some values instead of relocating them. This is purely an optimization and
// does not influence correctness.
@@ -2592,13 +2294,9 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto NeedsRewrite = [](Instruction &I) {
- if (UseDeoptBundles) {
- if (ImmutableCallSite CS = ImmutableCallSite(&I))
- return !callsGCLeafFunction(CS);
- return false;
- }
-
- return isStatepoint(I);
+ if (ImmutableCallSite CS = ImmutableCallSite(&I))
+ return !callsGCLeafFunction(CS) && !isStatepoint(CS);
+ return false;
};
// Gather all the statepoints which need rewritten. Be careful to only
@@ -2682,15 +2380,12 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {
/// Compute the live-in set for the location rbegin starting from
/// the live-out set of the basic block
-static void computeLiveInValues(BasicBlock::reverse_iterator rbegin,
- BasicBlock::reverse_iterator rend,
- DenseSet<Value *> &LiveTmp) {
-
- for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) {
- Instruction *I = &*ritr;
-
+static void computeLiveInValues(BasicBlock::reverse_iterator Begin,
+ BasicBlock::reverse_iterator End,
+ SetVector<Value *> &LiveTmp) {
+ for (auto &I : make_range(Begin, End)) {
// KILL/Def - Remove this definition from LiveIn
- LiveTmp.erase(I);
+ LiveTmp.remove(&I);
// Don't consider *uses* in PHI nodes, we handle their contribution to
// predecessor blocks when we seed the LiveOut sets
@@ -2698,7 +2393,7 @@ static void computeLiveInValues(BasicBlock::reverse_iterator rbegin,
continue;
// USE - Add to the LiveIn set for this instruction
- for (Value *V : I->operands()) {
+ for (Value *V : I.operands()) {
assert(!isUnhandledGCPointerType(V->getType()) &&
"support for FCA unimplemented");
if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
@@ -2718,24 +2413,24 @@ static void computeLiveInValues(BasicBlock::reverse_iterator rbegin,
}
}
-static void computeLiveOutSeed(BasicBlock *BB, DenseSet<Value *> &LiveTmp) {
-
+static void computeLiveOutSeed(BasicBlock *BB, SetVector<Value *> &LiveTmp) {
for (BasicBlock *Succ : successors(BB)) {
- const BasicBlock::iterator E(Succ->getFirstNonPHI());
- for (BasicBlock::iterator I = Succ->begin(); I != E; I++) {
- PHINode *Phi = cast<PHINode>(&*I);
- Value *V = Phi->getIncomingValueForBlock(BB);
+ for (auto &I : *Succ) {
+ PHINode *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
+ break;
+
+ Value *V = PN->getIncomingValueForBlock(BB);
assert(!isUnhandledGCPointerType(V->getType()) &&
"support for FCA unimplemented");
- if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
+ if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V))
LiveTmp.insert(V);
- }
}
}
}
-static DenseSet<Value *> computeKillSet(BasicBlock *BB) {
- DenseSet<Value *> KillSet;
+static SetVector<Value *> computeKillSet(BasicBlock *BB) {
+ SetVector<Value *> KillSet;
for (Instruction &I : *BB)
if (isHandledGCPointerType(I.getType()))
KillSet.insert(&I);
@@ -2745,7 +2440,7 @@ static DenseSet<Value *> computeKillSet(BasicBlock *BB) {
#ifndef NDEBUG
/// Check that the items in 'Live' dominate 'TI'. This is used as a basic
/// sanity check for the liveness computation.
-static void checkBasicSSA(DominatorTree &DT, DenseSet<Value *> &Live,
+static void checkBasicSSA(DominatorTree &DT, SetVector<Value *> &Live,
TerminatorInst *TI, bool TermOkay = false) {
for (Value *V : Live) {
if (auto *I = dyn_cast<Instruction>(V)) {
@@ -2773,17 +2468,7 @@ static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data,
static void computeLiveInValues(DominatorTree &DT, Function &F,
GCPtrLivenessData &Data) {
-
- SmallSetVector<BasicBlock *, 200> Worklist;
- auto AddPredsToWorklist = [&](BasicBlock *BB) {
- // We use a SetVector so that we don't have duplicates in the worklist.
- Worklist.insert(pred_begin(BB), pred_end(BB));
- };
- auto NextItem = [&]() {
- BasicBlock *BB = Worklist.back();
- Worklist.pop_back();
- return BB;
- };
+ SmallSetVector<BasicBlock *, 32> Worklist;
// Seed the liveness for each individual block
for (BasicBlock &BB : F) {
@@ -2796,56 +2481,55 @@ static void computeLiveInValues(DominatorTree &DT, Function &F,
assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill");
#endif
- Data.LiveOut[&BB] = DenseSet<Value *>();
+ Data.LiveOut[&BB] = SetVector<Value *>();
computeLiveOutSeed(&BB, Data.LiveOut[&BB]);
Data.LiveIn[&BB] = Data.LiveSet[&BB];
- set_union(Data.LiveIn[&BB], Data.LiveOut[&BB]);
- set_subtract(Data.LiveIn[&BB], Data.KillSet[&BB]);
+ Data.LiveIn[&BB].set_union(Data.LiveOut[&BB]);
+ Data.LiveIn[&BB].set_subtract(Data.KillSet[&BB]);
if (!Data.LiveIn[&BB].empty())
- AddPredsToWorklist(&BB);
+ Worklist.insert(pred_begin(&BB), pred_end(&BB));
}
// Propagate that liveness until stable
while (!Worklist.empty()) {
- BasicBlock *BB = NextItem();
+ BasicBlock *BB = Worklist.pop_back_val();
- // Compute our new liveout set, then exit early if it hasn't changed
- // despite the contribution of our successor.
- DenseSet<Value *> LiveOut = Data.LiveOut[BB];
+ // Compute our new liveout set, then exit early if it hasn't changed despite
+ // the contribution of our successor.
+ SetVector<Value *> LiveOut = Data.LiveOut[BB];
const auto OldLiveOutSize = LiveOut.size();
for (BasicBlock *Succ : successors(BB)) {
assert(Data.LiveIn.count(Succ));
- set_union(LiveOut, Data.LiveIn[Succ]);
+ LiveOut.set_union(Data.LiveIn[Succ]);
}
// assert OutLiveOut is a subset of LiveOut
if (OldLiveOutSize == LiveOut.size()) {
// If the sets are the same size, then we didn't actually add anything
- // when unioning our successors LiveIn Thus, the LiveIn of this block
+ // when unioning our successors LiveIn. Thus, the LiveIn of this block
// hasn't changed.
continue;
}
Data.LiveOut[BB] = LiveOut;
// Apply the effects of this basic block
- DenseSet<Value *> LiveTmp = LiveOut;
- set_union(LiveTmp, Data.LiveSet[BB]);
- set_subtract(LiveTmp, Data.KillSet[BB]);
+ SetVector<Value *> LiveTmp = LiveOut;
+ LiveTmp.set_union(Data.LiveSet[BB]);
+ LiveTmp.set_subtract(Data.KillSet[BB]);
assert(Data.LiveIn.count(BB));
- const DenseSet<Value *> &OldLiveIn = Data.LiveIn[BB];
+ const SetVector<Value *> &OldLiveIn = Data.LiveIn[BB];
// assert: OldLiveIn is a subset of LiveTmp
if (OldLiveIn.size() != LiveTmp.size()) {
Data.LiveIn[BB] = LiveTmp;
- AddPredsToWorklist(BB);
+ Worklist.insert(pred_begin(BB), pred_end(BB));
}
- } // while( !worklist.empty() )
+ } // while (!Worklist.empty())
#ifndef NDEBUG
// Sanity check our output against SSA properties. This helps catch any
// missing kills during the above iteration.
- for (BasicBlock &BB : F) {
+ for (BasicBlock &BB : F)
checkBasicSSA(DT, Data, BB);
- }
#endif
}
@@ -2856,7 +2540,7 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
// Note: The copy is intentional and required
assert(Data.LiveOut.count(BB));
- DenseSet<Value *> LiveOut = Data.LiveOut[BB];
+ SetVector<Value *> LiveOut = Data.LiveOut[BB];
// We want to handle the statepoint itself oddly. It's
// call result is not live (normal), nor are it's arguments
@@ -2864,12 +2548,12 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
// specifically what we need to relocate
BasicBlock::reverse_iterator rend(Inst->getIterator());
computeLiveInValues(BB->rbegin(), rend, LiveOut);
- LiveOut.erase(Inst);
+ LiveOut.remove(Inst);
Out.insert(LiveOut.begin(), LiveOut.end());
}
static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
- const CallSite &CS,
+ CallSite CS,
PartiallyConstructedSafepointRecord &Info) {
Instruction *Inst = CS.getInstruction();
StatepointLiveSetTy Updated;
@@ -2877,33 +2561,32 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
#ifndef NDEBUG
DenseSet<Value *> Bases;
- for (auto KVPair : Info.PointerToBase) {
+ for (auto KVPair : Info.PointerToBase)
Bases.insert(KVPair.second);
- }
#endif
+
// We may have base pointers which are now live that weren't before. We need
// to update the PointerToBase structure to reflect this.
for (auto V : Updated)
- if (!Info.PointerToBase.count(V)) {
- assert(Bases.count(V) && "can't find base for unexpected live value");
- Info.PointerToBase[V] = V;
+ if (Info.PointerToBase.insert({V, V}).second) {
+ assert(Bases.count(V) && "Can't find base for unexpected live value!");
continue;
}
#ifndef NDEBUG
- for (auto V : Updated) {
+ for (auto V : Updated)
assert(Info.PointerToBase.count(V) &&
- "must be able to find base for live value");
- }
+ "Must be able to find base for live value!");
#endif
// Remove any stale base mappings - this can happen since our liveness is
- // more precise then the one inherent in the base pointer analysis
+ // more precise then the one inherent in the base pointer analysis.
DenseSet<Value *> ToErase;
for (auto KVPair : Info.PointerToBase)
if (!Updated.count(KVPair.first))
ToErase.insert(KVPair.first);
- for (auto V : ToErase)
+
+ for (auto *V : ToErase)
Info.PointerToBase.erase(V);
#ifndef NDEBUG
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 8569e080873c9..da700f18cdafb 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -17,15 +17,15 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/IPO/SCCP.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
@@ -38,6 +38,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/SCCP.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
@@ -57,8 +59,8 @@ namespace {
///
class LatticeVal {
enum LatticeValueTy {
- /// undefined - This LLVM Value has no known value yet.
- undefined,
+ /// unknown - This LLVM Value has no known value yet.
+ unknown,
/// constant - This LLVM Value has a specific constant value.
constant,
@@ -83,9 +85,9 @@ class LatticeVal {
}
public:
- LatticeVal() : Val(nullptr, undefined) {}
+ LatticeVal() : Val(nullptr, unknown) {}
- bool isUndefined() const { return getLatticeValue() == undefined; }
+ bool isUnknown() const { return getLatticeValue() == unknown; }
bool isConstant() const {
return getLatticeValue() == constant || getLatticeValue() == forcedconstant;
}
@@ -112,7 +114,7 @@ public:
return false;
}
- if (isUndefined()) {
+ if (isUnknown()) {
Val.setInt(constant);
assert(V && "Marking constant with NULL");
Val.setPointer(V);
@@ -139,7 +141,7 @@ public:
}
void markForcedConstant(Constant *V) {
- assert(isUndefined() && "Can't force a defined value!");
+ assert(isUnknown() && "Can't force a defined value!");
Val.setInt(forcedconstant);
Val.setPointer(V);
}
@@ -228,7 +230,7 @@ public:
/// performing Interprocedural SCCP.
void TrackValueOfGlobalVariable(GlobalVariable *GV) {
// We only track the contents of scalar globals.
- if (GV->getType()->getElementType()->isSingleValueType()) {
+ if (GV->getValueType()->isSingleValueType()) {
LatticeVal &IV = TrackedGlobals[GV];
if (!isa<UndefValue>(GV->getInitializer()))
IV.markConstant(GV->getInitializer());
@@ -268,6 +270,18 @@ public:
return BBExecutable.count(BB);
}
+ std::vector<LatticeVal> getStructLatticeValueFor(Value *V) const {
+ std::vector<LatticeVal> StructValues;
+ StructType *STy = dyn_cast<StructType>(V->getType());
+ assert(STy && "getStructLatticeValueFor() can be called only on structs");
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ auto I = StructValueState.find(std::make_pair(V, i));
+ assert(I != StructValueState.end() && "Value not in valuemap!");
+ StructValues.push_back(I->second);
+ }
+ return StructValues;
+ }
+
LatticeVal getLatticeValueFor(Value *V) const {
DenseMap<Value*, LatticeVal>::const_iterator I = ValueState.find(V);
assert(I != ValueState.end() && "V is not in valuemap!");
@@ -302,6 +316,13 @@ public:
}
private:
+ // pushToWorkList - Helper for markConstant/markForcedConstant
+ void pushToWorkList(LatticeVal &IV, Value *V) {
+ if (IV.isOverdefined())
+ return OverdefinedInstWorkList.push_back(V);
+ InstWorkList.push_back(V);
+ }
+
// markConstant - Make a value be marked as "constant". If the value
// is not already a constant, add it to the instruction work list so that
// the users of the instruction are updated later.
@@ -309,10 +330,7 @@ private:
void markConstant(LatticeVal &IV, Value *V, Constant *C) {
if (!IV.markConstant(C)) return;
DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n');
- if (IV.isOverdefined())
- OverdefinedInstWorkList.push_back(V);
- else
- InstWorkList.push_back(V);
+ pushToWorkList(IV, V);
}
void markConstant(Value *V, Constant *C) {
@@ -325,10 +343,7 @@ private:
LatticeVal &IV = ValueState[V];
IV.markForcedConstant(C);
DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n');
- if (IV.isOverdefined())
- OverdefinedInstWorkList.push_back(V);
- else
- InstWorkList.push_back(V);
+ pushToWorkList(IV, V);
}
@@ -348,14 +363,14 @@ private:
}
void mergeInValue(LatticeVal &IV, Value *V, LatticeVal MergeWithV) {
- if (IV.isOverdefined() || MergeWithV.isUndefined())
+ if (IV.isOverdefined() || MergeWithV.isUnknown())
return; // Noop.
if (MergeWithV.isOverdefined())
- markOverdefined(IV, V);
- else if (IV.isUndefined())
- markConstant(IV, V, MergeWithV.getConstant());
- else if (IV.getConstant() != MergeWithV.getConstant())
- markOverdefined(IV, V);
+ return markOverdefined(IV, V);
+ if (IV.isUnknown())
+ return markConstant(IV, V, MergeWithV.getConstant());
+ if (IV.getConstant() != MergeWithV.getConstant())
+ return markOverdefined(IV, V);
}
void mergeInValue(Value *V, LatticeVal MergeWithV) {
@@ -378,7 +393,7 @@ private:
return LV; // Common case, already in the map.
if (Constant *C = dyn_cast<Constant>(V)) {
- // Undef values remain undefined.
+ // Undef values remain unknown.
if (!isa<UndefValue>(V))
LV.markConstant(C); // Constants are constant
}
@@ -409,7 +424,7 @@ private:
if (!Elt)
LV.markOverdefined(); // Unknown sort of constant.
else if (isa<UndefValue>(Elt))
- ; // Undef values remain undefined.
+ ; // Undef values remain unknown.
else
LV.markConstant(Elt); // Constants are constant.
}
@@ -537,7 +552,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
if (!CI) {
// Overdefined condition variables, and branches on unfoldable constant
// conditions, mean the branch could go either way.
- if (!BCValue.isUndefined())
+ if (!BCValue.isUnknown())
Succs[0] = Succs[1] = true;
return;
}
@@ -561,9 +576,9 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
- if (!CI) { // Overdefined or undefined condition?
+ if (!CI) { // Overdefined or unknown condition?
// All destinations are executable!
- if (!SCValue.isUndefined())
+ if (!SCValue.isUnknown())
Succs.assign(TI.getNumSuccessors(), true);
return;
}
@@ -607,7 +622,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
// undef conditions mean that neither edge is feasible yet.
ConstantInt *CI = BCValue.getConstantInt();
if (!CI)
- return !BCValue.isUndefined();
+ return !BCValue.isUnknown();
// Constant condition variables mean the branch can only go a single way.
return BI->getSuccessor(CI->isZero()) == To;
@@ -625,7 +640,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
ConstantInt *CI = SCValue.getConstantInt();
if (!CI)
- return !SCValue.isUndefined();
+ return !SCValue.isUnknown();
return SI->findCaseValue(CI).getCaseSuccessor() == To;
}
@@ -677,12 +692,12 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
// are overdefined, the PHI becomes overdefined as well. If they are all
// constant, and they agree with each other, the PHI becomes the identical
// constant. If they are constant and don't agree, the PHI is overdefined.
- // If there are no executable operands, the PHI remains undefined.
+ // If there are no executable operands, the PHI remains unknown.
//
Constant *OperandVal = nullptr;
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
LatticeVal IV = getValueState(PN.getIncomingValue(i));
- if (IV.isUndefined()) continue; // Doesn't influence PHI node.
+ if (IV.isUnknown()) continue; // Doesn't influence PHI node.
if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
continue;
@@ -708,7 +723,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
// If we exited the loop, this means that the PHI node only has constant
// arguments that agree with each other(and OperandVal is the constant) or
// OperandVal is null because there are no defined incoming arguments. If
- // this is the case, the PHI remains undefined.
+ // this is the case, the PHI remains unknown.
//
if (OperandVal)
markConstant(&PN, OperandVal); // Acquire operand value
@@ -758,8 +773,9 @@ void SCCPSolver::visitCastInst(CastInst &I) {
if (OpSt.isOverdefined()) // Inherit overdefinedness of operand
markOverdefined(&I);
else if (OpSt.isConstant()) {
- Constant *C =
- ConstantExpr::getCast(I.getOpcode(), OpSt.getConstant(), I.getType());
+ // Fold the constant as we build.
+ Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpSt.getConstant(),
+ I.getType(), DL);
if (isa<UndefValue>(C))
return;
// Propagate constant value
@@ -829,7 +845,7 @@ void SCCPSolver::visitSelectInst(SelectInst &I) {
return markAnythingOverdefined(&I);
LatticeVal CondValue = getValueState(I.getCondition());
- if (CondValue.isUndefined())
+ if (CondValue.isUnknown())
return;
if (ConstantInt *CondCB = CondValue.getConstantInt()) {
@@ -849,9 +865,9 @@ void SCCPSolver::visitSelectInst(SelectInst &I) {
TVal.getConstant() == FVal.getConstant())
return markConstant(&I, FVal.getConstant());
- if (TVal.isUndefined()) // select ?, undef, X -> X.
+ if (TVal.isUnknown()) // select ?, undef, X -> X.
return mergeInValue(&I, FVal);
- if (FVal.isUndefined()) // select ?, X, undef -> X.
+ if (FVal.isUnknown()) // select ?, X, undef -> X.
return mergeInValue(&I, TVal);
markOverdefined(&I);
}
@@ -890,7 +906,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
NonOverdefVal = &V2State;
if (NonOverdefVal) {
- if (NonOverdefVal->isUndefined()) {
+ if (NonOverdefVal->isUnknown()) {
// Could annihilate value.
if (I.getOpcode() == Instruction::And)
markConstant(IV, &I, Constant::getNullValue(I.getType()));
@@ -934,7 +950,7 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
return markConstant(IV, &I, C);
}
- // If operands are still undefined, wait for it to resolve.
+ // If operands are still unknown, wait for it to resolve.
if (!V1State.isOverdefined() && !V2State.isOverdefined())
return;
@@ -944,69 +960,16 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
void SCCPSolver::visitExtractElementInst(ExtractElementInst &I) {
// TODO : SCCP does not handle vectors properly.
return markOverdefined(&I);
-
-#if 0
- LatticeVal &ValState = getValueState(I.getOperand(0));
- LatticeVal &IdxState = getValueState(I.getOperand(1));
-
- if (ValState.isOverdefined() || IdxState.isOverdefined())
- markOverdefined(&I);
- else if(ValState.isConstant() && IdxState.isConstant())
- markConstant(&I, ConstantExpr::getExtractElement(ValState.getConstant(),
- IdxState.getConstant()));
-#endif
}
void SCCPSolver::visitInsertElementInst(InsertElementInst &I) {
// TODO : SCCP does not handle vectors properly.
return markOverdefined(&I);
-#if 0
- LatticeVal &ValState = getValueState(I.getOperand(0));
- LatticeVal &EltState = getValueState(I.getOperand(1));
- LatticeVal &IdxState = getValueState(I.getOperand(2));
-
- if (ValState.isOverdefined() || EltState.isOverdefined() ||
- IdxState.isOverdefined())
- markOverdefined(&I);
- else if(ValState.isConstant() && EltState.isConstant() &&
- IdxState.isConstant())
- markConstant(&I, ConstantExpr::getInsertElement(ValState.getConstant(),
- EltState.getConstant(),
- IdxState.getConstant()));
- else if (ValState.isUndefined() && EltState.isConstant() &&
- IdxState.isConstant())
- markConstant(&I,ConstantExpr::getInsertElement(UndefValue::get(I.getType()),
- EltState.getConstant(),
- IdxState.getConstant()));
-#endif
}
void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) {
// TODO : SCCP does not handle vectors properly.
return markOverdefined(&I);
-#if 0
- LatticeVal &V1State = getValueState(I.getOperand(0));
- LatticeVal &V2State = getValueState(I.getOperand(1));
- LatticeVal &MaskState = getValueState(I.getOperand(2));
-
- if (MaskState.isUndefined() ||
- (V1State.isUndefined() && V2State.isUndefined()))
- return; // Undefined output if mask or both inputs undefined.
-
- if (V1State.isOverdefined() || V2State.isOverdefined() ||
- MaskState.isOverdefined()) {
- markOverdefined(&I);
- } else {
- // A mix of constant/undef inputs.
- Constant *V1 = V1State.isConstant() ?
- V1State.getConstant() : UndefValue::get(I.getType());
- Constant *V2 = V2State.isConstant() ?
- V2State.getConstant() : UndefValue::get(I.getType());
- Constant *Mask = MaskState.isConstant() ?
- MaskState.getConstant() : UndefValue::get(I.getOperand(2)->getType());
- markConstant(&I, ConstantExpr::getShuffleVector(V1, V2, Mask));
- }
-#endif
}
// Handle getelementptr instructions. If all operands are constants then we
@@ -1020,7 +983,7 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
LatticeVal State = getValueState(I.getOperand(i));
- if (State.isUndefined())
+ if (State.isUnknown())
return; // Operands are not resolved yet.
if (State.isOverdefined())
@@ -1066,7 +1029,7 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
return markAnythingOverdefined(&I);
LatticeVal PtrVal = getValueState(I.getOperand(0));
- if (PtrVal.isUndefined()) return; // The pointer is not resolved yet!
+ if (PtrVal.isUnknown()) return; // The pointer is not resolved yet!
LatticeVal &IV = ValueState[&I];
if (IV.isOverdefined()) return;
@@ -1094,7 +1057,7 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
}
// Transform load from a constant into a constant if possible.
- if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL)) {
+ if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, I.getType(), DL)) {
if (isa<UndefValue>(C))
return;
return markConstant(IV, &I, C);
@@ -1127,7 +1090,7 @@ CallOverdefined:
AI != E; ++AI) {
LatticeVal State = getValueState(*AI);
- if (State.isUndefined())
+ if (State.isUnknown())
return; // Operands are not resolved yet.
if (State.isOverdefined())
return markOverdefined(I);
@@ -1275,11 +1238,11 @@ void SCCPSolver::Solve() {
/// conservatively, as "(zext i8 X -> i32) & 0xFF00" must always return zero,
/// even if X isn't defined.
bool SCCPSolver::ResolvedUndefsIn(Function &F) {
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (!BBExecutable.count(&*BB))
+ for (BasicBlock &BB : F) {
+ if (!BBExecutable.count(&BB))
continue;
- for (Instruction &I : *BB) {
+ for (Instruction &I : BB) {
// Look for instructions which produce undef values.
if (I.getType()->isVoidTy()) continue;
@@ -1301,14 +1264,14 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// more precise than this but it isn't worth bothering.
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
LatticeVal &LV = getStructValueState(&I, i);
- if (LV.isUndefined())
+ if (LV.isUnknown())
markOverdefined(LV, &I);
}
continue;
}
LatticeVal &LV = getValueState(&I);
- if (!LV.isUndefined()) continue;
+ if (!LV.isUnknown()) continue;
// extractvalue is safe; check here because the argument is a struct.
if (isa<ExtractValueInst>(I))
@@ -1347,7 +1310,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
case Instruction::FDiv:
case Instruction::FRem:
// Floating-point binary operation: be conservative.
- if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ if (Op0LV.isUnknown() && Op1LV.isUnknown())
markForcedConstant(&I, Constant::getNullValue(ITy));
else
markOverdefined(&I);
@@ -1367,7 +1330,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
case Instruction::Mul:
case Instruction::And:
// Both operands undef -> undef
- if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ if (Op0LV.isUnknown() && Op1LV.isUnknown())
break;
// undef * X -> 0. X could be zero.
// undef & X -> 0. X could be zero.
@@ -1376,7 +1339,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
case Instruction::Or:
// Both operands undef -> undef
- if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ if (Op0LV.isUnknown() && Op1LV.isUnknown())
break;
// undef | X -> -1. X could be -1.
markForcedConstant(&I, Constant::getAllOnesValue(ITy));
@@ -1386,7 +1349,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// undef ^ undef -> 0; strictly speaking, this is not strictly
// necessary, but we try to be nice to people who expect this
// behavior in simple cases
- if (Op0LV.isUndefined() && Op1LV.isUndefined()) {
+ if (Op0LV.isUnknown() && Op1LV.isUnknown()) {
markForcedConstant(&I, Constant::getNullValue(ITy));
return true;
}
@@ -1399,7 +1362,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
case Instruction::URem:
// X / undef -> undef. No change.
// X % undef -> undef. No change.
- if (Op1LV.isUndefined()) break;
+ if (Op1LV.isUnknown()) break;
// X / 0 -> undef. No change.
// X % 0 -> undef. No change.
@@ -1413,7 +1376,15 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
case Instruction::AShr:
// X >>a undef -> undef.
- if (Op1LV.isUndefined()) break;
+ if (Op1LV.isUnknown()) break;
+
+ // Shifting by the bitwidth or more is undefined.
+ if (Op1LV.isConstant()) {
+ if (auto *ShiftAmt = Op1LV.getConstantInt())
+ if (ShiftAmt->getLimitedValue() >=
+ ShiftAmt->getType()->getScalarSizeInBits())
+ break;
+ }
// undef >>a X -> all ones
markForcedConstant(&I, Constant::getAllOnesValue(ITy));
@@ -1422,7 +1393,15 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
case Instruction::Shl:
// X << undef -> undef.
// X >> undef -> undef.
- if (Op1LV.isUndefined()) break;
+ if (Op1LV.isUnknown()) break;
+
+ // Shifting by the bitwidth or more is undefined.
+ if (Op1LV.isConstant()) {
+ if (auto *ShiftAmt = Op1LV.getConstantInt())
+ if (ShiftAmt->getLimitedValue() >=
+ ShiftAmt->getType()->getScalarSizeInBits())
+ break;
+ }
// undef << X -> 0
// undef >> X -> 0
@@ -1431,13 +1410,13 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
case Instruction::Select:
Op1LV = getValueState(I.getOperand(1));
// undef ? X : Y -> X or Y. There could be commonality between X/Y.
- if (Op0LV.isUndefined()) {
+ if (Op0LV.isUnknown()) {
if (!Op1LV.isConstant()) // Pick the constant one if there is any.
Op1LV = getValueState(I.getOperand(2));
- } else if (Op1LV.isUndefined()) {
+ } else if (Op1LV.isUnknown()) {
// c ? undef : undef -> undef. No change.
Op1LV = getValueState(I.getOperand(2));
- if (Op1LV.isUndefined())
+ if (Op1LV.isUnknown())
break;
// Otherwise, c ? undef : x -> x.
} else {
@@ -1487,17 +1466,17 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// Check to see if we have a branch or switch on an undefined value. If so
// we force the branch to go one way or the other to make the successor
// values live. It doesn't really matter which way we force it.
- TerminatorInst *TI = BB->getTerminator();
+ TerminatorInst *TI = BB.getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
if (!BI->isConditional()) continue;
- if (!getValueState(BI->getCondition()).isUndefined())
+ if (!getValueState(BI->getCondition()).isUnknown())
continue;
// If the input to SCCP is actually branch on undef, fix the undef to
// false.
if (isa<UndefValue>(BI->getCondition())) {
BI->setCondition(ConstantInt::getFalse(BI->getContext()));
- markEdgeExecutable(&*BB, TI->getSuccessor(1));
+ markEdgeExecutable(&BB, TI->getSuccessor(1));
return true;
}
@@ -1510,16 +1489,14 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- if (!SI->getNumCases())
- continue;
- if (!getValueState(SI->getCondition()).isUndefined())
+ if (!SI->getNumCases() || !getValueState(SI->getCondition()).isUnknown())
continue;
// If the input to SCCP is actually switch on undef, fix the undef to
// the first constant.
if (isa<UndefValue>(SI->getCondition())) {
SI->setCondition(SI->case_begin().getCaseValue());
- markEdgeExecutable(&*BB, SI->case_begin().getCaseSuccessor());
+ markEdgeExecutable(&BB, SI->case_begin().getCaseSuccessor());
return true;
}
@@ -1531,75 +1508,53 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
return false;
}
-
-namespace {
- //===--------------------------------------------------------------------===//
- //
- /// SCCP Class - This class uses the SCCPSolver to implement a per-function
- /// Sparse Conditional Constant Propagator.
- ///
- struct SCCP : public FunctionPass {
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
- static char ID; // Pass identification, replacement for typeid
- SCCP() : FunctionPass(ID) {
- initializeSCCPPass(*PassRegistry::getPassRegistry());
+static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) {
+ Constant *Const = nullptr;
+ if (V->getType()->isStructTy()) {
+ std::vector<LatticeVal> IVs = Solver.getStructLatticeValueFor(V);
+ if (std::any_of(IVs.begin(), IVs.end(),
+ [](LatticeVal &LV) { return LV.isOverdefined(); }))
+ return false;
+ std::vector<Constant *> ConstVals;
+ StructType *ST = dyn_cast<StructType>(V->getType());
+ for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+ LatticeVal V = IVs[i];
+ ConstVals.push_back(V.isConstant()
+ ? V.getConstant()
+ : UndefValue::get(ST->getElementType(i)));
}
+ Const = ConstantStruct::get(ST, ConstVals);
+ } else {
+ LatticeVal IV = Solver.getLatticeValueFor(V);
+ if (IV.isOverdefined())
+ return false;
+ Const = IV.isConstant() ? IV.getConstant() : UndefValue::get(V->getType());
+ }
+ assert(Const && "Constant is nullptr here!");
+ DEBUG(dbgs() << " Constant: " << *Const << " = " << *V << '\n');
- // runOnFunction - Run the Sparse Conditional Constant Propagation
- // algorithm, and return true if the function was modified.
- //
- bool runOnFunction(Function &F) override;
- };
-} // end anonymous namespace
-
-char SCCP::ID = 0;
-INITIALIZE_PASS(SCCP, "sccp",
- "Sparse Conditional Constant Propagation", false, false)
-
-// createSCCPPass - This is the public interface to this file.
-FunctionPass *llvm::createSCCPPass() {
- return new SCCP();
+ // Replaces all of the uses of a variable with uses of the constant.
+ V->replaceAllUsesWith(Const);
+ return true;
}
-static void DeleteInstructionInBlock(BasicBlock *BB) {
- DEBUG(dbgs() << " BasicBlock Dead:" << *BB);
- ++NumDeadBlocks;
-
- // Check to see if there are non-terminating instructions to delete.
- if (isa<TerminatorInst>(BB->begin()))
- return;
+static bool tryToReplaceInstWithConstant(SCCPSolver &Solver, Instruction *Inst,
+ bool shouldEraseFromParent) {
+ if (!tryToReplaceWithConstant(Solver, Inst))
+ return false;
- // Delete the instructions backwards, as it has a reduced likelihood of having
- // to update as many def-use and use-def chains.
- Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
- while (EndInst != BB->begin()) {
- // Delete the next to last instruction.
- Instruction *Inst = &*--EndInst->getIterator();
- if (!Inst->use_empty())
- Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
- if (Inst->isEHPad()) {
- EndInst = Inst;
- continue;
- }
- BB->getInstList().erase(Inst);
- ++NumInstRemoved;
- }
+ // Delete the instruction.
+ if (shouldEraseFromParent)
+ Inst->eraseFromParent();
+ return true;
}
-// runOnFunction() - Run the Sparse Conditional Constant Propagation algorithm,
+// runSCCP() - Run the Sparse Conditional Constant Propagation algorithm,
// and return true if the function was modified.
//
-bool SCCP::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
- return false;
-
+static bool runSCCP(Function &F, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
- const DataLayout &DL = F.getParent()->getDataLayout();
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
SCCPSolver Solver(DL, TLI);
// Mark the first block of the function as being executable.
@@ -1623,9 +1578,13 @@ bool SCCP::runOnFunction(Function &F) {
// delete their contents now. Note that we cannot actually delete the blocks,
// as we cannot modify the CFG of the function.
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (!Solver.isBlockExecutable(&*BB)) {
- DeleteInstructionInBlock(&*BB);
+ for (BasicBlock &BB : F) {
+ if (!Solver.isBlockExecutable(&BB)) {
+ DEBUG(dbgs() << " BasicBlock Dead:" << BB);
+
+ ++NumDeadBlocks;
+ NumInstRemoved += removeAllNonTerminatorAndEHPadInstructions(&BB);
+
MadeChanges = true;
continue;
}
@@ -1633,70 +1592,74 @@ bool SCCP::runOnFunction(Function &F) {
// Iterate over all of the instructions in a function, replacing them with
// constants if we have found them to be of constant values.
//
- for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+ for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) {
Instruction *Inst = &*BI++;
if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
continue;
- // TODO: Reconstruct structs from their elements.
- if (Inst->getType()->isStructTy())
- continue;
-
- LatticeVal IV = Solver.getLatticeValueFor(Inst);
- if (IV.isOverdefined())
- continue;
-
- Constant *Const = IV.isConstant()
- ? IV.getConstant() : UndefValue::get(Inst->getType());
- DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst << '\n');
-
- // Replaces all of the uses of a variable with uses of the constant.
- Inst->replaceAllUsesWith(Const);
-
- // Delete the instruction.
- Inst->eraseFromParent();
-
- // Hey, we just changed something!
- MadeChanges = true;
- ++NumInstRemoved;
+ if (tryToReplaceInstWithConstant(Solver, Inst,
+ true /* shouldEraseFromParent */)) {
+ // Hey, we just changed something!
+ MadeChanges = true;
+ ++NumInstRemoved;
+ }
}
}
return MadeChanges;
}
+PreservedAnalyses SCCPPass::run(Function &F, AnalysisManager<Function> &AM) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ if (!runSCCP(F, DL, &TLI))
+ return PreservedAnalyses::all();
+
+ auto PA = PreservedAnalyses();
+ PA.preserve<GlobalsAA>();
+ return PA;
+}
+
namespace {
- //===--------------------------------------------------------------------===//
+//===--------------------------------------------------------------------===//
+//
+/// SCCP Class - This class uses the SCCPSolver to implement a per-function
+/// Sparse Conditional Constant Propagator.
+///
+class SCCPLegacyPass : public FunctionPass {
+public:
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+ static char ID; // Pass identification, replacement for typeid
+ SCCPLegacyPass() : FunctionPass(ID) {
+ initializeSCCPLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnFunction - Run the Sparse Conditional Constant Propagation
+ // algorithm, and return true if the function was modified.
//
- /// IPSCCP Class - This class implements interprocedural Sparse Conditional
- /// Constant Propagation.
- ///
- struct IPSCCP : public ModulePass {
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
- static char ID;
- IPSCCP() : ModulePass(ID) {
- initializeIPSCCPPass(*PassRegistry::getPassRegistry());
- }
- bool runOnModule(Module &M) override;
- };
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ return runSCCP(F, DL, TLI);
+ }
+};
} // end anonymous namespace
-char IPSCCP::ID = 0;
-INITIALIZE_PASS_BEGIN(IPSCCP, "ipsccp",
- "Interprocedural Sparse Conditional Constant Propagation",
- false, false)
+char SCCPLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(SCCPLegacyPass, "sccp",
+ "Sparse Conditional Constant Propagation", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(IPSCCP, "ipsccp",
- "Interprocedural Sparse Conditional Constant Propagation",
- false, false)
-
-// createIPSCCPPass - This is the public interface to this file.
-ModulePass *llvm::createIPSCCPPass() {
- return new IPSCCP();
-}
+INITIALIZE_PASS_END(SCCPLegacyPass, "sccp",
+ "Sparse Conditional Constant Propagation", false, false)
+// createSCCPPass - This is the public interface to this file.
+FunctionPass *llvm::createSCCPPass() { return new SCCPLegacyPass(); }
static bool AddressIsTaken(const GlobalValue *GV) {
// Delete any dead constantexpr klingons.
@@ -1725,10 +1688,8 @@ static bool AddressIsTaken(const GlobalValue *GV) {
return false;
}
-bool IPSCCP::runOnModule(Module &M) {
- const DataLayout &DL = M.getDataLayout();
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+static bool runIPSCCP(Module &M, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
SCCPSolver Solver(DL, TLI);
// AddressTakenFunctions - This set keeps track of the address-taken functions
@@ -1741,32 +1702,32 @@ bool IPSCCP::runOnModule(Module &M) {
// Loop over all functions, marking arguments to those with their addresses
// taken or that are external as overdefined.
//
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration())
+ for (Function &F : M) {
+ if (F.isDeclaration())
continue;
- // If this is a strong or ODR definition of this function, then we can
- // propagate information about its result into callsites of it.
- if (!F->mayBeOverridden())
- Solver.AddTrackedFunction(&*F);
+ // If this is an exact definition of this function, then we can propagate
+ // information about its result into callsites of it.
+ if (F.hasExactDefinition())
+ Solver.AddTrackedFunction(&F);
// If this function only has direct calls that we can see, we can track its
// arguments and return value aggressively, and can assume it is not called
// unless we see evidence to the contrary.
- if (F->hasLocalLinkage()) {
- if (AddressIsTaken(&*F))
- AddressTakenFunctions.insert(&*F);
+ if (F.hasLocalLinkage()) {
+ if (AddressIsTaken(&F))
+ AddressTakenFunctions.insert(&F);
else {
- Solver.AddArgumentTrackedFunction(&*F);
+ Solver.AddArgumentTrackedFunction(&F);
continue;
}
}
// Assume the function is called.
- Solver.MarkBlockExecutable(&F->front());
+ Solver.MarkBlockExecutable(&F.front());
// Assume nothing about the incoming arguments.
- for (Argument &AI : F->args())
+ for (Argument &AI : F.args())
Solver.markAnythingOverdefined(&AI);
}
@@ -1784,8 +1745,8 @@ bool IPSCCP::runOnModule(Module &M) {
DEBUG(dbgs() << "RESOLVING UNDEFS\n");
ResolvedUndefs = false;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
- ResolvedUndefs |= Solver.ResolvedUndefsIn(*F);
+ for (Function &F : M)
+ ResolvedUndefs |= Solver.ResolvedUndefsIn(F);
}
bool MadeChanges = false;
@@ -1795,79 +1756,47 @@ bool IPSCCP::runOnModule(Module &M) {
//
SmallVector<BasicBlock*, 512> BlocksToErase;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration())
+ for (Function &F : M) {
+ if (F.isDeclaration())
continue;
- if (Solver.isBlockExecutable(&F->front())) {
- for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
- AI != E; ++AI) {
- if (AI->use_empty() || AI->getType()->isStructTy()) continue;
-
- // TODO: Could use getStructLatticeValueFor to find out if the entire
- // result is a constant and replace it entirely if so.
-
- LatticeVal IV = Solver.getLatticeValueFor(&*AI);
- if (IV.isOverdefined()) continue;
-
- Constant *CST = IV.isConstant() ?
- IV.getConstant() : UndefValue::get(AI->getType());
- DEBUG(dbgs() << "*** Arg " << *AI << " = " << *CST <<"\n");
-
- // Replaces all of the uses of a variable with uses of the
- // constant.
- AI->replaceAllUsesWith(CST);
- ++IPNumArgsElimed;
+ if (Solver.isBlockExecutable(&F.front())) {
+ for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;
+ ++AI) {
+ if (AI->use_empty())
+ continue;
+ if (tryToReplaceWithConstant(Solver, &*AI))
+ ++IPNumArgsElimed;
}
}
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
if (!Solver.isBlockExecutable(&*BB)) {
- DeleteInstructionInBlock(&*BB);
- MadeChanges = true;
+ DEBUG(dbgs() << " BasicBlock Dead:" << *BB);
- TerminatorInst *TI = BB->getTerminator();
- for (BasicBlock *Succ : TI->successors()) {
- if (!Succ->empty() && isa<PHINode>(Succ->begin()))
- Succ->removePredecessor(&*BB);
- }
- if (!TI->use_empty())
- TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
- TI->eraseFromParent();
- new UnreachableInst(M.getContext(), &*BB);
+ ++NumDeadBlocks;
+ NumInstRemoved +=
+ changeToUnreachable(BB->getFirstNonPHI(), /*UseLLVMTrap=*/false);
+
+ MadeChanges = true;
- if (&*BB != &F->front())
+ if (&*BB != &F.front())
BlocksToErase.push_back(&*BB);
continue;
}
for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
Instruction *Inst = &*BI++;
- if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy())
+ if (Inst->getType()->isVoidTy())
continue;
-
- // TODO: Could use getStructLatticeValueFor to find out if the entire
- // result is a constant and replace it entirely if so.
-
- LatticeVal IV = Solver.getLatticeValueFor(Inst);
- if (IV.isOverdefined())
- continue;
-
- Constant *Const = IV.isConstant()
- ? IV.getConstant() : UndefValue::get(Inst->getType());
- DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst << '\n');
-
- // Replaces all of the uses of a variable with uses of the
- // constant.
- Inst->replaceAllUsesWith(Const);
-
- // Delete the instruction.
- if (!isa<CallInst>(Inst) && !isa<TerminatorInst>(Inst))
- Inst->eraseFromParent();
-
- // Hey, we just changed something!
- MadeChanges = true;
- ++IPNumInstRemoved;
+ if (tryToReplaceInstWithConstant(
+ Solver, Inst,
+ !isa<CallInst>(Inst) &&
+ !isa<TerminatorInst>(Inst) /* shouldEraseFromParent */)) {
+ // Hey, we just changed something!
+ MadeChanges = true;
+ ++IPNumInstRemoved;
+ }
}
}
@@ -1918,7 +1847,7 @@ bool IPSCCP::runOnModule(Module &M) {
}
// Finally, delete the basic block.
- F->getBasicBlockList().erase(DeadBB);
+ F.getBasicBlockList().erase(DeadBB);
}
BlocksToErase.clear();
}
@@ -1937,18 +1866,17 @@ bool IPSCCP::runOnModule(Module &M) {
// TODO: Process multiple value ret instructions also.
const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
- for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(),
- E = RV.end(); I != E; ++I) {
- Function *F = I->first;
- if (I->second.isOverdefined() || F->getReturnType()->isVoidTy())
+ for (const auto &I : RV) {
+ Function *F = I.first;
+ if (I.second.isOverdefined() || F->getReturnType()->isVoidTy())
continue;
// We can only do this if we know that nothing else can call the function.
if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F))
continue;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+ for (BasicBlock &BB : *F)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
if (!isa<UndefValue>(RI->getOperand(0)))
ReturnsToZap.push_back(RI);
}
@@ -1978,3 +1906,52 @@ bool IPSCCP::runOnModule(Module &M) {
return MadeChanges;
}
+
+PreservedAnalyses IPSCCPPass::run(Module &M, AnalysisManager<Module> &AM) {
+ const DataLayout &DL = M.getDataLayout();
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
+ if (!runIPSCCP(M, DL, &TLI))
+ return PreservedAnalyses::all();
+ return PreservedAnalyses::none();
+}
+
+namespace {
+//===--------------------------------------------------------------------===//
+//
+/// IPSCCP Class - This class implements interprocedural Sparse Conditional
+/// Constant Propagation.
+///
+class IPSCCPLegacyPass : public ModulePass {
+public:
+ static char ID;
+
+ IPSCCPLegacyPass() : ModulePass(ID) {
+ initializeIPSCCPLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override {
+ if (skipModule(M))
+ return false;
+ const DataLayout &DL = M.getDataLayout();
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ return runIPSCCP(M, DL, TLI);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ }
+};
+} // end anonymous namespace
+
+char IPSCCPLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(IPSCCPLegacyPass, "ipsccp",
+ "Interprocedural Sparse Conditional Constant Propagation",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(IPSCCPLegacyPass, "ipsccp",
+ "Interprocedural Sparse Conditional Constant Propagation",
+ false, false)
+
+// createIPSCCPPass - This is the public interface to this file.
+ModulePass *llvm::createIPSCCPPass() { return new IPSCCPLegacyPass(); }
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index a7361b5fe0839..7d33259c030b7 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -55,8 +55,8 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#if __cplusplus >= 201103L && !defined(NDEBUG)
-// We only use this for a debug check in C++11
+#ifndef NDEBUG
+// We only use this for a debug check.
#include <random>
#endif
@@ -87,12 +87,13 @@ static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false),
cl::Hidden);
namespace {
-/// \brief A custom IRBuilder inserter which prefixes all names if they are
-/// preserved.
-template <bool preserveNames = true>
-class IRBuilderPrefixedInserter
- : public IRBuilderDefaultInserter<preserveNames> {
+/// \brief A custom IRBuilder inserter which prefixes all names, but only in
+/// Assert builds.
+class IRBuilderPrefixedInserter : public IRBuilderDefaultInserter {
std::string Prefix;
+ const Twine getNameWithPrefix(const Twine &Name) const {
+ return Name.isTriviallyEmpty() ? Name : Prefix + Name;
+ }
public:
void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
@@ -100,27 +101,13 @@ public:
protected:
void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
BasicBlock::iterator InsertPt) const {
- IRBuilderDefaultInserter<preserveNames>::InsertHelper(
- I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt);
+ IRBuilderDefaultInserter::InsertHelper(I, getNameWithPrefix(Name), BB,
+ InsertPt);
}
};
-// Specialization for not preserving the name is trivial.
-template <>
-class IRBuilderPrefixedInserter<false>
- : public IRBuilderDefaultInserter<false> {
-public:
- void SetNamePrefix(const Twine &P) {}
-};
-
/// \brief Provide a typedef for IRBuilder that drops names in release builds.
-#ifndef NDEBUG
-typedef llvm::IRBuilder<true, ConstantFolder, IRBuilderPrefixedInserter<true>>
- IRBuilderTy;
-#else
-typedef llvm::IRBuilder<false, ConstantFolder, IRBuilderPrefixedInserter<false>>
- IRBuilderTy;
-#endif
+using IRBuilderTy = llvm::IRBuilder<ConstantFolder, IRBuilderPrefixedInserter>;
}
namespace {
@@ -694,7 +681,7 @@ private:
// langref in a very strict sense. If we ever want to enable
// SROAStrictInbounds, this code should be factored cleanly into
// PtrUseVisitor, but it is easier to experiment with SROAStrictInbounds
- // by writing out the code here where we have tho underlying allocation
+ // by writing out the code here where we have the underlying allocation
// size readily available.
APInt GEPOffset = Offset;
const DataLayout &DL = GEPI.getModule()->getDataLayout();
@@ -1015,7 +1002,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
}),
Slices.end());
-#if __cplusplus >= 201103L && !defined(NDEBUG)
+#ifndef NDEBUG
if (SROARandomShuffleSlices) {
std::mt19937 MT(static_cast<unsigned>(sys::TimeValue::now().msec()));
std::shuffle(Slices.begin(), Slices.end(), MT);
@@ -1192,8 +1179,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
// If this pointer is always safe to load, or if we can prove that there
// is already a load in the block, then we can move the load to the pred
// block.
- if (isDereferenceablePointer(InVal, DL) ||
- isSafeToLoadUnconditionally(InVal, TI, MaxAlign))
+ if (isSafeToLoadUnconditionally(InVal, MaxAlign, DL, TI))
continue;
return false;
@@ -1262,8 +1248,6 @@ static bool isSafeSelectToSpeculate(SelectInst &SI) {
Value *TValue = SI.getTrueValue();
Value *FValue = SI.getFalseValue();
const DataLayout &DL = SI.getModule()->getDataLayout();
- bool TDerefable = isDereferenceablePointer(TValue, DL);
- bool FDerefable = isDereferenceablePointer(FValue, DL);
for (User *U : SI.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
@@ -1273,11 +1257,9 @@ static bool isSafeSelectToSpeculate(SelectInst &SI) {
// Both operands to the select need to be dereferencable, either
// absolutely (e.g. allocas) or at this point because we can see other
// accesses to it.
- if (!TDerefable &&
- !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment()))
+ if (!isSafeToLoadUnconditionally(TValue, LI->getAlignment(), DL, LI))
return false;
- if (!FDerefable &&
- !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment()))
+ if (!isSafeToLoadUnconditionally(FValue, LI->getAlignment(), DL, LI))
return false;
}
@@ -1570,7 +1552,7 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
Ptr = cast<Operator>(Ptr)->getOperand(0);
} else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
- if (GA->mayBeOverridden())
+ if (GA->isInterposable())
break;
Ptr = GA->getAliasee();
} else {
@@ -1653,8 +1635,10 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
OldTy = OldTy->getScalarType();
NewTy = NewTy->getScalarType();
if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
- if (NewTy->isPointerTy() && OldTy->isPointerTy())
- return true;
+ if (NewTy->isPointerTy() && OldTy->isPointerTy()) {
+ return cast<PointerType>(NewTy)->getPointerAddressSpace() ==
+ cast<PointerType>(OldTy)->getPointerAddressSpace();
+ }
if (NewTy->isIntegerTy() || OldTy->isIntegerTy())
return true;
return false;
@@ -3123,9 +3107,14 @@ private:
void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
assert(Ty->isSingleValueType());
// Extract the single value and store it using the indices.
- Value *Store = IRB.CreateStore(
- IRB.CreateExtractValue(Agg, Indices, Name + ".extract"),
- IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"));
+ //
+ // The gep and extractvalue values are factored out of the CreateStore
+ // call to make the output independent of the argument evaluation order.
+ Value *ExtractValue =
+ IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
+ Value *InBoundsGEP =
+ IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
+ Value *Store = IRB.CreateStore(ExtractValue, InBoundsGEP);
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
}
@@ -3380,11 +3369,15 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
for (auto &P : AS.partitions()) {
for (Slice &S : P) {
Instruction *I = cast<Instruction>(S.getUse()->getUser());
- if (!S.isSplittable() ||S.endOffset() <= P.endOffset()) {
- // If this was a load we have to track that it can't participate in any
- // pre-splitting!
+ if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {
+ // If this is a load we have to track that it can't participate in any
+ // pre-splitting. If this is a store of a load we have to track that
+ // that load also can't participate in any pre-splitting.
if (auto *LI = dyn_cast<LoadInst>(I))
UnsplittableLoads.insert(LI);
+ else if (auto *SI = dyn_cast<StoreInst>(I))
+ if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand()))
+ UnsplittableLoads.insert(LI);
continue;
}
assert(P.endOffset() > S.beginOffset() &&
@@ -3411,9 +3404,9 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
}
Loads.push_back(LI);
- } else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser())) {
- if (!SI ||
- S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
+ } else if (auto *SI = dyn_cast<StoreInst>(I)) {
+ if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
+ // Skip stores *of* pointers. FIXME: This shouldn't even be possible!
continue;
auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand());
if (!StoredLoad || !StoredLoad->isSimple())
@@ -3937,15 +3930,19 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
Worklist.insert(NewAI);
}
} else {
- // If we can't promote the alloca, iterate on it to check for new
- // refinements exposed by splitting the current alloca. Don't iterate on an
- // alloca which didn't actually change and didn't get promoted.
- if (NewAI != &AI)
- Worklist.insert(NewAI);
-
// Drop any post-promotion work items if promotion didn't happen.
while (PostPromotionWorklist.size() > PPWOldSize)
PostPromotionWorklist.pop_back();
+
+ // We couldn't promote and we didn't create a new partition, nothing
+ // happened.
+ if (NewAI == &AI)
+ return nullptr;
+
+ // If we can't promote the alloca, iterate on it to check for new
+ // refinements exposed by splitting the current alloca. Don't iterate on an
+ // alloca which didn't actually change and didn't get promoted.
+ Worklist.insert(NewAI);
}
return NewAI;
@@ -4024,12 +4021,12 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
auto *Var = DbgDecl->getVariable();
auto *Expr = DbgDecl->getExpression();
DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
- bool IsSplit = Pieces.size() > 1;
+ uint64_t AllocaSize = DL.getTypeSizeInBits(AI.getAllocatedType());
for (auto Piece : Pieces) {
// Create a piece expression describing the new partition or reuse AI's
// expression if there is only one partition.
auto *PieceExpr = Expr;
- if (IsSplit || Expr->isBitPiece()) {
+ if (Piece.Size < AllocaSize || Expr->isBitPiece()) {
// If this alloca is already a scalar replacement of a larger aggregate,
// Piece.Offset describes the offset inside the scalar.
uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0;
@@ -4043,6 +4040,9 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
Size = std::min(Size, AbsEnd - Start);
}
PieceExpr = DIB.createBitPieceExpression(Start, Size);
+ } else {
+ assert(Pieces.size() == 1 &&
+ "partition is as large as original alloca");
}
// Remove any existing dbg.declare intrinsic describing the same alloca.
@@ -4237,14 +4237,19 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
PostPromotionWorklist.clear();
} while (!Worklist.empty());
+ if (!Changed)
+ return PreservedAnalyses::all();
+
// FIXME: Even when promoting allocas we should preserve some abstract set of
// CFG-specific analyses.
- return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<GlobalsAA>();
+ return PA;
}
-PreservedAnalyses SROA::run(Function &F, AnalysisManager<Function> *AM) {
- return runImpl(F, AM->getResult<DominatorTreeAnalysis>(F),
- AM->getResult<AssumptionAnalysis>(F));
+PreservedAnalyses SROA::run(Function &F, AnalysisManager<Function> &AM) {
+ return runImpl(F, AM.getResult<DominatorTreeAnalysis>(F),
+ AM.getResult<AssumptionAnalysis>(F));
}
/// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
@@ -4260,7 +4265,7 @@ public:
initializeSROALegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
- if (skipOptnoneFunction(F))
+ if (skipFunction(F))
return false;
auto PA = Impl.runImpl(
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 52d477cc95736..f235b12e49cc9 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
@@ -31,49 +32,52 @@ using namespace llvm;
/// ScalarOpts library.
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeADCELegacyPassPass(Registry);
- initializeBDCEPass(Registry);
+ initializeBDCELegacyPassPass(Registry);
initializeAlignmentFromAssumptionsPass(Registry);
- initializeConstantHoistingPass(Registry);
+ initializeConstantHoistingLegacyPassPass(Registry);
initializeConstantPropagationPass(Registry);
initializeCorrelatedValuePropagationPass(Registry);
- initializeDCEPass(Registry);
+ initializeDCELegacyPassPass(Registry);
initializeDeadInstEliminationPass(Registry);
initializeScalarizerPass(Registry);
- initializeDSEPass(Registry);
- initializeGVNPass(Registry);
+ initializeDSELegacyPassPass(Registry);
+ initializeGuardWideningLegacyPassPass(Registry);
+ initializeGVNLegacyPassPass(Registry);
initializeEarlyCSELegacyPassPass(Registry);
+ initializeGVNHoistLegacyPassPass(Registry);
initializeFlattenCFGPassPass(Registry);
initializeInductiveRangeCheckEliminationPass(Registry);
- initializeIndVarSimplifyPass(Registry);
+ initializeIndVarSimplifyLegacyPassPass(Registry);
initializeJumpThreadingPass(Registry);
- initializeLICMPass(Registry);
- initializeLoopDeletionPass(Registry);
- initializeLoopAccessAnalysisPass(Registry);
- initializeLoopInstSimplifyPass(Registry);
+ initializeLegacyLICMPassPass(Registry);
+ initializeLoopDataPrefetchPass(Registry);
+ initializeLoopDeletionLegacyPassPass(Registry);
+ initializeLoopAccessLegacyAnalysisPass(Registry);
+ initializeLoopInstSimplifyLegacyPassPass(Registry);
initializeLoopInterchangePass(Registry);
- initializeLoopRotatePass(Registry);
+ initializeLoopRotateLegacyPassPass(Registry);
initializeLoopStrengthReducePass(Registry);
initializeLoopRerollPass(Registry);
initializeLoopUnrollPass(Registry);
initializeLoopUnswitchPass(Registry);
- initializeLoopIdiomRecognizePass(Registry);
- initializeLowerAtomicPass(Registry);
+ initializeLoopVersioningLICMPass(Registry);
+ initializeLoopIdiomRecognizeLegacyPassPass(Registry);
+ initializeLowerAtomicLegacyPassPass(Registry);
initializeLowerExpectIntrinsicPass(Registry);
- initializeMemCpyOptPass(Registry);
- initializeMergedLoadStoreMotionPass(Registry);
+ initializeLowerGuardIntrinsicPass(Registry);
+ initializeMemCpyOptLegacyPassPass(Registry);
+ initializeMergedLoadStoreMotionLegacyPassPass(Registry);
initializeNaryReassociatePass(Registry);
- initializePartiallyInlineLibCallsPass(Registry);
- initializeReassociatePass(Registry);
+ initializePartiallyInlineLibCallsLegacyPassPass(Registry);
+ initializeReassociateLegacyPassPass(Registry);
initializeRegToMemPass(Registry);
initializeRewriteStatepointsForGCPass(Registry);
- initializeSCCPPass(Registry);
- initializeIPSCCPPass(Registry);
+ initializeSCCPLegacyPassPass(Registry);
+ initializeIPSCCPLegacyPassPass(Registry);
initializeSROALegacyPassPass(Registry);
- initializeSROA_DTPass(Registry);
- initializeSROA_SSAUpPass(Registry);
initializeCFGSimplifyPassPass(Registry);
initializeStructurizeCFGPass(Registry);
- initializeSinkingPass(Registry);
+ initializeSinkingLegacyPassPass(Registry);
initializeTailCallElimPass(Registry);
initializeSeparateConstOffsetFromGEPPass(Registry);
initializeSpeculativeExecutionPass(Registry);
@@ -81,9 +85,11 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoadCombinePass(Registry);
initializePlaceBackedgeSafepointsImplPass(Registry);
initializePlaceSafepointsPass(Registry);
- initializeFloat2IntPass(Registry);
- initializeLoopDistributePass(Registry);
+ initializeFloat2IntLegacyPassPass(Registry);
+ initializeLoopDistributeLegacyPass(Registry);
initializeLoopLoadEliminationPass(Registry);
+ initializeLoopSimplifyCFGLegacyPassPass(Registry);
+ initializeLoopVersioningPassPass(Registry);
}
void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
@@ -154,6 +160,10 @@ void LLVMAddLoopRerollPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopRerollPass());
}
+void LLVMAddLoopSimplifyCFGPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopSimplifyCFGPass());
+}
+
void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopUnrollPass());
}
@@ -187,16 +197,16 @@ void LLVMAddSCCPPass(LLVMPassManagerRef PM) {
}
void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createScalarReplAggregatesPass());
+ unwrap(PM)->add(createSROAPass());
}
void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createScalarReplAggregatesPass(-1, false));
+ unwrap(PM)->add(createSROAPass());
}
void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
int Threshold) {
- unwrap(PM)->add(createScalarReplAggregatesPass(Threshold));
+ unwrap(PM)->add(createSROAPass());
}
void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM) {
@@ -227,6 +237,10 @@ void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createEarlyCSEPass());
}
+void LLVMAddGVNHoistLegacyPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createGVNHoistPass());
+}
+
void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createTypeBasedAAWrapperPass());
}
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
deleted file mode 100644
index 114d22ddf2e44..0000000000000
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ /dev/null
@@ -1,2630 +0,0 @@
-//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This transformation implements the well known scalar replacement of
-// aggregates transformation. This xform breaks up alloca instructions of
-// aggregate type (structure or array) into individual alloca instructions for
-// each member (if possible). Then, if possible, it transforms the individual
-// alloca instructions into nice clean scalar SSA form.
-//
-// This combines a simple SRoA algorithm with the Mem2Reg algorithm because they
-// often interact, especially for C++ programs. As such, iterating between
-// SRoA, then Mem2Reg until we run out of things to promote works well.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "scalarrepl"
-
-STATISTIC(NumReplaced, "Number of allocas broken up");
-STATISTIC(NumPromoted, "Number of allocas promoted");
-STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
-STATISTIC(NumConverted, "Number of aggregates converted to scalar");
-
-namespace {
-#define SROA SROA_
- struct SROA : public FunctionPass {
- SROA(int T, bool hasDT, char &ID, int ST, int AT, int SLT)
- : FunctionPass(ID), HasDomTree(hasDT) {
- if (T == -1)
- SRThreshold = 128;
- else
- SRThreshold = T;
- if (ST == -1)
- StructMemberThreshold = 32;
- else
- StructMemberThreshold = ST;
- if (AT == -1)
- ArrayElementThreshold = 8;
- else
- ArrayElementThreshold = AT;
- if (SLT == -1)
- // Do not limit the scalar integer load size if no threshold is given.
- ScalarLoadThreshold = -1;
- else
- ScalarLoadThreshold = SLT;
- }
-
- bool runOnFunction(Function &F) override;
-
- bool performScalarRepl(Function &F);
- bool performPromotion(Function &F);
-
- private:
- bool HasDomTree;
-
- /// DeadInsts - Keep track of instructions we have made dead, so that
- /// we can remove them after we are done working.
- SmallVector<Value*, 32> DeadInsts;
-
- /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
- /// information about the uses. All these fields are initialized to false
- /// and set to true when something is learned.
- struct AllocaInfo {
- /// The alloca to promote.
- AllocaInst *AI;
-
- /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
- /// looping and avoid redundant work.
- SmallPtrSet<PHINode*, 8> CheckedPHIs;
-
- /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
- bool isUnsafe : 1;
-
- /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
- bool isMemCpySrc : 1;
-
- /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
- bool isMemCpyDst : 1;
-
- /// hasSubelementAccess - This is true if a subelement of the alloca is
- /// ever accessed, or false if the alloca is only accessed with mem
- /// intrinsics or load/store that only access the entire alloca at once.
- bool hasSubelementAccess : 1;
-
- /// hasALoadOrStore - This is true if there are any loads or stores to it.
- /// The alloca may just be accessed with memcpy, for example, which would
- /// not set this.
- bool hasALoadOrStore : 1;
-
- explicit AllocaInfo(AllocaInst *ai)
- : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
- hasSubelementAccess(false), hasALoadOrStore(false) {}
- };
-
- /// SRThreshold - The maximum alloca size to considered for SROA.
- unsigned SRThreshold;
-
- /// StructMemberThreshold - The maximum number of members a struct can
- /// contain to be considered for SROA.
- unsigned StructMemberThreshold;
-
- /// ArrayElementThreshold - The maximum number of elements an array can
- /// have to be considered for SROA.
- unsigned ArrayElementThreshold;
-
- /// ScalarLoadThreshold - The maximum size in bits of scalars to load when
- /// converting to scalar
- unsigned ScalarLoadThreshold;
-
- void MarkUnsafe(AllocaInfo &I, Instruction *User) {
- I.isUnsafe = true;
- DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n');
- }
-
- bool isSafeAllocaToScalarRepl(AllocaInst *AI);
-
- void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
- void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
- AllocaInfo &Info);
- void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
- void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
- Type *MemOpType, bool isStore, AllocaInfo &Info,
- Instruction *TheAccess, bool AllowWholeAccess);
- bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size,
- const DataLayout &DL);
- uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy,
- const DataLayout &DL);
-
- void DoScalarReplacement(AllocaInst *AI,
- std::vector<AllocaInst*> &WorkList);
- void DeleteDeadInstructions();
-
- void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
- SmallVectorImpl<AllocaInst *> &NewElts);
- void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
- SmallVectorImpl<AllocaInst *> &NewElts);
- void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
- SmallVectorImpl<AllocaInst *> &NewElts);
- void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
- uint64_t Offset,
- SmallVectorImpl<AllocaInst *> &NewElts);
- void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
- AllocaInst *AI,
- SmallVectorImpl<AllocaInst *> &NewElts);
- void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
- SmallVectorImpl<AllocaInst *> &NewElts);
- void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
- SmallVectorImpl<AllocaInst *> &NewElts);
- bool ShouldAttemptScalarRepl(AllocaInst *AI);
- };
-
- // SROA_DT - SROA that uses DominatorTree.
- struct SROA_DT : public SROA {
- static char ID;
- public:
- SROA_DT(int T = -1, int ST = -1, int AT = -1, int SLT = -1) :
- SROA(T, true, ID, ST, AT, SLT) {
- initializeSROA_DTPass(*PassRegistry::getPassRegistry());
- }
-
- // getAnalysisUsage - This pass does not require any passes, but we know it
- // will not alter the CFG, so say so.
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.setPreservesCFG();
- }
- };
-
- // SROA_SSAUp - SROA that uses SSAUpdater.
- struct SROA_SSAUp : public SROA {
- static char ID;
- public:
- SROA_SSAUp(int T = -1, int ST = -1, int AT = -1, int SLT = -1) :
- SROA(T, false, ID, ST, AT, SLT) {
- initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
- }
-
- // getAnalysisUsage - This pass does not require any passes, but we know it
- // will not alter the CFG, so say so.
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.setPreservesCFG();
- }
- };
-
-}
-
-char SROA_DT::ID = 0;
-char SROA_SSAUp::ID = 0;
-
-INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
- "Scalar Replacement of Aggregates (DT)", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(SROA_DT, "scalarrepl",
- "Scalar Replacement of Aggregates (DT)", false, false)
-
-INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa",
- "Scalar Replacement of Aggregates (SSAUp)", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
- "Scalar Replacement of Aggregates (SSAUp)", false, false)
-
-// Public interface to the ScalarReplAggregates pass
-FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold,
- bool UseDomTree,
- int StructMemberThreshold,
- int ArrayElementThreshold,
- int ScalarLoadThreshold) {
- if (UseDomTree)
- return new SROA_DT(Threshold, StructMemberThreshold, ArrayElementThreshold,
- ScalarLoadThreshold);
- return new SROA_SSAUp(Threshold, StructMemberThreshold,
- ArrayElementThreshold, ScalarLoadThreshold);
-}
-
-
-//===----------------------------------------------------------------------===//
-// Convert To Scalar Optimization.
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// ConvertToScalarInfo - This class implements the "Convert To Scalar"
-/// optimization, which scans the uses of an alloca and determines if it can
-/// rewrite it in terms of a single new alloca that can be mem2reg'd.
-class ConvertToScalarInfo {
- /// AllocaSize - The size of the alloca being considered in bytes.
- unsigned AllocaSize;
- const DataLayout &DL;
- unsigned ScalarLoadThreshold;
-
- /// IsNotTrivial - This is set to true if there is some access to the object
- /// which means that mem2reg can't promote it.
- bool IsNotTrivial;
-
- /// ScalarKind - Tracks the kind of alloca being considered for promotion,
- /// computed based on the uses of the alloca rather than the LLVM type system.
- enum {
- Unknown,
-
- // Accesses via GEPs that are consistent with element access of a vector
- // type. This will not be converted into a vector unless there is a later
- // access using an actual vector type.
- ImplicitVector,
-
- // Accesses via vector operations and GEPs that are consistent with the
- // layout of a vector type.
- Vector,
-
- // An integer bag-of-bits with bitwise operations for insertion and
- // extraction. Any combination of types can be converted into this kind
- // of scalar.
- Integer
- } ScalarKind;
-
- /// VectorTy - This tracks the type that we should promote the vector to if
- /// it is possible to turn it into a vector. This starts out null, and if it
- /// isn't possible to turn into a vector type, it gets set to VoidTy.
- VectorType *VectorTy;
-
- /// HadNonMemTransferAccess - True if there is at least one access to the
- /// alloca that is not a MemTransferInst. We don't want to turn structs into
- /// large integers unless there is some potential for optimization.
- bool HadNonMemTransferAccess;
-
- /// HadDynamicAccess - True if some element of this alloca was dynamic.
- /// We don't yet have support for turning a dynamic access into a large
- /// integer.
- bool HadDynamicAccess;
-
-public:
- explicit ConvertToScalarInfo(unsigned Size, const DataLayout &DL,
- unsigned SLT)
- : AllocaSize(Size), DL(DL), ScalarLoadThreshold(SLT), IsNotTrivial(false),
- ScalarKind(Unknown), VectorTy(nullptr), HadNonMemTransferAccess(false),
- HadDynamicAccess(false) { }
-
- AllocaInst *TryConvert(AllocaInst *AI);
-
-private:
- bool CanConvertToScalar(Value *V, uint64_t Offset, Value* NonConstantIdx);
- void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset);
- bool MergeInVectorType(VectorType *VInTy, uint64_t Offset);
- void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset,
- Value *NonConstantIdx);
-
- Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType,
- uint64_t Offset, Value* NonConstantIdx,
- IRBuilder<> &Builder);
- Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
- uint64_t Offset, Value* NonConstantIdx,
- IRBuilder<> &Builder);
-};
-} // end anonymous namespace.
-
-
-/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
-/// rewrite it to be a new alloca which is mem2reg'able. This returns the new
-/// alloca if possible or null if not.
-AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
- // If we can't convert this scalar, or if mem2reg can trivially do it, bail
- // out.
- if (!CanConvertToScalar(AI, 0, nullptr) || !IsNotTrivial)
- return nullptr;
-
- // If an alloca has only memset / memcpy uses, it may still have an Unknown
- // ScalarKind. Treat it as an Integer below.
- if (ScalarKind == Unknown)
- ScalarKind = Integer;
-
- if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
- ScalarKind = Integer;
-
- // If we were able to find a vector type that can handle this with
- // insert/extract elements, and if there was at least one use that had
- // a vector type, promote this to a vector. We don't want to promote
- // random stuff that doesn't use vectors (e.g. <9 x double>) because then
- // we just get a lot of insert/extracts. If at least one vector is
- // involved, then we probably really do have a union of vector/array.
- Type *NewTy;
- if (ScalarKind == Vector) {
- assert(VectorTy && "Missing type for vector scalar.");
- DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
- << *VectorTy << '\n');
- NewTy = VectorTy; // Use the vector type.
- } else {
- unsigned BitWidth = AllocaSize * 8;
-
- // Do not convert to scalar integer if the alloca size exceeds the
- // scalar load threshold.
- if (BitWidth > ScalarLoadThreshold)
- return nullptr;
-
- if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
- !HadNonMemTransferAccess && !DL.fitsInLegalInteger(BitWidth))
- return nullptr;
- // Dynamic accesses on integers aren't yet supported. They need us to shift
- // by a dynamic amount which could be difficult to work out as we might not
- // know whether to use a left or right shift.
- if (ScalarKind == Integer && HadDynamicAccess)
- return nullptr;
-
- DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
- // Create and insert the integer alloca.
- NewTy = IntegerType::get(AI->getContext(), BitWidth);
- }
- AllocaInst *NewAI =
- new AllocaInst(NewTy, nullptr, "", &AI->getParent()->front());
- ConvertUsesToScalar(AI, NewAI, 0, nullptr);
- return NewAI;
-}
-
-/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type
-/// (VectorTy) so far at the offset specified by Offset (which is specified in
-/// bytes).
-///
-/// There are two cases we handle here:
-/// 1) A union of vector types of the same size and potentially its elements.
-/// Here we turn element accesses into insert/extract element operations.
-/// This promotes a <4 x float> with a store of float to the third element
-/// into a <4 x float> that uses insert element.
-/// 2) A fully general blob of memory, which we turn into some (potentially
-/// large) integer type with extract and insert operations where the loads
-/// and stores would mutate the memory. We mark this by setting VectorTy
-/// to VoidTy.
-void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In,
- uint64_t Offset) {
- // If we already decided to turn this into a blob of integer memory, there is
- // nothing to be done.
- if (ScalarKind == Integer)
- return;
-
- // If this could be contributing to a vector, analyze it.
-
- // If the In type is a vector that is the same size as the alloca, see if it
- // matches the existing VecTy.
- if (VectorType *VInTy = dyn_cast<VectorType>(In)) {
- if (MergeInVectorType(VInTy, Offset))
- return;
- } else if (In->isFloatTy() || In->isDoubleTy() ||
- (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
- isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
- // Full width accesses can be ignored, because they can always be turned
- // into bitcasts.
- unsigned EltSize = In->getPrimitiveSizeInBits()/8;
- if (EltSize == AllocaSize)
- return;
-
- // If we're accessing something that could be an element of a vector, see
- // if the implied vector agrees with what we already have and if Offset is
- // compatible with it.
- if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
- (!VectorTy || EltSize == VectorTy->getElementType()
- ->getPrimitiveSizeInBits()/8)) {
- if (!VectorTy) {
- ScalarKind = ImplicitVector;
- VectorTy = VectorType::get(In, AllocaSize/EltSize);
- }
- return;
- }
- }
-
- // Otherwise, we have a case that we can't handle with an optimized vector
- // form. We can still turn this into a large integer.
- ScalarKind = Integer;
-}
-
-/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
-/// returning true if the type was successfully merged and false otherwise.
-bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
- uint64_t Offset) {
- if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
- // If we're storing/loading a vector of the right size, allow it as a
- // vector. If this the first vector we see, remember the type so that
- // we know the element size. If this is a subsequent access, ignore it
- // even if it is a differing type but the same size. Worst case we can
- // bitcast the resultant vectors.
- if (!VectorTy)
- VectorTy = VInTy;
- ScalarKind = Vector;
- return true;
- }
-
- return false;
-}
-
-/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
-/// its accesses to a single vector type, return true and set VecTy to
-/// the new type. If we could convert the alloca into a single promotable
-/// integer, return true but set VecTy to VoidTy. Further, if the use is not a
-/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset
-/// is the current offset from the base of the alloca being analyzed.
-///
-/// If we see at least one access to the value that is as a vector type, set the
-/// SawVec flag.
-bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
- Value* NonConstantIdx) {
- for (User *U : V->users()) {
- Instruction *UI = cast<Instruction>(U);
-
- if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
- // Don't break volatile loads.
- if (!LI->isSimple())
- return false;
- // Don't touch MMX operations.
- if (LI->getType()->isX86_MMXTy())
- return false;
- HadNonMemTransferAccess = true;
- MergeInTypeForLoadOrStore(LI->getType(), Offset);
- continue;
- }
-
- if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
- // Storing the pointer, not into the value?
- if (SI->getOperand(0) == V || !SI->isSimple()) return false;
- // Don't touch MMX operations.
- if (SI->getOperand(0)->getType()->isX86_MMXTy())
- return false;
- HadNonMemTransferAccess = true;
- MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset);
- continue;
- }
-
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(UI)) {
- if (!onlyUsedByLifetimeMarkers(BCI))
- IsNotTrivial = true; // Can't be mem2reg'd.
- if (!CanConvertToScalar(BCI, Offset, NonConstantIdx))
- return false;
- continue;
- }
-
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UI)) {
- // If this is a GEP with a variable indices, we can't handle it.
- PointerType* PtrTy = dyn_cast<PointerType>(GEP->getPointerOperandType());
- if (!PtrTy)
- return false;
-
- // Compute the offset that this GEP adds to the pointer.
- SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- Value *GEPNonConstantIdx = nullptr;
- if (!GEP->hasAllConstantIndices()) {
- if (!isa<VectorType>(PtrTy->getElementType()))
- return false;
- if (NonConstantIdx)
- return false;
- GEPNonConstantIdx = Indices.pop_back_val();
- if (!GEPNonConstantIdx->getType()->isIntegerTy(32))
- return false;
- HadDynamicAccess = true;
- } else
- GEPNonConstantIdx = NonConstantIdx;
- uint64_t GEPOffset = DL.getIndexedOffset(PtrTy,
- Indices);
- // See if all uses can be converted.
- if (!CanConvertToScalar(GEP, Offset+GEPOffset, GEPNonConstantIdx))
- return false;
- IsNotTrivial = true; // Can't be mem2reg'd.
- HadNonMemTransferAccess = true;
- continue;
- }
-
- // If this is a constant sized memset of a constant value (e.g. 0) we can
- // handle it.
- if (MemSetInst *MSI = dyn_cast<MemSetInst>(UI)) {
- // Store to dynamic index.
- if (NonConstantIdx)
- return false;
- // Store of constant value.
- if (!isa<ConstantInt>(MSI->getValue()))
- return false;
-
- // Store of constant size.
- ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength());
- if (!Len)
- return false;
-
- // If the size differs from the alloca, we can only convert the alloca to
- // an integer bag-of-bits.
- // FIXME: This should handle all of the cases that are currently accepted
- // as vector element insertions.
- if (Len->getZExtValue() != AllocaSize || Offset != 0)
- ScalarKind = Integer;
-
- IsNotTrivial = true; // Can't be mem2reg'd.
- HadNonMemTransferAccess = true;
- continue;
- }
-
- // If this is a memcpy or memmove into or out of the whole allocation, we
- // can handle it like a load or store of the scalar type.
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(UI)) {
- // Store to dynamic index.
- if (NonConstantIdx)
- return false;
- ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
- if (!Len || Len->getZExtValue() != AllocaSize || Offset != 0)
- return false;
-
- IsNotTrivial = true; // Can't be mem2reg'd.
- continue;
- }
-
- // If this is a lifetime intrinsic, we can handle it.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(UI)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end) {
- continue;
- }
- }
-
- // Otherwise, we cannot handle this!
- return false;
- }
-
- return true;
-}
-
-/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
-/// directly. This happens when we are converting an "integer union" to a
-/// single integer scalar, or when we are converting a "vector union" to a
-/// vector with insert/extractelement instructions.
-///
-/// Offset is an offset from the original alloca, in bits that need to be
-/// shifted to the right. By the end of this, there should be no uses of Ptr.
-void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
- uint64_t Offset,
- Value* NonConstantIdx) {
- while (!Ptr->use_empty()) {
- Instruction *User = cast<Instruction>(Ptr->user_back());
-
- if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
- ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx);
- CI->eraseFromParent();
- continue;
- }
-
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
- // Compute the offset that this GEP adds to the pointer.
- SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- Value* GEPNonConstantIdx = nullptr;
- if (!GEP->hasAllConstantIndices()) {
- assert(!NonConstantIdx &&
- "Dynamic GEP reading from dynamic GEP unsupported");
- GEPNonConstantIdx = Indices.pop_back_val();
- } else
- GEPNonConstantIdx = NonConstantIdx;
- uint64_t GEPOffset = DL.getIndexedOffset(GEP->getPointerOperandType(),
- Indices);
- ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, GEPNonConstantIdx);
- GEP->eraseFromParent();
- continue;
- }
-
- IRBuilder<> Builder(User);
-
- if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- // The load is a bit extract from NewAI shifted right by Offset bits.
- Value *LoadedVal = Builder.CreateLoad(NewAI);
- Value *NewLoadVal
- = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset,
- NonConstantIdx, Builder);
- LI->replaceAllUsesWith(NewLoadVal);
- LI->eraseFromParent();
- continue;
- }
-
- if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
- assert(SI->getOperand(0) != Ptr && "Consistency error!");
- Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
- Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
- NonConstantIdx, Builder);
- Builder.CreateStore(New, NewAI);
- SI->eraseFromParent();
-
- // If the load we just inserted is now dead, then the inserted store
- // overwrote the entire thing.
- if (Old->use_empty())
- Old->eraseFromParent();
- continue;
- }
-
- // If this is a constant sized memset of a constant value (e.g. 0) we can
- // transform it into a store of the expanded constant value.
- if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
- assert(MSI->getRawDest() == Ptr && "Consistency error!");
- assert(!NonConstantIdx && "Cannot replace dynamic memset with insert");
- int64_t SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue();
- if (SNumBytes > 0 && (SNumBytes >> 32) == 0) {
- unsigned NumBytes = static_cast<unsigned>(SNumBytes);
- unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
-
- // Compute the value replicated the right number of times.
- APInt APVal(NumBytes*8, Val);
-
- // Splat the value if non-zero.
- if (Val)
- for (unsigned i = 1; i != NumBytes; ++i)
- APVal |= APVal << 8;
-
- Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
- Value *New = ConvertScalar_InsertValue(
- ConstantInt::get(User->getContext(), APVal),
- Old, Offset, nullptr, Builder);
- Builder.CreateStore(New, NewAI);
-
- // If the load we just inserted is now dead, then the memset overwrote
- // the entire thing.
- if (Old->use_empty())
- Old->eraseFromParent();
- }
- MSI->eraseFromParent();
- continue;
- }
-
- // If this is a memcpy or memmove into or out of the whole allocation, we
- // can handle it like a load or store of the scalar type.
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
- assert(Offset == 0 && "must be store to start of alloca");
- assert(!NonConstantIdx && "Cannot replace dynamic transfer with insert");
-
- // If the source and destination are both to the same alloca, then this is
- // a noop copy-to-self, just delete it. Otherwise, emit a load and store
- // as appropriate.
- AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, DL, 0));
-
- if (GetUnderlyingObject(MTI->getSource(), DL, 0) != OrigAI) {
- // Dest must be OrigAI, change this to be a load from the original
- // pointer (bitcasted), then a store to our new alloca.
- assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
- Value *SrcPtr = MTI->getSource();
- PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
- PointerType* AIPTy = cast<PointerType>(NewAI->getType());
- if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
- AIPTy = PointerType::get(AIPTy->getElementType(),
- SPTy->getAddressSpace());
- }
- SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
-
- LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
- SrcVal->setAlignment(MTI->getAlignment());
- Builder.CreateStore(SrcVal, NewAI);
- } else if (GetUnderlyingObject(MTI->getDest(), DL, 0) != OrigAI) {
- // Src must be OrigAI, change this to be a load from NewAI then a store
- // through the original dest pointer (bitcasted).
- assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
- LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
-
- PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
- PointerType* AIPTy = cast<PointerType>(NewAI->getType());
- if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
- AIPTy = PointerType::get(AIPTy->getElementType(),
- DPTy->getAddressSpace());
- }
- Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
-
- StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
- NewStore->setAlignment(MTI->getAlignment());
- } else {
- // Noop transfer. Src == Dst
- }
-
- MTI->eraseFromParent();
- continue;
- }
-
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end) {
- // There's no need to preserve these, as the resulting alloca will be
- // converted to a register anyways.
- II->eraseFromParent();
- continue;
- }
- }
-
- llvm_unreachable("Unsupported operation!");
- }
-}
-
-/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
-/// or vector value FromVal, extracting the bits from the offset specified by
-/// Offset. This returns the value, which is of type ToType.
-///
-/// This happens when we are converting an "integer union" to a single
-/// integer scalar, or when we are converting a "vector union" to a vector with
-/// insert/extractelement instructions.
-///
-/// Offset is an offset from the original alloca, in bits that need to be
-/// shifted to the right.
-Value *ConvertToScalarInfo::
-ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
- uint64_t Offset, Value* NonConstantIdx,
- IRBuilder<> &Builder) {
- // If the load is of the whole new alloca, no conversion is needed.
- Type *FromType = FromVal->getType();
- if (FromType == ToType && Offset == 0)
- return FromVal;
-
- // If the result alloca is a vector type, this is either an element
- // access or a bitcast to another vector type of the same size.
- if (VectorType *VTy = dyn_cast<VectorType>(FromType)) {
- unsigned FromTypeSize = DL.getTypeAllocSize(FromType);
- unsigned ToTypeSize = DL.getTypeAllocSize(ToType);
- if (FromTypeSize == ToTypeSize)
- return Builder.CreateBitCast(FromVal, ToType);
-
- // Otherwise it must be an element access.
- unsigned Elt = 0;
- if (Offset) {
- unsigned EltSize = DL.getTypeAllocSizeInBits(VTy->getElementType());
- Elt = Offset/EltSize;
- assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
- }
- // Return the element extracted out of it.
- Value *Idx;
- if (NonConstantIdx) {
- if (Elt)
- Idx = Builder.CreateAdd(NonConstantIdx,
- Builder.getInt32(Elt),
- "dyn.offset");
- else
- Idx = NonConstantIdx;
- } else
- Idx = Builder.getInt32(Elt);
- Value *V = Builder.CreateExtractElement(FromVal, Idx);
- if (V->getType() != ToType)
- V = Builder.CreateBitCast(V, ToType);
- return V;
- }
-
- // If ToType is a first class aggregate, extract out each of the pieces and
- // use insertvalue's to form the FCA.
- if (StructType *ST = dyn_cast<StructType>(ToType)) {
- assert(!NonConstantIdx &&
- "Dynamic indexing into struct types not supported");
- const StructLayout &Layout = *DL.getStructLayout(ST);
- Value *Res = UndefValue::get(ST);
- for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
- Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
- Offset+Layout.getElementOffsetInBits(i),
- nullptr, Builder);
- Res = Builder.CreateInsertValue(Res, Elt, i);
- }
- return Res;
- }
-
- if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
- assert(!NonConstantIdx &&
- "Dynamic indexing into array types not supported");
- uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType());
- Value *Res = UndefValue::get(AT);
- for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
- Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
- Offset+i*EltSize, nullptr,
- Builder);
- Res = Builder.CreateInsertValue(Res, Elt, i);
- }
- return Res;
- }
-
- // Otherwise, this must be a union that was converted to an integer value.
- IntegerType *NTy = cast<IntegerType>(FromVal->getType());
-
- // If this is a big-endian system and the load is narrower than the
- // full alloca type, we need to do a shift to get the right bits.
- int ShAmt = 0;
- if (DL.isBigEndian()) {
- // On big-endian machines, the lowest bit is stored at the bit offset
- // from the pointer given by getTypeStoreSizeInBits. This matters for
- // integers with a bitwidth that is not a multiple of 8.
- ShAmt = DL.getTypeStoreSizeInBits(NTy) -
- DL.getTypeStoreSizeInBits(ToType) - Offset;
- } else {
- ShAmt = Offset;
- }
-
- // Note: we support negative bitwidths (with shl) which are not defined.
- // We do this to support (f.e.) loads off the end of a structure where
- // only some bits are used.
- if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
- FromVal = Builder.CreateLShr(FromVal,
- ConstantInt::get(FromVal->getType(), ShAmt));
- else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
- FromVal = Builder.CreateShl(FromVal,
- ConstantInt::get(FromVal->getType(), -ShAmt));
-
- // Finally, unconditionally truncate the integer to the right width.
- unsigned LIBitWidth = DL.getTypeSizeInBits(ToType);
- if (LIBitWidth < NTy->getBitWidth())
- FromVal =
- Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
- LIBitWidth));
- else if (LIBitWidth > NTy->getBitWidth())
- FromVal =
- Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
- LIBitWidth));
-
- // If the result is an integer, this is a trunc or bitcast.
- if (ToType->isIntegerTy()) {
- // Should be done.
- } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
- // Just do a bitcast, we know the sizes match up.
- FromVal = Builder.CreateBitCast(FromVal, ToType);
- } else {
- // Otherwise must be a pointer.
- FromVal = Builder.CreateIntToPtr(FromVal, ToType);
- }
- assert(FromVal->getType() == ToType && "Didn't convert right?");
- return FromVal;
-}
-
-/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
-/// or vector value "Old" at the offset specified by Offset.
-///
-/// This happens when we are converting an "integer union" to a
-/// single integer scalar, or when we are converting a "vector union" to a
-/// vector with insert/extractelement instructions.
-///
-/// Offset is an offset from the original alloca, in bits that need to be
-/// shifted to the right.
-///
-/// NonConstantIdx is an index value if there was a GEP with a non-constant
-/// index value. If this is 0 then all GEPs used to find this insert address
-/// are constant.
-Value *ConvertToScalarInfo::
-ConvertScalar_InsertValue(Value *SV, Value *Old,
- uint64_t Offset, Value* NonConstantIdx,
- IRBuilder<> &Builder) {
- // Convert the stored type to the actual type, shift it left to insert
- // then 'or' into place.
- Type *AllocaType = Old->getType();
- LLVMContext &Context = Old->getContext();
-
- if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
- uint64_t VecSize = DL.getTypeAllocSizeInBits(VTy);
- uint64_t ValSize = DL.getTypeAllocSizeInBits(SV->getType());
-
- // Changing the whole vector with memset or with an access of a different
- // vector type?
- if (ValSize == VecSize)
- return Builder.CreateBitCast(SV, AllocaType);
-
- // Must be an element insertion.
- Type *EltTy = VTy->getElementType();
- if (SV->getType() != EltTy)
- SV = Builder.CreateBitCast(SV, EltTy);
- uint64_t EltSize = DL.getTypeAllocSizeInBits(EltTy);
- unsigned Elt = Offset/EltSize;
- Value *Idx;
- if (NonConstantIdx) {
- if (Elt)
- Idx = Builder.CreateAdd(NonConstantIdx,
- Builder.getInt32(Elt),
- "dyn.offset");
- else
- Idx = NonConstantIdx;
- } else
- Idx = Builder.getInt32(Elt);
- return Builder.CreateInsertElement(Old, SV, Idx);
- }
-
- // If SV is a first-class aggregate value, insert each value recursively.
- if (StructType *ST = dyn_cast<StructType>(SV->getType())) {
- assert(!NonConstantIdx &&
- "Dynamic indexing into struct types not supported");
- const StructLayout &Layout = *DL.getStructLayout(ST);
- for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
- Value *Elt = Builder.CreateExtractValue(SV, i);
- Old = ConvertScalar_InsertValue(Elt, Old,
- Offset+Layout.getElementOffsetInBits(i),
- nullptr, Builder);
- }
- return Old;
- }
-
- if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
- assert(!NonConstantIdx &&
- "Dynamic indexing into array types not supported");
- uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType());
- for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
- Value *Elt = Builder.CreateExtractValue(SV, i);
- Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, nullptr,
- Builder);
- }
- return Old;
- }
-
- // If SV is a float, convert it to the appropriate integer type.
- // If it is a pointer, do the same.
- unsigned SrcWidth = DL.getTypeSizeInBits(SV->getType());
- unsigned DestWidth = DL.getTypeSizeInBits(AllocaType);
- unsigned SrcStoreWidth = DL.getTypeStoreSizeInBits(SV->getType());
- unsigned DestStoreWidth = DL.getTypeStoreSizeInBits(AllocaType);
- if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
- SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
- else if (SV->getType()->isPointerTy())
- SV = Builder.CreatePtrToInt(SV, DL.getIntPtrType(SV->getType()));
-
- // Zero extend or truncate the value if needed.
- if (SV->getType() != AllocaType) {
- if (SV->getType()->getPrimitiveSizeInBits() <
- AllocaType->getPrimitiveSizeInBits())
- SV = Builder.CreateZExt(SV, AllocaType);
- else {
- // Truncation may be needed if storing more than the alloca can hold
- // (undefined behavior).
- SV = Builder.CreateTrunc(SV, AllocaType);
- SrcWidth = DestWidth;
- SrcStoreWidth = DestStoreWidth;
- }
- }
-
- // If this is a big-endian system and the store is narrower than the
- // full alloca type, we need to do a shift to get the right bits.
- int ShAmt = 0;
- if (DL.isBigEndian()) {
- // On big-endian machines, the lowest bit is stored at the bit offset
- // from the pointer given by getTypeStoreSizeInBits. This matters for
- // integers with a bitwidth that is not a multiple of 8.
- ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
- } else {
- ShAmt = Offset;
- }
-
- // Note: we support negative bitwidths (with shr) which are not defined.
- // We do this to support (f.e.) stores off the end of a structure where
- // only some bits in the structure are set.
- APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
- if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
- SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt));
- Mask <<= ShAmt;
- } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
- SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt));
- Mask = Mask.lshr(-ShAmt);
- }
-
- // Mask out the bits we are about to insert from the old value, and or
- // in the new bits.
- if (SrcWidth != DestWidth) {
- assert(DestWidth > SrcWidth);
- Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
- SV = Builder.CreateOr(Old, SV, "ins");
- }
- return SV;
-}
-
-
-//===----------------------------------------------------------------------===//
-// SRoA Driver
-//===----------------------------------------------------------------------===//
-
-
-bool SROA::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
- return false;
-
- bool Changed = performPromotion(F);
-
- while (1) {
- bool LocalChange = performScalarRepl(F);
- if (!LocalChange) break; // No need to repromote if no scalarrepl
- Changed = true;
- LocalChange = performPromotion(F);
- if (!LocalChange) break; // No need to re-scalarrepl if no promotion
- }
-
- return Changed;
-}
-
-namespace {
-class AllocaPromoter : public LoadAndStorePromoter {
- AllocaInst *AI;
- DIBuilder *DIB;
- SmallVector<DbgDeclareInst *, 4> DDIs;
- SmallVector<DbgValueInst *, 4> DVIs;
-public:
- AllocaPromoter(ArrayRef<Instruction*> Insts, SSAUpdater &S,
- DIBuilder *DB)
- : LoadAndStorePromoter(Insts, S), AI(nullptr), DIB(DB) {}
-
- void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
- // Remember which alloca we're promoting (for isInstInList).
- this->AI = AI;
- if (auto *L = LocalAsMetadata::getIfExists(AI)) {
- if (auto *DINode = MetadataAsValue::getIfExists(AI->getContext(), L)) {
- for (User *U : DINode->users())
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
- DDIs.push_back(DDI);
- else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
- DVIs.push_back(DVI);
- }
- }
-
- LoadAndStorePromoter::run(Insts);
- AI->eraseFromParent();
- for (SmallVectorImpl<DbgDeclareInst *>::iterator I = DDIs.begin(),
- E = DDIs.end(); I != E; ++I) {
- DbgDeclareInst *DDI = *I;
- DDI->eraseFromParent();
- }
- for (SmallVectorImpl<DbgValueInst *>::iterator I = DVIs.begin(),
- E = DVIs.end(); I != E; ++I) {
- DbgValueInst *DVI = *I;
- DVI->eraseFromParent();
- }
- }
-
- bool isInstInList(Instruction *I,
- const SmallVectorImpl<Instruction*> &Insts) const override {
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
- return LI->getOperand(0) == AI;
- return cast<StoreInst>(I)->getPointerOperand() == AI;
- }
-
- void updateDebugInfo(Instruction *Inst) const override {
- for (SmallVectorImpl<DbgDeclareInst *>::const_iterator I = DDIs.begin(),
- E = DDIs.end(); I != E; ++I) {
- DbgDeclareInst *DDI = *I;
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
- else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- ConvertDebugDeclareToDebugValue(DDI, LI, *DIB);
- }
- for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(),
- E = DVIs.end(); I != E; ++I) {
- DbgValueInst *DVI = *I;
- Value *Arg = nullptr;
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- // If an argument is zero extended then use argument directly. The ZExt
- // may be zapped by an optimization pass in future.
- if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
- Arg = dyn_cast<Argument>(ZExt->getOperand(0));
- if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
- Arg = dyn_cast<Argument>(SExt->getOperand(0));
- if (!Arg)
- Arg = SI->getOperand(0);
- } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- Arg = LI->getOperand(0);
- } else {
- continue;
- }
- DIB->insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(),
- DVI->getExpression(), DVI->getDebugLoc(),
- Inst);
- }
- }
-};
-} // end anon namespace
-
-/// isSafeSelectToSpeculate - Select instructions that use an alloca and are
-/// subsequently loaded can be rewritten to load both input pointers and then
-/// select between the result, allowing the load of the alloca to be promoted.
-/// From this:
-/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
-/// %V = load i32* %P2
-/// to:
-/// %V1 = load i32* %Alloca -> will be mem2reg'd
-/// %V2 = load i32* %Other
-/// %V = select i1 %cond, i32 %V1, i32 %V2
-///
-/// We can do this to a select if its only uses are loads and if the operand to
-/// the select can be loaded unconditionally.
-static bool isSafeSelectToSpeculate(SelectInst *SI) {
- const DataLayout &DL = SI->getModule()->getDataLayout();
- bool TDerefable = isDereferenceablePointer(SI->getTrueValue(), DL);
- bool FDerefable = isDereferenceablePointer(SI->getFalseValue(), DL);
-
- for (User *U : SI->users()) {
- LoadInst *LI = dyn_cast<LoadInst>(U);
- if (!LI || !LI->isSimple()) return false;
-
- // Both operands to the select need to be dereferencable, either absolutely
- // (e.g. allocas) or at this point because we can see other accesses to it.
- if (!TDerefable &&
- !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
- LI->getAlignment()))
- return false;
- if (!FDerefable &&
- !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
- LI->getAlignment()))
- return false;
- }
-
- return true;
-}
-
-/// isSafePHIToSpeculate - PHI instructions that use an alloca and are
-/// subsequently loaded can be rewritten to load both input pointers in the pred
-/// blocks and then PHI the results, allowing the load of the alloca to be
-/// promoted.
-/// From this:
-/// %P2 = phi [i32* %Alloca, i32* %Other]
-/// %V = load i32* %P2
-/// to:
-/// %V1 = load i32* %Alloca -> will be mem2reg'd
-/// ...
-/// %V2 = load i32* %Other
-/// ...
-/// %V = phi [i32 %V1, i32 %V2]
-///
-/// We can do this to a select if its only uses are loads and if the operand to
-/// the select can be loaded unconditionally.
-static bool isSafePHIToSpeculate(PHINode *PN) {
- // For now, we can only do this promotion if the load is in the same block as
- // the PHI, and if there are no stores between the phi and load.
- // TODO: Allow recursive phi users.
- // TODO: Allow stores.
- BasicBlock *BB = PN->getParent();
- unsigned MaxAlign = 0;
- for (User *U : PN->users()) {
- LoadInst *LI = dyn_cast<LoadInst>(U);
- if (!LI || !LI->isSimple()) return false;
-
- // For now we only allow loads in the same block as the PHI. This is a
- // common case that happens when instcombine merges two loads through a PHI.
- if (LI->getParent() != BB) return false;
-
- // Ensure that there are no instructions between the PHI and the load that
- // could store.
- for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
- if (BBI->mayWriteToMemory())
- return false;
-
- MaxAlign = std::max(MaxAlign, LI->getAlignment());
- }
-
- const DataLayout &DL = PN->getModule()->getDataLayout();
-
- // Okay, we know that we have one or more loads in the same block as the PHI.
- // We can transform this if it is safe to push the loads into the predecessor
- // blocks. The only thing to watch out for is that we can't put a possibly
- // trapping load in the predecessor if it is a critical edge.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- BasicBlock *Pred = PN->getIncomingBlock(i);
- Value *InVal = PN->getIncomingValue(i);
-
- // If the terminator of the predecessor has side-effects (an invoke),
- // there is no safe place to put a load in the predecessor.
- if (Pred->getTerminator()->mayHaveSideEffects())
- return false;
-
- // If the value is produced by the terminator of the predecessor
- // (an invoke), there is no valid place to put a load in the predecessor.
- if (Pred->getTerminator() == InVal)
- return false;
-
- // If the predecessor has a single successor, then the edge isn't critical.
- if (Pred->getTerminator()->getNumSuccessors() == 1)
- continue;
-
- // If this pointer is always safe to load, or if we can prove that there is
- // already a load in the block, then we can move the load to the pred block.
- if (isDereferenceablePointer(InVal, DL) ||
- isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign))
- continue;
-
- return false;
- }
-
- return true;
-}
-
-
-/// tryToMakeAllocaBePromotable - This returns true if the alloca only has
-/// direct (non-volatile) loads and stores to it. If the alloca is close but
-/// not quite there, this will transform the code to allow promotion. As such,
-/// it is a non-pure predicate.
-static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout &DL) {
- SetVector<Instruction*, SmallVector<Instruction*, 4>,
- SmallPtrSet<Instruction*, 4> > InstsToRewrite;
- for (User *U : AI->users()) {
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- if (!LI->isSimple())
- return false;
- continue;
- }
-
- if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (SI->getOperand(0) == AI || !SI->isSimple())
- return false; // Don't allow a store OF the AI, only INTO the AI.
- continue;
- }
-
- if (SelectInst *SI = dyn_cast<SelectInst>(U)) {
- // If the condition being selected on is a constant, fold the select, yes
- // this does (rarely) happen early on.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) {
- Value *Result = SI->getOperand(1+CI->isZero());
- SI->replaceAllUsesWith(Result);
- SI->eraseFromParent();
-
- // This is very rare and we just scrambled the use list of AI, start
- // over completely.
- return tryToMakeAllocaBePromotable(AI, DL);
- }
-
- // If it is safe to turn "load (select c, AI, ptr)" into a select of two
- // loads, then we can transform this by rewriting the select.
- if (!isSafeSelectToSpeculate(SI))
- return false;
-
- InstsToRewrite.insert(SI);
- continue;
- }
-
- if (PHINode *PN = dyn_cast<PHINode>(U)) {
- if (PN->use_empty()) { // Dead PHIs can be stripped.
- InstsToRewrite.insert(PN);
- continue;
- }
-
- // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
- // in the pred blocks, then we can transform this by rewriting the PHI.
- if (!isSafePHIToSpeculate(PN))
- return false;
-
- InstsToRewrite.insert(PN);
- continue;
- }
-
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
- if (onlyUsedByLifetimeMarkers(BCI)) {
- InstsToRewrite.insert(BCI);
- continue;
- }
- }
-
- return false;
- }
-
- // If there are no instructions to rewrite, then all uses are load/stores and
- // we're done!
- if (InstsToRewrite.empty())
- return true;
-
- // If we have instructions that need to be rewritten for this to be promotable
- // take care of it now.
- for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) {
- // This could only be a bitcast used by nothing but lifetime intrinsics.
- for (BitCastInst::user_iterator I = BCI->user_begin(), E = BCI->user_end();
- I != E;)
- cast<Instruction>(*I++)->eraseFromParent();
- BCI->eraseFromParent();
- continue;
- }
-
- if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
- // Selects in InstsToRewrite only have load uses. Rewrite each as two
- // loads with a new select.
- while (!SI->use_empty()) {
- LoadInst *LI = cast<LoadInst>(SI->user_back());
-
- IRBuilder<> Builder(LI);
- LoadInst *TrueLoad =
- Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
- LoadInst *FalseLoad =
- Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f");
-
- // Transfer alignment and AA info if present.
- TrueLoad->setAlignment(LI->getAlignment());
- FalseLoad->setAlignment(LI->getAlignment());
-
- AAMDNodes Tags;
- LI->getAAMetadata(Tags);
- if (Tags) {
- TrueLoad->setAAMetadata(Tags);
- FalseLoad->setAAMetadata(Tags);
- }
-
- Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
- V->takeName(LI);
- LI->replaceAllUsesWith(V);
- LI->eraseFromParent();
- }
-
- // Now that all the loads are gone, the select is gone too.
- SI->eraseFromParent();
- continue;
- }
-
- // Otherwise, we have a PHI node which allows us to push the loads into the
- // predecessors.
- PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
- if (PN->use_empty()) {
- PN->eraseFromParent();
- continue;
- }
-
- Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
- PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
- PN->getName()+".ld", PN);
-
- // Get the AA tags and alignment to use from one of the loads. It doesn't
- // matter which one we get and if any differ, it doesn't matter.
- LoadInst *SomeLoad = cast<LoadInst>(PN->user_back());
-
- AAMDNodes AATags;
- SomeLoad->getAAMetadata(AATags);
- unsigned Align = SomeLoad->getAlignment();
-
- // Rewrite all loads of the PN to use the new PHI.
- while (!PN->use_empty()) {
- LoadInst *LI = cast<LoadInst>(PN->user_back());
- LI->replaceAllUsesWith(NewPN);
- LI->eraseFromParent();
- }
-
- // Inject loads into all of the pred blocks. Keep track of which blocks we
- // insert them into in case we have multiple edges from the same block.
- DenseMap<BasicBlock*, LoadInst*> InsertedLoads;
-
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- BasicBlock *Pred = PN->getIncomingBlock(i);
- LoadInst *&Load = InsertedLoads[Pred];
- if (!Load) {
- Load = new LoadInst(PN->getIncomingValue(i),
- PN->getName() + "." + Pred->getName(),
- Pred->getTerminator());
- Load->setAlignment(Align);
- if (AATags) Load->setAAMetadata(AATags);
- }
-
- NewPN->addIncoming(Load, Pred);
- }
-
- PN->eraseFromParent();
- }
-
- ++NumAdjusted;
- return true;
-}
-
-bool SROA::performPromotion(Function &F) {
- std::vector<AllocaInst*> Allocas;
- const DataLayout &DL = F.getParent()->getDataLayout();
- DominatorTree *DT = nullptr;
- if (HasDomTree)
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- AssumptionCache &AC =
- getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-
- BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
- DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
- bool Changed = false;
- SmallVector<Instruction*, 64> Insts;
- while (1) {
- Allocas.clear();
-
- // Find allocas that are safe to promote, by looking at all instructions in
- // the entry node
- for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
- if (tryToMakeAllocaBePromotable(AI, DL))
- Allocas.push_back(AI);
-
- if (Allocas.empty()) break;
-
- if (HasDomTree)
- PromoteMemToReg(Allocas, *DT, nullptr, &AC);
- else {
- SSAUpdater SSA;
- for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
- AllocaInst *AI = Allocas[i];
-
- // Build list of instructions to promote.
- for (User *U : AI->users())
- Insts.push_back(cast<Instruction>(U));
- AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts);
- Insts.clear();
- }
- }
- NumPromoted += Allocas.size();
- Changed = true;
- }
-
- return Changed;
-}
-
-
-/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
-/// SROA. It must be a struct or array type with a small number of elements.
-bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
- Type *T = AI->getAllocatedType();
- // Do not promote any struct that has too many members.
- if (StructType *ST = dyn_cast<StructType>(T))
- return ST->getNumElements() <= StructMemberThreshold;
- // Do not promote any array that has too many elements.
- if (ArrayType *AT = dyn_cast<ArrayType>(T))
- return AT->getNumElements() <= ArrayElementThreshold;
- return false;
-}
-
-// performScalarRepl - This algorithm is a simple worklist driven algorithm,
-// which runs on all of the alloca instructions in the entry block, removing
-// them if they are only used by getelementptr instructions.
-//
-bool SROA::performScalarRepl(Function &F) {
- std::vector<AllocaInst*> WorkList;
- const DataLayout &DL = F.getParent()->getDataLayout();
-
- // Scan the entry basic block, adding allocas to the worklist.
- BasicBlock &BB = F.getEntryBlock();
- for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
- if (AllocaInst *A = dyn_cast<AllocaInst>(I))
- WorkList.push_back(A);
-
- // Process the worklist
- bool Changed = false;
- while (!WorkList.empty()) {
- AllocaInst *AI = WorkList.back();
- WorkList.pop_back();
-
- // Handle dead allocas trivially. These can be formed by SROA'ing arrays
- // with unused elements.
- if (AI->use_empty()) {
- AI->eraseFromParent();
- Changed = true;
- continue;
- }
-
- // If this alloca is impossible for us to promote, reject it early.
- if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
- continue;
-
- // Check to see if we can perform the core SROA transformation. We cannot
- // transform the allocation instruction if it is an array allocation
- // (allocations OF arrays are ok though), and an allocation of a scalar
- // value cannot be decomposed at all.
- uint64_t AllocaSize = DL.getTypeAllocSize(AI->getAllocatedType());
-
- // Do not promote [0 x %struct].
- if (AllocaSize == 0) continue;
-
- // Do not promote any struct whose size is too big.
- if (AllocaSize > SRThreshold) continue;
-
- // If the alloca looks like a good candidate for scalar replacement, and if
- // all its users can be transformed, then split up the aggregate into its
- // separate elements.
- if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
- DoScalarReplacement(AI, WorkList);
- Changed = true;
- continue;
- }
-
- // If we can turn this aggregate value (potentially with casts) into a
- // simple scalar value that can be mem2reg'd into a register value.
- // IsNotTrivial tracks whether this is something that mem2reg could have
- // promoted itself. If so, we don't want to transform it needlessly. Note
- // that we can't just check based on the type: the alloca may be of an i32
- // but that has pointer arithmetic to set byte 3 of it or something.
- if (AllocaInst *NewAI =
- ConvertToScalarInfo((unsigned)AllocaSize, DL, ScalarLoadThreshold)
- .TryConvert(AI)) {
- NewAI->takeName(AI);
- AI->eraseFromParent();
- ++NumConverted;
- Changed = true;
- continue;
- }
-
- // Otherwise, couldn't process this alloca.
- }
-
- return Changed;
-}
-
-/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
-/// predicate, do SROA now.
-void SROA::DoScalarReplacement(AllocaInst *AI,
- std::vector<AllocaInst*> &WorkList) {
- DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
- SmallVector<AllocaInst*, 32> ElementAllocas;
- if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
- ElementAllocas.reserve(ST->getNumContainedTypes());
- for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
- AllocaInst *NA = new AllocaInst(ST->getContainedType(i), nullptr,
- AI->getAlignment(),
- AI->getName() + "." + Twine(i), AI);
- ElementAllocas.push_back(NA);
- WorkList.push_back(NA); // Add to worklist for recursive processing
- }
- } else {
- ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
- ElementAllocas.reserve(AT->getNumElements());
- Type *ElTy = AT->getElementType();
- for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
- AllocaInst *NA = new AllocaInst(ElTy, nullptr, AI->getAlignment(),
- AI->getName() + "." + Twine(i), AI);
- ElementAllocas.push_back(NA);
- WorkList.push_back(NA); // Add to worklist for recursive processing
- }
- }
-
- // Now that we have created the new alloca instructions, rewrite all the
- // uses of the old alloca.
- RewriteForScalarRepl(AI, AI, 0, ElementAllocas);
-
- // Now erase any instructions that were made dead while rewriting the alloca.
- DeleteDeadInstructions();
- AI->eraseFromParent();
-
- ++NumReplaced;
-}
-
-/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
-/// recursively including all their operands that become trivially dead.
-void SROA::DeleteDeadInstructions() {
- while (!DeadInsts.empty()) {
- Instruction *I = cast<Instruction>(DeadInsts.pop_back_val());
-
- for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
- if (Instruction *U = dyn_cast<Instruction>(*OI)) {
- // Zero out the operand and see if it becomes trivially dead.
- // (But, don't add allocas to the dead instruction list -- they are
- // already on the worklist and will be deleted separately.)
- *OI = nullptr;
- if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
- DeadInsts.push_back(U);
- }
-
- I->eraseFromParent();
- }
-}
-
-/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
-/// performing scalar replacement of alloca AI. The results are flagged in
-/// the Info parameter. Offset indicates the position within AI that is
-/// referenced by this instruction.
-void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
- AllocaInfo &Info) {
- const DataLayout &DL = I->getModule()->getDataLayout();
- for (Use &U : I->uses()) {
- Instruction *User = cast<Instruction>(U.getUser());
-
- if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
- isSafeForScalarRepl(BC, Offset, Info);
- } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
- uint64_t GEPOffset = Offset;
- isSafeGEP(GEPI, GEPOffset, Info);
- if (!Info.isUnsafe)
- isSafeForScalarRepl(GEPI, GEPOffset, Info);
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
- ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
- if (!Length || Length->isNegative())
- return MarkUnsafe(Info, User);
-
- isSafeMemAccess(Offset, Length->getZExtValue(), nullptr,
- U.getOperandNo() == 0, Info, MI,
- true /*AllowWholeAccess*/);
- } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- if (!LI->isSimple())
- return MarkUnsafe(Info, User);
- Type *LIType = LI->getType();
- isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info,
- LI, true /*AllowWholeAccess*/);
- Info.hasALoadOrStore = true;
-
- } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
- // Store is ok if storing INTO the pointer, not storing the pointer
- if (!SI->isSimple() || SI->getOperand(0) == I)
- return MarkUnsafe(Info, User);
-
- Type *SIType = SI->getOperand(0)->getType();
- isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info,
- SI, true /*AllowWholeAccess*/);
- Info.hasALoadOrStore = true;
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
- if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
- II->getIntrinsicID() != Intrinsic::lifetime_end)
- return MarkUnsafe(Info, User);
- } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
- isSafePHISelectUseForScalarRepl(User, Offset, Info);
- } else {
- return MarkUnsafe(Info, User);
- }
- if (Info.isUnsafe) return;
- }
-}
-
-
-/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
-/// derived from the alloca, we can often still split the alloca into elements.
-/// This is useful if we have a large alloca where one element is phi'd
-/// together somewhere: we can SRoA and promote all the other elements even if
-/// we end up not being able to promote this one.
-///
-/// All we require is that the uses of the PHI do not index into other parts of
-/// the alloca. The most important use case for this is single load and stores
-/// that are PHI'd together, which can happen due to code sinking.
-void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
- AllocaInfo &Info) {
- // If we've already checked this PHI, don't do it again.
- if (PHINode *PN = dyn_cast<PHINode>(I))
- if (!Info.CheckedPHIs.insert(PN).second)
- return;
-
- const DataLayout &DL = I->getModule()->getDataLayout();
- for (User *U : I->users()) {
- Instruction *UI = cast<Instruction>(U);
-
- if (BitCastInst *BC = dyn_cast<BitCastInst>(UI)) {
- isSafePHISelectUseForScalarRepl(BC, Offset, Info);
- } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) {
- // Only allow "bitcast" GEPs for simplicity. We could generalize this,
- // but would have to prove that we're staying inside of an element being
- // promoted.
- if (!GEPI->hasAllZeroIndices())
- return MarkUnsafe(Info, UI);
- isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
- } else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
- if (!LI->isSimple())
- return MarkUnsafe(Info, UI);
- Type *LIType = LI->getType();
- isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info,
- LI, false /*AllowWholeAccess*/);
- Info.hasALoadOrStore = true;
-
- } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
- // Store is ok if storing INTO the pointer, not storing the pointer
- if (!SI->isSimple() || SI->getOperand(0) == I)
- return MarkUnsafe(Info, UI);
-
- Type *SIType = SI->getOperand(0)->getType();
- isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info,
- SI, false /*AllowWholeAccess*/);
- Info.hasALoadOrStore = true;
- } else if (isa<PHINode>(UI) || isa<SelectInst>(UI)) {
- isSafePHISelectUseForScalarRepl(UI, Offset, Info);
- } else {
- return MarkUnsafe(Info, UI);
- }
- if (Info.isUnsafe) return;
- }
-}
-
-/// isSafeGEP - Check if a GEP instruction can be handled for scalar
-/// replacement. It is safe when all the indices are constant, in-bounds
-/// references, and when the resulting offset corresponds to an element within
-/// the alloca type. The results are flagged in the Info parameter. Upon
-/// return, Offset is adjusted as specified by the GEP indices.
-void SROA::isSafeGEP(GetElementPtrInst *GEPI,
- uint64_t &Offset, AllocaInfo &Info) {
- gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
- if (GEPIt == E)
- return;
- bool NonConstant = false;
- unsigned NonConstantIdxSize = 0;
-
- // Walk through the GEP type indices, checking the types that this indexes
- // into.
- for (; GEPIt != E; ++GEPIt) {
- // Ignore struct elements, no extra checking needed for these.
- if ((*GEPIt)->isStructTy())
- continue;
-
- ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
- if (!IdxVal)
- return MarkUnsafe(Info, GEPI);
- }
-
- // Compute the offset due to this GEP and check if the alloca has a
- // component element at that offset.
- SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
- // If this GEP is non-constant then the last operand must have been a
- // dynamic index into a vector. Pop this now as it has no impact on the
- // constant part of the offset.
- if (NonConstant)
- Indices.pop_back();
-
- const DataLayout &DL = GEPI->getModule()->getDataLayout();
- Offset += DL.getIndexedOffset(GEPI->getPointerOperandType(), Indices);
- if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, NonConstantIdxSize,
- DL))
- MarkUnsafe(Info, GEPI);
-}
-
-/// isHomogeneousAggregate - Check if type T is a struct or array containing
-/// elements of the same type (which is always true for arrays). If so,
-/// return true with NumElts and EltTy set to the number of elements and the
-/// element type, respectively.
-static bool isHomogeneousAggregate(Type *T, unsigned &NumElts,
- Type *&EltTy) {
- if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
- NumElts = AT->getNumElements();
- EltTy = (NumElts == 0 ? nullptr : AT->getElementType());
- return true;
- }
- if (StructType *ST = dyn_cast<StructType>(T)) {
- NumElts = ST->getNumContainedTypes();
- EltTy = (NumElts == 0 ? nullptr : ST->getContainedType(0));
- for (unsigned n = 1; n < NumElts; ++n) {
- if (ST->getContainedType(n) != EltTy)
- return false;
- }
- return true;
- }
- return false;
-}
-
-/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
-/// "homogeneous" aggregates with the same element type and number of elements.
-static bool isCompatibleAggregate(Type *T1, Type *T2) {
- if (T1 == T2)
- return true;
-
- unsigned NumElts1, NumElts2;
- Type *EltTy1, *EltTy2;
- if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
- isHomogeneousAggregate(T2, NumElts2, EltTy2) &&
- NumElts1 == NumElts2 &&
- EltTy1 == EltTy2)
- return true;
-
- return false;
-}
-
-/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
-/// alloca or has an offset and size that corresponds to a component element
-/// within it. The offset checked here may have been formed from a GEP with a
-/// pointer bitcasted to a different type.
-///
-/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
-/// unit. If false, it only allows accesses known to be in a single element.
-void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
- Type *MemOpType, bool isStore,
- AllocaInfo &Info, Instruction *TheAccess,
- bool AllowWholeAccess) {
- const DataLayout &DL = TheAccess->getModule()->getDataLayout();
- // Check if this is a load/store of the entire alloca.
- if (Offset == 0 && AllowWholeAccess &&
- MemSize == DL.getTypeAllocSize(Info.AI->getAllocatedType())) {
- // This can be safe for MemIntrinsics (where MemOpType is 0) and integer
- // loads/stores (which are essentially the same as the MemIntrinsics with
- // regard to copying padding between elements). But, if an alloca is
- // flagged as both a source and destination of such operations, we'll need
- // to check later for padding between elements.
- if (!MemOpType || MemOpType->isIntegerTy()) {
- if (isStore)
- Info.isMemCpyDst = true;
- else
- Info.isMemCpySrc = true;
- return;
- }
- // This is also safe for references using a type that is compatible with
- // the type of the alloca, so that loads/stores can be rewritten using
- // insertvalue/extractvalue.
- if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) {
- Info.hasSubelementAccess = true;
- return;
- }
- }
- // Check if the offset/size correspond to a component within the alloca type.
- Type *T = Info.AI->getAllocatedType();
- if (TypeHasComponent(T, Offset, MemSize, DL)) {
- Info.hasSubelementAccess = true;
- return;
- }
-
- return MarkUnsafe(Info, TheAccess);
-}
-
-/// TypeHasComponent - Return true if T has a component type with the
-/// specified offset and size. If Size is zero, do not check the size.
-bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size,
- const DataLayout &DL) {
- Type *EltTy;
- uint64_t EltSize;
- if (StructType *ST = dyn_cast<StructType>(T)) {
- const StructLayout *Layout = DL.getStructLayout(ST);
- unsigned EltIdx = Layout->getElementContainingOffset(Offset);
- EltTy = ST->getContainedType(EltIdx);
- EltSize = DL.getTypeAllocSize(EltTy);
- Offset -= Layout->getElementOffset(EltIdx);
- } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
- EltTy = AT->getElementType();
- EltSize = DL.getTypeAllocSize(EltTy);
- if (Offset >= AT->getNumElements() * EltSize)
- return false;
- Offset %= EltSize;
- } else if (VectorType *VT = dyn_cast<VectorType>(T)) {
- EltTy = VT->getElementType();
- EltSize = DL.getTypeAllocSize(EltTy);
- if (Offset >= VT->getNumElements() * EltSize)
- return false;
- Offset %= EltSize;
- } else {
- return false;
- }
- if (Offset == 0 && (Size == 0 || EltSize == Size))
- return true;
- // Check if the component spans multiple elements.
- if (Offset + Size > EltSize)
- return false;
- return TypeHasComponent(EltTy, Offset, Size, DL);
-}
-
-/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
-/// the instruction I, which references it, to use the separate elements.
-/// Offset indicates the position within AI that is referenced by this
-/// instruction.
-void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
- SmallVectorImpl<AllocaInst *> &NewElts) {
- const DataLayout &DL = I->getModule()->getDataLayout();
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
- Use &TheUse = *UI++;
- Instruction *User = cast<Instruction>(TheUse.getUser());
-
- if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
- RewriteBitCast(BC, AI, Offset, NewElts);
- continue;
- }
-
- if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
- RewriteGEP(GEPI, AI, Offset, NewElts);
- continue;
- }
-
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
- ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
- uint64_t MemSize = Length->getZExtValue();
- if (Offset == 0 && MemSize == DL.getTypeAllocSize(AI->getAllocatedType()))
- RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
- // Otherwise the intrinsic can only touch a single element and the
- // address operand will be updated, so nothing else needs to be done.
- continue;
- }
-
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end) {
- RewriteLifetimeIntrinsic(II, AI, Offset, NewElts);
- }
- continue;
- }
-
- if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- Type *LIType = LI->getType();
-
- if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
- // Replace:
- // %res = load { i32, i32 }* %alloc
- // with:
- // %load.0 = load i32* %alloc.0
- // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
- // %load.1 = load i32* %alloc.1
- // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
- // (Also works for arrays instead of structs)
- Value *Insert = UndefValue::get(LIType);
- IRBuilder<> Builder(LI);
- for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
- Value *Load = Builder.CreateLoad(NewElts[i], "load");
- Insert = Builder.CreateInsertValue(Insert, Load, i, "insert");
- }
- LI->replaceAllUsesWith(Insert);
- DeadInsts.push_back(LI);
- } else if (LIType->isIntegerTy() &&
- DL.getTypeAllocSize(LIType) ==
- DL.getTypeAllocSize(AI->getAllocatedType())) {
- // If this is a load of the entire alloca to an integer, rewrite it.
- RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
- }
- continue;
- }
-
- if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
- Value *Val = SI->getOperand(0);
- Type *SIType = Val->getType();
- if (isCompatibleAggregate(SIType, AI->getAllocatedType())) {
- // Replace:
- // store { i32, i32 } %val, { i32, i32 }* %alloc
- // with:
- // %val.0 = extractvalue { i32, i32 } %val, 0
- // store i32 %val.0, i32* %alloc.0
- // %val.1 = extractvalue { i32, i32 } %val, 1
- // store i32 %val.1, i32* %alloc.1
- // (Also works for arrays instead of structs)
- IRBuilder<> Builder(SI);
- for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
- Value *Extract = Builder.CreateExtractValue(Val, i, Val->getName());
- Builder.CreateStore(Extract, NewElts[i]);
- }
- DeadInsts.push_back(SI);
- } else if (SIType->isIntegerTy() &&
- DL.getTypeAllocSize(SIType) ==
- DL.getTypeAllocSize(AI->getAllocatedType())) {
- // If this is a store of the entire alloca from an integer, rewrite it.
- RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
- }
- continue;
- }
-
- if (isa<SelectInst>(User) || isa<PHINode>(User)) {
- // If we have a PHI user of the alloca itself (as opposed to a GEP or
- // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to
- // the new pointer.
- if (!isa<AllocaInst>(I)) continue;
-
- assert(Offset == 0 && NewElts[0] &&
- "Direct alloca use should have a zero offset");
-
- // If we have a use of the alloca, we know the derived uses will be
- // utilizing just the first element of the scalarized result. Insert a
- // bitcast of the first alloca before the user as required.
- AllocaInst *NewAI = NewElts[0];
- BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI);
- NewAI->moveBefore(BCI);
- TheUse = BCI;
- continue;
- }
- }
-}
-
-/// RewriteBitCast - Update a bitcast reference to the alloca being replaced
-/// and recursively continue updating all of its uses.
-void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
- SmallVectorImpl<AllocaInst *> &NewElts) {
- RewriteForScalarRepl(BC, AI, Offset, NewElts);
- if (BC->getOperand(0) != AI)
- return;
-
- // The bitcast references the original alloca. Replace its uses with
- // references to the alloca containing offset zero (which is normally at
- // index zero, but might not be in cases involving structs with elements
- // of size zero).
- Type *T = AI->getAllocatedType();
- uint64_t EltOffset = 0;
- Type *IdxTy;
- uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy,
- BC->getModule()->getDataLayout());
- Instruction *Val = NewElts[Idx];
- if (Val->getType() != BC->getDestTy()) {
- Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
- Val->takeName(BC);
- }
- BC->replaceAllUsesWith(Val);
- DeadInsts.push_back(BC);
-}
-
-/// FindElementAndOffset - Return the index of the element containing Offset
-/// within the specified type, which must be either a struct or an array.
-/// Sets T to the type of the element and Offset to the offset within that
-/// element. IdxTy is set to the type of the index result to be used in a
-/// GEP instruction.
-uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy,
- const DataLayout &DL) {
- uint64_t Idx = 0;
-
- if (StructType *ST = dyn_cast<StructType>(T)) {
- const StructLayout *Layout = DL.getStructLayout(ST);
- Idx = Layout->getElementContainingOffset(Offset);
- T = ST->getContainedType(Idx);
- Offset -= Layout->getElementOffset(Idx);
- IdxTy = Type::getInt32Ty(T->getContext());
- return Idx;
- } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
- T = AT->getElementType();
- uint64_t EltSize = DL.getTypeAllocSize(T);
- Idx = Offset / EltSize;
- Offset -= Idx * EltSize;
- IdxTy = Type::getInt64Ty(T->getContext());
- return Idx;
- }
- VectorType *VT = cast<VectorType>(T);
- T = VT->getElementType();
- uint64_t EltSize = DL.getTypeAllocSize(T);
- Idx = Offset / EltSize;
- Offset -= Idx * EltSize;
- IdxTy = Type::getInt64Ty(T->getContext());
- return Idx;
-}
-
-/// RewriteGEP - Check if this GEP instruction moves the pointer across
-/// elements of the alloca that are being split apart, and if so, rewrite
-/// the GEP to be relative to the new element.
-void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
- SmallVectorImpl<AllocaInst *> &NewElts) {
- uint64_t OldOffset = Offset;
- const DataLayout &DL = GEPI->getModule()->getDataLayout();
- SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
- // If the GEP was dynamic then it must have been a dynamic vector lookup.
- // In this case, it must be the last GEP operand which is dynamic so keep that
- // aside until we've found the constant GEP offset then add it back in at the
- // end.
- Value* NonConstantIdx = nullptr;
- if (!GEPI->hasAllConstantIndices())
- NonConstantIdx = Indices.pop_back_val();
- Offset += DL.getIndexedOffset(GEPI->getPointerOperandType(), Indices);
-
- RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
-
- Type *T = AI->getAllocatedType();
- Type *IdxTy;
- uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy, DL);
- if (GEPI->getOperand(0) == AI)
- OldIdx = ~0ULL; // Force the GEP to be rewritten.
-
- T = AI->getAllocatedType();
- uint64_t EltOffset = Offset;
- uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy, DL);
-
- // If this GEP does not move the pointer across elements of the alloca
- // being split, then it does not needs to be rewritten.
- if (Idx == OldIdx)
- return;
-
- Type *i32Ty = Type::getInt32Ty(AI->getContext());
- SmallVector<Value*, 8> NewArgs;
- NewArgs.push_back(Constant::getNullValue(i32Ty));
- while (EltOffset != 0) {
- uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy, DL);
- NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
- }
- if (NonConstantIdx) {
- Type* GepTy = T;
- // This GEP has a dynamic index. We need to add "i32 0" to index through
- // any structs or arrays in the original type until we get to the vector
- // to index.
- while (!isa<VectorType>(GepTy)) {
- NewArgs.push_back(Constant::getNullValue(i32Ty));
- GepTy = cast<CompositeType>(GepTy)->getTypeAtIndex(0U);
- }
- NewArgs.push_back(NonConstantIdx);
- }
- Instruction *Val = NewElts[Idx];
- if (NewArgs.size() > 1) {
- Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI);
- Val->takeName(GEPI);
- }
- if (Val->getType() != GEPI->getType())
- Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI);
- GEPI->replaceAllUsesWith(Val);
- DeadInsts.push_back(GEPI);
-}
-
-/// RewriteLifetimeIntrinsic - II is a lifetime.start/lifetime.end. Rewrite it
-/// to mark the lifetime of the scalarized memory.
-void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
- uint64_t Offset,
- SmallVectorImpl<AllocaInst *> &NewElts) {
- ConstantInt *OldSize = cast<ConstantInt>(II->getArgOperand(0));
- // Put matching lifetime markers on everything from Offset up to
- // Offset+OldSize.
- Type *AIType = AI->getAllocatedType();
- const DataLayout &DL = II->getModule()->getDataLayout();
- uint64_t NewOffset = Offset;
- Type *IdxTy;
- uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy, DL);
-
- IRBuilder<> Builder(II);
- uint64_t Size = OldSize->getLimitedValue();
-
- if (NewOffset) {
- // Splice the first element and index 'NewOffset' bytes in. SROA will
- // split the alloca again later.
- unsigned AS = AI->getType()->getAddressSpace();
- Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS));
- V = Builder.CreateGEP(Builder.getInt8Ty(), V, Builder.getInt64(NewOffset));
-
- IdxTy = NewElts[Idx]->getAllocatedType();
- uint64_t EltSize = DL.getTypeAllocSize(IdxTy) - NewOffset;
- if (EltSize > Size) {
- EltSize = Size;
- Size = 0;
- } else {
- Size -= EltSize;
- }
- if (II->getIntrinsicID() == Intrinsic::lifetime_start)
- Builder.CreateLifetimeStart(V, Builder.getInt64(EltSize));
- else
- Builder.CreateLifetimeEnd(V, Builder.getInt64(EltSize));
- ++Idx;
- }
-
- for (; Idx != NewElts.size() && Size; ++Idx) {
- IdxTy = NewElts[Idx]->getAllocatedType();
- uint64_t EltSize = DL.getTypeAllocSize(IdxTy);
- if (EltSize > Size) {
- EltSize = Size;
- Size = 0;
- } else {
- Size -= EltSize;
- }
- if (II->getIntrinsicID() == Intrinsic::lifetime_start)
- Builder.CreateLifetimeStart(NewElts[Idx],
- Builder.getInt64(EltSize));
- else
- Builder.CreateLifetimeEnd(NewElts[Idx],
- Builder.getInt64(EltSize));
- }
- DeadInsts.push_back(II);
-}
-
-/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
-/// Rewrite it to copy or set the elements of the scalarized memory.
-void
-SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
- AllocaInst *AI,
- SmallVectorImpl<AllocaInst *> &NewElts) {
- // If this is a memcpy/memmove, construct the other pointer as the
- // appropriate type. The "Other" pointer is the pointer that goes to memory
- // that doesn't have anything to do with the alloca that we are promoting. For
- // memset, this Value* stays null.
- Value *OtherPtr = nullptr;
- unsigned MemAlignment = MI->getAlignment();
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
- if (Inst == MTI->getRawDest())
- OtherPtr = MTI->getRawSource();
- else {
- assert(Inst == MTI->getRawSource());
- OtherPtr = MTI->getRawDest();
- }
- }
-
- // If there is an other pointer, we want to convert it to the same pointer
- // type as AI has, so we can GEP through it safely.
- if (OtherPtr) {
- unsigned AddrSpace =
- cast<PointerType>(OtherPtr->getType())->getAddressSpace();
-
- // Remove bitcasts and all-zero GEPs from OtherPtr. This is an
- // optimization, but it's also required to detect the corner case where
- // both pointer operands are referencing the same memory, and where
- // OtherPtr may be a bitcast or GEP that currently being rewritten. (This
- // function is only called for mem intrinsics that access the whole
- // aggregate, so non-zero GEPs are not an issue here.)
- OtherPtr = OtherPtr->stripPointerCasts();
-
- // Copying the alloca to itself is a no-op: just delete it.
- if (OtherPtr == AI || OtherPtr == NewElts[0]) {
- // This code will run twice for a no-op memcpy -- once for each operand.
- // Put only one reference to MI on the DeadInsts list.
- for (SmallVectorImpl<Value *>::const_iterator I = DeadInsts.begin(),
- E = DeadInsts.end(); I != E; ++I)
- if (*I == MI) return;
- DeadInsts.push_back(MI);
- return;
- }
-
- // If the pointer is not the right type, insert a bitcast to the right
- // type.
- Type *NewTy =
- PointerType::get(AI->getType()->getElementType(), AddrSpace);
-
- if (OtherPtr->getType() != NewTy)
- OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
- }
-
- // Process each element of the aggregate.
- bool SROADest = MI->getRawDest() == Inst;
-
- Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
- const DataLayout &DL = MI->getModule()->getDataLayout();
-
- for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
- // If this is a memcpy/memmove, emit a GEP of the other element address.
- Value *OtherElt = nullptr;
- unsigned OtherEltAlign = MemAlignment;
-
- if (OtherPtr) {
- Value *Idx[2] = { Zero,
- ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
- OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx,
- OtherPtr->getName()+"."+Twine(i),
- MI);
- uint64_t EltOffset;
- PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
- Type *OtherTy = OtherPtrTy->getElementType();
- if (StructType *ST = dyn_cast<StructType>(OtherTy)) {
- EltOffset = DL.getStructLayout(ST)->getElementOffset(i);
- } else {
- Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
- EltOffset = DL.getTypeAllocSize(EltTy) * i;
- }
-
- // The alignment of the other pointer is the guaranteed alignment of the
- // element, which is affected by both the known alignment of the whole
- // mem intrinsic and the alignment of the element. If the alignment of
- // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
- // known alignment is just 4 bytes.
- OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
- }
-
- Value *EltPtr = NewElts[i];
- Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
-
- // If we got down to a scalar, insert a load or store as appropriate.
- if (EltTy->isSingleValueType()) {
- if (isa<MemTransferInst>(MI)) {
- if (SROADest) {
- // From Other to Alloca.
- Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
- new StoreInst(Elt, EltPtr, MI);
- } else {
- // From Alloca to Other.
- Value *Elt = new LoadInst(EltPtr, "tmp", MI);
- new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
- }
- continue;
- }
- assert(isa<MemSetInst>(MI));
-
- // If the stored element is zero (common case), just store a null
- // constant.
- Constant *StoreVal;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) {
- if (CI->isZero()) {
- StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
- } else {
- // If EltTy is a vector type, get the element type.
- Type *ValTy = EltTy->getScalarType();
-
- // Construct an integer with the right value.
- unsigned EltSize = DL.getTypeSizeInBits(ValTy);
- APInt OneVal(EltSize, CI->getZExtValue());
- APInt TotalVal(OneVal);
- // Set each byte.
- for (unsigned i = 0; 8*i < EltSize; ++i) {
- TotalVal = TotalVal.shl(8);
- TotalVal |= OneVal;
- }
-
- // Convert the integer value to the appropriate type.
- StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
- if (ValTy->isPointerTy())
- StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
- else if (ValTy->isFloatingPointTy())
- StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
- assert(StoreVal->getType() == ValTy && "Type mismatch!");
-
- // If the requested value was a vector constant, create it.
- if (EltTy->isVectorTy()) {
- unsigned NumElts = cast<VectorType>(EltTy)->getNumElements();
- StoreVal = ConstantVector::getSplat(NumElts, StoreVal);
- }
- }
- new StoreInst(StoreVal, EltPtr, MI);
- continue;
- }
- // Otherwise, if we're storing a byte variable, use a memset call for
- // this element.
- }
-
- unsigned EltSize = DL.getTypeAllocSize(EltTy);
- if (!EltSize)
- continue;
-
- IRBuilder<> Builder(MI);
-
- // Finally, insert the meminst for this element.
- if (isa<MemSetInst>(MI)) {
- Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize,
- MI->isVolatile());
- } else {
- assert(isa<MemTransferInst>(MI));
- Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr
- Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr
-
- if (isa<MemCpyInst>(MI))
- Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile());
- else
- Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile());
- }
- }
- DeadInsts.push_back(MI);
-}
-
-/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
-/// overwrites the entire allocation. Extract out the pieces of the stored
-/// integer and store them individually.
-void
-SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
- SmallVectorImpl<AllocaInst *> &NewElts) {
- // Extract each element out of the integer according to its structure offset
- // and store the element value to the individual alloca.
- Value *SrcVal = SI->getOperand(0);
- Type *AllocaEltTy = AI->getAllocatedType();
- const DataLayout &DL = SI->getModule()->getDataLayout();
- uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy);
-
- IRBuilder<> Builder(SI);
-
- // Handle tail padding by extending the operand
- if (DL.getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
- SrcVal = Builder.CreateZExt(SrcVal,
- IntegerType::get(SI->getContext(), AllocaSizeBits));
-
- DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
- << '\n');
-
- // There are two forms here: AI could be an array or struct. Both cases
- // have different ways to compute the element offset.
- if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
- const StructLayout *Layout = DL.getStructLayout(EltSTy);
-
- for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
- // Get the number of bits to shift SrcVal to get the value.
- Type *FieldTy = EltSTy->getElementType(i);
- uint64_t Shift = Layout->getElementOffsetInBits(i);
-
- if (DL.isBigEndian())
- Shift = AllocaSizeBits - Shift - DL.getTypeAllocSizeInBits(FieldTy);
-
- Value *EltVal = SrcVal;
- if (Shift) {
- Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
- EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
- }
-
- // Truncate down to an integer of the right size.
- uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy);
-
- // Ignore zero sized fields like {}, they obviously contain no data.
- if (FieldSizeBits == 0) continue;
-
- if (FieldSizeBits != AllocaSizeBits)
- EltVal = Builder.CreateTrunc(EltVal,
- IntegerType::get(SI->getContext(), FieldSizeBits));
- Value *DestField = NewElts[i];
- if (EltVal->getType() == FieldTy) {
- // Storing to an integer field of this size, just do it.
- } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
- // Bitcast to the right element type (for fp/vector values).
- EltVal = Builder.CreateBitCast(EltVal, FieldTy);
- } else {
- // Otherwise, bitcast the dest pointer (for aggregates).
- DestField = Builder.CreateBitCast(DestField,
- PointerType::getUnqual(EltVal->getType()));
- }
- new StoreInst(EltVal, DestField, SI);
- }
-
- } else {
- ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
- Type *ArrayEltTy = ATy->getElementType();
- uint64_t ElementOffset = DL.getTypeAllocSizeInBits(ArrayEltTy);
- uint64_t ElementSizeBits = DL.getTypeSizeInBits(ArrayEltTy);
-
- uint64_t Shift;
-
- if (DL.isBigEndian())
- Shift = AllocaSizeBits-ElementOffset;
- else
- Shift = 0;
-
- for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
- // Ignore zero sized fields like {}, they obviously contain no data.
- if (ElementSizeBits == 0) continue;
-
- Value *EltVal = SrcVal;
- if (Shift) {
- Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
- EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
- }
-
- // Truncate down to an integer of the right size.
- if (ElementSizeBits != AllocaSizeBits)
- EltVal = Builder.CreateTrunc(EltVal,
- IntegerType::get(SI->getContext(),
- ElementSizeBits));
- Value *DestField = NewElts[i];
- if (EltVal->getType() == ArrayEltTy) {
- // Storing to an integer field of this size, just do it.
- } else if (ArrayEltTy->isFloatingPointTy() ||
- ArrayEltTy->isVectorTy()) {
- // Bitcast to the right element type (for fp/vector values).
- EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy);
- } else {
- // Otherwise, bitcast the dest pointer (for aggregates).
- DestField = Builder.CreateBitCast(DestField,
- PointerType::getUnqual(EltVal->getType()));
- }
- new StoreInst(EltVal, DestField, SI);
-
- if (DL.isBigEndian())
- Shift -= ElementOffset;
- else
- Shift += ElementOffset;
- }
- }
-
- DeadInsts.push_back(SI);
-}
-
-/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
-/// an integer. Load the individual pieces to form the aggregate value.
-void
-SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
- SmallVectorImpl<AllocaInst *> &NewElts) {
- // Extract each element out of the NewElts according to its structure offset
- // and form the result value.
- Type *AllocaEltTy = AI->getAllocatedType();
- const DataLayout &DL = LI->getModule()->getDataLayout();
- uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy);
-
- DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
- << '\n');
-
- // There are two forms here: AI could be an array or struct. Both cases
- // have different ways to compute the element offset.
- const StructLayout *Layout = nullptr;
- uint64_t ArrayEltBitOffset = 0;
- if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
- Layout = DL.getStructLayout(EltSTy);
- } else {
- Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
- ArrayEltBitOffset = DL.getTypeAllocSizeInBits(ArrayEltTy);
- }
-
- Value *ResultVal =
- Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
-
- for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
- // Load the value from the alloca. If the NewElt is an aggregate, cast
- // the pointer to an integer of the same size before doing the load.
- Value *SrcField = NewElts[i];
- Type *FieldTy =
- cast<PointerType>(SrcField->getType())->getElementType();
- uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy);
-
- // Ignore zero sized fields like {}, they obviously contain no data.
- if (FieldSizeBits == 0) continue;
-
- IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
- FieldSizeBits);
- if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
- !FieldTy->isVectorTy())
- SrcField = new BitCastInst(SrcField,
- PointerType::getUnqual(FieldIntTy),
- "", LI);
- SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
-
- // If SrcField is a fp or vector of the right size but that isn't an
- // integer type, bitcast to an integer so we can shift it.
- if (SrcField->getType() != FieldIntTy)
- SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI);
-
- // Zero extend the field to be the same size as the final alloca so that
- // we can shift and insert it.
- if (SrcField->getType() != ResultVal->getType())
- SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
-
- // Determine the number of bits to shift SrcField.
- uint64_t Shift;
- if (Layout) // Struct case.
- Shift = Layout->getElementOffsetInBits(i);
- else // Array case.
- Shift = i*ArrayEltBitOffset;
-
- if (DL.isBigEndian())
- Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
-
- if (Shift) {
- Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
- SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
- }
-
- // Don't create an 'or x, 0' on the first iteration.
- if (!isa<Constant>(ResultVal) ||
- !cast<Constant>(ResultVal)->isNullValue())
- ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
- else
- ResultVal = SrcField;
- }
-
- // Handle tail padding by truncating the result
- if (DL.getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
- ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
-
- LI->replaceAllUsesWith(ResultVal);
- DeadInsts.push_back(LI);
-}
-
-/// HasPadding - Return true if the specified type has any structure or
-/// alignment padding in between the elements that would be split apart
-/// by SROA; return false otherwise.
-static bool HasPadding(Type *Ty, const DataLayout &DL) {
- if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- Ty = ATy->getElementType();
- return DL.getTypeSizeInBits(Ty) != DL.getTypeAllocSizeInBits(Ty);
- }
-
- // SROA currently handles only Arrays and Structs.
- StructType *STy = cast<StructType>(Ty);
- const StructLayout *SL = DL.getStructLayout(STy);
- unsigned PrevFieldBitOffset = 0;
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
-
- // Check to see if there is any padding between this element and the
- // previous one.
- if (i) {
- unsigned PrevFieldEnd =
- PrevFieldBitOffset+DL.getTypeSizeInBits(STy->getElementType(i-1));
- if (PrevFieldEnd < FieldBitOffset)
- return true;
- }
- PrevFieldBitOffset = FieldBitOffset;
- }
- // Check for tail padding.
- if (unsigned EltCount = STy->getNumElements()) {
- unsigned PrevFieldEnd = PrevFieldBitOffset +
- DL.getTypeSizeInBits(STy->getElementType(EltCount-1));
- if (PrevFieldEnd < SL->getSizeInBits())
- return true;
- }
- return false;
-}
-
-/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
-/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe,
-/// or 1 if safe after canonicalization has been performed.
-bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
- // Loop over the use list of the alloca. We can only transform it if all of
- // the users are safe to transform.
- AllocaInfo Info(AI);
-
- isSafeForScalarRepl(AI, 0, Info);
- if (Info.isUnsafe) {
- DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
- return false;
- }
-
- const DataLayout &DL = AI->getModule()->getDataLayout();
-
- // Okay, we know all the users are promotable. If the aggregate is a memcpy
- // source and destination, we have to be careful. In particular, the memcpy
- // could be moving around elements that live in structure padding of the LLVM
- // types, but may actually be used. In these cases, we refuse to promote the
- // struct.
- if (Info.isMemCpySrc && Info.isMemCpyDst &&
- HasPadding(AI->getAllocatedType(), DL))
- return false;
-
- // If the alloca never has an access to just *part* of it, but is accessed
- // via loads and stores, then we should use ConvertToScalarInfo to promote
- // the alloca instead of promoting each piece at a time and inserting fission
- // and fusion code.
- if (!Info.hasSubelementAccess && Info.hasALoadOrStore) {
- // If the struct/array just has one element, use basic SRoA.
- if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
- if (ST->getNumElements() > 1) return false;
- } else {
- if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1)
- return false;
- }
- }
-
- return true;
-}
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 054bacdc706ba..aed4a4ad4d26a 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -14,12 +14,11 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
@@ -253,6 +252,8 @@ bool Scalarizer::doInitialization(Module &M) {
}
bool Scalarizer::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
assert(Gathered.empty() && Scattered.empty());
for (BasicBlock &BB : F) {
for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE;) {
@@ -305,7 +306,11 @@ void Scalarizer::gather(Instruction *Op, const ValueVector &CV) {
ValueVector &SV = Scattered[Op];
if (!SV.empty()) {
for (unsigned I = 0, E = SV.size(); I != E; ++I) {
- Instruction *Old = cast<Instruction>(SV[I]);
+ Value *V = SV[I];
+ if (V == nullptr)
+ continue;
+
+ Instruction *Old = cast<Instruction>(V);
CV[I]->takeName(Old);
Old->replaceAllUsesWith(CV[I]);
Old->eraseFromParent();
@@ -334,13 +339,11 @@ void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
Op->getAllMetadataOtherThanDebugLoc(MDs);
for (unsigned I = 0, E = CV.size(); I != E; ++I) {
if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
- for (SmallVectorImpl<std::pair<unsigned, MDNode *>>::iterator
- MI = MDs.begin(),
- ME = MDs.end();
- MI != ME; ++MI)
- if (canTransferMetadata(MI->first))
- New->setMetadata(MI->first, MI->second);
- New->setDebugLoc(Op->getDebugLoc());
+ for (const auto &MD : MDs)
+ if (canTransferMetadata(MD.first))
+ New->setMetadata(MD.first, MD.second);
+ if (Op->getDebugLoc() && !New->getDebugLoc())
+ New->setDebugLoc(Op->getDebugLoc());
}
}
}
@@ -646,10 +649,9 @@ bool Scalarizer::finish() {
// made to the Function.
if (Gathered.empty() && Scattered.empty())
return false;
- for (GatherList::iterator GMI = Gathered.begin(), GME = Gathered.end();
- GMI != GME; ++GMI) {
- Instruction *Op = GMI->first;
- ValueVector &CV = *GMI->second;
+ for (const auto &GMI : Gathered) {
+ Instruction *Op = GMI.first;
+ ValueVector &CV = *GMI.second;
if (!Op->use_empty()) {
// The value is still needed, so recreate it using a series of
// InsertElements.
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 86a10d2a16122..d6ae186698c7a 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -590,9 +590,9 @@ Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
distributeExtsAndCloneChain(UserChain.size() - 1);
// Remove all nullptrs (used to be s/zext) from UserChain.
unsigned NewSize = 0;
- for (auto I = UserChain.begin(), E = UserChain.end(); I != E; ++I) {
- if (*I != nullptr) {
- UserChain[NewSize] = *I;
+ for (User *I : UserChain) {
+ if (I != nullptr) {
+ UserChain[NewSize] = I;
NewSize++;
}
}
@@ -824,8 +824,8 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
// If we created a GEP with constant index, and the base is loop invariant,
// then we swap the first one with it, so LICM can move constant GEP out
// later.
- GetElementPtrInst *FirstGEP = dyn_cast<GetElementPtrInst>(FirstResult);
- GetElementPtrInst *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
+ GetElementPtrInst *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
+ GetElementPtrInst *SecondGEP = dyn_cast_or_null<GetElementPtrInst>(ResultPtr);
if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
swapGEPOperand(FirstGEP, SecondGEP);
@@ -911,7 +911,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*GEP->getParent()->getParent());
unsigned AddrSpace = GEP->getPointerAddressSpace();
- if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(),
+ if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
/*BaseGV=*/nullptr, AccumulativeByteOffset,
/*HasBaseReg=*/true, /*Scale=*/0,
AddrSpace)) {
@@ -1018,7 +1018,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is
// used with unsigned integers later.
int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
- DL->getTypeAllocSize(GEP->getType()->getElementType()));
+ DL->getTypeAllocSize(GEP->getResultElementType()));
Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
// Very likely. As long as %gep is natually aligned, the byte offset we
@@ -1064,7 +1064,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
}
bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
+ if (skipFunction(F))
return false;
if (DisableSeparateConstOffsetFromGEP)
@@ -1075,8 +1075,8 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool Changed = false;
- for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
- for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE;)
+ for (BasicBlock &B : F) {
+ for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;)
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++))
Changed |= splitGEP(GEP);
// No need to split GEP ConstantExprs because all its indices are constant
@@ -1162,8 +1162,8 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
}
void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
- for (auto &B : F) {
- for (auto &I : B) {
+ for (BasicBlock &B : F) {
+ for (Instruction &I : B) {
if (isInstructionTriviallyDead(&I)) {
std::string ErrMessage;
raw_string_ostream RSO(ErrMessage);
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 63c8836bf3810..2d0a21d2c518a 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -21,12 +21,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar/SimplifyCFG.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CFG.h"
@@ -37,8 +37,10 @@
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/SimplifyCFG.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "simplifycfg"
@@ -131,12 +133,19 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
unsigned BonusInstThreshold) {
bool Changed = false;
bool LocalChange = true;
+
+ SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 32> Edges;
+ FindFunctionBackedges(F, Edges);
+ SmallPtrSet<BasicBlock *, 16> LoopHeaders;
+ for (unsigned i = 0, e = Edges.size(); i != e; ++i)
+ LoopHeaders.insert(const_cast<BasicBlock *>(Edges[i].second));
+
while (LocalChange) {
LocalChange = false;
// Loop over all of the basic blocks and remove them if they are unneeded.
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
- if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC)) {
+ if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC, &LoopHeaders)) {
LocalChange = true;
++NumSimpl;
}
@@ -178,14 +187,15 @@ SimplifyCFGPass::SimplifyCFGPass(int BonusInstThreshold)
: BonusInstThreshold(BonusInstThreshold) {}
PreservedAnalyses SimplifyCFGPass::run(Function &F,
- AnalysisManager<Function> *AM) {
- auto &TTI = AM->getResult<TargetIRAnalysis>(F);
- auto &AC = AM->getResult<AssumptionAnalysis>(F);
+ AnalysisManager<Function> &AM) {
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
if (!simplifyFunctionCFG(F, TTI, &AC, BonusInstThreshold))
- return PreservedAnalyses::none();
-
- return PreservedAnalyses::all();
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<GlobalsAA>();
+ return PA;
}
namespace {
@@ -196,15 +206,12 @@ struct CFGSimplifyPass : public FunctionPass {
CFGSimplifyPass(int T = -1,
std::function<bool(const Function &)> Ftor = nullptr)
- : FunctionPass(ID), PredicateFtor(Ftor) {
+ : FunctionPass(ID), PredicateFtor(std::move(Ftor)) {
BonusInstThreshold = (T == -1) ? UserBonusInstThreshold : unsigned(T);
initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
- if (PredicateFtor && !PredicateFtor(F))
- return false;
-
- if (skipOptnoneFunction(F))
+ if (skipFunction(F) || (PredicateFtor && !PredicateFtor(F)))
return false;
AssumptionCache *AC =
@@ -234,6 +241,5 @@ INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
FunctionPass *
llvm::createCFGSimplificationPass(int Threshold,
std::function<bool(const Function &)> Ftor) {
- return new CFGSimplifyPass(Threshold, Ftor);
+ return new CFGSimplifyPass(Threshold, std::move(Ftor));
}
-
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index 64109b2df1173..d9a296c631221 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/Sink.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -24,6 +24,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
#define DEBUG_TYPE "sink"
@@ -31,50 +32,10 @@ using namespace llvm;
STATISTIC(NumSunk, "Number of instructions sunk");
STATISTIC(NumSinkIter, "Number of sinking iterations");
-namespace {
- class Sinking : public FunctionPass {
- DominatorTree *DT;
- LoopInfo *LI;
- AliasAnalysis *AA;
-
- public:
- static char ID; // Pass identification
- Sinking() : FunctionPass(ID) {
- initializeSinkingPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- FunctionPass::getAnalysisUsage(AU);
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- }
- private:
- bool ProcessBlock(BasicBlock &BB);
- bool SinkInstruction(Instruction *I, SmallPtrSetImpl<Instruction*> &Stores);
- bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const;
- bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo) const;
- };
-} // end anonymous namespace
-
-char Sinking::ID = 0;
-INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
-
-FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
-
/// AllUsesDominatedByBlock - Return true if all uses of the specified value
/// occur in blocks dominated by the specified block.
-bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
- BasicBlock *BB) const {
+static bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB,
+ DominatorTree &DT) {
// Ignoring debug uses is necessary so debug info doesn't affect the code.
// This may leave a referencing dbg_value in the original block, before
// the definition of the vreg. Dwarf generator handles this although the
@@ -90,71 +51,13 @@ bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
UseBlock = PN->getIncomingBlock(Num);
}
// Check that it dominates.
- if (!DT->dominates(BB, UseBlock))
+ if (!DT.dominates(BB, UseBlock))
return false;
}
return true;
}
-bool Sinking::runOnFunction(Function &F) {
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
-
- bool MadeChange, EverMadeChange = false;
-
- do {
- MadeChange = false;
- DEBUG(dbgs() << "Sinking iteration " << NumSinkIter << "\n");
- // Process all basic blocks.
- for (Function::iterator I = F.begin(), E = F.end();
- I != E; ++I)
- MadeChange |= ProcessBlock(*I);
- EverMadeChange |= MadeChange;
- NumSinkIter++;
- } while (MadeChange);
-
- return EverMadeChange;
-}
-
-bool Sinking::ProcessBlock(BasicBlock &BB) {
- // Can't sink anything out of a block that has less than two successors.
- if (BB.getTerminator()->getNumSuccessors() <= 1) return false;
-
- // Don't bother sinking code out of unreachable blocks. In addition to being
- // unprofitable, it can also lead to infinite looping, because in an
- // unreachable loop there may be nowhere to stop.
- if (!DT->isReachableFromEntry(&BB)) return false;
-
- bool MadeChange = false;
-
- // Walk the basic block bottom-up. Remember if we saw a store.
- BasicBlock::iterator I = BB.end();
- --I;
- bool ProcessedBegin = false;
- SmallPtrSet<Instruction *, 8> Stores;
- do {
- Instruction *Inst = &*I; // The instruction to sink.
-
- // Predecrement I (if it's not begin) so that it isn't invalidated by
- // sinking.
- ProcessedBegin = I == BB.begin();
- if (!ProcessedBegin)
- --I;
-
- if (isa<DbgInfoIntrinsic>(Inst))
- continue;
-
- if (SinkInstruction(Inst, Stores))
- ++NumSunk, MadeChange = true;
-
- // If we just processed the first instruction in the block, we're done.
- } while (!ProcessedBegin);
-
- return MadeChange;
-}
-
-static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
+static bool isSafeToMove(Instruction *Inst, AliasAnalysis &AA,
SmallPtrSetImpl<Instruction *> &Stores) {
if (Inst->mayWriteToMemory()) {
@@ -165,7 +68,7 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
MemoryLocation Loc = MemoryLocation::get(L);
for (Instruction *S : Stores)
- if (AA->getModRefInfo(S, Loc) & MRI_Mod)
+ if (AA.getModRefInfo(S, Loc) & MRI_Mod)
return false;
}
@@ -173,11 +76,15 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
Inst->mayThrow())
return false;
- // Convergent operations cannot be made control-dependent on additional
- // values.
if (auto CS = CallSite(Inst)) {
+ // Convergent operations cannot be made control-dependent on additional
+ // values.
if (CS.hasFnAttr(Attribute::Convergent))
return false;
+
+ for (Instruction *S : Stores)
+ if (AA.getModRefInfo(S, CS) & MRI_Mod)
+ return false;
}
return true;
@@ -185,8 +92,8 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
/// IsAcceptableTarget - Return true if it is possible to sink the instruction
/// in the specified basic block.
-bool Sinking::IsAcceptableTarget(Instruction *Inst,
- BasicBlock *SuccToSinkTo) const {
+static bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo,
+ DominatorTree &DT, LoopInfo &LI) {
assert(Inst && "Instruction to be sunk is null");
assert(SuccToSinkTo && "Candidate sink target is null");
@@ -212,25 +119,26 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
// We don't want to sink across a critical edge if we don't dominate the
// successor. We could be introducing calculations to new code paths.
- if (!DT->dominates(Inst->getParent(), SuccToSinkTo))
+ if (!DT.dominates(Inst->getParent(), SuccToSinkTo))
return false;
// Don't sink instructions into a loop.
- Loop *succ = LI->getLoopFor(SuccToSinkTo);
- Loop *cur = LI->getLoopFor(Inst->getParent());
+ Loop *succ = LI.getLoopFor(SuccToSinkTo);
+ Loop *cur = LI.getLoopFor(Inst->getParent());
if (succ != nullptr && succ != cur)
return false;
}
// Finally, check that all the uses of the instruction are actually
// dominated by the candidate
- return AllUsesDominatedByBlock(Inst, SuccToSinkTo);
+ return AllUsesDominatedByBlock(Inst, SuccToSinkTo, DT);
}
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
-bool Sinking::SinkInstruction(Instruction *Inst,
- SmallPtrSetImpl<Instruction *> &Stores) {
+static bool SinkInstruction(Instruction *Inst,
+ SmallPtrSetImpl<Instruction *> &Stores,
+ DominatorTree &DT, LoopInfo &LI, AAResults &AA) {
// Don't sink static alloca instructions. CodeGen assumes allocas outside the
// entry block are dynamically sized stack objects.
@@ -257,12 +165,12 @@ bool Sinking::SinkInstruction(Instruction *Inst,
// Instructions can only be sunk if all their uses are in blocks
// dominated by one of the successors.
// Look at all the postdominators and see if we can sink it in one.
- DomTreeNode *DTN = DT->getNode(Inst->getParent());
+ DomTreeNode *DTN = DT.getNode(Inst->getParent());
for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end();
I != E && SuccToSinkTo == nullptr; ++I) {
BasicBlock *Candidate = (*I)->getBlock();
if ((*I)->getIDom()->getBlock() == Inst->getParent() &&
- IsAcceptableTarget(Inst, Candidate))
+ IsAcceptableTarget(Inst, Candidate, DT, LI))
SuccToSinkTo = Candidate;
}
@@ -270,7 +178,7 @@ bool Sinking::SinkInstruction(Instruction *Inst,
// decide which one we should sink to, if any.
for (succ_iterator I = succ_begin(Inst->getParent()),
E = succ_end(Inst->getParent()); I != E && !SuccToSinkTo; ++I) {
- if (IsAcceptableTarget(Inst, *I))
+ if (IsAcceptableTarget(Inst, *I, DT, LI))
SuccToSinkTo = *I;
}
@@ -288,3 +196,111 @@ bool Sinking::SinkInstruction(Instruction *Inst,
Inst->moveBefore(&*SuccToSinkTo->getFirstInsertionPt());
return true;
}
+
+static bool ProcessBlock(BasicBlock &BB, DominatorTree &DT, LoopInfo &LI,
+ AAResults &AA) {
+ // Can't sink anything out of a block that has less than two successors.
+ if (BB.getTerminator()->getNumSuccessors() <= 1) return false;
+
+ // Don't bother sinking code out of unreachable blocks. In addition to being
+ // unprofitable, it can also lead to infinite looping, because in an
+ // unreachable loop there may be nowhere to stop.
+ if (!DT.isReachableFromEntry(&BB)) return false;
+
+ bool MadeChange = false;
+
+ // Walk the basic block bottom-up. Remember if we saw a store.
+ BasicBlock::iterator I = BB.end();
+ --I;
+ bool ProcessedBegin = false;
+ SmallPtrSet<Instruction *, 8> Stores;
+ do {
+ Instruction *Inst = &*I; // The instruction to sink.
+
+ // Predecrement I (if it's not begin) so that it isn't invalidated by
+ // sinking.
+ ProcessedBegin = I == BB.begin();
+ if (!ProcessedBegin)
+ --I;
+
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+
+ if (SinkInstruction(Inst, Stores, DT, LI, AA)) {
+ ++NumSunk;
+ MadeChange = true;
+ }
+
+ // If we just processed the first instruction in the block, we're done.
+ } while (!ProcessedBegin);
+
+ return MadeChange;
+}
+
+static bool iterativelySinkInstructions(Function &F, DominatorTree &DT,
+ LoopInfo &LI, AAResults &AA) {
+ bool MadeChange, EverMadeChange = false;
+
+ do {
+ MadeChange = false;
+ DEBUG(dbgs() << "Sinking iteration " << NumSinkIter << "\n");
+ // Process all basic blocks.
+ for (BasicBlock &I : F)
+ MadeChange |= ProcessBlock(I, DT, LI, AA);
+ EverMadeChange |= MadeChange;
+ NumSinkIter++;
+ } while (MadeChange);
+
+ return EverMadeChange;
+}
+
+PreservedAnalyses SinkingPass::run(Function &F, AnalysisManager<Function> &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &AA = AM.getResult<AAManager>(F);
+
+ if (!iterativelySinkInstructions(F, DT, LI, AA))
+ return PreservedAnalyses::all();
+
+ auto PA = PreservedAnalyses();
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LoopAnalysis>();
+ return PA;
+}
+
+namespace {
+ class SinkingLegacyPass : public FunctionPass {
+ public:
+ static char ID; // Pass identification
+ SinkingLegacyPass() : FunctionPass(ID) {
+ initializeSinkingLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+ return iterativelySinkInstructions(F, DT, LI, AA);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ }
+ };
+} // end anonymous namespace
+
+char SinkingLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(SinkingLegacyPass, "sink", "Code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(SinkingLegacyPass, "sink", "Code sinking", false, false)
+
+FunctionPass *llvm::createSinkingPass() { return new SinkingLegacyPass(); }
diff --git a/lib/Transforms/Scalar/SpeculativeExecution.cpp b/lib/Transforms/Scalar/SpeculativeExecution.cpp
index 147d615488ffe..9bf2d62068194 100644
--- a/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -50,9 +50,19 @@
// aggressive speculation while counting on later passes to either capitalize on
// that or clean it up.
//
+// If the pass was created by calling
+// createSpeculativeExecutionIfHasBranchDivergencePass or the
+// -spec-exec-only-if-divergent-target option is present, this pass only has an
+// effect on targets where TargetTransformInfo::hasBranchDivergence() is true;
+// on other targets, it is a nop.
+//
+// This lets you include this pass unconditionally in the IR pass pipeline, but
+// only enable it for relevant targets.
+//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Instructions.h"
@@ -83,19 +93,39 @@ static cl::opt<unsigned> SpecExecMaxNotHoisted(
"number of instructions that would not be speculatively executed "
"exceeds this limit."));
+static cl::opt<bool> SpecExecOnlyIfDivergentTarget(
+ "spec-exec-only-if-divergent-target", cl::init(false), cl::Hidden,
+ cl::desc("Speculative execution is applied only to targets with divergent "
+ "branches, even if the pass was configured to apply only to all "
+ "targets."));
+
namespace {
+
class SpeculativeExecution : public FunctionPass {
public:
- static char ID;
- SpeculativeExecution(): FunctionPass(ID) {}
+ static char ID;
+ explicit SpeculativeExecution(bool OnlyIfDivergentTarget = false)
+ : FunctionPass(ID),
+ OnlyIfDivergentTarget(OnlyIfDivergentTarget ||
+ SpecExecOnlyIfDivergentTarget) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnFunction(Function &F) override;
+ const char *getPassName() const override {
+ if (OnlyIfDivergentTarget)
+ return "Speculatively execute instructions if target has divergent "
+ "branches";
+ return "Speculatively execute instructions";
+ }
private:
bool runOnBasicBlock(BasicBlock &B);
bool considerHoistingFromTo(BasicBlock &FromBlock, BasicBlock &ToBlock);
+ // If true, this pass is a nop unless the target architecture has branch
+ // divergence.
+ const bool OnlyIfDivergentTarget;
const TargetTransformInfo *TTI = nullptr;
};
} // namespace
@@ -105,17 +135,23 @@ INITIALIZE_PASS_BEGIN(SpeculativeExecution, "speculative-execution",
"Speculatively execute instructions", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(SpeculativeExecution, "speculative-execution",
- "Speculatively execute instructions", false, false)
+ "Speculatively execute instructions", false, false)
void SpeculativeExecution::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
bool SpeculativeExecution::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
+ if (skipFunction(F))
return false;
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence()) {
+ DEBUG(dbgs() << "Not running SpeculativeExecution because "
+ "TTI->hasBranchDivergence() is false.\n");
+ return false;
+ }
bool Changed = false;
for (auto& B : F) {
@@ -240,4 +276,8 @@ FunctionPass *createSpeculativeExecutionPass() {
return new SpeculativeExecution();
}
+FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() {
+ return new SpeculativeExecution(/* OnlyIfDivergentTarget = */ true);
+}
+
} // namespace llvm
diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 1faa65eb34175..292d0400a516b 100644
--- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -57,8 +57,6 @@
// SLSR.
#include <vector>
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/FoldingSet.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -76,6 +74,8 @@ using namespace PatternMatch;
namespace {
+static const unsigned UnknownAddressSpace = ~0u;
+
class StraightLineStrengthReduce : public FunctionPass {
public:
// SLSR candidate. Such a candidate must be in one of the forms described in
@@ -234,51 +234,22 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
Basis.CandidateKind == C.CandidateKind);
}
-// TODO: use TTI->getGEPCost.
static bool isGEPFoldable(GetElementPtrInst *GEP,
- const TargetTransformInfo *TTI,
- const DataLayout *DL) {
- GlobalVariable *BaseGV = nullptr;
- int64_t BaseOffset = 0;
- bool HasBaseReg = false;
- int64_t Scale = 0;
-
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand()))
- BaseGV = GV;
- else
- HasBaseReg = true;
-
- gep_type_iterator GTI = gep_type_begin(GEP);
- for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I, ++GTI) {
- if (isa<SequentialType>(*GTI)) {
- int64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
- if (ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I)) {
- BaseOffset += ConstIdx->getSExtValue() * ElementSize;
- } else {
- // Needs scale register.
- if (Scale != 0) {
- // No addressing mode takes two scale registers.
- return false;
- }
- Scale = ElementSize;
- }
- } else {
- StructType *STy = cast<StructType>(*GTI);
- uint64_t Field = cast<ConstantInt>(*I)->getZExtValue();
- BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field);
- }
- }
-
- unsigned AddrSpace = GEP->getPointerAddressSpace();
- return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV,
- BaseOffset, HasBaseReg, Scale, AddrSpace);
+ const TargetTransformInfo *TTI) {
+ SmallVector<const Value*, 4> Indices;
+ for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
+ Indices.push_back(*I);
+ return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
+ Indices) == TargetTransformInfo::TCC_Free;
}
// Returns whether (Base + Index * Stride) can be folded to an addressing mode.
static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride,
TargetTransformInfo *TTI) {
- return TTI->isLegalAddressingMode(Base->getType(), nullptr, 0, true,
- Index->getSExtValue());
+ // Index->getSExtValue() may crash if Index is wider than 64-bit.
+ return Index->getBitWidth() <= 64 &&
+ TTI->isLegalAddressingMode(Base->getType(), nullptr, 0, true,
+ Index->getSExtValue(), UnknownAddressSpace);
}
bool StraightLineStrengthReduce::isFoldable(const Candidate &C,
@@ -287,7 +258,7 @@ bool StraightLineStrengthReduce::isFoldable(const Candidate &C,
if (C.CandidateKind == Candidate::Add)
return isAddFoldable(C.Base, C.Index, C.Stride, TTI);
if (C.CandidateKind == Candidate::GEP)
- return isGEPFoldable(cast<GetElementPtrInst>(C.Ins), TTI, DL);
+ return isGEPFoldable(cast<GetElementPtrInst>(C.Ins), TTI);
return false;
}
@@ -533,13 +504,23 @@ void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
IndexExprs, GEP->isInBounds());
Value *ArrayIdx = GEP->getOperand(I);
uint64_t ElementSize = DL->getTypeAllocSize(*GTI);
- factorArrayIndex(ArrayIdx, BaseExpr, ElementSize, GEP);
+ if (ArrayIdx->getType()->getIntegerBitWidth() <=
+ DL->getPointerSizeInBits(GEP->getAddressSpace())) {
+ // Skip factoring if ArrayIdx is wider than the pointer size, because
+ // ArrayIdx is implicitly truncated to the pointer size.
+ factorArrayIndex(ArrayIdx, BaseExpr, ElementSize, GEP);
+ }
// When ArrayIdx is the sext of a value, we try to factor that value as
// well. Handling this case is important because array indices are
// typically sign-extended to the pointer size.
Value *TruncatedArrayIdx = nullptr;
- if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx))))
+ if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx))) &&
+ TruncatedArrayIdx->getType()->getIntegerBitWidth() <=
+ DL->getPointerSizeInBits(GEP->getAddressSpace())) {
+ // Skip factoring if TruncatedArrayIdx is wider than the pointer size,
+ // because TruncatedArrayIdx is implicitly truncated to the pointer size.
factorArrayIndex(TruncatedArrayIdx, BaseExpr, ElementSize, GEP);
+ }
IndexExprs[I - 1] = OrigIndexExpr;
}
@@ -567,10 +548,10 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
APInt ElementSize(
IndexOffset.getBitWidth(),
DL->getTypeAllocSize(
- cast<GetElementPtrInst>(Basis.Ins)->getType()->getElementType()));
+ cast<GetElementPtrInst>(Basis.Ins)->getResultElementType()));
APInt Q, R;
APInt::sdivrem(IndexOffset, ElementSize, Q, R);
- if (R.getSExtValue() == 0)
+ if (R == 0)
IndexOffset = Q;
else
BumpWithUglyGEP = true;
@@ -578,10 +559,10 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
// Compute Bump = C - Basis = (i' - i) * S.
// Common case 1: if (i' - i) is 1, Bump = S.
- if (IndexOffset.getSExtValue() == 1)
+ if (IndexOffset == 1)
return C.Stride;
// Common case 2: if (i' - i) is -1, Bump = -S.
- if (IndexOffset.getSExtValue() == -1)
+ if (IndexOffset.isAllOnesValue())
return Builder.CreateNeg(C.Stride);
// Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may
@@ -685,7 +666,7 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
}
bool StraightLineStrengthReduce::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
+ if (skipFunction(F))
return false;
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 662513c7d8ae0..e9ac39beae5a7 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
@@ -161,6 +162,9 @@ public:
/// consist of a network of PHI nodes where the true incoming values expresses
/// breaks and the false values expresses continue states.
class StructurizeCFG : public RegionPass {
+ bool SkipUniformRegions;
+ DivergenceAnalysis *DA;
+
Type *Boolean;
ConstantInt *BoolTrue;
ConstantInt *BoolFalse;
@@ -232,11 +236,18 @@ class StructurizeCFG : public RegionPass {
void rebuildSSA();
+ bool hasOnlyUniformBranches(const Region *R);
+
public:
static char ID;
StructurizeCFG() :
- RegionPass(ID) {
+ RegionPass(ID), SkipUniformRegions(false) {
+ initializeStructurizeCFGPass(*PassRegistry::getPassRegistry());
+ }
+
+ StructurizeCFG(bool SkipUniformRegions) :
+ RegionPass(ID), SkipUniformRegions(SkipUniformRegions) {
initializeStructurizeCFGPass(*PassRegistry::getPassRegistry());
}
@@ -250,6 +261,8 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ if (SkipUniformRegions)
+ AU.addRequired<DivergenceAnalysis>();
AU.addRequiredID(LowerSwitchID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
@@ -264,6 +277,7 @@ char StructurizeCFG::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG",
false, false)
+INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
@@ -297,11 +311,7 @@ void StructurizeCFG::orderNodes() {
for (RegionNode *RN : TempOrder) {
BasicBlock *BB = RN->getEntry();
Loop *Loop = LI->getLoopFor(BB);
- if (!LoopBlocks.count(Loop)) {
- LoopBlocks[Loop] = 1;
- continue;
- }
- LoopBlocks[Loop]++;
+ ++LoopBlocks[Loop];
}
unsigned CurrentLoopDepth = 0;
@@ -319,11 +329,11 @@ void StructurizeCFG::orderNodes() {
// the outer loop.
RNVector::iterator LoopI = I;
- while(LoopBlocks[CurrentLoop]) {
+ while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) {
LoopI++;
BasicBlock *LoopBB = (*LoopI)->getEntry();
if (LI->getLoopFor(LoopBB) == CurrentLoop) {
- LoopBlocks[CurrentLoop]--;
+ --BlockCount;
Order.push_back(*LoopI);
}
}
@@ -367,14 +377,8 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
/// \brief Invert the given condition
Value *StructurizeCFG::invert(Value *Condition) {
// First: Check if it's a constant
- if (Condition == BoolTrue)
- return BoolFalse;
-
- if (Condition == BoolFalse)
- return BoolTrue;
-
- if (Condition == BoolUndef)
- return BoolUndef;
+ if (Constant *C = dyn_cast<Constant>(Condition))
+ return ConstantExpr::getNot(C);
// Second: If the condition is already inverted, return the original value
if (match(Condition, m_Not(m_Value(Condition))))
@@ -491,21 +495,21 @@ void StructurizeCFG::collectInfos() {
// Reset the visited nodes
Visited.clear();
- for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
- OI != OE; ++OI) {
+ for (RegionNode *RN : reverse(Order)) {
- DEBUG(dbgs() << "Visiting: " <<
- ((*OI)->isSubRegion() ? "SubRegion with entry: " : "") <<
- (*OI)->getEntry()->getName() << " Loop Depth: " << LI->getLoopDepth((*OI)->getEntry()) << "\n");
+ DEBUG(dbgs() << "Visiting: "
+ << (RN->isSubRegion() ? "SubRegion with entry: " : "")
+ << RN->getEntry()->getName() << " Loop Depth: "
+ << LI->getLoopDepth(RN->getEntry()) << "\n");
// Analyze all the conditions leading to a node
- gatherPredicates(*OI);
+ gatherPredicates(RN);
// Remember that we've seen this node
- Visited.insert((*OI)->getEntry());
+ Visited.insert(RN->getEntry());
// Find the last back edges
- analyzeLoops(*OI);
+ analyzeLoops(RN);
}
}
@@ -584,20 +588,18 @@ void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
/// \brief Add the real PHI value as soon as everything is set up
void StructurizeCFG::setPhiValues() {
SSAUpdater Updater;
- for (BB2BBVecMap::iterator AI = AddedPhis.begin(), AE = AddedPhis.end();
- AI != AE; ++AI) {
+ for (const auto &AddedPhi : AddedPhis) {
- BasicBlock *To = AI->first;
- BBVector &From = AI->second;
+ BasicBlock *To = AddedPhi.first;
+ const BBVector &From = AddedPhi.second;
if (!DeletedPhis.count(To))
continue;
PhiMap &Map = DeletedPhis[To];
- for (PhiMap::iterator PI = Map.begin(), PE = Map.end();
- PI != PE; ++PI) {
+ for (const auto &PI : Map) {
- PHINode *Phi = PI->first;
+ PHINode *Phi = PI.first;
Value *Undef = UndefValue::get(Phi->getType());
Updater.Initialize(Phi->getType(), "");
Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
@@ -605,22 +607,20 @@ void StructurizeCFG::setPhiValues() {
NearestCommonDominator Dominator(DT);
Dominator.addBlock(To, false);
- for (BBValueVector::iterator VI = PI->second.begin(),
- VE = PI->second.end(); VI != VE; ++VI) {
+ for (const auto &VI : PI.second) {
- Updater.AddAvailableValue(VI->first, VI->second);
- Dominator.addBlock(VI->first);
+ Updater.AddAvailableValue(VI.first, VI.second);
+ Dominator.addBlock(VI.first);
}
if (!Dominator.wasResultExplicitMentioned())
Updater.AddAvailableValue(Dominator.getResult(), Undef);
- for (BBVector::iterator FI = From.begin(), FE = From.end();
- FI != FE; ++FI) {
+ for (BasicBlock *FI : From) {
- int Idx = Phi->getBasicBlockIndex(*FI);
+ int Idx = Phi->getBasicBlockIndex(FI);
assert(Idx != -1);
- Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(*FI));
+ Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(FI));
}
}
@@ -914,11 +914,48 @@ void StructurizeCFG::rebuildSSA() {
}
}
+bool StructurizeCFG::hasOnlyUniformBranches(const Region *R) {
+ for (const BasicBlock *BB : R->blocks()) {
+ const BranchInst *Br = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!Br || !Br->isConditional())
+ continue;
+
+ if (!DA->isUniform(Br->getCondition()))
+ return false;
+ DEBUG(dbgs() << "BB: " << BB->getName() << " has uniform terminator\n");
+ }
+ return true;
+}
+
/// \brief Run the transformation for each region found
bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
if (R->isTopLevelRegion())
return false;
+ if (SkipUniformRegions) {
+ DA = &getAnalysis<DivergenceAnalysis>();
+ // TODO: We could probably be smarter here with how we handle sub-regions.
+ if (hasOnlyUniformBranches(R)) {
+ DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R << '\n');
+
+ // Mark all direct child block terminators as having been treated as
+ // uniform. To account for a possible future in which non-uniform
+ // sub-regions are treated more cleverly, indirect children are not
+ // marked as uniform.
+ MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
+ Region::element_iterator E = R->element_end();
+ for (Region::element_iterator I = R->element_begin(); I != E; ++I) {
+ if (I->isSubRegion())
+ continue;
+
+ if (Instruction *Term = I->getEntry()->getTerminator())
+ Term->setMetadata("structurizecfg.uniform", MD);
+ }
+
+ return false;
+ }
+ }
+
Func = R->getEntry()->getParent();
ParentRegion = R;
@@ -947,7 +984,6 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
return true;
}
-/// \brief Create the pass
-Pass *llvm::createStructurizeCFGPass() {
- return new StructurizeCFG();
+Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
+ return new StructurizeCFG(SkipUniformRegions);
}
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 4e84d72ae7bdd..d5ff997503703 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -50,6 +50,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -85,64 +86,9 @@ STATISTIC(NumEliminated, "Number of tail calls removed");
STATISTIC(NumRetDuped, "Number of return duplicated");
STATISTIC(NumAccumAdded, "Number of accumulators introduced");
-namespace {
- struct TailCallElim : public FunctionPass {
- const TargetTransformInfo *TTI;
-
- static char ID; // Pass identification, replacement for typeid
- TailCallElim() : FunctionPass(ID) {
- initializeTailCallElimPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- bool runOnFunction(Function &F) override;
-
- private:
- bool runTRE(Function &F);
- bool markTails(Function &F, bool &AllCallsAreTailCalls);
-
- CallInst *FindTRECandidate(Instruction *I,
- bool CannotTailCallElimCallsMarkedTail);
- bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
- BasicBlock *&OldEntry,
- bool &TailCallsAreMarkedTail,
- SmallVectorImpl<PHINode *> &ArgumentPHIs,
- bool CannotTailCallElimCallsMarkedTail);
- bool FoldReturnAndProcessPred(BasicBlock *BB,
- ReturnInst *Ret, BasicBlock *&OldEntry,
- bool &TailCallsAreMarkedTail,
- SmallVectorImpl<PHINode *> &ArgumentPHIs,
- bool CannotTailCallElimCallsMarkedTail);
- bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
- bool &TailCallsAreMarkedTail,
- SmallVectorImpl<PHINode *> &ArgumentPHIs,
- bool CannotTailCallElimCallsMarkedTail);
- bool CanMoveAboveCall(Instruction *I, CallInst *CI);
- Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI);
- };
-}
-
-char TailCallElim::ID = 0;
-INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim",
- "Tail Call Elimination", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(TailCallElim, "tailcallelim",
- "Tail Call Elimination", false, false)
-
-// Public interface to the TailCallElimination pass
-FunctionPass *llvm::createTailCallEliminationPass() {
- return new TailCallElim();
-}
-
-void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
-}
-
/// \brief Scan the specified function for alloca instructions.
/// If it contains any dynamic allocas, returns false.
-static bool CanTRE(Function &F) {
+static bool canTRE(Function &F) {
// Because of PR962, we don't TRE dynamic allocas.
for (auto &BB : F) {
for (auto &I : BB) {
@@ -156,20 +102,6 @@ static bool CanTRE(Function &F) {
return true;
}
-bool TailCallElim::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
- return false;
-
- if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
- return false;
-
- bool AllCallsAreTailCalls = false;
- bool Modified = markTails(F, AllCallsAreTailCalls);
- if (AllCallsAreTailCalls)
- Modified |= runTRE(F);
- return Modified;
-}
-
namespace {
struct AllocaDerivedValueTracker {
// Start at a root value and walk its use-def chain to mark calls that use the
@@ -250,7 +182,7 @@ struct AllocaDerivedValueTracker {
};
}
-bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
+static bool markTails(Function &F, bool &AllCallsAreTailCalls) {
if (F.callsFunctionThatReturnsTwice())
return false;
AllCallsAreTailCalls = true;
@@ -385,63 +317,11 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
return Modified;
}
-bool TailCallElim::runTRE(Function &F) {
- // If this function is a varargs function, we won't be able to PHI the args
- // right, so don't even try to convert it...
- if (F.getFunctionType()->isVarArg()) return false;
-
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- BasicBlock *OldEntry = nullptr;
- bool TailCallsAreMarkedTail = false;
- SmallVector<PHINode*, 8> ArgumentPHIs;
- bool MadeChange = false;
-
- // If false, we cannot perform TRE on tail calls marked with the 'tail'
- // attribute, because doing so would cause the stack size to increase (real
- // TRE would deallocate variable sized allocas, TRE doesn't).
- bool CanTRETailMarkedCall = CanTRE(F);
-
- // Change any tail recursive calls to loops.
- //
- // FIXME: The code generator produces really bad code when an 'escaping
- // alloca' is changed from being a static alloca to being a dynamic alloca.
- // Until this is resolved, disable this transformation if that would ever
- // happen. This bug is PR962.
- for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) {
- BasicBlock *BB = &*BBI++; // FoldReturnAndProcessPred may delete BB.
- if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
- bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs, !CanTRETailMarkedCall);
- if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
- Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
- TailCallsAreMarkedTail, ArgumentPHIs,
- !CanTRETailMarkedCall);
- MadeChange |= Change;
- }
- }
-
- // If we eliminated any tail recursions, it's possible that we inserted some
- // silly PHI nodes which just merge an initial value (the incoming operand)
- // with themselves. Check to see if we did and clean up our mess if so. This
- // occurs when a function passes an argument straight through to its tail
- // call.
- for (PHINode *PN : ArgumentPHIs) {
- // If the PHI Node is a dynamic constant, replace it with the value it is.
- if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) {
- PN->replaceAllUsesWith(PNV);
- PN->eraseFromParent();
- }
- }
-
- return MadeChange;
-}
-
-
/// Return true if it is safe to move the specified
/// instruction from after the call to before the call, assuming that all
/// instructions between the call and this instruction are movable.
///
-bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
+static bool canMoveAboveCall(Instruction *I, CallInst *CI) {
// FIXME: We can move load/store/call/free instructions above the call if the
// call does not mod/ref the memory location being processed.
if (I->mayHaveSideEffects()) // This also handles volatile loads.
@@ -454,9 +334,10 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
// does not write to memory and the load provably won't trap.
// FIXME: Writes to memory only matter if they may alias the pointer
// being loaded from.
+ const DataLayout &DL = L->getModule()->getDataLayout();
if (CI->mayWriteToMemory() ||
- !isSafeToLoadUnconditionally(L->getPointerOperand(), L,
- L->getAlignment()))
+ !isSafeToLoadUnconditionally(L->getPointerOperand(),
+ L->getAlignment(), DL, L))
return false;
}
}
@@ -512,8 +393,8 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
Function *F = CI->getParent()->getParent();
Value *ReturnedValue = nullptr;
- for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
- ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
+ for (BasicBlock &BBI : *F) {
+ ReturnInst *RI = dyn_cast<ReturnInst>(BBI.getTerminator());
if (RI == nullptr || RI == IgnoreRI) continue;
// We can only perform this transformation if the value returned is
@@ -534,8 +415,7 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
/// If the specified instruction can be transformed using accumulator recursion
/// elimination, return the constant which is the start of the accumulator
/// value. Otherwise return null.
-Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
- CallInst *CI) {
+static Value *canTransformAccumulatorRecursion(Instruction *I, CallInst *CI) {
if (!I->isAssociative() || !I->isCommutative()) return nullptr;
assert(I->getNumOperands() == 2 &&
"Associative/commutative operations should have 2 args!");
@@ -555,15 +435,15 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
return getCommonReturnValue(cast<ReturnInst>(I->user_back()), CI);
}
-static Instruction *FirstNonDbg(BasicBlock::iterator I) {
+static Instruction *firstNonDbg(BasicBlock::iterator I) {
while (isa<DbgInfoIntrinsic>(I))
++I;
return &*I;
}
-CallInst*
-TailCallElim::FindTRECandidate(Instruction *TI,
- bool CannotTailCallElimCallsMarkedTail) {
+static CallInst *findTRECandidate(Instruction *TI,
+ bool CannotTailCallElimCallsMarkedTail,
+ const TargetTransformInfo *TTI) {
BasicBlock *BB = TI->getParent();
Function *F = BB->getParent();
@@ -594,8 +474,8 @@ TailCallElim::FindTRECandidate(Instruction *TI,
// and disable this xform in this case, because the code generator will
// lower the call to fabs into inline code.
if (BB == &F->getEntryBlock() &&
- FirstNonDbg(BB->front().getIterator()) == CI &&
- FirstNonDbg(std::next(BB->begin())) == TI && CI->getCalledFunction() &&
+ firstNonDbg(BB->front().getIterator()) == CI &&
+ firstNonDbg(std::next(BB->begin())) == TI && CI->getCalledFunction() &&
!TTI->isLoweredToCall(CI->getCalledFunction())) {
// A single-block function with just a call and a return. Check that
// the arguments match.
@@ -612,7 +492,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
return CI;
}
-bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
SmallVectorImpl<PHINode *> &ArgumentPHIs,
@@ -636,14 +516,14 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// Check that this is the case now.
BasicBlock::iterator BBI(CI);
for (++BBI; &*BBI != Ret; ++BBI) {
- if (CanMoveAboveCall(&*BBI, CI)) continue;
+ if (canMoveAboveCall(&*BBI, CI)) continue;
// If we can't move the instruction above the call, it might be because it
// is an associative and commutative operation that could be transformed
// using accumulator recursion elimination. Check to see if this is the
// case, and if so, remember the initial accumulator value for later.
if ((AccumulatorRecursionEliminationInitVal =
- CanTransformAccumulatorRecursion(&*BBI, CI))) {
+ canTransformAccumulatorRecursion(&*BBI, CI))) {
// Yes, this is accumulator recursion. Remember which instruction
// accumulates.
AccumulatorRecursionInstr = &*BBI;
@@ -773,8 +653,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// Finally, rewrite any return instructions in the program to return the PHI
// node instead of the "initval" that they do currently. This loop will
// actually rewrite the return value we are destroying, but that's ok.
- for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
+ for (BasicBlock &BBI : *F)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI.getTerminator()))
RI->setOperand(0, AccPN);
++NumAccumAdded;
}
@@ -790,11 +670,12 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
return true;
}
-bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
- ReturnInst *Ret, BasicBlock *&OldEntry,
- bool &TailCallsAreMarkedTail,
- SmallVectorImpl<PHINode *> &ArgumentPHIs,
- bool CannotTailCallElimCallsMarkedTail) {
+static bool foldReturnAndProcessPred(BasicBlock *BB, ReturnInst *Ret,
+ BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVectorImpl<PHINode *> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail,
+ const TargetTransformInfo *TTI) {
bool Change = false;
// If the return block contains nothing but the return and PHI's,
@@ -813,7 +694,7 @@ bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
while (!UncondBranchPreds.empty()) {
BranchInst *BI = UncondBranchPreds.pop_back_val();
BasicBlock *Pred = BI->getParent();
- if (CallInst *CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail)){
+ if (CallInst *CI = findTRECandidate(BI, CannotTailCallElimCallsMarkedTail, TTI)){
DEBUG(dbgs() << "FOLDING: " << *BB
<< "INTO UNCOND BRANCH PRED: " << *Pred);
ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred);
@@ -821,11 +702,11 @@ bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
// Cleanup: if all predecessors of BB have been eliminated by
// FoldReturnIntoUncondBranch, delete it. It is important to empty it,
// because the ret instruction in there is still using a value which
- // EliminateRecursiveTailCall will attempt to remove.
+ // eliminateRecursiveTailCall will attempt to remove.
if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
BB->eraseFromParent();
- EliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail,
+ eliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs,
CannotTailCallElimCallsMarkedTail);
++NumRetDuped;
@@ -836,16 +717,124 @@ bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
return Change;
}
-bool
-TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
- bool &TailCallsAreMarkedTail,
- SmallVectorImpl<PHINode *> &ArgumentPHIs,
- bool CannotTailCallElimCallsMarkedTail) {
- CallInst *CI = FindTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
+static bool processReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVectorImpl<PHINode *> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail,
+ const TargetTransformInfo *TTI) {
+ CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail, TTI);
if (!CI)
return false;
- return EliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
+ return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs,
CannotTailCallElimCallsMarkedTail);
}
+
+static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI) {
+ if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
+ return false;
+
+ bool MadeChange = false;
+ bool AllCallsAreTailCalls = false;
+ MadeChange |= markTails(F, AllCallsAreTailCalls);
+ if (!AllCallsAreTailCalls)
+ return MadeChange;
+
+ // If this function is a varargs function, we won't be able to PHI the args
+ // right, so don't even try to convert it...
+ if (F.getFunctionType()->isVarArg())
+ return false;
+
+ BasicBlock *OldEntry = nullptr;
+ bool TailCallsAreMarkedTail = false;
+ SmallVector<PHINode*, 8> ArgumentPHIs;
+
+ // If false, we cannot perform TRE on tail calls marked with the 'tail'
+ // attribute, because doing so would cause the stack size to increase (real
+ // TRE would deallocate variable sized allocas, TRE doesn't).
+ bool CanTRETailMarkedCall = canTRE(F);
+
+ // Change any tail recursive calls to loops.
+ //
+ // FIXME: The code generator produces really bad code when an 'escaping
+ // alloca' is changed from being a static alloca to being a dynamic alloca.
+ // Until this is resolved, disable this transformation if that would ever
+ // happen. This bug is PR962.
+ for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) {
+ BasicBlock *BB = &*BBI++; // foldReturnAndProcessPred may delete BB.
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ bool Change =
+ processReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs, !CanTRETailMarkedCall, TTI);
+ if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
+ Change =
+ foldReturnAndProcessPred(BB, Ret, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs, !CanTRETailMarkedCall, TTI);
+ MadeChange |= Change;
+ }
+ }
+
+ // If we eliminated any tail recursions, it's possible that we inserted some
+ // silly PHI nodes which just merge an initial value (the incoming operand)
+ // with themselves. Check to see if we did and clean up our mess if so. This
+ // occurs when a function passes an argument straight through to its tail
+ // call.
+ for (PHINode *PN : ArgumentPHIs) {
+ // If the PHI Node is a dynamic constant, replace it with the value it is.
+ if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) {
+ PN->replaceAllUsesWith(PNV);
+ PN->eraseFromParent();
+ }
+ }
+
+ return MadeChange;
+}
+
+namespace {
+struct TailCallElim : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ TailCallElim() : FunctionPass(ID) {
+ initializeTailCallElimPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
+ return eliminateTailRecursion(
+ F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F));
+ }
+};
+}
+
+char TailCallElim::ID = 0;
+INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim", "Tail Call Elimination",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(TailCallElim, "tailcallelim", "Tail Call Elimination",
+ false, false)
+
+// Public interface to the TailCallElimination pass
+FunctionPass *llvm::createTailCallEliminationPass() {
+ return new TailCallElim();
+}
+
+PreservedAnalyses TailCallElimPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+
+ TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
+
+ bool Changed = eliminateTailRecursion(F, &TTI);
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<GlobalsAA>();
+ return PA;
+}