aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-24 01:00:08 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-24 01:00:08 +0000
commitc7dac04c3480f3c20487f912f77343139fce2d99 (patch)
tree21a09bce0171e27bd1e92649db9df797fa097cea /lib/Transforms/Scalar
parent044eb2f6afba375a914ac9d8024f8f5142bb912e (diff)
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/CallSiteSplitting.cpp46
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp19
-rw-r--r--lib/Transforms/Scalar/LoopSink.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp2
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp56
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp29
-rw-r--r--lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp2
7 files changed, 110 insertions, 46 deletions
diff --git a/lib/Transforms/Scalar/CallSiteSplitting.cpp b/lib/Transforms/Scalar/CallSiteSplitting.cpp
index d8c408035038..207243231aad 100644
--- a/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -13,10 +13,11 @@
// threading, or IPA-CP based function cloning, etc.).
// As of now we support two cases :
//
-// 1) If a call site is dominated by an OR condition and if any of its arguments
-// are predicated on this OR condition, try to split the condition with more
-// constrained arguments. For example, in the code below, we try to split the
-// call site since we can predicate the argument(ptr) based on the OR condition.
+// 1) Try to a split call-site with constrained arguments, if any constraints
+// on any argument can be found by following the single predecessors of the
+// all site's predecessors. Currently this pass only handles call-sites with 2
+// predecessors. For example, in the code below, we try to split the call-site
+// since we can predicate the argument(ptr) based on the OR condition.
//
// Split from :
// if (!ptr || c)
@@ -200,16 +201,15 @@ static bool canSplitCallSite(CallSite CS) {
}
/// Return true if the CS is split into its new predecessors which are directly
-/// hooked to each of its orignial predecessors pointed by PredBB1 and PredBB2.
-/// In OR predicated case, PredBB1 will point the header, and PredBB2 will point
-/// to the second compare block. CallInst1 and CallInst2 will be the new
-/// call-sites placed in the new predecessors split for PredBB1 and PredBB2,
-/// repectively. Therefore, CallInst1 will be the call-site placed
-/// between Header and Tail, and CallInst2 will be the call-site between TBB and
-/// Tail. For example, in the IR below with an OR condition, the call-site can
-/// be split
+/// hooked to each of its original predecessors pointed by PredBB1 and PredBB2.
+/// CallInst1 and CallInst2 will be the new call-sites placed in the new
+/// predecessors split for PredBB1 and PredBB2, respectively.
+/// For example, in the IR below with an OR condition, the call-site can
+/// be split. Assuming PredBB1=Header and PredBB2=TBB, CallInst1 will be the
+/// call-site placed between Header and Tail, and CallInst2 will be the
+/// call-site between TBB and Tail.
///
-/// from :
+/// From :
///
/// Header:
/// %c = icmp eq i32* %a, null
@@ -237,9 +237,9 @@ static bool canSplitCallSite(CallSite CS) {
/// Tail:
/// %p = phi i1 [%ca1, %Tail-split1],[%ca2, %Tail-split2]
///
-/// Note that for an OR predicated case, CallInst1 and CallInst2 should be
-/// created with more constrained arguments in
-/// createCallSitesOnOrPredicatedArgument().
+/// Note that in case any arguments at the call-site are constrained by its
+/// predecessors, new call-sites with more constrained arguments will be
+/// created in createCallSitesOnPredicatedArgument().
static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2,
Instruction *CallInst1, Instruction *CallInst2) {
Instruction *Instr = CS.getInstruction();
@@ -332,18 +332,10 @@ static bool tryToSplitOnPHIPredicatedArgument(CallSite CS) {
splitCallSite(CS, Preds[0], Preds[1], nullptr, nullptr);
return true;
}
-// Check if one of the predecessors is a single predecessors of the other.
-// This is a requirement for control flow modeling an OR. HeaderBB points to
-// the single predecessor and OrBB points to other node. HeaderBB potentially
-// contains the first compare of the OR and OrBB the second.
-static bool isOrHeader(BasicBlock *HeaderBB, BasicBlock *OrBB) {
- return OrBB->getSinglePredecessor() == HeaderBB &&
- HeaderBB->getTerminator()->getNumSuccessors() == 2;
-}
-static bool tryToSplitOnOrPredicatedArgument(CallSite CS) {
+static bool tryToSplitOnPredicatedArgument(CallSite CS) {
auto Preds = getTwoPredecessors(CS.getInstruction()->getParent());
- if (!isOrHeader(Preds[0], Preds[1]) && !isOrHeader(Preds[1], Preds[0]))
+ if (Preds[0] == Preds[1])
return false;
SmallVector<std::pair<ICmpInst *, unsigned>, 2> C1, C2;
@@ -362,7 +354,7 @@ static bool tryToSplitOnOrPredicatedArgument(CallSite CS) {
static bool tryToSplitCallSite(CallSite CS) {
if (!CS.arg_size() || !canSplitCallSite(CS))
return false;
- return tryToSplitOnOrPredicatedArgument(CS) ||
+ return tryToSplitOnPredicatedArgument(CS) ||
tryToSplitOnPHIPredicatedArgument(CS);
}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 6b0377e0ecb3..1476f7850cf0 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -282,7 +282,7 @@ bool JumpThreading::runOnFunction(Function &F) {
auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
- bool HasProfileData = F.getEntryCount().hasValue();
+ bool HasProfileData = F.hasProfileData();
if (HasProfileData) {
LoopInfo LI{DominatorTree(F)};
BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
@@ -307,8 +307,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
- bool HasProfileData = F.getEntryCount().hasValue();
- if (HasProfileData) {
+ if (F.hasProfileData()) {
LoopInfo LI{DominatorTree(F)};
BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
@@ -1333,6 +1332,20 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// code size.
BasicBlock *UnavailablePred = nullptr;
+ // If the value is unavailable in one of predecessors, we will end up
+ // inserting a new instruction into them. It is only valid if all the
+ // instructions before LI are guaranteed to pass execution to its successor,
+ // or if LI is safe to speculate.
+ // TODO: If this logic becomes more complex, and we will perform PRE insertion
+ // farther than to a predecessor, we need to reuse the code from GVN's PRE.
+ // It requires domination tree analysis, so for this simple case it is an
+ // overkill.
+ if (PredsScanned.size() != AvailablePreds.size() &&
+ !isSafeToSpeculativelyExecute(LI))
+ for (auto I = LoadBB->begin(); &*I != LI; ++I)
+ if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
+ return false;
+
// If there is exactly one predecessor where the value is unavailable, the
// already computed 'OneUnavailablePred' block is it. If it ends in an
// unconditional branch, we know that it isn't a critical edge.
diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp
index c9d55b4594fe..430a7085d93f 100644
--- a/lib/Transforms/Scalar/LoopSink.cpp
+++ b/lib/Transforms/Scalar/LoopSink.cpp
@@ -247,7 +247,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
// Enable LoopSink only when runtime profile is available.
// With static profile, the sinking decision may be sub-optimal.
- if (!Preheader->getParent()->getEntryCount())
+ if (!Preheader->getParent()->hasProfileData())
return false;
const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader);
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 7b1d6446a24a..15e7da5e1a7a 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -882,7 +882,7 @@ static bool computeUnrollCount(
}
// Check if the runtime trip count is too small when profile is available.
- if (L->getHeader()->getParent()->getEntryCount()) {
+ if (L->getHeader()->getParent()->hasProfileData()) {
if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
if (*ProfileTripCount < FlatLoopTripCountThreshold)
return false;
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 9c870b42a747..6af3fef963dc 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -476,22 +476,33 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
Alignment = DL.getABITypeAlignment(EltType);
}
- AMemSet =
- Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
+ // Remember the debug location.
+ DebugLoc Loc;
+ if (!Range.TheStores.empty())
+ Loc = Range.TheStores[0]->getDebugLoc();
DEBUG(dbgs() << "Replace stores:\n";
for (Instruction *SI : Range.TheStores)
- dbgs() << *SI << '\n';
- dbgs() << "With: " << *AMemSet << '\n');
-
- if (!Range.TheStores.empty())
- AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
+ dbgs() << *SI << '\n');
// Zap all the stores.
for (Instruction *SI : Range.TheStores) {
MD->removeInstruction(SI);
SI->eraseFromParent();
}
+
+ // Create the memset after removing the stores, so that if there any cached
+ // non-local dependencies on the removed instructions in
+ // MemoryDependenceAnalysis, the cache entries are updated to "dirty"
+ // entries pointing below the memset, so subsequent queries include the
+ // memset.
+ AMemSet =
+ Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
+ if (!Range.TheStores.empty())
+ AMemSet->setDebugLoc(Loc);
+
+ DEBUG(dbgs() << "With: " << *AMemSet << '\n');
+
++NumMemSetInfer;
}
@@ -1031,9 +1042,22 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
//
// NOTE: This is conservative, it will stop on any read from the source loc,
// not just the defining memcpy.
- MemDepResult SourceDep =
- MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
- M->getIterator(), M->getParent());
+ MemoryLocation SourceLoc = MemoryLocation::getForSource(MDep);
+ MemDepResult SourceDep = MD->getPointerDependencyFrom(SourceLoc, false,
+ M->getIterator(), M->getParent());
+
+ if (SourceDep.isNonLocal()) {
+ SmallVector<NonLocalDepResult, 2> NonLocalDepResults;
+ MD->getNonLocalPointerDependencyFrom(M, SourceLoc, /*isLoad=*/false,
+ NonLocalDepResults);
+ if (NonLocalDepResults.size() == 1) {
+ SourceDep = NonLocalDepResults[0].getResult();
+ assert((!SourceDep.getInst() ||
+ LookupDomTree().dominates(SourceDep.getInst(), M)) &&
+ "when memdep returns exactly one result, it should dominate");
+ }
+ }
+
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
@@ -1235,6 +1259,18 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
SrcLoc, true, M->getIterator(), M->getParent());
+ if (SrcDepInfo.isNonLocal()) {
+ SmallVector<NonLocalDepResult, 2> NonLocalDepResults;
+ MD->getNonLocalPointerDependencyFrom(M, SrcLoc, /*isLoad=*/true,
+ NonLocalDepResults);
+ if (NonLocalDepResults.size() == 1) {
+ SrcDepInfo = NonLocalDepResults[0].getResult();
+ assert((!SrcDepInfo.getInst() ||
+ LookupDomTree().dominates(SrcDepInfo.getInst(), M)) &&
+ "when memdep returns exactly one result, it should dominate");
+ }
+ }
+
if (SrcDepInfo.isClobber()) {
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
return processMemCpyMemCpyDependence(M, MDep);
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index e5866b4718da..66608ec631f6 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1929,9 +1929,32 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
if (!I) continue;
bool Folded = ConstantFoldTerminator(I->getParent());
- assert(Folded &&
- "Expect TermInst on constantint or blockaddress to be folded");
- (void) Folded;
+ if (!Folded) {
+ // The constant folder may not have been able to fold the terminator
+ // if this is a branch or switch on undef. Fold it manually as a
+ // branch to the first successor.
+#ifndef NDEBUG
+ if (auto *BI = dyn_cast<BranchInst>(I)) {
+ assert(BI->isConditional() && isa<UndefValue>(BI->getCondition()) &&
+ "Branch should be foldable!");
+ } else if (auto *SI = dyn_cast<SwitchInst>(I)) {
+ assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold");
+ } else {
+ llvm_unreachable("Didn't fold away reference to block!");
+ }
+#endif
+
+ // Make this an uncond branch to the first successor.
+ TerminatorInst *TI = I->getParent()->getTerminator();
+ BranchInst::Create(TI->getSuccessor(0), TI);
+
+ // Remove entries in successor phi nodes to remove edges.
+ for (unsigned i = 1, e = TI->getNumSuccessors(); i != e; ++i)
+ TI->getSuccessor(i)->removePredecessor(TI->getParent());
+
+ // Remove the old terminator.
+ TI->eraseFromParent();
+ }
}
// Finally, delete the basic block.
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 209821ff21d7..8fa9ffb6d014 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -97,7 +97,7 @@
// load %p2
// ...
//
-// We can not do CSE for to the common part related to index "i64 %i". Lowering
+// We can not do CSE to the common part related to index "i64 %i". Lowering
// GEPs can achieve such goals.
// If the target does not use alias analysis in codegen, this pass will
// lower a GEP with multiple indices into arithmetic operations: