src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2017-12-24 01:00:08 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-12-24 01:00:08 +0000
commit	c7dac04c3480f3c20487f912f77343139fce2d99 (patch)
tree	21a09bce0171e27bd1e92649db9df797fa097cea /lib/Transforms/Scalar
parent	044eb2f6afba375a914ac9d8024f8f5142bb912e (diff)

vendor/llvm/llvm-trunk-r321414

Diffstat (limited to 'lib/Transforms/Scalar')

-rw-r--r--

lib/Transforms/Scalar/CallSiteSplitting.cpp

-rw-r--r--

lib/Transforms/Scalar/JumpThreading.cpp

-rw-r--r--

lib/Transforms/Scalar/LoopSink.cpp

-rw-r--r--

lib/Transforms/Scalar/LoopUnrollPass.cpp

-rw-r--r--

lib/Transforms/Scalar/MemCpyOptimizer.cpp

-rw-r--r--

lib/Transforms/Scalar/SCCP.cpp

-rw-r--r--

lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

7 files changed, 110 insertions, 46 deletions

diff --git a/lib/Transforms/Scalar/CallSiteSplitting.cpp b/lib/Transforms/Scalar/CallSiteSplitting.cpp
index d8c408035038..207243231aad 100644
--- a/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/lib/Transforms/Scalar/CallSiteSplitting.cpp

@@ -13,10 +13,11 @@

// threading, or IPA-CP based function cloning, etc.).

// As of now we support two cases :

-// 1) If a call site is dominated by an OR condition and if any of its arguments

-// are predicated on this OR condition, try to split the condition with more

-// constrained arguments. For example, in the code below, we try to split the

-// call site since we can predicate the argument(ptr) based on the OR condition.

+// 1) Try to a split call-site with constrained arguments, if any constraints

+// on any argument can be found by following the single predecessors of the

+// all site's predecessors. Currently this pass only handles call-sites with 2

+// predecessors. For example, in the code below, we try to split the call-site

+// since we can predicate the argument(ptr) based on the OR condition.

// Split from :

// if (!ptr || c)

@@ -200,16 +201,15 @@ static bool canSplitCallSite(CallSite CS) {

}

/// Return true if the CS is split into its new predecessors which are directly

-/// hooked to each of its orignial predecessors pointed by PredBB1 and PredBB2.

-/// In OR predicated case, PredBB1 will point the header, and PredBB2 will point

-/// to the second compare block. CallInst1 and CallInst2 will be the new

-/// call-sites placed in the new predecessors split for PredBB1 and PredBB2,

-/// repectively. Therefore, CallInst1 will be the call-site placed

-/// between Header and Tail, and CallInst2 will be the call-site between TBB and

-/// Tail. For example, in the IR below with an OR condition, the call-site can

-/// be split

+/// hooked to each of its original predecessors pointed by PredBB1 and PredBB2.

+/// CallInst1 and CallInst2 will be the new call-sites placed in the new

+/// predecessors split for PredBB1 and PredBB2, respectively.

+/// For example, in the IR below with an OR condition, the call-site can

+/// be split. Assuming PredBB1=Header and PredBB2=TBB, CallInst1 will be the

+/// call-site placed between Header and Tail, and CallInst2 will be the

+/// call-site between TBB and Tail.

///

-/// from :

+/// From :

///

/// Header:

/// %c = icmp eq i32* %a, null

@@ -237,9 +237,9 @@ static bool canSplitCallSite(CallSite CS) {

/// Tail:

/// %p = phi i1 [%ca1, %Tail-split1],[%ca2, %Tail-split2]

///

-/// Note that for an OR predicated case, CallInst1 and CallInst2 should be

-/// created with more constrained arguments in

-/// createCallSitesOnOrPredicatedArgument().

+/// Note that in case any arguments at the call-site are constrained by its

+/// predecessors, new call-sites with more constrained arguments will be

+/// created in createCallSitesOnPredicatedArgument().

static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2,

Instruction *CallInst1, Instruction *CallInst2) {

Instruction *Instr = CS.getInstruction();

@@ -332,18 +332,10 @@ static bool tryToSplitOnPHIPredicatedArgument(CallSite CS) {

splitCallSite(CS, Preds[0], Preds[1], nullptr, nullptr);

return true;

}

-// Check if one of the predecessors is a single predecessors of the other.

-// This is a requirement for control flow modeling an OR. HeaderBB points to

-// the single predecessor and OrBB points to other node. HeaderBB potentially

-// contains the first compare of the OR and OrBB the second.

-static bool isOrHeader(BasicBlock *HeaderBB, BasicBlock *OrBB) {

- return OrBB->getSinglePredecessor() == HeaderBB &&

- HeaderBB->getTerminator()->getNumSuccessors() == 2;

-static bool tryToSplitOnOrPredicatedArgument(CallSite CS) {

+static bool tryToSplitOnPredicatedArgument(CallSite CS) {

auto Preds = getTwoPredecessors(CS.getInstruction()->getParent());

- if (!isOrHeader(Preds[0], Preds[1]) && !isOrHeader(Preds[1], Preds[0]))

+ if (Preds[0] == Preds[1])

return false;

SmallVector<std::pair<ICmpInst *, unsigned>, 2> C1, C2;

@@ -362,7 +354,7 @@ static bool tryToSplitOnOrPredicatedArgument(CallSite CS) {

static bool tryToSplitCallSite(CallSite CS) {

if (!CS.arg_size() || !canSplitCallSite(CS))

return false;

- return tryToSplitOnOrPredicatedArgument(CS) ||

+ return tryToSplitOnPredicatedArgument(CS) ||

tryToSplitOnPHIPredicatedArgument(CS);

}

diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 6b0377e0ecb3..1476f7850cf0 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp

@@ -282,7 +282,7 @@ bool JumpThreading::runOnFunction(Function &F) {

auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();

std::unique_ptr<BlockFrequencyInfo> BFI;

std::unique_ptr<BranchProbabilityInfo> BPI;

- bool HasProfileData = F.getEntryCount().hasValue();

+ bool HasProfileData = F.hasProfileData();

if (HasProfileData) {

LoopInfo LI{DominatorTree(F)};

BPI.reset(new BranchProbabilityInfo(F, LI, TLI));

@@ -307,8 +307,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,

std::unique_ptr<BlockFrequencyInfo> BFI;

std::unique_ptr<BranchProbabilityInfo> BPI;

- bool HasProfileData = F.getEntryCount().hasValue();

- if (HasProfileData) {

+ if (F.hasProfileData()) {

LoopInfo LI{DominatorTree(F)};

BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));

BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));

@@ -1333,6 +1332,20 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {

// code size.

BasicBlock *UnavailablePred = nullptr;

+ // If the value is unavailable in one of predecessors, we will end up

+ // inserting a new instruction into them. It is only valid if all the

+ // instructions before LI are guaranteed to pass execution to its successor,

+ // or if LI is safe to speculate.

+ // TODO: If this logic becomes more complex, and we will perform PRE insertion

+ // farther than to a predecessor, we need to reuse the code from GVN's PRE.

+ // It requires domination tree analysis, so for this simple case it is an

+ // overkill.

+ if (PredsScanned.size() != AvailablePreds.size() &&

+ !isSafeToSpeculativelyExecute(LI))

+ for (auto I = LoadBB->begin(); &*I != LI; ++I)

+ if (!isGuaranteedToTransferExecutionToSuccessor(&*I))

+ return false;

// If there is exactly one predecessor where the value is unavailable, the

// already computed 'OneUnavailablePred' block is it. If it ends in an

// unconditional branch, we know that it isn't a critical edge.

diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp
index c9d55b4594fe..430a7085d93f 100644
--- a/lib/Transforms/Scalar/LoopSink.cpp
+++ b/lib/Transforms/Scalar/LoopSink.cpp

@@ -247,7 +247,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,

// Enable LoopSink only when runtime profile is available.

// With static profile, the sinking decision may be sub-optimal.

- if (!Preheader->getParent()->getEntryCount())

+ if (!Preheader->getParent()->hasProfileData())

return false;

const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader);

diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 7b1d6446a24a..15e7da5e1a7a 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp

@@ -882,7 +882,7 @@ static bool computeUnrollCount(

}

// Check if the runtime trip count is too small when profile is available.

- if (L->getHeader()->getParent()->getEntryCount()) {

+ if (L->getHeader()->getParent()->hasProfileData()) {

if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {

if (*ProfileTripCount < FlatLoopTripCountThreshold)

return false;

diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 9c870b42a747..6af3fef963dc 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp

@@ -476,22 +476,33 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,

Alignment = DL.getABITypeAlignment(EltType);

}

- AMemSet =

- Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);

+ // Remember the debug location.

+ DebugLoc Loc;

+ if (!Range.TheStores.empty())

+ Loc = Range.TheStores[0]->getDebugLoc();

DEBUG(dbgs() << "Replace stores:\n";

for (Instruction *SI : Range.TheStores)

- dbgs() << *SI << '\n';

- dbgs() << "With: " << *AMemSet << '\n');

- if (!Range.TheStores.empty())

- AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());

+ dbgs() << *SI << '\n');

// Zap all the stores.

for (Instruction *SI : Range.TheStores) {

MD->removeInstruction(SI);

SI->eraseFromParent();

}

+ // Create the memset after removing the stores, so that if there any cached

+ // non-local dependencies on the removed instructions in

+ // MemoryDependenceAnalysis, the cache entries are updated to "dirty"

+ // entries pointing below the memset, so subsequent queries include the

+ // memset.

+ AMemSet =

+ Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);

+ if (!Range.TheStores.empty())

+ AMemSet->setDebugLoc(Loc);

+ DEBUG(dbgs() << "With: " << *AMemSet << '\n');

++NumMemSetInfer;

}

@@ -1031,9 +1042,22 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,

// NOTE: This is conservative, it will stop on any read from the source loc,

// not just the defining memcpy.

- MemDepResult SourceDep =

- MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,

- M->getIterator(), M->getParent());

+ MemoryLocation SourceLoc = MemoryLocation::getForSource(MDep);

+ MemDepResult SourceDep = MD->getPointerDependencyFrom(SourceLoc, false,

+ M->getIterator(), M->getParent());

+ if (SourceDep.isNonLocal()) {

+ SmallVector<NonLocalDepResult, 2> NonLocalDepResults;

+ MD->getNonLocalPointerDependencyFrom(M, SourceLoc, /*isLoad=*/false,

+ NonLocalDepResults);

+ if (NonLocalDepResults.size() == 1) {

+ SourceDep = NonLocalDepResults[0].getResult();

+ assert((!SourceDep.getInst() ||

+ LookupDomTree().dominates(SourceDep.getInst(), M)) &&

+ "when memdep returns exactly one result, it should dominate");

+ }

if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)

return false;

@@ -1235,6 +1259,18 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {

MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(

SrcLoc, true, M->getIterator(), M->getParent());

+ if (SrcDepInfo.isNonLocal()) {

+ SmallVector<NonLocalDepResult, 2> NonLocalDepResults;

+ MD->getNonLocalPointerDependencyFrom(M, SrcLoc, /*isLoad=*/true,

+ NonLocalDepResults);

+ if (NonLocalDepResults.size() == 1) {

+ SrcDepInfo = NonLocalDepResults[0].getResult();

+ assert((!SrcDepInfo.getInst() ||

+ LookupDomTree().dominates(SrcDepInfo.getInst(), M)) &&

+ "when memdep returns exactly one result, it should dominate");

+ }

if (SrcDepInfo.isClobber()) {

if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))

return processMemCpyMemCpyDependence(M, MDep);

diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index e5866b4718da..66608ec631f6 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp

@@ -1929,9 +1929,32 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,

if (!I) continue;

bool Folded = ConstantFoldTerminator(I->getParent());

- assert(Folded &&

- "Expect TermInst on constantint or blockaddress to be folded");

- (void) Folded;

+ if (!Folded) {

+ // The constant folder may not have been able to fold the terminator

+ // if this is a branch or switch on undef. Fold it manually as a

+ // branch to the first successor.

+#ifndef NDEBUG

+ if (auto *BI = dyn_cast<BranchInst>(I)) {

+ assert(BI->isConditional() && isa<UndefValue>(BI->getCondition()) &&

+ "Branch should be foldable!");

+ } else if (auto *SI = dyn_cast<SwitchInst>(I)) {

+ assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold");

+ } else {

+ llvm_unreachable("Didn't fold away reference to block!");

+ }

+#endif

+ // Make this an uncond branch to the first successor.

+ TerminatorInst *TI = I->getParent()->getTerminator();

+ BranchInst::Create(TI->getSuccessor(0), TI);

+ // Remove entries in successor phi nodes to remove edges.

+ for (unsigned i = 1, e = TI->getNumSuccessors(); i != e; ++i)

+ TI->getSuccessor(i)->removePredecessor(TI->getParent());

+ // Remove the old terminator.

+ TI->eraseFromParent();

+ }

}

// Finally, delete the basic block.

diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 209821ff21d7..8fa9ffb6d014 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

@@ -97,7 +97,7 @@

// load %p2

// ...

-// We can not do CSE for to the common part related to index "i64 %i". Lowering

+// We can not do CSE to the common part related to index "i64 %i". Lowering

// GEPs can achieve such goals.

// If the target does not use alias analysis in codegen, this pass will

// lower a GEP with multiple indices into arithmetic operations: