aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-29 00:56:15 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-29 00:56:15 +0000
commitfe4fed2e4d17945c38474cf0746792d04bf84b7d (patch)
treef82cc30abef889351b2dbe8d8aa2874056dbebbd /contrib/llvm/lib/Transforms
parentbbd32193a0463b1c7383443a45b774a2fe4d3430 (diff)
parent55e6d896ad333f07bb3b1ba487df214fc268a4ab (diff)
downloadsrc-fe4fed2e4d17945c38474cf0746792d04bf84b7d.tar.gz
src-fe4fed2e4d17945c38474cf0746792d04bf84b7d.zip
Notes
Diffstat (limited to 'contrib/llvm/lib/Transforms')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp36
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp56
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp179
12 files changed, 47 insertions, 282 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 7086c2eb52c4..a69c009e1a54 100644
--- a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -181,8 +181,9 @@ public:
StringRef Name, bool IsThinLTOPreLink,
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
- : GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo),
- Filename(Name), IsThinLTOPreLink(IsThinLTOPreLink) {}
+ : GetAC(std::move(GetAssumptionCache)),
+ GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
+ IsThinLTOPreLink(IsThinLTOPreLink) {}
bool doInitialization(Module &M);
bool runOnModule(Module &M, ModuleAnalysisManager *AM);
@@ -1547,14 +1548,14 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM) {
// Populate the symbol map.
for (const auto &N_F : M.getValueSymbolTable()) {
- std::string OrigName = N_F.getKey();
+ StringRef OrigName = N_F.getKey();
Function *F = dyn_cast<Function>(N_F.getValue());
if (F == nullptr)
continue;
SymbolMap[OrigName] = F;
auto pos = OrigName.find('.');
- if (pos != std::string::npos) {
- std::string NewName = OrigName.substr(0, pos);
+ if (pos != StringRef::npos) {
+ StringRef NewName = OrigName.substr(0, pos);
auto r = SymbolMap.insert(std::make_pair(NewName, F));
// Failiing to insert means there is already an entry in SymbolMap,
// thus there are multiple functions that are mapped to the same
diff --git a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 945133074059..caffc03339c4 100644
--- a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -90,8 +90,7 @@ void promoteTypeIds(Module &M, StringRef ModuleId) {
if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
Metadata *&GlobalMD = LocalToGlobal[MD];
if (!GlobalMD) {
- std::string NewName =
- (to_string(LocalToGlobal.size()) + ModuleId).str();
+ std::string NewName = (Twine(LocalToGlobal.size()) + ModuleId).str();
GlobalMD = MDString::get(M.getContext(), NewName);
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index a088d447337f..40e52ee755e5 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1802,9 +1802,7 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) {
/// instructions. For normal calls, it allows visitCallSite to do the heavy
/// lifting.
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
- auto Args = CI.arg_operands();
- if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(),
- Args.end(), SQ.getWithInstruction(&CI)))
+ if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))
return replaceInstUsesWith(CI, V);
if (isFreeCall(&CI, &TLI))
@@ -1903,16 +1901,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
return replaceInstUsesWith(CI, N);
return nullptr;
-
case Intrinsic::bswap: {
Value *IIOperand = II->getArgOperand(0);
Value *X = nullptr;
- // TODO should this be in InstSimplify?
- // bswap(bswap(x)) -> x
- if (match(IIOperand, m_BSwap(m_Value(X))))
- return replaceInstUsesWith(CI, X);
-
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
unsigned C = X->getType()->getPrimitiveSizeInBits() -
@@ -1923,18 +1915,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
}
-
- case Intrinsic::bitreverse: {
- Value *IIOperand = II->getArgOperand(0);
- Value *X = nullptr;
-
- // TODO should this be in InstSimplify?
- // bitreverse(bitreverse(x)) -> x
- if (match(IIOperand, m_BitReverse(m_Value(X))))
- return replaceInstUsesWith(CI, X);
- break;
- }
-
case Intrinsic::masked_load:
if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
return replaceInstUsesWith(CI, SimplifiedMaskedOp);
@@ -1948,16 +1928,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::powi:
if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
- // powi(x, 0) -> 1.0
- if (Power->isZero())
- return replaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
- // powi(x, 1) -> x
- if (Power->isOne())
- return replaceInstUsesWith(CI, II->getArgOperand(0));
+ // 0 and 1 are handled in instsimplify
+
// powi(x, -1) -> 1/x
if (Power->isMinusOne())
return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
II->getArgOperand(0));
+ // powi(x, 2) -> x*x
+ if (Power->equalsInt(2))
+ return BinaryOperator::CreateFMul(II->getArgOperand(0),
+ II->getArgOperand(0));
}
break;
@@ -2396,7 +2376,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// The compare intrinsic uses the above assumptions and therefore
// doesn't require additional flags.
if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) &&
- match(Arg1, m_Zero()) &&
+ match(Arg1, m_Zero()) && isa<Instruction>(Arg0) &&
cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {
if (Arg0IsZero)
std::swap(A, B);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 87666360c1a0..541dde6c47d2 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1631,9 +1631,5 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
- // Handle cases involving: rem X, (select Cond, Y, Z)
- if (simplifyDivRemOfSelectWithZeroOp(I))
- return &I;
-
return nullptr;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 65a96b965227..aeac8910af6b 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -181,11 +181,13 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// If extracting a specified index from the vector, see if we can recursively
// find a previously computed scalar that was inserted into the vector.
if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
- unsigned IndexVal = IdxC->getZExtValue();
unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
- // InstSimplify handles cases where the index is invalid.
- assert(IndexVal < VectorWidth);
+ // InstSimplify should handle cases where the index is invalid.
+ if (!IdxC->getValue().ule(VectorWidth))
+ return nullptr;
+
+ unsigned IndexVal = IdxC->getZExtValue();
// This instruction only demands the single element from the input vector.
// If the input vector has a single use, simplify it based on this use
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 8328d4031941..8e39f24d819c 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2702,9 +2702,10 @@ void FunctionStackPoisoner::copyArgsPassedByValToAllocas() {
unsigned Align = Arg.getParamAlignment();
if (Align == 0) Align = DL.getABITypeAlignment(Ty);
- const std::string &Name = Arg.hasName() ? Arg.getName().str() :
- "Arg" + llvm::to_string(Arg.getArgNo());
- AllocaInst *AI = IRB.CreateAlloca(Ty, nullptr, Twine(Name) + ".byval");
+ AllocaInst *AI = IRB.CreateAlloca(
+ Ty, nullptr,
+ (Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) +
+ ".byval");
AI->setAlignment(Align);
Arg.replaceAllUsesWith(AI);
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp
index 814a62cd7d65..bf92e43c4715 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -641,7 +641,7 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
DenseMap<uint32_t, unsigned> VNums;
for (auto *I : Insts) {
uint32_t N = VN.lookupOrAdd(I);
- DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n");
+ DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
if (N == ~0U)
return None;
VNums[N]++;
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 6af3fef963dc..9c870b42a747 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -476,33 +476,22 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
Alignment = DL.getABITypeAlignment(EltType);
}
- // Remember the debug location.
- DebugLoc Loc;
- if (!Range.TheStores.empty())
- Loc = Range.TheStores[0]->getDebugLoc();
+ AMemSet =
+ Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
DEBUG(dbgs() << "Replace stores:\n";
for (Instruction *SI : Range.TheStores)
- dbgs() << *SI << '\n');
+ dbgs() << *SI << '\n';
+ dbgs() << "With: " << *AMemSet << '\n');
+
+ if (!Range.TheStores.empty())
+ AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
// Zap all the stores.
for (Instruction *SI : Range.TheStores) {
MD->removeInstruction(SI);
SI->eraseFromParent();
}
-
- // Create the memset after removing the stores, so that if there any cached
- // non-local dependencies on the removed instructions in
- // MemoryDependenceAnalysis, the cache entries are updated to "dirty"
- // entries pointing below the memset, so subsequent queries include the
- // memset.
- AMemSet =
- Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
- if (!Range.TheStores.empty())
- AMemSet->setDebugLoc(Loc);
-
- DEBUG(dbgs() << "With: " << *AMemSet << '\n');
-
++NumMemSetInfer;
}
@@ -1042,22 +1031,9 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
//
// NOTE: This is conservative, it will stop on any read from the source loc,
// not just the defining memcpy.
- MemoryLocation SourceLoc = MemoryLocation::getForSource(MDep);
- MemDepResult SourceDep = MD->getPointerDependencyFrom(SourceLoc, false,
- M->getIterator(), M->getParent());
-
- if (SourceDep.isNonLocal()) {
- SmallVector<NonLocalDepResult, 2> NonLocalDepResults;
- MD->getNonLocalPointerDependencyFrom(M, SourceLoc, /*isLoad=*/false,
- NonLocalDepResults);
- if (NonLocalDepResults.size() == 1) {
- SourceDep = NonLocalDepResults[0].getResult();
- assert((!SourceDep.getInst() ||
- LookupDomTree().dominates(SourceDep.getInst(), M)) &&
- "when memdep returns exactly one result, it should dominate");
- }
- }
-
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
+ M->getIterator(), M->getParent());
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
@@ -1259,18 +1235,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
SrcLoc, true, M->getIterator(), M->getParent());
- if (SrcDepInfo.isNonLocal()) {
- SmallVector<NonLocalDepResult, 2> NonLocalDepResults;
- MD->getNonLocalPointerDependencyFrom(M, SrcLoc, /*isLoad=*/true,
- NonLocalDepResults);
- if (NonLocalDepResults.size() == 1) {
- SrcDepInfo = NonLocalDepResults[0].getResult();
- assert((!SrcDepInfo.getInst() ||
- LookupDomTree().dominates(SrcDepInfo.getInst(), M)) &&
- "when memdep returns exactly one result, it should dominate");
- }
- }
-
if (SrcDepInfo.isClobber()) {
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
return processMemCpyMemCpyDependence(M, MDep);
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 3b45cfa482e6..c44edbed8ed9 100644
--- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -2796,17 +2796,12 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
StatepointLiveSetTy Updated;
findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
-#ifndef NDEBUG
- DenseSet<Value *> Bases;
- for (auto KVPair : Info.PointerToBase)
- Bases.insert(KVPair.second);
-#endif
-
// We may have base pointers which are now live that weren't before. We need
// to update the PointerToBase structure to reflect this.
for (auto V : Updated)
if (Info.PointerToBase.insert({V, V}).second) {
- assert(Bases.count(V) && "Can't find base for unexpected live value!");
+ assert(isKnownBaseResult(V) &&
+ "Can't find base for unexpected live value!");
continue;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index efff06f79cb7..e00541d3c812 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -648,8 +648,13 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa",
DT, LI, PreserveLCSSA);
+ // NewExit gets its DebugLoc from LatchExit, which is not part of the
+ // original Loop.
+ // Fix this by setting Loop's DebugLoc to NewExit.
+ auto *NewExitTerminator = NewExit->getTerminator();
+ NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
// Split NewExit to insert epilog remainder loop.
- EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
+ EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
} else {
// If prolog remainder
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
index c3fa05a11a24..fe106e33bca1 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -880,9 +880,10 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
/// If we are able to find such sequence, we return the instructions
/// we found, namely %casted_phi and the instructions on its use-def chain up
/// to the phi (not including the phi).
-bool getCastsForInductionPHI(
- PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev,
- const SCEVAddRecExpr *AR, SmallVectorImpl<Instruction *> &CastInsts) {
+static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
+ const SCEVUnknown *PhiScev,
+ const SCEVAddRecExpr *AR,
+ SmallVectorImpl<Instruction *> &CastInsts) {
assert(CastInsts.empty() && "CastInsts is expected to be empty.");
auto *PN = cast<PHINode>(PhiScev->getValue());
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index b3c80424c8b9..e7358dbcb624 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -127,16 +127,6 @@ static cl::opt<unsigned> MaxSpeculationDepth(
cl::desc("Limit maximum recursion depth when calculating costs of "
"speculatively executed instructions"));
-static cl::opt<unsigned> DependenceChainLatency(
- "dependence-chain-latency", cl::Hidden, cl::init(8),
- cl::desc("Limit the maximum latency of dependence chain containing cmp "
- "for if conversion"));
-
-static cl::opt<unsigned> SmallBBSize(
- "small-bb-size", cl::Hidden, cl::init(40),
- cl::desc("Check dependence chain latency only in basic block smaller than "
- "this number"));
-
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps,
"Number of switch instructions turned into linear mapping");
@@ -405,166 +395,6 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
return true;
}
-/// Estimate the code size of the specified BB.
-static unsigned CountBBCodeSize(BasicBlock *BB,
- const TargetTransformInfo &TTI) {
- unsigned Size = 0;
- for (auto II = BB->begin(); !isa<TerminatorInst>(II); ++II)
- Size += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_CodeSize);
- return Size;
-}
-
-/// Find out the latency of the longest dependence chain in the BB if
-/// LongestChain is true, or the dependence chain containing the compare
-/// instruction feeding the block's conditional branch.
-static unsigned FindDependenceChainLatency(BasicBlock *BB,
- DenseMap<Instruction *, unsigned> &Instructions,
- const TargetTransformInfo &TTI,
- bool LongestChain) {
- unsigned MaxLatency = 0;
-
- BasicBlock::iterator II;
- for (II = BB->begin(); !isa<TerminatorInst>(II); ++II) {
- unsigned Latency = 0;
- for (unsigned O = 0, E = II->getNumOperands(); O != E; ++O) {
- Instruction *Op = dyn_cast<Instruction>(II->getOperand(O));
- if (Op && Instructions.count(Op)) {
- auto OpLatency = Instructions[Op];
- if (OpLatency > Latency)
- Latency = OpLatency;
- }
- }
- Latency += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_Latency);
- Instructions[&(*II)] = Latency;
-
- if (Latency > MaxLatency)
- MaxLatency = Latency;
- }
-
- if (LongestChain)
- return MaxLatency;
-
- // The length of the dependence chain containing the compare instruction is
- // wanted, so the terminator must be a BranchInst.
- assert(isa<BranchInst>(II));
- BranchInst* Br = cast<BranchInst>(II);
- Instruction *Cmp = dyn_cast<Instruction>(Br->getCondition());
- if (Cmp && Instructions.count(Cmp))
- return Instructions[Cmp];
- else
- return 0;
-}
-
-/// Instructions in BB2 may depend on instructions in BB1, and instructions
-/// in BB1 may have users in BB2. If the last (in terms of latency) such kind
-/// of instruction in BB1 is I, then the instructions after I can be executed
-/// in parallel with instructions in BB2.
-/// This function returns the latency of I.
-static unsigned LatencyAdjustment(BasicBlock *BB1, BasicBlock *BB2,
- BasicBlock *IfBlock1, BasicBlock *IfBlock2,
- DenseMap<Instruction *, unsigned> &BB1Instructions) {
- unsigned LastLatency = 0;
- SmallVector<Instruction *, 16> Worklist;
- BasicBlock::iterator II;
- for (II = BB2->begin(); !isa<TerminatorInst>(II); ++II) {
- if (PHINode *PN = dyn_cast<PHINode>(II)) {
- // Look for users in BB2.
- bool InBBUser = false;
- for (User *U : PN->users()) {
- if (cast<Instruction>(U)->getParent() == BB2) {
- InBBUser = true;
- break;
- }
- }
- // No such user, we don't care about this instruction and its operands.
- if (!InBBUser)
- break;
- }
- Worklist.push_back(&(*II));
- }
-
- while (!Worklist.empty()) {
- Instruction *I = Worklist.pop_back_val();
- for (unsigned O = 0, E = I->getNumOperands(); O != E; ++O) {
- if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(O))) {
- if (Op->getParent() == IfBlock1 || Op->getParent() == IfBlock2)
- Worklist.push_back(Op);
- else if (Op->getParent() == BB1 && BB1Instructions.count(Op)) {
- if (BB1Instructions[Op] > LastLatency)
- LastLatency = BB1Instructions[Op];
- }
- }
- }
- }
-
- return LastLatency;
-}
-
-/// If after if conversion, most of the instructions in this new BB construct a
-/// long and slow dependence chain, it may be slower than cmp/branch, even
-/// if the branch has a high miss rate, because the control dependence is
-/// transformed into data dependence, and control dependence can be speculated,
-/// and thus, the second part can execute in parallel with the first part on
-/// modern OOO processor.
-///
-/// To check this condition, this function finds the length of the dependence
-/// chain in BB1 (only the part that can be executed in parallel with code after
-/// branch in BB2) containing cmp, and if the length is longer than a threshold,
-/// don't perform if conversion.
-///
-/// BB1, BB2, IfBlock1 and IfBlock2 are candidate BBs for if conversion.
-/// SpeculationSize contains the code size of IfBlock1 and IfBlock2.
-static bool FindLongDependenceChain(BasicBlock *BB1, BasicBlock *BB2,
- BasicBlock *IfBlock1, BasicBlock *IfBlock2,
- unsigned SpeculationSize,
- const TargetTransformInfo &TTI) {
- // Accumulated latency of each instruction in their BBs.
- DenseMap<Instruction *, unsigned> BB1Instructions;
- DenseMap<Instruction *, unsigned> BB2Instructions;
-
- if (!TTI.isOutOfOrder())
- return false;
-
- unsigned NewBBSize = CountBBCodeSize(BB1, TTI) + CountBBCodeSize(BB2, TTI)
- + SpeculationSize;
-
- // We check small BB only since it is more difficult to find unrelated
- // instructions to fill functional units in a small BB.
- if (NewBBSize > SmallBBSize)
- return false;
-
- auto BB1Chain =
- FindDependenceChainLatency(BB1, BB1Instructions, TTI, false);
- auto BB2Chain =
- FindDependenceChainLatency(BB2, BB2Instructions, TTI, true);
-
- // If there are many unrelated instructions in the new BB, there will be
- // other instructions for the processor to issue regardless of the length
- // of this new dependence chain.
- // Modern processors can issue 3 or more instructions in each cycle. But in
- // real world applications, an IPC of 2 is already very good for non-loop
- // code with small basic blocks. Higher IPC is usually found in programs with
- // small kernel. So IPC of 2 is more reasonable for most applications.
- if ((BB1Chain + BB2Chain) * 2 <= NewBBSize)
- return false;
-
- // We only care about part of the dependence chain in BB1 that can be
- // executed in parallel with BB2, so adjust the latency.
- BB1Chain -=
- LatencyAdjustment(BB1, BB2, IfBlock1, IfBlock2, BB1Instructions);
-
- // Correctly predicted branch instruction can skip the dependence chain in
- // BB1, but misprediction has a penalty, so only when the dependence chain is
- // longer than DependenceChainLatency, then branch is better than select.
- // Besides misprediction penalty, the threshold value DependenceChainLatency
- // also depends on branch misprediction rate, taken branch latency and cmov
- // latency.
- if (BB1Chain >= DependenceChainLatency)
- return true;
-
- return false;
-}
-
/// Extract ConstantInt from value, looking through IntToPtr
/// and PointerNullValue. Return NULL if value is not a constant int.
static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
@@ -2214,11 +2044,6 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
return false;
- // Don't do if conversion for long dependence chain.
- if (FindLongDependenceChain(BB, EndBB, ThenBB, nullptr,
- CountBBCodeSize(ThenBB, TTI), TTI))
- return false;
-
// If we get here, we can hoist the instruction and if-convert.
DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
@@ -2526,10 +2351,6 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
}
}
- if (FindLongDependenceChain(DomBlock, BB, IfBlock1, IfBlock2,
- AggressiveInsts.size(), TTI))
- return false;
-
DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
<< IfTrue->getName() << " F: " << IfFalse->getName() << "\n");