src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2017-12-29 00:56:15 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-12-29 00:56:15 +0000
commit	fe4fed2e4d17945c38474cf0746792d04bf84b7d (patch)
tree	f82cc30abef889351b2dbe8d8aa2874056dbebbd /contrib/llvm/lib/Transforms
parent	bbd32193a0463b1c7383443a45b774a2fe4d3430 (diff)
parent	55e6d896ad333f07bb3b1ba487df214fc268a4ab (diff)

Notes

Diffstat (limited to 'contrib/llvm/lib/Transforms')

-rw-r--r--

contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp

179

12 files changed, 47 insertions, 282 deletions

diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 7086c2eb52c4..a69c009e1a54 100644
--- a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp

@@ -181,8 +181,9 @@ public:

StringRef Name, bool IsThinLTOPreLink,

std::function<AssumptionCache &(Function &)> GetAssumptionCache,

std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)

- : GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo),

- Filename(Name), IsThinLTOPreLink(IsThinLTOPreLink) {}

+ : GetAC(std::move(GetAssumptionCache)),

+ GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),

+ IsThinLTOPreLink(IsThinLTOPreLink) {}

bool doInitialization(Module &M);

bool runOnModule(Module &M, ModuleAnalysisManager *AM);

@@ -1547,14 +1548,14 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM) {

// Populate the symbol map.

for (const auto &N_F : M.getValueSymbolTable()) {

- std::string OrigName = N_F.getKey();

+ StringRef OrigName = N_F.getKey();

Function *F = dyn_cast<Function>(N_F.getValue());

if (F == nullptr)

continue;

SymbolMap[OrigName] = F;

auto pos = OrigName.find('.');

- if (pos != std::string::npos) {

- std::string NewName = OrigName.substr(0, pos);

+ if (pos != StringRef::npos) {

+ StringRef NewName = OrigName.substr(0, pos);

auto r = SymbolMap.insert(std::make_pair(NewName, F));

// Failiing to insert means there is already an entry in SymbolMap,

// thus there are multiple functions that are mapped to the same

diff --git a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 945133074059..caffc03339c4 100644
--- a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp

@@ -90,8 +90,7 @@ void promoteTypeIds(Module &M, StringRef ModuleId) {

if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {

Metadata *&GlobalMD = LocalToGlobal[MD];

if (!GlobalMD) {

- std::string NewName =

- (to_string(LocalToGlobal.size()) + ModuleId).str();

+ std::string NewName = (Twine(LocalToGlobal.size()) + ModuleId).str();

GlobalMD = MDString::get(M.getContext(), NewName);

}

diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index a088d447337f..40e52ee755e5 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

@@ -1802,9 +1802,7 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) {

/// instructions. For normal calls, it allows visitCallSite to do the heavy

/// lifting.

Instruction *InstCombiner::visitCallInst(CallInst &CI) {

- auto Args = CI.arg_operands();

- if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(),

- Args.end(), SQ.getWithInstruction(&CI)))

+ if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))

return replaceInstUsesWith(CI, V);

if (isFreeCall(&CI, &TLI))

@@ -1903,16 +1901,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {

lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))

return replaceInstUsesWith(CI, N);

return nullptr;

case Intrinsic::bswap: {

Value *IIOperand = II->getArgOperand(0);

Value *X = nullptr;

- // TODO should this be in InstSimplify?

- // bswap(bswap(x)) -> x

- if (match(IIOperand, m_BSwap(m_Value(X))))

- return replaceInstUsesWith(CI, X);

// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))

if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {

unsigned C = X->getType()->getPrimitiveSizeInBits() -

@@ -1923,18 +1915,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {

}

break;

}

- case Intrinsic::bitreverse: {

- Value *IIOperand = II->getArgOperand(0);

- Value *X = nullptr;

- // TODO should this be in InstSimplify?

- // bitreverse(bitreverse(x)) -> x

- if (match(IIOperand, m_BitReverse(m_Value(X))))

- return replaceInstUsesWith(CI, X);

- break;

- }

case Intrinsic::masked_load:

if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))

return replaceInstUsesWith(CI, SimplifiedMaskedOp);

@@ -1948,16 +1928,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {

case Intrinsic::powi:

if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {

- // powi(x, 0) -> 1.0

- if (Power->isZero())

- return replaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));

- // powi(x, 1) -> x

- if (Power->isOne())

- return replaceInstUsesWith(CI, II->getArgOperand(0));

+ // 0 and 1 are handled in instsimplify

// powi(x, -1) -> 1/x

if (Power->isMinusOne())

return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),

II->getArgOperand(0));

+ // powi(x, 2) -> x*x

+ if (Power->equalsInt(2))

+ return BinaryOperator::CreateFMul(II->getArgOperand(0),

+ II->getArgOperand(0));

}

break;

@@ -2396,7 +2376,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {

// The compare intrinsic uses the above assumptions and therefore

// doesn't require additional flags.

if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) &&

- match(Arg1, m_Zero()) &&

+ match(Arg1, m_Zero()) && isa<Instruction>(Arg0) &&

cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {

if (Arg0IsZero)

std::swap(A, B);

diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 87666360c1a0..541dde6c47d2 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

@@ -1631,9 +1631,5 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {

SQ.getWithInstruction(&I)))

return replaceInstUsesWith(I, V);

- // Handle cases involving: rem X, (select Cond, Y, Z)

- if (simplifyDivRemOfSelectWithZeroOp(I))

- return &I;

return nullptr;

}

diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 65a96b965227..aeac8910af6b 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

@@ -181,11 +181,13 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {

// If extracting a specified index from the vector, see if we can recursively

// find a previously computed scalar that was inserted into the vector.

if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {

- unsigned IndexVal = IdxC->getZExtValue();

unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();

- // InstSimplify handles cases where the index is invalid.

- assert(IndexVal < VectorWidth);

+ // InstSimplify should handle cases where the index is invalid.

+ if (!IdxC->getValue().ule(VectorWidth))

+ return nullptr;

+ unsigned IndexVal = IdxC->getZExtValue();

// This instruction only demands the single element from the input vector.

// If the input vector has a single use, simplify it based on this use

diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 8328d4031941..8e39f24d819c 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

@@ -2702,9 +2702,10 @@ void FunctionStackPoisoner::copyArgsPassedByValToAllocas() {

unsigned Align = Arg.getParamAlignment();

if (Align == 0) Align = DL.getABITypeAlignment(Ty);

- const std::string &Name = Arg.hasName() ? Arg.getName().str() :

- "Arg" + llvm::to_string(Arg.getArgNo());

- AllocaInst *AI = IRB.CreateAlloca(Ty, nullptr, Twine(Name) + ".byval");

+ AllocaInst *AI = IRB.CreateAlloca(

+ Ty, nullptr,

+ (Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) +

+ ".byval");

AI->setAlignment(Align);

Arg.replaceAllUsesWith(AI);

diff --git a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp
index 814a62cd7d65..bf92e43c4715 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp

@@ -641,7 +641,7 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(

DenseMap<uint32_t, unsigned> VNums;

for (auto *I : Insts) {

uint32_t N = VN.lookupOrAdd(I);

- DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n");

+ DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");

if (N == ~0U)

return None;

VNums[N]++;

diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 6af3fef963dc..9c870b42a747 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

@@ -476,33 +476,22 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,

Alignment = DL.getABITypeAlignment(EltType);

}

- // Remember the debug location.

- DebugLoc Loc;

- if (!Range.TheStores.empty())

- Loc = Range.TheStores[0]->getDebugLoc();

+ AMemSet =

+ Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);

DEBUG(dbgs() << "Replace stores:\n";

for (Instruction *SI : Range.TheStores)

- dbgs() << *SI << '\n');

+ dbgs() << *SI << '\n';

+ dbgs() << "With: " << *AMemSet << '\n');

+ if (!Range.TheStores.empty())

+ AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());

// Zap all the stores.

for (Instruction *SI : Range.TheStores) {

MD->removeInstruction(SI);

SI->eraseFromParent();

}

- // Create the memset after removing the stores, so that if there any cached

- // non-local dependencies on the removed instructions in

- // MemoryDependenceAnalysis, the cache entries are updated to "dirty"

- // entries pointing below the memset, so subsequent queries include the

- // memset.

- AMemSet =

- Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);

- if (!Range.TheStores.empty())

- AMemSet->setDebugLoc(Loc);

- DEBUG(dbgs() << "With: " << *AMemSet << '\n');

++NumMemSetInfer;

}

@@ -1042,22 +1031,9 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,

// NOTE: This is conservative, it will stop on any read from the source loc,

// not just the defining memcpy.

- MemoryLocation SourceLoc = MemoryLocation::getForSource(MDep);

- MemDepResult SourceDep = MD->getPointerDependencyFrom(SourceLoc, false,

- M->getIterator(), M->getParent());

- if (SourceDep.isNonLocal()) {

- SmallVector<NonLocalDepResult, 2> NonLocalDepResults;

- MD->getNonLocalPointerDependencyFrom(M, SourceLoc, /*isLoad=*/false,

- NonLocalDepResults);

- if (NonLocalDepResults.size() == 1) {

- SourceDep = NonLocalDepResults[0].getResult();

- assert((!SourceDep.getInst() ||

- LookupDomTree().dominates(SourceDep.getInst(), M)) &&

- "when memdep returns exactly one result, it should dominate");

- }

+ MemDepResult SourceDep =

+ MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,

+ M->getIterator(), M->getParent());

if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)

return false;

@@ -1259,18 +1235,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {

MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(

SrcLoc, true, M->getIterator(), M->getParent());

- if (SrcDepInfo.isNonLocal()) {

- SmallVector<NonLocalDepResult, 2> NonLocalDepResults;

- MD->getNonLocalPointerDependencyFrom(M, SrcLoc, /*isLoad=*/true,

- NonLocalDepResults);

- if (NonLocalDepResults.size() == 1) {

- SrcDepInfo = NonLocalDepResults[0].getResult();

- assert((!SrcDepInfo.getInst() ||

- LookupDomTree().dominates(SrcDepInfo.getInst(), M)) &&

- "when memdep returns exactly one result, it should dominate");

- }

if (SrcDepInfo.isClobber()) {

if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))

return processMemCpyMemCpyDependence(M, MDep);

diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 3b45cfa482e6..c44edbed8ed9 100644
--- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp

@@ -2796,17 +2796,12 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,

StatepointLiveSetTy Updated;

findLiveSetAtInst(Inst, RevisedLivenessData, Updated);

-#ifndef NDEBUG

- DenseSet<Value *> Bases;

- for (auto KVPair : Info.PointerToBase)

- Bases.insert(KVPair.second);

-#endif

// We may have base pointers which are now live that weren't before. We need

// to update the PointerToBase structure to reflect this.

for (auto V : Updated)

if (Info.PointerToBase.insert({V, V}).second) {

- assert(Bases.count(V) && "Can't find base for unexpected live value!");

+ assert(isKnownBaseResult(V) &&

+ "Can't find base for unexpected live value!");

continue;

}

diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index efff06f79cb7..e00541d3c812 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp

@@ -648,8 +648,13 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,

SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));

NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa",

DT, LI, PreserveLCSSA);

+ // NewExit gets its DebugLoc from LatchExit, which is not part of the

+ // original Loop.

+ // Fix this by setting Loop's DebugLoc to NewExit.

+ auto *NewExitTerminator = NewExit->getTerminator();

+ NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());

// Split NewExit to insert epilog remainder loop.

- EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);

+ EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);

EpilogPreHeader->setName(Header->getName() + ".epil.preheader");

} else {

// If prolog remainder

diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
index c3fa05a11a24..fe106e33bca1 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp

@@ -880,9 +880,10 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,

/// If we are able to find such sequence, we return the instructions

/// we found, namely %casted_phi and the instructions on its use-def chain up

/// to the phi (not including the phi).

-bool getCastsForInductionPHI(

- PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev,

- const SCEVAddRecExpr *AR, SmallVectorImpl<Instruction *> &CastInsts) {

+static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,

+ const SCEVUnknown *PhiScev,

+ const SCEVAddRecExpr *AR,

+ SmallVectorImpl<Instruction *> &CastInsts) {

assert(CastInsts.empty() && "CastInsts is expected to be empty.");

auto *PN = cast<PHINode>(PhiScev->getValue());

diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index b3c80424c8b9..e7358dbcb624 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp

@@ -127,16 +127,6 @@ static cl::opt<unsigned> MaxSpeculationDepth(

cl::desc("Limit maximum recursion depth when calculating costs of "

"speculatively executed instructions"));

-static cl::opt<unsigned> DependenceChainLatency(

- "dependence-chain-latency", cl::Hidden, cl::init(8),

- cl::desc("Limit the maximum latency of dependence chain containing cmp "

- "for if conversion"));

-static cl::opt<unsigned> SmallBBSize(

- "small-bb-size", cl::Hidden, cl::init(40),

- cl::desc("Check dependence chain latency only in basic block smaller than "

- "this number"));

STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");

STATISTIC(NumLinearMaps,

"Number of switch instructions turned into linear mapping");

@@ -405,166 +395,6 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,

return true;

}

-/// Estimate the code size of the specified BB.

-static unsigned CountBBCodeSize(BasicBlock *BB,

- const TargetTransformInfo &TTI) {

- unsigned Size = 0;

- for (auto II = BB->begin(); !isa<TerminatorInst>(II); ++II)

- Size += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_CodeSize);

- return Size;

-/// Find out the latency of the longest dependence chain in the BB if

-/// LongestChain is true, or the dependence chain containing the compare

-/// instruction feeding the block's conditional branch.

-static unsigned FindDependenceChainLatency(BasicBlock *BB,

- DenseMap<Instruction *, unsigned> &Instructions,

- const TargetTransformInfo &TTI,

- bool LongestChain) {

- unsigned MaxLatency = 0;

- BasicBlock::iterator II;

- for (II = BB->begin(); !isa<TerminatorInst>(II); ++II) {

- unsigned Latency = 0;

- for (unsigned O = 0, E = II->getNumOperands(); O != E; ++O) {

- Instruction *Op = dyn_cast<Instruction>(II->getOperand(O));

- if (Op && Instructions.count(Op)) {

- auto OpLatency = Instructions[Op];

- if (OpLatency > Latency)

- Latency = OpLatency;

- }

- Latency += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_Latency);

- Instructions[&(*II)] = Latency;

- if (Latency > MaxLatency)

- MaxLatency = Latency;

- }

- if (LongestChain)

- return MaxLatency;

- // The length of the dependence chain containing the compare instruction is

- // wanted, so the terminator must be a BranchInst.

- assert(isa<BranchInst>(II));

- BranchInst* Br = cast<BranchInst>(II);

- Instruction *Cmp = dyn_cast<Instruction>(Br->getCondition());

- if (Cmp && Instructions.count(Cmp))

- return Instructions[Cmp];

- else

- return 0;

-/// Instructions in BB2 may depend on instructions in BB1, and instructions

-/// in BB1 may have users in BB2. If the last (in terms of latency) such kind

-/// of instruction in BB1 is I, then the instructions after I can be executed

-/// in parallel with instructions in BB2.

-/// This function returns the latency of I.

-static unsigned LatencyAdjustment(BasicBlock *BB1, BasicBlock *BB2,

- BasicBlock *IfBlock1, BasicBlock *IfBlock2,

- DenseMap<Instruction *, unsigned> &BB1Instructions) {

- unsigned LastLatency = 0;

- SmallVector<Instruction *, 16> Worklist;

- BasicBlock::iterator II;

- for (II = BB2->begin(); !isa<TerminatorInst>(II); ++II) {

- if (PHINode *PN = dyn_cast<PHINode>(II)) {

- // Look for users in BB2.

- bool InBBUser = false;

- for (User *U : PN->users()) {

- if (cast<Instruction>(U)->getParent() == BB2) {

- InBBUser = true;

- break;

- }

- // No such user, we don't care about this instruction and its operands.

- if (!InBBUser)

- break;

- }

- Worklist.push_back(&(*II));

- }

- while (!Worklist.empty()) {

- Instruction *I = Worklist.pop_back_val();

- for (unsigned O = 0, E = I->getNumOperands(); O != E; ++O) {

- if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(O))) {

- if (Op->getParent() == IfBlock1 || Op->getParent() == IfBlock2)

- Worklist.push_back(Op);

- else if (Op->getParent() == BB1 && BB1Instructions.count(Op)) {

- if (BB1Instructions[Op] > LastLatency)

- LastLatency = BB1Instructions[Op];

- }

- return LastLatency;

-/// If after if conversion, most of the instructions in this new BB construct a

-/// long and slow dependence chain, it may be slower than cmp/branch, even

-/// if the branch has a high miss rate, because the control dependence is

-/// transformed into data dependence, and control dependence can be speculated,

-/// and thus, the second part can execute in parallel with the first part on

-/// modern OOO processor.

-///

-/// To check this condition, this function finds the length of the dependence

-/// chain in BB1 (only the part that can be executed in parallel with code after

-/// branch in BB2) containing cmp, and if the length is longer than a threshold,

-/// don't perform if conversion.

-///

-/// BB1, BB2, IfBlock1 and IfBlock2 are candidate BBs for if conversion.

-/// SpeculationSize contains the code size of IfBlock1 and IfBlock2.

-static bool FindLongDependenceChain(BasicBlock *BB1, BasicBlock *BB2,

- BasicBlock *IfBlock1, BasicBlock *IfBlock2,

- unsigned SpeculationSize,

- const TargetTransformInfo &TTI) {

- // Accumulated latency of each instruction in their BBs.

- DenseMap<Instruction *, unsigned> BB1Instructions;

- DenseMap<Instruction *, unsigned> BB2Instructions;

- if (!TTI.isOutOfOrder())

- return false;

- unsigned NewBBSize = CountBBCodeSize(BB1, TTI) + CountBBCodeSize(BB2, TTI)

- + SpeculationSize;

- // We check small BB only since it is more difficult to find unrelated

- // instructions to fill functional units in a small BB.

- if (NewBBSize > SmallBBSize)

- return false;

- auto BB1Chain =

- FindDependenceChainLatency(BB1, BB1Instructions, TTI, false);

- auto BB2Chain =

- FindDependenceChainLatency(BB2, BB2Instructions, TTI, true);

- // If there are many unrelated instructions in the new BB, there will be

- // other instructions for the processor to issue regardless of the length

- // of this new dependence chain.

- // Modern processors can issue 3 or more instructions in each cycle. But in

- // real world applications, an IPC of 2 is already very good for non-loop

- // code with small basic blocks. Higher IPC is usually found in programs with

- // small kernel. So IPC of 2 is more reasonable for most applications.

- if ((BB1Chain + BB2Chain) * 2 <= NewBBSize)

- return false;

- // We only care about part of the dependence chain in BB1 that can be

- // executed in parallel with BB2, so adjust the latency.

- BB1Chain -=

- LatencyAdjustment(BB1, BB2, IfBlock1, IfBlock2, BB1Instructions);

- // Correctly predicted branch instruction can skip the dependence chain in

- // BB1, but misprediction has a penalty, so only when the dependence chain is

- // longer than DependenceChainLatency, then branch is better than select.

- // Besides misprediction penalty, the threshold value DependenceChainLatency

- // also depends on branch misprediction rate, taken branch latency and cmov

- // latency.

- if (BB1Chain >= DependenceChainLatency)

- return true;

- return false;

/// Extract ConstantInt from value, looking through IntToPtr

/// and PointerNullValue. Return NULL if value is not a constant int.

static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {

@@ -2214,11 +2044,6 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,

if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))

return false;

- // Don't do if conversion for long dependence chain.

- if (FindLongDependenceChain(BB, EndBB, ThenBB, nullptr,

- CountBBCodeSize(ThenBB, TTI), TTI))

- return false;

// If we get here, we can hoist the instruction and if-convert.

DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);

@@ -2526,10 +2351,6 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,

}

- if (FindLongDependenceChain(DomBlock, BB, IfBlock1, IfBlock2,

- AggressiveInsts.size(), TTI))

- return false;

DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "

<< IfTrue->getName() << " F: " << IfFalse->getName() << "\n");