diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-29 00:56:15 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-29 00:56:15 +0000 |
commit | fe4fed2e4d17945c38474cf0746792d04bf84b7d (patch) | |
tree | f82cc30abef889351b2dbe8d8aa2874056dbebbd /contrib/llvm/lib/Transforms | |
parent | bbd32193a0463b1c7383443a45b774a2fe4d3430 (diff) | |
parent | 55e6d896ad333f07bb3b1ba487df214fc268a4ab (diff) | |
download | src-fe4fed2e4d17945c38474cf0746792d04bf84b7d.tar.gz src-fe4fed2e4d17945c38474cf0746792d04bf84b7d.zip |
Notes
Diffstat (limited to 'contrib/llvm/lib/Transforms')
12 files changed, 47 insertions, 282 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp index 7086c2eb52c4..a69c009e1a54 100644 --- a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -181,8 +181,9 @@ public: StringRef Name, bool IsThinLTOPreLink, std::function<AssumptionCache &(Function &)> GetAssumptionCache, std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo) - : GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo), - Filename(Name), IsThinLTOPreLink(IsThinLTOPreLink) {} + : GetAC(std::move(GetAssumptionCache)), + GetTTI(std::move(GetTargetTransformInfo)), Filename(Name), + IsThinLTOPreLink(IsThinLTOPreLink) {} bool doInitialization(Module &M); bool runOnModule(Module &M, ModuleAnalysisManager *AM); @@ -1547,14 +1548,14 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM) { // Populate the symbol map. for (const auto &N_F : M.getValueSymbolTable()) { - std::string OrigName = N_F.getKey(); + StringRef OrigName = N_F.getKey(); Function *F = dyn_cast<Function>(N_F.getValue()); if (F == nullptr) continue; SymbolMap[OrigName] = F; auto pos = OrigName.find('.'); - if (pos != std::string::npos) { - std::string NewName = OrigName.substr(0, pos); + if (pos != StringRef::npos) { + StringRef NewName = OrigName.substr(0, pos); auto r = SymbolMap.insert(std::make_pair(NewName, F)); // Failiing to insert means there is already an entry in SymbolMap, // thus there are multiple functions that are mapped to the same diff --git a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 945133074059..caffc03339c4 100644 --- a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -90,8 +90,7 @@ void promoteTypeIds(Module &M, StringRef ModuleId) { if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) { Metadata *&GlobalMD = LocalToGlobal[MD]; if (!GlobalMD) { - std::string NewName = - (to_string(LocalToGlobal.size()) + ModuleId).str(); + std::string NewName = (Twine(LocalToGlobal.size()) + ModuleId).str(); GlobalMD = MDString::get(M.getContext(), NewName); } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index a088d447337f..40e52ee755e5 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1802,9 +1802,7 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) { /// instructions. For normal calls, it allows visitCallSite to do the heavy /// lifting. Instruction *InstCombiner::visitCallInst(CallInst &CI) { - auto Args = CI.arg_operands(); - if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(), - Args.end(), SQ.getWithInstruction(&CI))) + if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI))) return replaceInstUsesWith(CI, V); if (isFreeCall(&CI, &TLI)) @@ -1903,16 +1901,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false)) return replaceInstUsesWith(CI, N); return nullptr; - case Intrinsic::bswap: { Value *IIOperand = II->getArgOperand(0); Value *X = nullptr; - // TODO should this be in InstSimplify? - // bswap(bswap(x)) -> x - if (match(IIOperand, m_BSwap(m_Value(X)))) - return replaceInstUsesWith(CI, X); - // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { unsigned C = X->getType()->getPrimitiveSizeInBits() - @@ -1923,18 +1915,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; } - - case Intrinsic::bitreverse: { - Value *IIOperand = II->getArgOperand(0); - Value *X = nullptr; - - // TODO should this be in InstSimplify? - // bitreverse(bitreverse(x)) -> x - if (match(IIOperand, m_BitReverse(m_Value(X)))) - return replaceInstUsesWith(CI, X); - break; - } - case Intrinsic::masked_load: if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder)) return replaceInstUsesWith(CI, SimplifiedMaskedOp); @@ -1948,16 +1928,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::powi: if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) { - // powi(x, 0) -> 1.0 - if (Power->isZero()) - return replaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); - // powi(x, 1) -> x - if (Power->isOne()) - return replaceInstUsesWith(CI, II->getArgOperand(0)); + // 0 and 1 are handled in instsimplify + // powi(x, -1) -> 1/x if (Power->isMinusOne()) return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), II->getArgOperand(0)); + // powi(x, 2) -> x*x + if (Power->equalsInt(2)) + return BinaryOperator::CreateFMul(II->getArgOperand(0), + II->getArgOperand(0)); } break; @@ -2396,7 +2376,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // The compare intrinsic uses the above assumptions and therefore // doesn't require additional flags. if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) && - match(Arg1, m_Zero()) && + match(Arg1, m_Zero()) && isa<Instruction>(Arg0) && cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) { if (Arg0IsZero) std::swap(A, B); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 87666360c1a0..541dde6c47d2 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -1631,9 +1631,5 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); - // Handle cases involving: rem X, (select Cond, Y, Z) - if (simplifyDivRemOfSelectWithZeroOp(I)) - return &I; - return nullptr; } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 65a96b965227..aeac8910af6b 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -181,11 +181,13 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // If extracting a specified index from the vector, see if we can recursively // find a previously computed scalar that was inserted into the vector. if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { - unsigned IndexVal = IdxC->getZExtValue(); unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); - // InstSimplify handles cases where the index is invalid. - assert(IndexVal < VectorWidth); + // InstSimplify should handle cases where the index is invalid. + if (!IdxC->getValue().ule(VectorWidth)) + return nullptr; + + unsigned IndexVal = IdxC->getZExtValue(); // This instruction only demands the single element from the input vector. // If the input vector has a single use, simplify it based on this use diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 8328d4031941..8e39f24d819c 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -2702,9 +2702,10 @@ void FunctionStackPoisoner::copyArgsPassedByValToAllocas() { unsigned Align = Arg.getParamAlignment(); if (Align == 0) Align = DL.getABITypeAlignment(Ty); - const std::string &Name = Arg.hasName() ? Arg.getName().str() : - "Arg" + llvm::to_string(Arg.getArgNo()); - AllocaInst *AI = IRB.CreateAlloca(Ty, nullptr, Twine(Name) + ".byval"); + AllocaInst *AI = IRB.CreateAlloca( + Ty, nullptr, + (Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) + + ".byval"); AI->setAlignment(Align); Arg.replaceAllUsesWith(AI); diff --git a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp index 814a62cd7d65..bf92e43c4715 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp @@ -641,7 +641,7 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking( DenseMap<uint32_t, unsigned> VNums; for (auto *I : Insts) { uint32_t N = VN.lookupOrAdd(I); - DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n"); + DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n"); if (N == ~0U) return None; VNums[N]++; diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 6af3fef963dc..9c870b42a747 100644 --- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -476,33 +476,22 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, Alignment = DL.getABITypeAlignment(EltType); } - // Remember the debug location. - DebugLoc Loc; - if (!Range.TheStores.empty()) - Loc = Range.TheStores[0]->getDebugLoc(); + AMemSet = + Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI : Range.TheStores) - dbgs() << *SI << '\n'); + dbgs() << *SI << '\n'; + dbgs() << "With: " << *AMemSet << '\n'); + + if (!Range.TheStores.empty()) + AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); // Zap all the stores. for (Instruction *SI : Range.TheStores) { MD->removeInstruction(SI); SI->eraseFromParent(); } - - // Create the memset after removing the stores, so that if there any cached - // non-local dependencies on the removed instructions in - // MemoryDependenceAnalysis, the cache entries are updated to "dirty" - // entries pointing below the memset, so subsequent queries include the - // memset. - AMemSet = - Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); - if (!Range.TheStores.empty()) - AMemSet->setDebugLoc(Loc); - - DEBUG(dbgs() << "With: " << *AMemSet << '\n'); - ++NumMemSetInfer; } @@ -1042,22 +1031,9 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, // // NOTE: This is conservative, it will stop on any read from the source loc, // not just the defining memcpy. - MemoryLocation SourceLoc = MemoryLocation::getForSource(MDep); - MemDepResult SourceDep = MD->getPointerDependencyFrom(SourceLoc, false, - M->getIterator(), M->getParent()); - - if (SourceDep.isNonLocal()) { - SmallVector<NonLocalDepResult, 2> NonLocalDepResults; - MD->getNonLocalPointerDependencyFrom(M, SourceLoc, /*isLoad=*/false, - NonLocalDepResults); - if (NonLocalDepResults.size() == 1) { - SourceDep = NonLocalDepResults[0].getResult(); - assert((!SourceDep.getInst() || - LookupDomTree().dominates(SourceDep.getInst(), M)) && - "when memdep returns exactly one result, it should dominate"); - } - } - + MemDepResult SourceDep = + MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, + M->getIterator(), M->getParent()); if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) return false; @@ -1259,18 +1235,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) { MemDepResult SrcDepInfo = MD->getPointerDependencyFrom( SrcLoc, true, M->getIterator(), M->getParent()); - if (SrcDepInfo.isNonLocal()) { - SmallVector<NonLocalDepResult, 2> NonLocalDepResults; - MD->getNonLocalPointerDependencyFrom(M, SrcLoc, /*isLoad=*/true, - NonLocalDepResults); - if (NonLocalDepResults.size() == 1) { - SrcDepInfo = NonLocalDepResults[0].getResult(); - assert((!SrcDepInfo.getInst() || - LookupDomTree().dominates(SrcDepInfo.getInst(), M)) && - "when memdep returns exactly one result, it should dominate"); - } - } - if (SrcDepInfo.isClobber()) { if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) return processMemCpyMemCpyDependence(M, MDep); diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 3b45cfa482e6..c44edbed8ed9 100644 --- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -2796,17 +2796,12 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, StatepointLiveSetTy Updated; findLiveSetAtInst(Inst, RevisedLivenessData, Updated); -#ifndef NDEBUG - DenseSet<Value *> Bases; - for (auto KVPair : Info.PointerToBase) - Bases.insert(KVPair.second); -#endif - // We may have base pointers which are now live that weren't before. We need // to update the PointerToBase structure to reflect this. for (auto V : Updated) if (Info.PointerToBase.insert({V, V}).second) { - assert(Bases.count(V) && "Can't find base for unexpected live value!"); + assert(isKnownBaseResult(V) && + "Can't find base for unexpected live value!"); continue; } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index efff06f79cb7..e00541d3c812 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -648,8 +648,13 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit)); NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); + // NewExit gets its DebugLoc from LatchExit, which is not part of the + // original Loop. + // Fix this by setting Loop's DebugLoc to NewExit. + auto *NewExitTerminator = NewExit->getTerminator(); + NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc()); // Split NewExit to insert epilog remainder loop. - EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); + EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI); EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); } else { // If prolog remainder diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp index c3fa05a11a24..fe106e33bca1 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -880,9 +880,10 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, /// If we are able to find such sequence, we return the instructions /// we found, namely %casted_phi and the instructions on its use-def chain up /// to the phi (not including the phi). -bool getCastsForInductionPHI( - PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev, - const SCEVAddRecExpr *AR, SmallVectorImpl<Instruction *> &CastInsts) { +static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, + const SCEVUnknown *PhiScev, + const SCEVAddRecExpr *AR, + SmallVectorImpl<Instruction *> &CastInsts) { assert(CastInsts.empty() && "CastInsts is expected to be empty."); auto *PN = cast<PHINode>(PhiScev->getValue()); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index b3c80424c8b9..e7358dbcb624 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -127,16 +127,6 @@ static cl::opt<unsigned> MaxSpeculationDepth( cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions")); -static cl::opt<unsigned> DependenceChainLatency( - "dependence-chain-latency", cl::Hidden, cl::init(8), - cl::desc("Limit the maximum latency of dependence chain containing cmp " - "for if conversion")); - -static cl::opt<unsigned> SmallBBSize( - "small-bb-size", cl::Hidden, cl::init(40), - cl::desc("Check dependence chain latency only in basic block smaller than " - "this number")); - STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); @@ -405,166 +395,6 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, return true; } -/// Estimate the code size of the specified BB. -static unsigned CountBBCodeSize(BasicBlock *BB, - const TargetTransformInfo &TTI) { - unsigned Size = 0; - for (auto II = BB->begin(); !isa<TerminatorInst>(II); ++II) - Size += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_CodeSize); - return Size; -} - -/// Find out the latency of the longest dependence chain in the BB if -/// LongestChain is true, or the dependence chain containing the compare -/// instruction feeding the block's conditional branch. -static unsigned FindDependenceChainLatency(BasicBlock *BB, - DenseMap<Instruction *, unsigned> &Instructions, - const TargetTransformInfo &TTI, - bool LongestChain) { - unsigned MaxLatency = 0; - - BasicBlock::iterator II; - for (II = BB->begin(); !isa<TerminatorInst>(II); ++II) { - unsigned Latency = 0; - for (unsigned O = 0, E = II->getNumOperands(); O != E; ++O) { - Instruction *Op = dyn_cast<Instruction>(II->getOperand(O)); - if (Op && Instructions.count(Op)) { - auto OpLatency = Instructions[Op]; - if (OpLatency > Latency) - Latency = OpLatency; - } - } - Latency += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_Latency); - Instructions[&(*II)] = Latency; - - if (Latency > MaxLatency) - MaxLatency = Latency; - } - - if (LongestChain) - return MaxLatency; - - // The length of the dependence chain containing the compare instruction is - // wanted, so the terminator must be a BranchInst. - assert(isa<BranchInst>(II)); - BranchInst* Br = cast<BranchInst>(II); - Instruction *Cmp = dyn_cast<Instruction>(Br->getCondition()); - if (Cmp && Instructions.count(Cmp)) - return Instructions[Cmp]; - else - return 0; -} - -/// Instructions in BB2 may depend on instructions in BB1, and instructions -/// in BB1 may have users in BB2. If the last (in terms of latency) such kind -/// of instruction in BB1 is I, then the instructions after I can be executed -/// in parallel with instructions in BB2. -/// This function returns the latency of I. -static unsigned LatencyAdjustment(BasicBlock *BB1, BasicBlock *BB2, - BasicBlock *IfBlock1, BasicBlock *IfBlock2, - DenseMap<Instruction *, unsigned> &BB1Instructions) { - unsigned LastLatency = 0; - SmallVector<Instruction *, 16> Worklist; - BasicBlock::iterator II; - for (II = BB2->begin(); !isa<TerminatorInst>(II); ++II) { - if (PHINode *PN = dyn_cast<PHINode>(II)) { - // Look for users in BB2. - bool InBBUser = false; - for (User *U : PN->users()) { - if (cast<Instruction>(U)->getParent() == BB2) { - InBBUser = true; - break; - } - } - // No such user, we don't care about this instruction and its operands. - if (!InBBUser) - break; - } - Worklist.push_back(&(*II)); - } - - while (!Worklist.empty()) { - Instruction *I = Worklist.pop_back_val(); - for (unsigned O = 0, E = I->getNumOperands(); O != E; ++O) { - if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(O))) { - if (Op->getParent() == IfBlock1 || Op->getParent() == IfBlock2) - Worklist.push_back(Op); - else if (Op->getParent() == BB1 && BB1Instructions.count(Op)) { - if (BB1Instructions[Op] > LastLatency) - LastLatency = BB1Instructions[Op]; - } - } - } - } - - return LastLatency; -} - -/// If after if conversion, most of the instructions in this new BB construct a -/// long and slow dependence chain, it may be slower than cmp/branch, even -/// if the branch has a high miss rate, because the control dependence is -/// transformed into data dependence, and control dependence can be speculated, -/// and thus, the second part can execute in parallel with the first part on -/// modern OOO processor. -/// -/// To check this condition, this function finds the length of the dependence -/// chain in BB1 (only the part that can be executed in parallel with code after -/// branch in BB2) containing cmp, and if the length is longer than a threshold, -/// don't perform if conversion. -/// -/// BB1, BB2, IfBlock1 and IfBlock2 are candidate BBs for if conversion. -/// SpeculationSize contains the code size of IfBlock1 and IfBlock2. -static bool FindLongDependenceChain(BasicBlock *BB1, BasicBlock *BB2, - BasicBlock *IfBlock1, BasicBlock *IfBlock2, - unsigned SpeculationSize, - const TargetTransformInfo &TTI) { - // Accumulated latency of each instruction in their BBs. - DenseMap<Instruction *, unsigned> BB1Instructions; - DenseMap<Instruction *, unsigned> BB2Instructions; - - if (!TTI.isOutOfOrder()) - return false; - - unsigned NewBBSize = CountBBCodeSize(BB1, TTI) + CountBBCodeSize(BB2, TTI) - + SpeculationSize; - - // We check small BB only since it is more difficult to find unrelated - // instructions to fill functional units in a small BB. - if (NewBBSize > SmallBBSize) - return false; - - auto BB1Chain = - FindDependenceChainLatency(BB1, BB1Instructions, TTI, false); - auto BB2Chain = - FindDependenceChainLatency(BB2, BB2Instructions, TTI, true); - - // If there are many unrelated instructions in the new BB, there will be - // other instructions for the processor to issue regardless of the length - // of this new dependence chain. - // Modern processors can issue 3 or more instructions in each cycle. But in - // real world applications, an IPC of 2 is already very good for non-loop - // code with small basic blocks. Higher IPC is usually found in programs with - // small kernel. So IPC of 2 is more reasonable for most applications. - if ((BB1Chain + BB2Chain) * 2 <= NewBBSize) - return false; - - // We only care about part of the dependence chain in BB1 that can be - // executed in parallel with BB2, so adjust the latency. - BB1Chain -= - LatencyAdjustment(BB1, BB2, IfBlock1, IfBlock2, BB1Instructions); - - // Correctly predicted branch instruction can skip the dependence chain in - // BB1, but misprediction has a penalty, so only when the dependence chain is - // longer than DependenceChainLatency, then branch is better than select. - // Besides misprediction penalty, the threshold value DependenceChainLatency - // also depends on branch misprediction rate, taken branch latency and cmov - // latency. - if (BB1Chain >= DependenceChainLatency) - return true; - - return false; -} - /// Extract ConstantInt from value, looking through IntToPtr /// and PointerNullValue. Return NULL if value is not a constant int. static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) { @@ -2214,11 +2044,6 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue)) return false; - // Don't do if conversion for long dependence chain. - if (FindLongDependenceChain(BB, EndBB, ThenBB, nullptr, - CountBBCodeSize(ThenBB, TTI), TTI)) - return false; - // If we get here, we can hoist the instruction and if-convert. DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); @@ -2526,10 +2351,6 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } } - if (FindLongDependenceChain(DomBlock, BB, IfBlock1, IfBlock2, - AggressiveInsts.size(), TTI)) - return false; - DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); |