diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-24 01:00:08 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-24 01:00:08 +0000 |
| commit | c7dac04c3480f3c20487f912f77343139fce2d99 (patch) | |
| tree | 21a09bce0171e27bd1e92649db9df797fa097cea /lib/Transforms/Scalar | |
| parent | 044eb2f6afba375a914ac9d8024f8f5142bb912e (diff) | |
Diffstat (limited to 'lib/Transforms/Scalar')
| -rw-r--r-- | lib/Transforms/Scalar/CallSiteSplitting.cpp | 46 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/JumpThreading.cpp | 19 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/LoopSink.cpp | 2 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/LoopUnrollPass.cpp | 2 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/MemCpyOptimizer.cpp | 56 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/SCCP.cpp | 29 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp | 2 |
7 files changed, 110 insertions, 46 deletions
diff --git a/lib/Transforms/Scalar/CallSiteSplitting.cpp b/lib/Transforms/Scalar/CallSiteSplitting.cpp index d8c408035038..207243231aad 100644 --- a/lib/Transforms/Scalar/CallSiteSplitting.cpp +++ b/lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -13,10 +13,11 @@ // threading, or IPA-CP based function cloning, etc.). // As of now we support two cases : // -// 1) If a call site is dominated by an OR condition and if any of its arguments -// are predicated on this OR condition, try to split the condition with more -// constrained arguments. For example, in the code below, we try to split the -// call site since we can predicate the argument(ptr) based on the OR condition. +// 1) Try to a split call-site with constrained arguments, if any constraints +// on any argument can be found by following the single predecessors of the +// all site's predecessors. Currently this pass only handles call-sites with 2 +// predecessors. For example, in the code below, we try to split the call-site +// since we can predicate the argument(ptr) based on the OR condition. // // Split from : // if (!ptr || c) @@ -200,16 +201,15 @@ static bool canSplitCallSite(CallSite CS) { } /// Return true if the CS is split into its new predecessors which are directly -/// hooked to each of its orignial predecessors pointed by PredBB1 and PredBB2. -/// In OR predicated case, PredBB1 will point the header, and PredBB2 will point -/// to the second compare block. CallInst1 and CallInst2 will be the new -/// call-sites placed in the new predecessors split for PredBB1 and PredBB2, -/// repectively. Therefore, CallInst1 will be the call-site placed -/// between Header and Tail, and CallInst2 will be the call-site between TBB and -/// Tail. For example, in the IR below with an OR condition, the call-site can -/// be split +/// hooked to each of its original predecessors pointed by PredBB1 and PredBB2. +/// CallInst1 and CallInst2 will be the new call-sites placed in the new +/// predecessors split for PredBB1 and PredBB2, respectively. +/// For example, in the IR below with an OR condition, the call-site can +/// be split. Assuming PredBB1=Header and PredBB2=TBB, CallInst1 will be the +/// call-site placed between Header and Tail, and CallInst2 will be the +/// call-site between TBB and Tail. /// -/// from : +/// From : /// /// Header: /// %c = icmp eq i32* %a, null @@ -237,9 +237,9 @@ static bool canSplitCallSite(CallSite CS) { /// Tail: /// %p = phi i1 [%ca1, %Tail-split1],[%ca2, %Tail-split2] /// -/// Note that for an OR predicated case, CallInst1 and CallInst2 should be -/// created with more constrained arguments in -/// createCallSitesOnOrPredicatedArgument(). +/// Note that in case any arguments at the call-site are constrained by its +/// predecessors, new call-sites with more constrained arguments will be +/// created in createCallSitesOnPredicatedArgument(). static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2, Instruction *CallInst1, Instruction *CallInst2) { Instruction *Instr = CS.getInstruction(); @@ -332,18 +332,10 @@ static bool tryToSplitOnPHIPredicatedArgument(CallSite CS) { splitCallSite(CS, Preds[0], Preds[1], nullptr, nullptr); return true; } -// Check if one of the predecessors is a single predecessors of the other. -// This is a requirement for control flow modeling an OR. HeaderBB points to -// the single predecessor and OrBB points to other node. HeaderBB potentially -// contains the first compare of the OR and OrBB the second. -static bool isOrHeader(BasicBlock *HeaderBB, BasicBlock *OrBB) { - return OrBB->getSinglePredecessor() == HeaderBB && - HeaderBB->getTerminator()->getNumSuccessors() == 2; -} -static bool tryToSplitOnOrPredicatedArgument(CallSite CS) { +static bool tryToSplitOnPredicatedArgument(CallSite CS) { auto Preds = getTwoPredecessors(CS.getInstruction()->getParent()); - if (!isOrHeader(Preds[0], Preds[1]) && !isOrHeader(Preds[1], Preds[0])) + if (Preds[0] == Preds[1]) return false; SmallVector<std::pair<ICmpInst *, unsigned>, 2> C1, C2; @@ -362,7 +354,7 @@ static bool tryToSplitOnOrPredicatedArgument(CallSite CS) { static bool tryToSplitCallSite(CallSite CS) { if (!CS.arg_size() || !canSplitCallSite(CS)) return false; - return tryToSplitOnOrPredicatedArgument(CS) || + return tryToSplitOnPredicatedArgument(CS) || tryToSplitOnPHIPredicatedArgument(CS); } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 6b0377e0ecb3..1476f7850cf0 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -282,7 +282,7 @@ bool JumpThreading::runOnFunction(Function &F) { auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); std::unique_ptr<BlockFrequencyInfo> BFI; std::unique_ptr<BranchProbabilityInfo> BPI; - bool HasProfileData = F.getEntryCount().hasValue(); + bool HasProfileData = F.hasProfileData(); if (HasProfileData) { LoopInfo LI{DominatorTree(F)}; BPI.reset(new BranchProbabilityInfo(F, LI, TLI)); @@ -307,8 +307,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F, std::unique_ptr<BlockFrequencyInfo> BFI; std::unique_ptr<BranchProbabilityInfo> BPI; - bool HasProfileData = F.getEntryCount().hasValue(); - if (HasProfileData) { + if (F.hasProfileData()) { LoopInfo LI{DominatorTree(F)}; BPI.reset(new BranchProbabilityInfo(F, LI, &TLI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); @@ -1333,6 +1332,20 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // code size. BasicBlock *UnavailablePred = nullptr; + // If the value is unavailable in one of predecessors, we will end up + // inserting a new instruction into them. It is only valid if all the + // instructions before LI are guaranteed to pass execution to its successor, + // or if LI is safe to speculate. + // TODO: If this logic becomes more complex, and we will perform PRE insertion + // farther than to a predecessor, we need to reuse the code from GVN's PRE. + // It requires domination tree analysis, so for this simple case it is an + // overkill. + if (PredsScanned.size() != AvailablePreds.size() && + !isSafeToSpeculativelyExecute(LI)) + for (auto I = LoadBB->begin(); &*I != LI; ++I) + if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) + return false; + // If there is exactly one predecessor where the value is unavailable, the // already computed 'OneUnavailablePred' block is it. If it ends in an // unconditional branch, we know that it isn't a critical edge. diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp index c9d55b4594fe..430a7085d93f 100644 --- a/lib/Transforms/Scalar/LoopSink.cpp +++ b/lib/Transforms/Scalar/LoopSink.cpp @@ -247,7 +247,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, // Enable LoopSink only when runtime profile is available. // With static profile, the sinking decision may be sub-optimal. - if (!Preheader->getParent()->getEntryCount()) + if (!Preheader->getParent()->hasProfileData()) return false; const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader); diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 7b1d6446a24a..15e7da5e1a7a 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -882,7 +882,7 @@ static bool computeUnrollCount( } // Check if the runtime trip count is too small when profile is available. - if (L->getHeader()->getParent()->getEntryCount()) { + if (L->getHeader()->getParent()->hasProfileData()) { if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) { if (*ProfileTripCount < FlatLoopTripCountThreshold) return false; diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 9c870b42a747..6af3fef963dc 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -476,22 +476,33 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, Alignment = DL.getABITypeAlignment(EltType); } - AMemSet = - Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); + // Remember the debug location. + DebugLoc Loc; + if (!Range.TheStores.empty()) + Loc = Range.TheStores[0]->getDebugLoc(); DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI : Range.TheStores) - dbgs() << *SI << '\n'; - dbgs() << "With: " << *AMemSet << '\n'); - - if (!Range.TheStores.empty()) - AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); + dbgs() << *SI << '\n'); // Zap all the stores. for (Instruction *SI : Range.TheStores) { MD->removeInstruction(SI); SI->eraseFromParent(); } + + // Create the memset after removing the stores, so that if there any cached + // non-local dependencies on the removed instructions in + // MemoryDependenceAnalysis, the cache entries are updated to "dirty" + // entries pointing below the memset, so subsequent queries include the + // memset. + AMemSet = + Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); + if (!Range.TheStores.empty()) + AMemSet->setDebugLoc(Loc); + + DEBUG(dbgs() << "With: " << *AMemSet << '\n'); + ++NumMemSetInfer; } @@ -1031,9 +1042,22 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, // // NOTE: This is conservative, it will stop on any read from the source loc, // not just the defining memcpy. - MemDepResult SourceDep = - MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, - M->getIterator(), M->getParent()); + MemoryLocation SourceLoc = MemoryLocation::getForSource(MDep); + MemDepResult SourceDep = MD->getPointerDependencyFrom(SourceLoc, false, + M->getIterator(), M->getParent()); + + if (SourceDep.isNonLocal()) { + SmallVector<NonLocalDepResult, 2> NonLocalDepResults; + MD->getNonLocalPointerDependencyFrom(M, SourceLoc, /*isLoad=*/false, + NonLocalDepResults); + if (NonLocalDepResults.size() == 1) { + SourceDep = NonLocalDepResults[0].getResult(); + assert((!SourceDep.getInst() || + LookupDomTree().dominates(SourceDep.getInst(), M)) && + "when memdep returns exactly one result, it should dominate"); + } + } + if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) return false; @@ -1235,6 +1259,18 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) { MemDepResult SrcDepInfo = MD->getPointerDependencyFrom( SrcLoc, true, M->getIterator(), M->getParent()); + if (SrcDepInfo.isNonLocal()) { + SmallVector<NonLocalDepResult, 2> NonLocalDepResults; + MD->getNonLocalPointerDependencyFrom(M, SrcLoc, /*isLoad=*/true, + NonLocalDepResults); + if (NonLocalDepResults.size() == 1) { + SrcDepInfo = NonLocalDepResults[0].getResult(); + assert((!SrcDepInfo.getInst() || + LookupDomTree().dominates(SrcDepInfo.getInst(), M)) && + "when memdep returns exactly one result, it should dominate"); + } + } + if (SrcDepInfo.isClobber()) { if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) return processMemCpyMemCpyDependence(M, MDep); diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index e5866b4718da..66608ec631f6 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1929,9 +1929,32 @@ static bool runIPSCCP(Module &M, const DataLayout &DL, if (!I) continue; bool Folded = ConstantFoldTerminator(I->getParent()); - assert(Folded && - "Expect TermInst on constantint or blockaddress to be folded"); - (void) Folded; + if (!Folded) { + // The constant folder may not have been able to fold the terminator + // if this is a branch or switch on undef. Fold it manually as a + // branch to the first successor. +#ifndef NDEBUG + if (auto *BI = dyn_cast<BranchInst>(I)) { + assert(BI->isConditional() && isa<UndefValue>(BI->getCondition()) && + "Branch should be foldable!"); + } else if (auto *SI = dyn_cast<SwitchInst>(I)) { + assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold"); + } else { + llvm_unreachable("Didn't fold away reference to block!"); + } +#endif + + // Make this an uncond branch to the first successor. + TerminatorInst *TI = I->getParent()->getTerminator(); + BranchInst::Create(TI->getSuccessor(0), TI); + + // Remove entries in successor phi nodes to remove edges. + for (unsigned i = 1, e = TI->getNumSuccessors(); i != e; ++i) + TI->getSuccessor(i)->removePredecessor(TI->getParent()); + + // Remove the old terminator. + TI->eraseFromParent(); + } } // Finally, delete the basic block. diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 209821ff21d7..8fa9ffb6d014 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -97,7 +97,7 @@ // load %p2 // ... // -// We can not do CSE for to the common part related to index "i64 %i". Lowering +// We can not do CSE to the common part related to index "i64 %i". Lowering // GEPs can achieve such goals. // If the target does not use alias analysis in codegen, this pass will // lower a GEP with multiple indices into arithmetic operations: |
