diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar/StructurizeCFG.cpp')
-rw-r--r-- | llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 150 |
1 files changed, 124 insertions, 26 deletions
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 0b797abefe20..81d151c2904e 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" @@ -87,6 +88,8 @@ using BBPredicates = DenseMap<BasicBlock *, Value *>; using PredMap = DenseMap<BasicBlock *, BBPredicates>; using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>; +using BranchDebugLocMap = DenseMap<BasicBlock *, DebugLoc>; + // A traits type that is intended to be used in graph algorithms. The graph // traits starts at an entry node, and traverses the RegionNodes that are in // the Nodes set. @@ -246,6 +249,7 @@ class StructurizeCFG { SmallVector<RegionNode *, 8> Order; BBSet Visited; + BBSet FlowSet; SmallVector<WeakVH, 8> AffectedPhis; BBPhiMap DeletedPhis; @@ -258,6 +262,8 @@ class StructurizeCFG { PredMap LoopPreds; BranchVector LoopConds; + BranchDebugLocMap TermDL; + RegionNode *PrevNode; void orderNodes(); @@ -278,6 +284,9 @@ class StructurizeCFG { void addPhiValues(BasicBlock *From, BasicBlock *To); + void findUndefBlocks(BasicBlock *PHIBlock, + const SmallSet<BasicBlock *, 8> &Incomings, + SmallVector<BasicBlock *> &UndefBlks) const; void setPhiValues(); void simplifyAffectedPhis(); @@ -395,7 +404,7 @@ void StructurizeCFG::orderNodes() { WorkList.emplace_back(I, I + Size); // Add the SCC nodes to the Order array. - for (auto &N : SCC) { + for (const auto &N : SCC) { assert(I < E && "SCC size mismatch!"); Order[I++] = N.first; } @@ -536,6 +545,14 @@ void StructurizeCFG::collectInfos() { // Find the last back edges analyzeLoops(RN); } + + // Reset the collected term debug locations + TermDL.clear(); + + for (BasicBlock &BB : *Func) { + if (const DebugLoc &DL = BB.getTerminator()->getDebugLoc()) + TermDL[&BB] = DL; + } } /// Insert the missing branch conditions @@ -632,6 +649,67 @@ void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { AddedPhis[To].push_back(From); } +/// When we are reconstructing a PHI inside \p PHIBlock with incoming values +/// from predecessors \p Incomings, we have a chance to mark the available value +/// from some blocks as undefined. The function will find out all such blocks +/// and return in \p UndefBlks. +void StructurizeCFG::findUndefBlocks( + BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings, + SmallVector<BasicBlock *> &UndefBlks) const { + // We may get a post-structured CFG like below: + // + // | P1 + // |/ + // F1 + // |\ + // | N + // |/ + // F2 + // |\ + // | P2 + // |/ + // F3 + // |\ + // B + // + // B is the block that has a PHI being reconstructed. P1/P2 are predecessors + // of B before structurization. F1/F2/F3 are flow blocks inserted during + // structurization process. Block N is not a predecessor of B before + // structurization, but are placed between the predecessors(P1/P2) of B after + // structurization. This usually means that threads went to N never take the + // path N->F2->F3->B. For example, the threads take the branch F1->N may + // always take the branch F2->P2. So, when we are reconstructing a PHI + // originally in B, we can safely say the incoming value from N is undefined. + SmallSet<BasicBlock *, 8> VisitedBlock; + SmallVector<BasicBlock *, 8> Stack; + if (PHIBlock == ParentRegion->getExit()) { + for (auto P : predecessors(PHIBlock)) { + if (ParentRegion->contains(P)) + Stack.push_back(P); + } + } else { + append_range(Stack, predecessors(PHIBlock)); + } + + // Do a backward traversal over the CFG, and stop further searching if + // the block is not a Flow. If a block is neither flow block nor the + // incoming predecessor, then the incoming value from the block is + // undefined value for the PHI being reconstructed. + while (!Stack.empty()) { + BasicBlock *Current = Stack.pop_back_val(); + if (VisitedBlock.contains(Current)) + continue; + + VisitedBlock.insert(Current); + if (FlowSet.contains(Current)) { + for (auto P : predecessors(Current)) + Stack.push_back(P); + } else if (!Incomings.contains(Current)) { + UndefBlks.push_back(Current); + } + } +} + /// Add the real PHI value as soon as everything is set up void StructurizeCFG::setPhiValues() { SmallVector<PHINode *, 8> InsertedPhis; @@ -643,6 +721,8 @@ void StructurizeCFG::setPhiValues() { if (!DeletedPhis.count(To)) continue; + SmallVector<BasicBlock *> UndefBlks; + bool CachedUndefs = false; PhiMap &Map = DeletedPhis[To]; for (const auto &PI : Map) { PHINode *Phi = PI.first; @@ -651,15 +731,30 @@ void StructurizeCFG::setPhiValues() { Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); Updater.AddAvailableValue(To, Undef); - NearestCommonDominator Dominator(DT); - Dominator.addBlock(To); + SmallSet<BasicBlock *, 8> Incomings; + SmallVector<BasicBlock *> ConstantPreds; for (const auto &VI : PI.second) { + Incomings.insert(VI.first); Updater.AddAvailableValue(VI.first, VI.second); - Dominator.addAndRememberBlock(VI.first); + if (isa<Constant>(VI.second)) + ConstantPreds.push_back(VI.first); } - if (!Dominator.resultIsRememberedBlock()) - Updater.AddAvailableValue(Dominator.result(), Undef); + if (!CachedUndefs) { + findUndefBlocks(To, Incomings, UndefBlks); + CachedUndefs = true; + } + + for (auto UB : UndefBlks) { + // If this undef block is dominated by any predecessor(before + // structurization) of reconstructed PHI with constant incoming value, + // don't mark the available value as undefined. Setting undef to such + // block will stop us from getting optimal phi insertion. + if (any_of(ConstantPreds, + [&](BasicBlock *CP) { return DT->dominates(CP, UB); })) + continue; + Updater.AddAvailableValue(UB, Undef); + } for (BasicBlock *FI : From) Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI)); @@ -679,6 +774,9 @@ void StructurizeCFG::simplifyAffectedPhis() { Changed = false; SimplifyQuery Q(Func->getParent()->getDataLayout()); Q.DT = DT; + // Setting CanUseUndef to true might extend value liveness, set it to false + // to achieve better register pressure. + Q.CanUseUndef = false; for (WeakVH VH : AffectedPhis) { if (auto Phi = dyn_cast_or_null<PHINode>(VH)) { if (auto NewValue = simplifyInstruction(Phi, Q)) { @@ -742,7 +840,8 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, } else { BasicBlock *BB = Node->getNodeAs<BasicBlock>(); killTerminator(BB); - BranchInst::Create(NewExit, BB); + BranchInst *Br = BranchInst::Create(NewExit, BB); + Br->setDebugLoc(TermDL[BB]); addPhiValues(BB, NewExit); if (IncludeDominator) DT->changeImmediateDominator(NewExit, BB); @@ -756,6 +855,13 @@ BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { Order.back()->getEntry(); BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, Func, Insert); + FlowSet.insert(Flow); + + // use a temporary variable to avoid a use-after-free if the map's storage is + // reallocated + DebugLoc DL = TermDL[Dominator]; + TermDL[Flow] = std::move(DL); + DT->addNewBlock(Flow, Dominator); ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); return Flow; @@ -851,7 +957,9 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed, BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); // let it point to entry and next block - Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow)); + BranchInst *Br = BranchInst::Create(Entry, Next, BoolUndef, Flow); + Br->setDebugLoc(TermDL[Flow]); + Conditions.push_back(Br); addPhiValues(Flow, Entry); DT->changeImmediateDominator(Entry, Flow); @@ -885,26 +993,14 @@ void StructurizeCFG::handleLoops(bool ExitUseAllowed, handleLoops(false, LoopEnd); } - // If the start of the loop is the entry block, we can't branch to it so - // insert a new dummy entry block. - Function *LoopFunc = LoopStart->getParent(); - if (LoopStart == &LoopFunc->getEntryBlock()) { - LoopStart->setName("entry.orig"); - - BasicBlock *NewEntry = - BasicBlock::Create(LoopStart->getContext(), - "entry", - LoopFunc, - LoopStart); - BranchInst::Create(LoopStart, NewEntry); - DT->setNewRoot(NewEntry); - } + assert(LoopStart != &LoopStart->getParent()->getEntryBlock()); // Create an extra loop end node LoopEnd = needPrefix(false); BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); - LoopConds.push_back(BranchInst::Create(Next, LoopStart, - BoolUndef, LoopEnd)); + BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolUndef, LoopEnd); + Br->setDebugLoc(TermDL[LoopEnd]); + LoopConds.push_back(Br); addPhiValues(LoopEnd, LoopStart); setPrevNode(Next); } @@ -974,7 +1070,7 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, // Count of how many direct children are conditional. unsigned ConditionalDirectChildren = 0; - for (auto E : R->elements()) { + for (auto *E : R->elements()) { if (!E->isSubRegion()) { auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator()); if (!Br || !Br->isConditional()) @@ -998,7 +1094,7 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, // their direct child basic blocks' terminators, regardless of whether // subregions are uniform or not. However, this requires a very careful // look at SIAnnotateControlFlow to make sure nothing breaks there. - for (auto BB : E->getNodeAs<Region>()->blocks()) { + for (auto *BB : E->getNodeAs<Region>()->blocks()) { auto Br = dyn_cast<BranchInst>(BB->getTerminator()); if (!Br || !Br->isConditional()) continue; @@ -1100,6 +1196,8 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) { Loops.clear(); LoopPreds.clear(); LoopConds.clear(); + FlowSet.clear(); + TermDL.clear(); return true; } |