aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/StructurizeCFG.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/StructurizeCFG.cpp150
1 files changed, 124 insertions, 26 deletions
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 0b797abefe20..81d151c2904e 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
@@ -87,6 +88,8 @@ using BBPredicates = DenseMap<BasicBlock *, Value *>;
using PredMap = DenseMap<BasicBlock *, BBPredicates>;
using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
+using BranchDebugLocMap = DenseMap<BasicBlock *, DebugLoc>;
+
// A traits type that is intended to be used in graph algorithms. The graph
// traits starts at an entry node, and traverses the RegionNodes that are in
// the Nodes set.
@@ -246,6 +249,7 @@ class StructurizeCFG {
SmallVector<RegionNode *, 8> Order;
BBSet Visited;
+ BBSet FlowSet;
SmallVector<WeakVH, 8> AffectedPhis;
BBPhiMap DeletedPhis;
@@ -258,6 +262,8 @@ class StructurizeCFG {
PredMap LoopPreds;
BranchVector LoopConds;
+ BranchDebugLocMap TermDL;
+
RegionNode *PrevNode;
void orderNodes();
@@ -278,6 +284,9 @@ class StructurizeCFG {
void addPhiValues(BasicBlock *From, BasicBlock *To);
+ void findUndefBlocks(BasicBlock *PHIBlock,
+ const SmallSet<BasicBlock *, 8> &Incomings,
+ SmallVector<BasicBlock *> &UndefBlks) const;
void setPhiValues();
void simplifyAffectedPhis();
@@ -395,7 +404,7 @@ void StructurizeCFG::orderNodes() {
WorkList.emplace_back(I, I + Size);
// Add the SCC nodes to the Order array.
- for (auto &N : SCC) {
+ for (const auto &N : SCC) {
assert(I < E && "SCC size mismatch!");
Order[I++] = N.first;
}
@@ -536,6 +545,14 @@ void StructurizeCFG::collectInfos() {
// Find the last back edges
analyzeLoops(RN);
}
+
+ // Reset the collected term debug locations
+ TermDL.clear();
+
+ for (BasicBlock &BB : *Func) {
+ if (const DebugLoc &DL = BB.getTerminator()->getDebugLoc())
+ TermDL[&BB] = DL;
+ }
}
/// Insert the missing branch conditions
@@ -632,6 +649,67 @@ void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
AddedPhis[To].push_back(From);
}
+/// When we are reconstructing a PHI inside \p PHIBlock with incoming values
+/// from predecessors \p Incomings, we have a chance to mark the available value
+/// from some blocks as undefined. The function will find out all such blocks
+/// and return in \p UndefBlks.
+void StructurizeCFG::findUndefBlocks(
+ BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings,
+ SmallVector<BasicBlock *> &UndefBlks) const {
+ // We may get a post-structured CFG like below:
+ //
+ // | P1
+ // |/
+ // F1
+ // |\
+ // | N
+ // |/
+ // F2
+ // |\
+ // | P2
+ // |/
+ // F3
+ // |\
+ // B
+ //
+ // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
+ // of B before structurization. F1/F2/F3 are flow blocks inserted during
+ // structurization process. Block N is not a predecessor of B before
+ // structurization, but are placed between the predecessors(P1/P2) of B after
+ // structurization. This usually means that threads went to N never take the
+ // path N->F2->F3->B. For example, the threads take the branch F1->N may
+ // always take the branch F2->P2. So, when we are reconstructing a PHI
+ // originally in B, we can safely say the incoming value from N is undefined.
+ SmallSet<BasicBlock *, 8> VisitedBlock;
+ SmallVector<BasicBlock *, 8> Stack;
+ if (PHIBlock == ParentRegion->getExit()) {
+ for (auto P : predecessors(PHIBlock)) {
+ if (ParentRegion->contains(P))
+ Stack.push_back(P);
+ }
+ } else {
+ append_range(Stack, predecessors(PHIBlock));
+ }
+
+ // Do a backward traversal over the CFG, and stop further searching if
+ // the block is not a Flow. If a block is neither flow block nor the
+ // incoming predecessor, then the incoming value from the block is
+ // undefined value for the PHI being reconstructed.
+ while (!Stack.empty()) {
+ BasicBlock *Current = Stack.pop_back_val();
+ if (VisitedBlock.contains(Current))
+ continue;
+
+ VisitedBlock.insert(Current);
+ if (FlowSet.contains(Current)) {
+ for (auto P : predecessors(Current))
+ Stack.push_back(P);
+ } else if (!Incomings.contains(Current)) {
+ UndefBlks.push_back(Current);
+ }
+ }
+}
+
/// Add the real PHI value as soon as everything is set up
void StructurizeCFG::setPhiValues() {
SmallVector<PHINode *, 8> InsertedPhis;
@@ -643,6 +721,8 @@ void StructurizeCFG::setPhiValues() {
if (!DeletedPhis.count(To))
continue;
+ SmallVector<BasicBlock *> UndefBlks;
+ bool CachedUndefs = false;
PhiMap &Map = DeletedPhis[To];
for (const auto &PI : Map) {
PHINode *Phi = PI.first;
@@ -651,15 +731,30 @@ void StructurizeCFG::setPhiValues() {
Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
Updater.AddAvailableValue(To, Undef);
- NearestCommonDominator Dominator(DT);
- Dominator.addBlock(To);
+ SmallSet<BasicBlock *, 8> Incomings;
+ SmallVector<BasicBlock *> ConstantPreds;
for (const auto &VI : PI.second) {
+ Incomings.insert(VI.first);
Updater.AddAvailableValue(VI.first, VI.second);
- Dominator.addAndRememberBlock(VI.first);
+ if (isa<Constant>(VI.second))
+ ConstantPreds.push_back(VI.first);
}
- if (!Dominator.resultIsRememberedBlock())
- Updater.AddAvailableValue(Dominator.result(), Undef);
+ if (!CachedUndefs) {
+ findUndefBlocks(To, Incomings, UndefBlks);
+ CachedUndefs = true;
+ }
+
+ for (auto UB : UndefBlks) {
+ // If this undef block is dominated by any predecessor(before
+ // structurization) of reconstructed PHI with constant incoming value,
+ // don't mark the available value as undefined. Setting undef to such
+ // block will stop us from getting optimal phi insertion.
+ if (any_of(ConstantPreds,
+ [&](BasicBlock *CP) { return DT->dominates(CP, UB); }))
+ continue;
+ Updater.AddAvailableValue(UB, Undef);
+ }
for (BasicBlock *FI : From)
Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
@@ -679,6 +774,9 @@ void StructurizeCFG::simplifyAffectedPhis() {
Changed = false;
SimplifyQuery Q(Func->getParent()->getDataLayout());
Q.DT = DT;
+ // Setting CanUseUndef to true might extend value liveness, set it to false
+ // to achieve better register pressure.
+ Q.CanUseUndef = false;
for (WeakVH VH : AffectedPhis) {
if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
if (auto NewValue = simplifyInstruction(Phi, Q)) {
@@ -742,7 +840,8 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
} else {
BasicBlock *BB = Node->getNodeAs<BasicBlock>();
killTerminator(BB);
- BranchInst::Create(NewExit, BB);
+ BranchInst *Br = BranchInst::Create(NewExit, BB);
+ Br->setDebugLoc(TermDL[BB]);
addPhiValues(BB, NewExit);
if (IncludeDominator)
DT->changeImmediateDominator(NewExit, BB);
@@ -756,6 +855,13 @@ BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
Order.back()->getEntry();
BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
Func, Insert);
+ FlowSet.insert(Flow);
+
+ // use a temporary variable to avoid a use-after-free if the map's storage is
+ // reallocated
+ DebugLoc DL = TermDL[Dominator];
+ TermDL[Flow] = std::move(DL);
+
DT->addNewBlock(Flow, Dominator);
ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
return Flow;
@@ -851,7 +957,9 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed,
BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
// let it point to entry and next block
- Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow));
+ BranchInst *Br = BranchInst::Create(Entry, Next, BoolUndef, Flow);
+ Br->setDebugLoc(TermDL[Flow]);
+ Conditions.push_back(Br);
addPhiValues(Flow, Entry);
DT->changeImmediateDominator(Entry, Flow);
@@ -885,26 +993,14 @@ void StructurizeCFG::handleLoops(bool ExitUseAllowed,
handleLoops(false, LoopEnd);
}
- // If the start of the loop is the entry block, we can't branch to it so
- // insert a new dummy entry block.
- Function *LoopFunc = LoopStart->getParent();
- if (LoopStart == &LoopFunc->getEntryBlock()) {
- LoopStart->setName("entry.orig");
-
- BasicBlock *NewEntry =
- BasicBlock::Create(LoopStart->getContext(),
- "entry",
- LoopFunc,
- LoopStart);
- BranchInst::Create(LoopStart, NewEntry);
- DT->setNewRoot(NewEntry);
- }
+ assert(LoopStart != &LoopStart->getParent()->getEntryBlock());
// Create an extra loop end node
LoopEnd = needPrefix(false);
BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
- LoopConds.push_back(BranchInst::Create(Next, LoopStart,
- BoolUndef, LoopEnd));
+ BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolUndef, LoopEnd);
+ Br->setDebugLoc(TermDL[LoopEnd]);
+ LoopConds.push_back(Br);
addPhiValues(LoopEnd, LoopStart);
setPrevNode(Next);
}
@@ -974,7 +1070,7 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
// Count of how many direct children are conditional.
unsigned ConditionalDirectChildren = 0;
- for (auto E : R->elements()) {
+ for (auto *E : R->elements()) {
if (!E->isSubRegion()) {
auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
if (!Br || !Br->isConditional())
@@ -998,7 +1094,7 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
// their direct child basic blocks' terminators, regardless of whether
// subregions are uniform or not. However, this requires a very careful
// look at SIAnnotateControlFlow to make sure nothing breaks there.
- for (auto BB : E->getNodeAs<Region>()->blocks()) {
+ for (auto *BB : E->getNodeAs<Region>()->blocks()) {
auto Br = dyn_cast<BranchInst>(BB->getTerminator());
if (!Br || !Br->isConditional())
continue;
@@ -1100,6 +1196,8 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
Loops.clear();
LoopPreds.clear();
LoopConds.clear();
+ FlowSet.clear();
+ TermDL.clear();
return true;
}