aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar')
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ADCE.cpp43
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/BDCE.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp21
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp25
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DCE.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp62
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp92
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp498
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp89
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp19
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp44
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp903
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp293
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp170
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp19
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp127
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp46
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp105
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopPassManager.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp282
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp49
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp40
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp432
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp172
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp301
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp147
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp179
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp2750
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp13
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp59
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp171
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SROA.cpp24
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp19
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp76
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Sink.cpp12
46 files changed, 5149 insertions, 2239 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
index adc903cab31b..5b467dc9fe12 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -41,8 +41,8 @@ using namespace llvm;
STATISTIC(NumRemoved, "Number of instructions removed");
STATISTIC(NumBranchesRemoved, "Number of branch instructions removed");
-// This is a tempoary option until we change the interface
-// to this pass based on optimization level.
+// This is a temporary option until we change the interface to this pass based
+// on optimization level.
static cl::opt<bool> RemoveControlFlowFlag("adce-remove-control-flow",
cl::init(true), cl::Hidden);
@@ -110,7 +110,7 @@ class AggressiveDeadCodeElimination {
/// The set of blocks which we have determined whose control
/// dependence sources must be live and which have not had
- /// those dependences analyized.
+ /// those dependences analyzed.
SmallPtrSet<BasicBlock *, 16> NewLiveBlocks;
/// Set up auxiliary data structures for Instructions and BasicBlocks and
@@ -145,7 +145,7 @@ class AggressiveDeadCodeElimination {
/// was removed.
bool removeDeadInstructions();
- /// Identify connected sections of the control flow grap which have
+ /// Identify connected sections of the control flow graph which have
/// dead terminators and rewrite the control flow graph to remove them.
void updateDeadRegions();
@@ -234,7 +234,7 @@ void AggressiveDeadCodeElimination::initialize() {
return Iter != end() && Iter->second;
}
} State;
-
+
State.reserve(F.size());
// Iterate over blocks in depth-first pre-order and
// treat all edges to a block already seen as loop back edges
@@ -262,25 +262,6 @@ void AggressiveDeadCodeElimination::initialize() {
continue;
auto *BB = BBInfo.BB;
if (!PDT.getNode(BB)) {
- markLive(BBInfo.Terminator);
- continue;
- }
- for (auto *Succ : successors(BB))
- if (!PDT.getNode(Succ)) {
- markLive(BBInfo.Terminator);
- break;
- }
- }
-
- // Mark blocks live if there is no path from the block to the
- // return of the function or a successor for which this is true.
- // This protects IDFCalculator which cannot handle such blocks.
- for (auto &BBInfoPair : BlockInfo) {
- auto &BBInfo = BBInfoPair.second;
- if (BBInfo.terminatorIsLive())
- continue;
- auto *BB = BBInfo.BB;
- if (!PDT.getNode(BB)) {
DEBUG(dbgs() << "Not post-dominated by return: " << BB->getName()
<< '\n';);
markLive(BBInfo.Terminator);
@@ -579,7 +560,7 @@ void AggressiveDeadCodeElimination::updateDeadRegions() {
PreferredSucc = Info;
}
assert((PreferredSucc && PreferredSucc->PostOrder > 0) &&
- "Failed to find safe successor for dead branc");
+ "Failed to find safe successor for dead branch");
bool First = true;
for (auto *Succ : successors(BB)) {
if (!First || Succ != PreferredSucc->BB)
@@ -594,13 +575,13 @@ void AggressiveDeadCodeElimination::updateDeadRegions() {
// reverse top-sort order
void AggressiveDeadCodeElimination::computeReversePostOrder() {
-
- // This provides a post-order numbering of the reverse conrtol flow graph
+
+ // This provides a post-order numbering of the reverse control flow graph
// Note that it is incomplete in the presence of infinite loops but we don't
// need numbers blocks which don't reach the end of the functions since
// all branches in those blocks are forced live.
-
- // For each block without successors, extend the DFS from the bloack
+
+ // For each block without successors, extend the DFS from the block
// backward through the graph
SmallPtrSet<BasicBlock*, 16> Visited;
unsigned PostOrder = 0;
@@ -644,8 +625,8 @@ PreservedAnalyses ADCEPass::run(Function &F, FunctionAnalysisManager &FAM) {
if (!AggressiveDeadCodeElimination(F, PDT).performDeadCodeElimination())
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
- auto PA = PreservedAnalyses();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
return PA;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index c1df3173c0fc..fd931c521c8f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -438,19 +438,13 @@ AlignmentFromAssumptionsPass::run(Function &F, FunctionAnalysisManager &AM) {
AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
- bool Changed = runImpl(F, AC, &SE, &DT);
-
- // FIXME: We need to invalidate this to avoid PR28400. Is there a better
- // solution?
- AM.invalidate<ScalarEvolutionAnalysis>(F);
-
- if (!Changed)
+ if (!runImpl(F, AC, &SE, &DT))
return PreservedAnalyses::all();
+
PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<AAManager>();
PA.preserve<ScalarEvolutionAnalysis>();
PA.preserve<GlobalsAA>();
- PA.preserve<LoopAnalysis>();
- PA.preserve<DominatorTreeAnalysis>();
return PA;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
index 251b38707769..61e8700f1cd6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -80,8 +80,8 @@ PreservedAnalyses BDCEPass::run(Function &F, FunctionAnalysisManager &AM) {
if (!bitTrackingDCE(F, DB))
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
- auto PA = PreservedAnalyses();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
return PA;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 38262514c9ec..ee6333e88716 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -136,8 +136,16 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst,
if (Idx != ~0U && isa<PHINode>(Inst))
return cast<PHINode>(Inst)->getIncomingBlock(Idx)->getTerminator();
- BasicBlock *IDom = DT->getNode(Inst->getParent())->getIDom()->getBlock();
- return IDom->getTerminator();
+ // This must be an EH pad. Iterate over immediate dominators until we find a
+ // non-EH pad. We need to skip over catchswitch blocks, which are both EH pads
+ // and terminators.
+ auto IDom = DT->getNode(Inst->getParent())->getIDom();
+ while (IDom->getBlock()->isEHPad()) {
+ assert(Entry != IDom->getBlock() && "eh pad in entry block");
+ IDom = IDom->getIDom();
+ }
+
+ return IDom->getBlock()->getTerminator();
}
/// \brief Find an insertion point that dominates all uses.
@@ -289,8 +297,8 @@ void ConstantHoistingPass::collectConstantCandidates(Function &Fn) {
// bit widths (APInt Operator- does not like that). If the value cannot be
// represented in uint64 we return an "empty" APInt. This is then interpreted
// as the value is not in range.
-static llvm::Optional<APInt> calculateOffsetDiff(APInt V1, APInt V2)
-{
+static llvm::Optional<APInt> calculateOffsetDiff(const APInt &V1,
+ const APInt &V2) {
llvm::Optional<APInt> Res = None;
unsigned BW = V1.getBitWidth() > V2.getBitWidth() ?
V1.getBitWidth() : V2.getBitWidth();
@@ -623,6 +631,7 @@ PreservedAnalyses ConstantHoistingPass::run(Function &F,
if (!runImpl(F, TTI, DT, F.getEntryBlock()))
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
- return PreservedAnalyses::none();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 84f9373ae914..c843c61ea94e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -235,9 +235,8 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) {
// Analyse each switch case in turn. This is done in reverse order so that
// removing a case doesn't cause trouble for the iteration.
bool Changed = false;
- for (SwitchInst::CaseIt CI = SI->case_end(), CE = SI->case_begin(); CI-- != CE;
- ) {
- ConstantInt *Case = CI.getCaseValue();
+ for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
+ ConstantInt *Case = CI->getCaseValue();
// Check to see if the switch condition is equal to/not equal to the case
// value on every incoming edge, equal/not equal being the same each time.
@@ -270,8 +269,9 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) {
if (State == LazyValueInfo::False) {
// This case never fires - remove it.
- CI.getCaseSuccessor()->removePredecessor(BB);
- SI->removeCase(CI); // Does not invalidate the iterator.
+ CI->getCaseSuccessor()->removePredecessor(BB);
+ CI = SI->removeCase(CI);
+ CE = SI->case_end();
// The condition can be modified by removePredecessor's PHI simplification
// logic.
@@ -279,7 +279,9 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) {
++NumDeadCases;
Changed = true;
- } else if (State == LazyValueInfo::True) {
+ continue;
+ }
+ if (State == LazyValueInfo::True) {
// This case always fires. Arrange for the switch to be turned into an
// unconditional branch by replacing the switch condition with the case
// value.
@@ -288,6 +290,9 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) {
Changed = true;
break;
}
+
+ // Increment the case iterator sense we didn't delete it.
+ ++CI;
}
if (Changed)
@@ -308,7 +313,7 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
// Try to mark pointer typed parameters as non-null. We skip the
// relatively expensive analysis for constants which are obviously either
// null or non-null to start with.
- if (Type && !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) &&
+ if (Type && !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
!isa<Constant>(V) &&
LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
ConstantPointerNull::get(Type),
@@ -322,7 +327,7 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
if (Indices.empty())
return false;
- AttributeSet AS = CS.getAttributes();
+ AttributeList AS = CS.getAttributes();
LLVMContext &Ctx = CS.getInstruction()->getContext();
AS = AS.addAttribute(Ctx, Indices, Attribute::get(Ctx, Attribute::NonNull));
CS.setAttributes(AS);
@@ -570,10 +575,6 @@ CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F);
bool Changed = runImpl(F, LVI);
- // FIXME: We need to invalidate LVI to avoid PR28400. Is there a better
- // solution?
- AM.invalidate<LazyValueAnalysis>(F);
-
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index cc2a3cfaf9d1..07a0ba9b1222 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -124,9 +124,12 @@ static bool eliminateDeadCode(Function &F, TargetLibraryInfo *TLI) {
}
PreservedAnalyses DCEPass::run(Function &F, FunctionAnalysisManager &AM) {
- if (eliminateDeadCode(F, AM.getCachedResult<TargetLibraryAnalysis>(F)))
- return PreservedAnalyses::none();
- return PreservedAnalyses::all();
+ if (!eliminateDeadCode(F, AM.getCachedResult<TargetLibraryAnalysis>(F)))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
namespace {
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 4d4c3baef3f5..1ec38e56aa4c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -135,13 +135,13 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) {
if (auto CS = CallSite(I)) {
if (Function *F = CS.getCalledFunction()) {
StringRef FnName = F->getName();
- if (TLI.has(LibFunc::strcpy) && FnName == TLI.getName(LibFunc::strcpy))
+ if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy))
return true;
- if (TLI.has(LibFunc::strncpy) && FnName == TLI.getName(LibFunc::strncpy))
+ if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy))
return true;
- if (TLI.has(LibFunc::strcat) && FnName == TLI.getName(LibFunc::strcat))
+ if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat))
return true;
- if (TLI.has(LibFunc::strncat) && FnName == TLI.getName(LibFunc::strncat))
+ if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat))
return true;
}
}
@@ -287,19 +287,14 @@ static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
}
namespace {
-enum OverwriteResult {
- OverwriteBegin,
- OverwriteComplete,
- OverwriteEnd,
- OverwriteUnknown
-};
+enum OverwriteResult { OW_Begin, OW_Complete, OW_End, OW_Unknown };
}
-/// Return 'OverwriteComplete' if a store to the 'Later' location completely
-/// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of
-/// the 'Earlier' location is completely overwritten by 'Later',
-/// 'OverwriteBegin' if the beginning of the 'Earlier' location is overwritten
-/// by 'Later', or 'OverwriteUnknown' if nothing can be determined.
+/// Return 'OW_Complete' if a store to the 'Later' location completely
+/// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
+/// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
+/// beginning of the 'Earlier' location is overwritten by 'Later', or
+/// 'OW_Unknown' if nothing can be determined.
static OverwriteResult isOverwrite(const MemoryLocation &Later,
const MemoryLocation &Earlier,
const DataLayout &DL,
@@ -310,7 +305,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
// If we don't know the sizes of either access, then we can't do a comparison.
if (Later.Size == MemoryLocation::UnknownSize ||
Earlier.Size == MemoryLocation::UnknownSize)
- return OverwriteUnknown;
+ return OW_Unknown;
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
@@ -320,7 +315,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
if (P1 == P2) {
// Make sure that the Later size is >= the Earlier size.
if (Later.Size >= Earlier.Size)
- return OverwriteComplete;
+ return OW_Complete;
}
// Check to see if the later store is to the entire object (either a global,
@@ -332,13 +327,13 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
if (UO1 != UO2)
- return OverwriteUnknown;
+ return OW_Unknown;
// If the "Later" store is to a recognizable object, get its size.
uint64_t ObjectSize = getPointerSize(UO2, DL, TLI);
if (ObjectSize != MemoryLocation::UnknownSize)
if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size)
- return OverwriteComplete;
+ return OW_Complete;
// Okay, we have stores to two completely different pointers. Try to
// decompose the pointer into a "base + constant_offset" form. If the base
@@ -350,7 +345,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
// If the base pointers still differ, we have two completely different stores.
if (BP1 != BP2)
- return OverwriteUnknown;
+ return OW_Unknown;
// The later store completely overlaps the earlier store if:
//
@@ -370,7 +365,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
if (EarlierOff >= LaterOff &&
Later.Size >= Earlier.Size &&
uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
- return OverwriteComplete;
+ return OW_Complete;
// We may now overlap, although the overlap is not complete. There might also
// be other incomplete overlaps, and together, they might cover the complete
@@ -428,7 +423,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
") Composite Later [" <<
ILI->second << ", " << ILI->first << ")\n");
++NumCompletePartials;
- return OverwriteComplete;
+ return OW_Complete;
}
}
@@ -443,7 +438,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
if (!EnablePartialOverwriteTracking &&
(LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) &&
int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)))
- return OverwriteEnd;
+ return OW_End;
// Finally, we also need to check if the later store overwrites the beginning
// of the earlier store.
@@ -458,11 +453,11 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
(LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff)) {
assert(int64_t(LaterOff + Later.Size) <
int64_t(EarlierOff + Earlier.Size) &&
- "Expect to be handled as OverwriteComplete");
- return OverwriteBegin;
+ "Expect to be handled as OW_Complete");
+ return OW_Begin;
}
// Otherwise, they don't completely overlap.
- return OverwriteUnknown;
+ return OW_Unknown;
}
/// If 'Inst' might be a self read (i.e. a noop copy of a
@@ -551,7 +546,7 @@ static bool memoryIsNotModifiedBetween(Instruction *FirstI,
Instruction *I = &*BI;
if (I->mayWriteToMemory() && I != SecondI) {
auto Res = AA->getModRefInfo(I, MemLoc);
- if (Res != MRI_NoModRef)
+ if (Res & MRI_Mod)
return false;
}
}
@@ -909,7 +904,7 @@ static bool tryToShortenBegin(Instruction *EarlierWrite,
if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) {
assert(LaterStart + LaterSize < EarlierStart + EarlierSize &&
- "Should have been handled as OverwriteComplete");
+ "Should have been handled as OW_Complete");
if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
LaterSize, false)) {
IntervalMap.erase(OII);
@@ -1105,7 +1100,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
OverwriteResult OR =
isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset,
DepWrite, IOL);
- if (OR == OverwriteComplete) {
+ if (OR == OW_Complete) {
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
@@ -1117,15 +1112,15 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
// We erased DepWrite; start over.
InstDep = MD->getDependency(Inst);
continue;
- } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) ||
- ((OR == OverwriteBegin &&
+ } else if ((OR == OW_End && isShortenableAtTheEnd(DepWrite)) ||
+ ((OR == OW_Begin &&
isShortenableAtTheBeginning(DepWrite)))) {
assert(!EnablePartialOverwriteTracking && "Do not expect to perform "
"when partial-overwrite "
"tracking is enabled");
int64_t EarlierSize = DepLoc.Size;
int64_t LaterSize = Loc.Size;
- bool IsOverwriteEnd = (OR == OverwriteEnd);
+ bool IsOverwriteEnd = (OR == OW_End);
MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
InstWriteOffset, LaterSize, IsOverwriteEnd);
}
@@ -1186,8 +1181,9 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
if (!eliminateDeadStores(F, AA, MD, DT, TLI))
return PreservedAnalyses::all();
+
PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
PA.preserve<MemoryDependenceAnalysis>();
return PA;
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 16e08ee58fbe..04479b6e49ac 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -19,6 +19,8 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -32,7 +34,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/MemorySSA.h"
#include <deque>
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -253,6 +254,7 @@ public:
DominatorTree &DT;
AssumptionCache &AC;
MemorySSA *MSSA;
+ std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
typedef RecyclingAllocator<
BumpPtrAllocator, ScopedHashTableVal<SimpleValue, Value *>> AllocatorTy;
typedef ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
@@ -315,7 +317,9 @@ public:
/// \brief Set up the EarlyCSE runner for a particular function.
EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA)
- : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA), CurrentGeneration(0) {}
+ : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA),
+ MSSAUpdater(make_unique<MemorySSAUpdater>(MSSA)), CurrentGeneration(0) {
+ }
bool run();
@@ -388,7 +392,7 @@ private:
ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
: IsTargetMemInst(false), Inst(Inst) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
- if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1)
+ if (TTI.getTgtMemIntrinsic(II, Info))
IsTargetMemInst = true;
}
bool isLoad() const {
@@ -400,17 +404,14 @@ private:
return isa<StoreInst>(Inst);
}
bool isAtomic() const {
- if (IsTargetMemInst) {
- assert(Info.IsSimple && "need to refine IsSimple in TTI");
- return false;
- }
+ if (IsTargetMemInst)
+ return Info.Ordering != AtomicOrdering::NotAtomic;
return Inst->isAtomic();
}
bool isUnordered() const {
- if (IsTargetMemInst) {
- assert(Info.IsSimple && "need to refine IsSimple in TTI");
- return true;
- }
+ if (IsTargetMemInst)
+ return Info.isUnordered();
+
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
return LI->isUnordered();
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
@@ -421,10 +422,9 @@ private:
}
bool isVolatile() const {
- if (IsTargetMemInst) {
- assert(Info.IsSimple && "need to refine IsSimple in TTI");
- return false;
- }
+ if (IsTargetMemInst)
+ return Info.IsVolatile;
+
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
return LI->isVolatile();
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
@@ -517,7 +517,7 @@ private:
if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U))
PhisToCheck.push_back(MP);
- MSSA->removeMemoryAccess(WI);
+ MSSAUpdater->removeMemoryAccess(WI);
for (MemoryPhi *MP : PhisToCheck) {
MemoryAccess *FirstIn = MP->getIncomingValue(0);
@@ -587,27 +587,28 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// which reaches this block where the condition might hold a different
// value. Since we're adding this to the scoped hash table (like any other
// def), it will have been popped if we encounter a future merge block.
- if (BasicBlock *Pred = BB->getSinglePredecessor())
- if (auto *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
- if (BI->isConditional())
- if (auto *CondInst = dyn_cast<Instruction>(BI->getCondition()))
- if (SimpleValue::canHandle(CondInst)) {
- assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
- auto *ConditionalConstant = (BI->getSuccessor(0) == BB) ?
- ConstantInt::getTrue(BB->getContext()) :
- ConstantInt::getFalse(BB->getContext());
- AvailableValues.insert(CondInst, ConditionalConstant);
- DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
- << CondInst->getName() << "' as " << *ConditionalConstant
- << " in " << BB->getName() << "\n");
- // Replace all dominated uses with the known value.
- if (unsigned Count =
- replaceDominatedUsesWith(CondInst, ConditionalConstant, DT,
- BasicBlockEdge(Pred, BB))) {
- Changed = true;
- NumCSECVP = NumCSECVP + Count;
- }
- }
+ if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+ auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
+ if (BI && BI->isConditional()) {
+ auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
+ if (CondInst && SimpleValue::canHandle(CondInst)) {
+ assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
+ auto *TorF = (BI->getSuccessor(0) == BB)
+ ? ConstantInt::getTrue(BB->getContext())
+ : ConstantInt::getFalse(BB->getContext());
+ AvailableValues.insert(CondInst, TorF);
+ DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
+ << CondInst->getName() << "' as " << *TorF << " in "
+ << BB->getName() << "\n");
+ // Replace all dominated uses with the known value.
+ if (unsigned Count = replaceDominatedUsesWith(
+ CondInst, TorF, DT, BasicBlockEdge(Pred, BB))) {
+ Changed = true;
+ NumCSECVP = NumCSECVP + Count;
+ }
+ }
+ }
+ }
/// LastStore - Keep track of the last non-volatile store that we saw... for
/// as long as there in no instruction that reads memory. If we see a store
@@ -761,12 +762,13 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
- // If this instruction may read from memory, forget LastStore.
- // Load/store intrinsics will indicate both a read and a write to
- // memory. The target may override this (e.g. so that a store intrinsic
- // does not read from memory, and thus will be treated the same as a
- // regular store for commoning purposes).
- if (Inst->mayReadFromMemory() &&
+ // If this instruction may read from memory or throw (and potentially read
+ // from memory in the exception handler), forget LastStore. Load/store
+ // intrinsics will indicate both a read and a write to memory. The target
+ // may override this (e.g. so that a store intrinsic does not read from
+ // memory, and thus will be treated the same as a regular store for
+ // commoning purposes).
+ if ((Inst->mayReadFromMemory() || Inst->mayThrow()) &&
!(MemInst.isValid() && !MemInst.mayReadFromMemory()))
LastStore = nullptr;
@@ -967,10 +969,8 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
if (!CSE.run())
return PreservedAnalyses::all();
- // CSE preserves the dominator tree because it doesn't mutate the CFG.
- // FIXME: Bundle this with other CFG-preservation.
PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
if (UseMemorySSA)
PA.preserve<MemorySSAAnalysis>();
diff --git a/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp b/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
index 545036d724ef..8a5af6195f1b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -516,11 +516,10 @@ FunctionPass *createFloat2IntPass() { return new Float2IntLegacyPass(); }
PreservedAnalyses Float2IntPass::run(Function &F, FunctionAnalysisManager &) {
if (!runImpl(F))
return PreservedAnalyses::all();
- else {
- // FIXME: This should also 'preserve the CFG'.
- PreservedAnalyses PA;
- PA.preserve<GlobalsAA>();
- return PA;
- }
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<GlobalsAA>();
+ return PA;
}
} // End namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 0137378b828b..be696df548d5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -36,7 +36,6 @@
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalVariable.h"
@@ -51,9 +50,12 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/VNCoercion.h"
+
#include <vector>
using namespace llvm;
using namespace llvm::gvn;
+using namespace llvm::VNCoercion;
using namespace PatternMatch;
#define DEBUG_TYPE "gvn"
@@ -595,11 +597,12 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<GlobalsAA>();
+ PA.preserve<TargetLibraryAnalysis>();
return PA;
}
-LLVM_DUMP_METHOD
-void GVN::dump(DenseMap<uint32_t, Value*>& d) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) {
errs() << "{\n";
for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
E = d.end(); I != E; ++I) {
@@ -608,6 +611,7 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {
}
errs() << "}\n";
}
+#endif
/// Return true if we can prove that the value
/// we're analyzing is fully available in the specified block. As we go, keep
@@ -690,442 +694,6 @@ SpeculationFailure:
}
-/// Return true if CoerceAvailableValueToLoadType will succeed.
-static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
- Type *LoadTy,
- const DataLayout &DL) {
- // If the loaded or stored value is an first class array or struct, don't try
- // to transform them. We need to be able to bitcast to integer.
- if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
- StoredVal->getType()->isStructTy() ||
- StoredVal->getType()->isArrayTy())
- return false;
-
- // The store has to be at least as big as the load.
- if (DL.getTypeSizeInBits(StoredVal->getType()) <
- DL.getTypeSizeInBits(LoadTy))
- return false;
-
- return true;
-}
-
-/// If we saw a store of a value to memory, and
-/// then a load from a must-aliased pointer of a different type, try to coerce
-/// the stored value. LoadedTy is the type of the load we want to replace.
-/// IRB is IRBuilder used to insert new instructions.
-///
-/// If we can't do it, return null.
-static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
- IRBuilder<> &IRB,
- const DataLayout &DL) {
- assert(CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
- "precondition violation - materialization can't fail");
-
- if (auto *C = dyn_cast<Constant>(StoredVal))
- if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
- StoredVal = FoldedStoredVal;
-
- // If this is already the right type, just return it.
- Type *StoredValTy = StoredVal->getType();
-
- uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy);
- uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy);
-
- // If the store and reload are the same size, we can always reuse it.
- if (StoredValSize == LoadedValSize) {
- // Pointer to Pointer -> use bitcast.
- if (StoredValTy->getScalarType()->isPointerTy() &&
- LoadedTy->getScalarType()->isPointerTy()) {
- StoredVal = IRB.CreateBitCast(StoredVal, LoadedTy);
- } else {
- // Convert source pointers to integers, which can be bitcast.
- if (StoredValTy->getScalarType()->isPointerTy()) {
- StoredValTy = DL.getIntPtrType(StoredValTy);
- StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy);
- }
-
- Type *TypeToCastTo = LoadedTy;
- if (TypeToCastTo->getScalarType()->isPointerTy())
- TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
-
- if (StoredValTy != TypeToCastTo)
- StoredVal = IRB.CreateBitCast(StoredVal, TypeToCastTo);
-
- // Cast to pointer if the load needs a pointer type.
- if (LoadedTy->getScalarType()->isPointerTy())
- StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy);
- }
-
- if (auto *C = dyn_cast<ConstantExpr>(StoredVal))
- if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
- StoredVal = FoldedStoredVal;
-
- return StoredVal;
- }
-
- // If the loaded value is smaller than the available value, then we can
- // extract out a piece from it. If the available value is too small, then we
- // can't do anything.
- assert(StoredValSize >= LoadedValSize &&
- "CanCoerceMustAliasedValueToLoad fail");
-
- // Convert source pointers to integers, which can be manipulated.
- if (StoredValTy->getScalarType()->isPointerTy()) {
- StoredValTy = DL.getIntPtrType(StoredValTy);
- StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy);
- }
-
- // Convert vectors and fp to integer, which can be manipulated.
- if (!StoredValTy->isIntegerTy()) {
- StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize);
- StoredVal = IRB.CreateBitCast(StoredVal, StoredValTy);
- }
-
- // If this is a big-endian system, we need to shift the value down to the low
- // bits so that a truncate will work.
- if (DL.isBigEndian()) {
- uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) -
- DL.getTypeStoreSizeInBits(LoadedTy);
- StoredVal = IRB.CreateLShr(StoredVal, ShiftAmt, "tmp");
- }
-
- // Truncate the integer to the right size now.
- Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize);
- StoredVal = IRB.CreateTrunc(StoredVal, NewIntTy, "trunc");
-
- if (LoadedTy != NewIntTy) {
- // If the result is a pointer, inttoptr.
- if (LoadedTy->getScalarType()->isPointerTy())
- StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy, "inttoptr");
- else
- // Otherwise, bitcast.
- StoredVal = IRB.CreateBitCast(StoredVal, LoadedTy, "bitcast");
- }
-
- if (auto *C = dyn_cast<Constant>(StoredVal))
- if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
- StoredVal = FoldedStoredVal;
-
- return StoredVal;
-}
-
-/// This function is called when we have a
-/// memdep query of a load that ends up being a clobbering memory write (store,
-/// memset, memcpy, memmove). This means that the write *may* provide bits used
-/// by the load but we can't be sure because the pointers don't mustalias.
-///
-/// Check this case to see if there is anything more we can do before we give
-/// up. This returns -1 if we have to give up, or a byte number in the stored
-/// value of the piece that feeds the load.
-static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
- Value *WritePtr,
- uint64_t WriteSizeInBits,
- const DataLayout &DL) {
- // If the loaded or stored value is a first class array or struct, don't try
- // to transform them. We need to be able to bitcast to integer.
- if (LoadTy->isStructTy() || LoadTy->isArrayTy())
- return -1;
-
- int64_t StoreOffset = 0, LoadOffset = 0;
- Value *StoreBase =
- GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL);
- Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL);
- if (StoreBase != LoadBase)
- return -1;
-
- // If the load and store are to the exact same address, they should have been
- // a must alias. AA must have gotten confused.
- // FIXME: Study to see if/when this happens. One case is forwarding a memset
- // to a load from the base of the memset.
-
- // If the load and store don't overlap at all, the store doesn't provide
- // anything to the load. In this case, they really don't alias at all, AA
- // must have gotten confused.
- uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy);
-
- if ((WriteSizeInBits & 7) | (LoadSize & 7))
- return -1;
- uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes.
- LoadSize /= 8;
-
-
- bool isAAFailure = false;
- if (StoreOffset < LoadOffset)
- isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
- else
- isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
-
- if (isAAFailure)
- return -1;
-
- // If the Load isn't completely contained within the stored bits, we don't
- // have all the bits to feed it. We could do something crazy in the future
- // (issue a smaller load then merge the bits in) but this seems unlikely to be
- // valuable.
- if (StoreOffset > LoadOffset ||
- StoreOffset+StoreSize < LoadOffset+LoadSize)
- return -1;
-
- // Okay, we can do this transformation. Return the number of bytes into the
- // store that the load is.
- return LoadOffset-StoreOffset;
-}
-
-/// This function is called when we have a
-/// memdep query of a load that ends up being a clobbering store.
-static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
- StoreInst *DepSI) {
- // Cannot handle reading from store of first-class aggregate yet.
- if (DepSI->getValueOperand()->getType()->isStructTy() ||
- DepSI->getValueOperand()->getType()->isArrayTy())
- return -1;
-
- const DataLayout &DL = DepSI->getModule()->getDataLayout();
- Value *StorePtr = DepSI->getPointerOperand();
- uint64_t StoreSize =DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
- return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
- StorePtr, StoreSize, DL);
-}
-
-/// This function is called when we have a
-/// memdep query of a load that ends up being clobbered by another load. See if
-/// the other load can feed into the second load.
-static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
- LoadInst *DepLI, const DataLayout &DL){
- // Cannot handle reading from store of first-class aggregate yet.
- if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
- return -1;
-
- Value *DepPtr = DepLI->getPointerOperand();
- uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType());
- int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
- if (R != -1) return R;
-
- // If we have a load/load clobber an DepLI can be widened to cover this load,
- // then we should widen it!
- int64_t LoadOffs = 0;
- const Value *LoadBase =
- GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
- unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
-
- unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
- LoadBase, LoadOffs, LoadSize, DepLI);
- if (Size == 0) return -1;
-
- // Check non-obvious conditions enforced by MDA which we rely on for being
- // able to materialize this potentially available value
- assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!");
- assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load");
-
- return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL);
-}
-
-
-
-static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
- MemIntrinsic *MI,
- const DataLayout &DL) {
- // If the mem operation is a non-constant size, we can't handle it.
- ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
- if (!SizeCst) return -1;
- uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
-
- // If this is memset, we just need to see if the offset is valid in the size
- // of the memset..
- if (MI->getIntrinsicID() == Intrinsic::memset)
- return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
- MemSizeInBits, DL);
-
- // If we have a memcpy/memmove, the only case we can handle is if this is a
- // copy from constant memory. In that case, we can read directly from the
- // constant memory.
- MemTransferInst *MTI = cast<MemTransferInst>(MI);
-
- Constant *Src = dyn_cast<Constant>(MTI->getSource());
- if (!Src) return -1;
-
- GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
- if (!GV || !GV->isConstant()) return -1;
-
- // See if the access is within the bounds of the transfer.
- int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
- MI->getDest(), MemSizeInBits, DL);
- if (Offset == -1)
- return Offset;
-
- unsigned AS = Src->getType()->getPointerAddressSpace();
- // Otherwise, see if we can constant fold a load from the constant with the
- // offset applied as appropriate.
- Src = ConstantExpr::getBitCast(Src,
- Type::getInt8PtrTy(Src->getContext(), AS));
- Constant *OffsetCst =
- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
- OffsetCst);
- Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL))
- return Offset;
- return -1;
-}
-
-
-/// This function is called when we have a
-/// memdep query of a load that ends up being a clobbering store. This means
-/// that the store provides bits used by the load but we the pointers don't
-/// mustalias. Check this case to see if there is anything more we can do
-/// before we give up.
-static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
- Type *LoadTy,
- Instruction *InsertPt, const DataLayout &DL){
- LLVMContext &Ctx = SrcVal->getType()->getContext();
-
- uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
- uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;
-
- IRBuilder<> Builder(InsertPt);
-
- // Compute which bits of the stored value are being used by the load. Convert
- // to an integer type to start with.
- if (SrcVal->getType()->getScalarType()->isPointerTy())
- SrcVal = Builder.CreatePtrToInt(SrcVal,
- DL.getIntPtrType(SrcVal->getType()));
- if (!SrcVal->getType()->isIntegerTy())
- SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
-
- // Shift the bits to the least significant depending on endianness.
- unsigned ShiftAmt;
- if (DL.isLittleEndian())
- ShiftAmt = Offset*8;
- else
- ShiftAmt = (StoreSize-LoadSize-Offset)*8;
-
- if (ShiftAmt)
- SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt);
-
- if (LoadSize != StoreSize)
- SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8));
-
- return CoerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL);
-}
-
-/// This function is called when we have a
-/// memdep query of a load that ends up being a clobbering load. This means
-/// that the load *may* provide bits used by the load but we can't be sure
-/// because the pointers don't mustalias. Check this case to see if there is
-/// anything more we can do before we give up.
-static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
- Type *LoadTy, Instruction *InsertPt,
- GVN &gvn) {
- const DataLayout &DL = SrcVal->getModule()->getDataLayout();
- // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
- // widen SrcVal out to a larger load.
- unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType());
- unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
- if (Offset+LoadSize > SrcValStoreSize) {
- assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
- assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
- // If we have a load/load clobber an DepLI can be widened to cover this
- // load, then we should widen it to the next power of 2 size big enough!
- unsigned NewLoadSize = Offset+LoadSize;
- if (!isPowerOf2_32(NewLoadSize))
- NewLoadSize = NextPowerOf2(NewLoadSize);
-
- Value *PtrVal = SrcVal->getPointerOperand();
-
- // Insert the new load after the old load. This ensures that subsequent
- // memdep queries will find the new load. We can't easily remove the old
- // load completely because it is already in the value numbering table.
- IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
- Type *DestPTy =
- IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
- DestPTy = PointerType::get(DestPTy,
- PtrVal->getType()->getPointerAddressSpace());
- Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
- PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
- LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
- NewLoad->takeName(SrcVal);
- NewLoad->setAlignment(SrcVal->getAlignment());
-
- DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
- DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
-
- // Replace uses of the original load with the wider load. On a big endian
- // system, we need to shift down to get the relevant bits.
- Value *RV = NewLoad;
- if (DL.isBigEndian())
- RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8);
- RV = Builder.CreateTrunc(RV, SrcVal->getType());
- SrcVal->replaceAllUsesWith(RV);
-
- // We would like to use gvn.markInstructionForDeletion here, but we can't
- // because the load is already memoized into the leader map table that GVN
- // tracks. It is potentially possible to remove the load from the table,
- // but then there all of the operations based on it would need to be
- // rehashed. Just leave the dead load around.
- gvn.getMemDep().removeInstruction(SrcVal);
- SrcVal = NewLoad;
- }
-
- return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL);
-}
-
-
-/// This function is called when we have a
-/// memdep query of a load that ends up being a clobbering mem intrinsic.
-static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
- Type *LoadTy, Instruction *InsertPt,
- const DataLayout &DL){
- LLVMContext &Ctx = LoadTy->getContext();
- uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy)/8;
-
- IRBuilder<> Builder(InsertPt);
-
- // We know that this method is only called when the mem transfer fully
- // provides the bits for the load.
- if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
- // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
- // independently of what the offset is.
- Value *Val = MSI->getValue();
- if (LoadSize != 1)
- Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
-
- Value *OneElt = Val;
-
- // Splat the value out to the right number of bits.
- for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
- // If we can double the number of bytes set, do it.
- if (NumBytesSet*2 <= LoadSize) {
- Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
- Val = Builder.CreateOr(Val, ShVal);
- NumBytesSet <<= 1;
- continue;
- }
-
- // Otherwise insert one byte at a time.
- Value *ShVal = Builder.CreateShl(Val, 1*8);
- Val = Builder.CreateOr(OneElt, ShVal);
- ++NumBytesSet;
- }
-
- return CoerceAvailableValueToLoadType(Val, LoadTy, Builder, DL);
- }
-
- // Otherwise, this is a memcpy/memmove from a constant global.
- MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
- Constant *Src = cast<Constant>(MTI->getSource());
- unsigned AS = Src->getType()->getPointerAddressSpace();
-
- // Otherwise, see if we can constant fold a load from the constant with the
- // offset applied as appropriate.
- Src = ConstantExpr::getBitCast(Src,
- Type::getInt8PtrTy(Src->getContext(), AS));
- Constant *OffsetCst =
- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
- OffsetCst);
- Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL);
-}
/// Given a set of loads specified by ValuesPerBlock,
@@ -1171,7 +739,7 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *LI,
if (isSimpleValue()) {
Res = getSimpleValue();
if (Res->getType() != LoadTy) {
- Res = GetStoreValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
+ Res = getStoreValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
<< *getSimpleValue() << '\n'
@@ -1182,14 +750,20 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *LI,
if (Load->getType() == LoadTy && Offset == 0) {
Res = Load;
} else {
- Res = GetLoadValueForLoad(Load, Offset, LoadTy, InsertPt, gvn);
-
+ Res = getLoadValueForLoad(Load, Offset, LoadTy, InsertPt, DL);
+ // We would like to use gvn.markInstructionForDeletion here, but we can't
+ // because the load is already memoized into the leader map table that GVN
+ // tracks. It is potentially possible to remove the load from the table,
+ // but then there all of the operations based on it would need to be
+ // rehashed. Just leave the dead load around.
+ gvn.getMemDep().removeInstruction(Load);
DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
<< *getCoercedLoadValue() << '\n'
- << *Res << '\n' << "\n\n\n");
+ << *Res << '\n'
+ << "\n\n\n");
}
} else if (isMemIntrinValue()) {
- Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
+ Res = getMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
InsertPt, DL);
DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *getMemIntrinValue() << '\n'
@@ -1258,7 +832,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// Can't forward from non-atomic to atomic without violating memory model.
if (Address && LI->isAtomic() <= DepSI->isAtomic()) {
int Offset =
- AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI);
+ analyzeLoadFromClobberingStore(LI->getType(), Address, DepSI, DL);
if (Offset != -1) {
Res = AvailableValue::get(DepSI->getValueOperand(), Offset);
return true;
@@ -1276,7 +850,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// Can't forward from non-atomic to atomic without violating memory model.
if (DepLI != LI && Address && LI->isAtomic() <= DepLI->isAtomic()) {
int Offset =
- AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
+ analyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
if (Offset != -1) {
Res = AvailableValue::getLoad(DepLI, Offset);
@@ -1289,7 +863,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// forward a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
if (Address && !LI->isAtomic()) {
- int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
+ int Offset = analyzeLoadFromClobberingMemInst(LI->getType(), Address,
DepMI, DL);
if (Offset != -1) {
Res = AvailableValue::getMI(DepMI, Offset);
@@ -1334,7 +908,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// different types if we have to. If the stored value is larger or equal to
// the loaded value, we can reuse it.
if (S->getValueOperand()->getType() != LI->getType() &&
- !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
+ !canCoerceMustAliasedValueToLoad(S->getValueOperand(),
LI->getType(), DL))
return false;
@@ -1351,7 +925,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// If the stored value is larger or equal to the loaded value, we can reuse
// it.
if (LD->getType() != LI->getType() &&
- !CanCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
+ !canCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
return false;
// Can't forward from non-atomic to atomic without violating memory model.
@@ -1713,7 +1287,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// If instruction I has debug info, then we should not update it.
// Also, if I has a null DebugLoc, then it is still potentially incorrect
// to propagate LI's DebugLoc because LI may not post-dominate I.
- if (LI->getDebugLoc() && ValuesPerBlock.size() != 1)
+ if (LI->getDebugLoc() && LI->getParent() == I->getParent())
I->setDebugLoc(LI->getDebugLoc());
if (V->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
@@ -1795,7 +1369,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
// being replaced.
- // Note that if 'I' is a load being replaced by some operation,
+ // Note that if 'I' is a load being replaced by some operation,
// for example, by an arithmetic operation, then andIRFlags()
// would just erase all math flags from the original arithmetic
// operation, which is clearly not wanted and not needed.
@@ -2187,11 +1761,11 @@ bool GVN::processInstruction(Instruction *I) {
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i) {
- BasicBlock *Dst = i.getCaseSuccessor();
+ BasicBlock *Dst = i->getCaseSuccessor();
// If there is only a single edge, propagate the case value into it.
if (SwitchEdges.lookup(Dst) == 1) {
BasicBlockEdge E(Parent, Dst);
- Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E, true);
+ Changed |= propagateEquality(SwitchCond, i->getCaseValue(), E, true);
}
}
return Changed;
@@ -2581,21 +2155,12 @@ bool GVN::iterateOnFunction(Function &F) {
// Top-down walk of the dominator tree
bool Changed = false;
- // Save the blocks this function have before transformation begins. GVN may
- // split critical edge, and hence may invalidate the RPO/DT iterator.
- //
- std::vector<BasicBlock *> BBVect;
- BBVect.reserve(256);
// Needed for value numbering with phi construction to work.
+ // RPOT walks the graph in its constructor and will not be invalidated during
+ // processBlock.
ReversePostOrderTraversal<Function *> RPOT(&F);
- for (ReversePostOrderTraversal<Function *>::rpo_iterator RI = RPOT.begin(),
- RE = RPOT.end();
- RI != RE; ++RI)
- BBVect.push_back(*RI);
-
- for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end();
- I != E; I++)
- Changed |= processBlock(*I);
+ for (BasicBlock *BB : RPOT)
+ Changed |= processBlock(BB);
return Changed;
}
@@ -2783,6 +2348,7 @@ public:
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<TargetLibraryInfoWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index f8e1d2e1a08a..6adfe130d148 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -17,16 +17,39 @@
// is disabled in the following cases.
// 1. Scalars across calls.
// 2. geps when corresponding load/store cannot be hoisted.
+//
+// TODO: Hoist from >2 successors. Currently GVNHoist will not hoist stores
+// in this case because it works on two instructions at a time.
+// entry:
+// switch i32 %c1, label %exit1 [
+// i32 0, label %sw0
+// i32 1, label %sw1
+// ]
+//
+// sw0:
+// store i32 1, i32* @G
+// br label %exit
+//
+// sw1:
+// store i32 1, i32* @G
+// br label %exit
+//
+// exit1:
+// store i32 1, i32* @G
+// ret void
+// exit:
+// ret void
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/MemorySSA.h"
using namespace llvm;
@@ -60,7 +83,7 @@ static cl::opt<int>
cl::desc("Maximum length of dependent chains to hoist "
"(default = 10, unlimited = -1)"));
-namespace {
+namespace llvm {
// Provides a sorting function based on the execution order of two instructions.
struct SortByDFSIn {
@@ -72,13 +95,6 @@ public:
// Returns true when A executes before B.
bool operator()(const Instruction *A, const Instruction *B) const {
- // FIXME: libc++ has a std::sort() algorithm that will call the compare
- // function on the same element. Once PR20837 is fixed and some more years
- // pass by and all the buildbots have moved to a corrected std::sort(),
- // enable the following assert:
- //
- // assert(A != B);
-
const BasicBlock *BA = A->getParent();
const BasicBlock *BB = B->getParent();
unsigned ADFS, BDFS;
@@ -202,6 +218,7 @@ public:
GVNHoist(DominatorTree *DT, AliasAnalysis *AA, MemoryDependenceResults *MD,
MemorySSA *MSSA)
: DT(DT), AA(AA), MD(MD), MSSA(MSSA),
+ MSSAUpdater(make_unique<MemorySSAUpdater>(MSSA)),
HoistingGeps(false),
HoistedCtr(0)
{ }
@@ -249,9 +266,11 @@ private:
AliasAnalysis *AA;
MemoryDependenceResults *MD;
MemorySSA *MSSA;
+ std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
const bool HoistingGeps;
DenseMap<const Value *, unsigned> DFSNumber;
BBSideEffectsSet BBSideEffects;
+ DenseSet<const BasicBlock*> HoistBarrier;
int HoistedCtr;
enum InsKind { Unknown, Scalar, Load, Store };
@@ -307,8 +326,8 @@ private:
continue;
}
- // Check for end of function, calls that do not return, etc.
- if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator()))
+ // We reached the leaf Basic Block => not all paths have this instruction.
+ if (!BB->getTerminator()->getNumSuccessors())
return false;
// When reaching the back-edge of a loop, there may be a path through the
@@ -360,7 +379,7 @@ private:
ReachedNewPt = true;
}
}
- if (defClobbersUseOrDef(Def, MU, *AA))
+ if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA))
return true;
}
@@ -387,7 +406,8 @@ private:
// executed between the execution of NewBB and OldBB. Hoisting an expression
// from OldBB into NewBB has to be safe on all execution paths.
for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) {
- if (*I == NewBB) {
+ const BasicBlock *BB = *I;
+ if (BB == NewBB) {
// Stop traversal when reaching HoistPt.
I.skipChildren();
continue;
@@ -398,11 +418,17 @@ private:
return true;
// Impossible to hoist with exceptions on the path.
- if (hasEH(*I))
+ if (hasEH(BB))
+ return true;
+
+ // No such instruction after HoistBarrier in a basic block was
+ // selected for hoisting so instructions selected within basic block with
+ // a hoist barrier can be hoisted.
+ if ((BB != OldBB) && HoistBarrier.count(BB))
return true;
// Check that we do not move a store past loads.
- if (hasMemoryUse(NewPt, Def, *I))
+ if (hasMemoryUse(NewPt, Def, BB))
return true;
// -1 is unlimited number of blocks on all paths.
@@ -419,17 +445,18 @@ private:
// Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
// return true when the counter NBBsOnAllPaths reaches 0, except when it is
// initialized to -1 which is unlimited.
- bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *BB,
+ bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB,
int &NBBsOnAllPaths) {
- assert(DT->dominates(HoistPt, BB) && "Invalid path");
+ assert(DT->dominates(HoistPt, SrcBB) && "Invalid path");
// Walk all basic blocks reachable in depth-first iteration on
// the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
// blocks that may be executed between the execution of NewHoistPt and
// BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
// on all execution paths.
- for (auto I = idf_begin(BB), E = idf_end(BB); I != E;) {
- if (*I == HoistPt) {
+ for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E;) {
+ const BasicBlock *BB = *I;
+ if (BB == HoistPt) {
// Stop traversal when reaching NewHoistPt.
I.skipChildren();
continue;
@@ -440,7 +467,13 @@ private:
return true;
// Impossible to hoist with exceptions on the path.
- if (hasEH(*I))
+ if (hasEH(BB))
+ return true;
+
+ // No such instruction after HoistBarrier in a basic block was
+ // selected for hoisting so instructions selected within basic block with
+ // a hoist barrier can be hoisted.
+ if ((BB != SrcBB) && HoistBarrier.count(BB))
return true;
// -1 is unlimited number of blocks on all paths.
@@ -626,6 +659,8 @@ private:
// Compute the insertion point and the list of expressions to be hoisted.
SmallVecInsn InstructionsToHoist;
for (auto I : V)
+ // We don't need to check for hoist-barriers here because if
+ // I->getParent() is a barrier then I precedes the barrier.
if (!hasEH(I->getParent()))
InstructionsToHoist.push_back(I);
@@ -809,9 +844,9 @@ private:
// legal when the ld/st is not moved past its current definition.
MemoryAccess *Def = OldMemAcc->getDefiningAccess();
NewMemAcc =
- MSSA->createMemoryAccessInBB(Repl, Def, HoistPt, MemorySSA::End);
+ MSSAUpdater->createMemoryAccessInBB(Repl, Def, HoistPt, MemorySSA::End);
OldMemAcc->replaceAllUsesWith(NewMemAcc);
- MSSA->removeMemoryAccess(OldMemAcc);
+ MSSAUpdater->removeMemoryAccess(OldMemAcc);
}
}
@@ -850,7 +885,7 @@ private:
// Update the uses of the old MSSA access with NewMemAcc.
MemoryAccess *OldMA = MSSA->getMemoryAccess(I);
OldMA->replaceAllUsesWith(NewMemAcc);
- MSSA->removeMemoryAccess(OldMA);
+ MSSAUpdater->removeMemoryAccess(OldMA);
}
Repl->andIRFlags(I);
@@ -872,7 +907,7 @@ private:
auto In = Phi->incoming_values();
if (all_of(In, [&](Use &U) { return U == NewMemAcc; })) {
Phi->replaceAllUsesWith(NewMemAcc);
- MSSA->removeMemoryAccess(Phi);
+ MSSAUpdater->removeMemoryAccess(Phi);
}
}
}
@@ -896,6 +931,12 @@ private:
for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
int InstructionNb = 0;
for (Instruction &I1 : *BB) {
+ // If I1 cannot guarantee progress, subsequent instructions
+ // in BB cannot be hoisted anyways.
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I1)) {
+ HoistBarrier.insert(BB);
+ break;
+ }
// Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting
// deeper may increase the register pressure and compilation time.
if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB)
diff --git a/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index b05ef002a456..7019287954a1 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -568,8 +568,7 @@ bool GuardWideningImpl::combineRangeChecks(
return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength;
};
- std::copy_if(Checks.begin(), Checks.end(),
- std::back_inserter(CurrentChecks), IsCurrentCheck);
+ copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck);
Checks.erase(remove_if(Checks, IsCurrentCheck), Checks.end());
assert(CurrentChecks.size() != 0 && "We know we have at least one!");
@@ -658,8 +657,12 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
- bool Changed = GuardWideningImpl(DT, PDT, LI).run();
- return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+ if (!GuardWideningImpl(DT, PDT, LI).run())
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 1752fb75eb1b..dcb2a4a0c6e6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -231,8 +231,9 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
bool isExact = false;
// See if we can convert this to an int64_t
uint64_t UIntVal;
- if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
- &isExact) != APFloat::opOK || !isExact)
+ if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
+ APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
+ !isExact)
return false;
IntVal = UIntVal;
return true;
@@ -906,7 +907,7 @@ class WidenIV {
SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
enum ExtendKind { ZeroExtended, SignExtended, Unknown };
- // A map tracking the kind of extension used to widen each narrow IV
+ // A map tracking the kind of extension used to widen each narrow IV
// and narrow IV user.
// Key: pointer to a narrow IV or IV user.
// Value: the kind of extension used to widen this Instruction.
@@ -1608,7 +1609,7 @@ void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
return;
CmpInst::Predicate P =
- TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
+ TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
auto CmpConstrainedLHSRange =
@@ -1634,7 +1635,7 @@ void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
UpdateRangeFromGuards(NarrowUser);
BasicBlock *NarrowUserBB = NarrowUser->getParent();
- // If NarrowUserBB is statically unreachable asking dominator queries may
+ // If NarrowUserBB is statically unreachable asking dominator queries may
// yield surprising results. (e.g. the block may not have a dom tree node)
if (!DT->isReachableFromEntry(NarrowUserBB))
return;
@@ -2152,6 +2153,8 @@ linearFunctionTestReplace(Loop *L,
Value *CmpIndVar = IndVar;
const SCEV *IVCount = BackedgeTakenCount;
+ assert(L->getLoopLatch() && "Loop no longer in simplified form?");
+
// If the exiting block is the same as the backedge block, we prefer to
// compare against the post-incremented value, otherwise we must compare
// against the preincremented value.
@@ -2376,6 +2379,7 @@ bool IndVarSimplify::run(Loop *L) {
// Loop::getCanonicalInductionVariable only supports loops with preheaders,
// and we're in trouble if we can't find the induction variable even when
// we've manually inserted one.
+ // - LFTR relies on having a single backedge.
if (!L->isLoopSimplifyForm())
return false;
@@ -2492,8 +2496,9 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
if (!IVS.run(&L))
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
- return getLoopPassPreservedAnalyses();
+ auto PA = getLoopPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
namespace {
diff --git a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 8e81541c2337..85db6e5e1105 100644
--- a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -446,6 +446,15 @@ struct LoopStructure {
BasicBlock *LatchExit;
unsigned LatchBrExitIdx;
+ // The loop represented by this instance of LoopStructure is semantically
+ // equivalent to:
+ //
+ // intN_ty inc = IndVarIncreasing ? 1 : -1;
+ // pred_ty predicate = IndVarIncreasing ? ICMP_SLT : ICMP_SGT;
+ //
+ // for (intN_ty iv = IndVarStart; predicate(iv, LoopExitAt); iv = IndVarNext)
+ // ... body ...
+
Value *IndVarNext;
Value *IndVarStart;
Value *LoopExitAt;
@@ -789,6 +798,10 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
return None;
}
+ const SCEV *StartNext = IndVarNext->getStart();
+ const SCEV *Addend = SE.getNegativeSCEV(IndVarNext->getStepRecurrence(SE));
+ const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
+
ConstantInt *One = ConstantInt::get(IndVarTy, 1);
// TODO: generalize the predicates here to also match their unsigned variants.
if (IsIncreasing) {
@@ -809,10 +822,22 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
return None;
}
+ if (!SE.isLoopEntryGuardedByCond(
+ &L, CmpInst::ICMP_SLT, IndVarStart,
+ SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())))) {
+ FailureReason = "Induction variable start not bounded by upper limit";
+ return None;
+ }
+
IRBuilder<> B(Preheader->getTerminator());
RightValue = B.CreateAdd(RightValue, One);
+ } else {
+ if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SLT, IndVarStart,
+ RightSCEV)) {
+ FailureReason = "Induction variable start not bounded by upper limit";
+ return None;
+ }
}
-
} else {
bool FoundExpectedPred =
(Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 1) ||
@@ -831,15 +856,24 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
return None;
}
+ if (!SE.isLoopEntryGuardedByCond(
+ &L, CmpInst::ICMP_SGT, IndVarStart,
+ SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())))) {
+ FailureReason = "Induction variable start not bounded by lower limit";
+ return None;
+ }
+
IRBuilder<> B(Preheader->getTerminator());
RightValue = B.CreateSub(RightValue, One);
+ } else {
+ if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SGT, IndVarStart,
+ RightSCEV)) {
+ FailureReason = "Induction variable start not bounded by lower limit";
+ return None;
+ }
}
}
- const SCEV *StartNext = IndVarNext->getStart();
- const SCEV *Addend = SE.getNegativeSCEV(IndVarNext->getStepRecurrence(SE));
- const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
-
BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx);
assert(SE.getLoopDisposition(LatchCount, &L) ==
diff --git a/contrib/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/contrib/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
new file mode 100644
index 000000000000..5d8701431a2c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -0,0 +1,903 @@
+//===-- NVPTXInferAddressSpace.cpp - ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// CUDA C/C++ includes memory space designation as variable type qualifers (such
+// as __global__ and __shared__). Knowing the space of a memory access allows
+// CUDA compilers to emit faster PTX loads and stores. For example, a load from
+// shared memory can be translated to `ld.shared` which is roughly 10% faster
+// than a generic `ld` on an NVIDIA Tesla K40c.
+//
+// Unfortunately, type qualifiers only apply to variable declarations, so CUDA
+// compilers must infer the memory space of an address expression from
+// type-qualified variables.
+//
+// LLVM IR uses non-zero (so-called) specific address spaces to represent memory
+// spaces (e.g. addrspace(3) means shared memory). The Clang frontend
+// places only type-qualified variables in specific address spaces, and then
+// conservatively `addrspacecast`s each type-qualified variable to addrspace(0)
+// (so-called the generic address space) for other instructions to use.
+//
+// For example, the Clang translates the following CUDA code
+// __shared__ float a[10];
+// float v = a[i];
+// to
+// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
+// %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i
+// %v = load float, float* %1 ; emits ld.f32
+// @a is in addrspace(3) since it's type-qualified, but its use from %1 is
+// redirected to %0 (the generic version of @a).
+//
+// The optimization implemented in this file propagates specific address spaces
+// from type-qualified variable declarations to its users. For example, it
+// optimizes the above IR to
+// %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
+// %v = load float addrspace(3)* %1 ; emits ld.shared.f32
+// propagating the addrspace(3) from @a to %1. As the result, the NVPTX
+// codegen is able to emit ld.shared.f32 for %v.
+//
+// Address space inference works in two steps. First, it uses a data-flow
+// analysis to infer as many generic pointers as possible to point to only one
+// specific address space. In the above example, it can prove that %1 only
+// points to addrspace(3). This algorithm was published in
+// CUDA: Compiling and optimizing for a GPU platform
+// Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang
+// ICCS 2012
+//
+// Then, address space inference replaces all refinable generic pointers with
+// equivalent specific pointers.
+//
+// The major challenge of implementing this optimization is handling PHINodes,
+// which may create loops in the data flow graph. This brings two complications.
+//
+// First, the data flow analysis in Step 1 needs to be circular. For example,
+// %generic.input = addrspacecast float addrspace(3)* %input to float*
+// loop:
+// %y = phi [ %generic.input, %y2 ]
+// %y2 = getelementptr %y, 1
+// %v = load %y2
+// br ..., label %loop, ...
+// proving %y specific requires proving both %generic.input and %y2 specific,
+// but proving %y2 specific circles back to %y. To address this complication,
+// the data flow analysis operates on a lattice:
+// uninitialized > specific address spaces > generic.
+// All address expressions (our implementation only considers phi, bitcast,
+// addrspacecast, and getelementptr) start with the uninitialized address space.
+// The monotone transfer function moves the address space of a pointer down a
+// lattice path from uninitialized to specific and then to generic. A join
+// operation of two different specific address spaces pushes the expression down
+// to the generic address space. The analysis completes once it reaches a fixed
+// point.
+//
+// Second, IR rewriting in Step 2 also needs to be circular. For example,
+// converting %y to addrspace(3) requires the compiler to know the converted
+// %y2, but converting %y2 needs the converted %y. To address this complication,
+// we break these cycles using "undef" placeholders. When converting an
+// instruction `I` to a new address space, if its operand `Op` is not converted
+// yet, we let `I` temporarily use `undef` and fix all the uses of undef later.
+// For instance, our algorithm first converts %y to
+// %y' = phi float addrspace(3)* [ %input, undef ]
+// Then, it converts %y2 to
+// %y2' = getelementptr %y', 1
+// Finally, it fixes the undef in %y' so that
+// %y' = phi float addrspace(3)* [ %input, %y2' ]
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+
+#define DEBUG_TYPE "infer-address-spaces"
+
+using namespace llvm;
+
+namespace {
+static const unsigned UninitializedAddressSpace = ~0u;
+
+using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
+
+/// \brief InferAddressSpaces
+class InferAddressSpaces : public FunctionPass {
+ /// Target specific address space which uses of should be replaced if
+ /// possible.
+ unsigned FlatAddrSpace;
+
+public:
+ static char ID;
+
+ InferAddressSpaces() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override;
+
+private:
+ // Returns the new address space of V if updated; otherwise, returns None.
+ Optional<unsigned>
+ updateAddressSpace(const Value &V,
+ const ValueToAddrSpaceMapTy &InferredAddrSpace) const;
+
+ // Tries to infer the specific address space of each address expression in
+ // Postorder.
+ void inferAddressSpaces(const std::vector<Value *> &Postorder,
+ ValueToAddrSpaceMapTy *InferredAddrSpace) const;
+
+ bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
+
+ // Changes the flat address expressions in function F to point to specific
+ // address spaces if InferredAddrSpace says so. Postorder is the postorder of
+ // all flat expressions in the use-def graph of function F.
+ bool
+ rewriteWithNewAddressSpaces(const std::vector<Value *> &Postorder,
+ const ValueToAddrSpaceMapTy &InferredAddrSpace,
+ Function *F) const;
+
+ void appendsFlatAddressExpressionToPostorderStack(
+ Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack,
+ DenseSet<Value *> *Visited) const;
+
+ bool rewriteIntrinsicOperands(IntrinsicInst *II,
+ Value *OldV, Value *NewV) const;
+ void collectRewritableIntrinsicOperands(
+ IntrinsicInst *II,
+ std::vector<std::pair<Value *, bool>> *PostorderStack,
+ DenseSet<Value *> *Visited) const;
+
+ std::vector<Value *> collectFlatAddressExpressions(Function &F) const;
+
+ Value *cloneValueWithNewAddressSpace(
+ Value *V, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace,
+ SmallVectorImpl<const Use *> *UndefUsesToFix) const;
+ unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const;
+};
+} // end anonymous namespace
+
+char InferAddressSpaces::ID = 0;
+
+namespace llvm {
+void initializeInferAddressSpacesPass(PassRegistry &);
+}
+
+INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
+ false, false)
+
+// Returns true if V is an address expression.
+// TODO: Currently, we consider only phi, bitcast, addrspacecast, and
+// getelementptr operators.
+static bool isAddressExpression(const Value &V) {
+ if (!isa<Operator>(V))
+ return false;
+
+ switch (cast<Operator>(V).getOpcode()) {
+ case Instruction::PHI:
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ case Instruction::GetElementPtr:
+ case Instruction::Select:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// Returns the pointer operands of V.
+//
+// Precondition: V is an address expression.
+static SmallVector<Value *, 2> getPointerOperands(const Value &V) {
+ assert(isAddressExpression(V));
+ const Operator &Op = cast<Operator>(V);
+ switch (Op.getOpcode()) {
+ case Instruction::PHI: {
+ auto IncomingValues = cast<PHINode>(Op).incoming_values();
+ return SmallVector<Value *, 2>(IncomingValues.begin(),
+ IncomingValues.end());
+ }
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ case Instruction::GetElementPtr:
+ return {Op.getOperand(0)};
+ case Instruction::Select:
+ return {Op.getOperand(1), Op.getOperand(2)};
+ default:
+ llvm_unreachable("Unexpected instruction type.");
+ }
+}
+
+// TODO: Move logic to TTI?
+bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II,
+ Value *OldV,
+ Value *NewV) const {
+ Module *M = II->getParent()->getParent()->getParent();
+
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::amdgcn_atomic_inc:
+ case Intrinsic::amdgcn_atomic_dec:{
+ const ConstantInt *IsVolatile = dyn_cast<ConstantInt>(II->getArgOperand(4));
+ if (!IsVolatile || !IsVolatile->isNullValue())
+ return false;
+
+ LLVM_FALLTHROUGH;
+ }
+ case Intrinsic::objectsize: {
+ Type *DestTy = II->getType();
+ Type *SrcTy = NewV->getType();
+ Function *NewDecl =
+ Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy});
+ II->setArgOperand(0, NewV);
+ II->setCalledFunction(NewDecl);
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
+// TODO: Move logic to TTI?
+void InferAddressSpaces::collectRewritableIntrinsicOperands(
+ IntrinsicInst *II, std::vector<std::pair<Value *, bool>> *PostorderStack,
+ DenseSet<Value *> *Visited) const {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::objectsize:
+ case Intrinsic::amdgcn_atomic_inc:
+ case Intrinsic::amdgcn_atomic_dec:
+ appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
+ PostorderStack, Visited);
+ break;
+ default:
+ break;
+ }
+}
+
+// Returns all flat address expressions in function F. The elements are
+// If V is an unvisited flat address expression, appends V to PostorderStack
+// and marks it as visited.
+void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
+ Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack,
+ DenseSet<Value *> *Visited) const {
+ assert(V->getType()->isPointerTy());
+ if (isAddressExpression(*V) &&
+ V->getType()->getPointerAddressSpace() == FlatAddrSpace) {
+ if (Visited->insert(V).second)
+ PostorderStack->push_back(std::make_pair(V, false));
+ }
+}
+
+// Returns all flat address expressions in function F. The elements are ordered
+// ordered in postorder.
+std::vector<Value *>
+InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
+ // This function implements a non-recursive postorder traversal of a partial
+ // use-def graph of function F.
+ std::vector<std::pair<Value *, bool>> PostorderStack;
+ // The set of visited expressions.
+ DenseSet<Value *> Visited;
+
+ auto PushPtrOperand = [&](Value *Ptr) {
+ appendsFlatAddressExpressionToPostorderStack(Ptr, &PostorderStack,
+ &Visited);
+ };
+
+ // We only explore address expressions that are reachable from loads and
+ // stores for now because we aim at generating faster loads and stores.
+ for (Instruction &I : instructions(F)) {
+ if (auto *LI = dyn_cast<LoadInst>(&I))
+ PushPtrOperand(LI->getPointerOperand());
+ else if (auto *SI = dyn_cast<StoreInst>(&I))
+ PushPtrOperand(SI->getPointerOperand());
+ else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
+ PushPtrOperand(RMW->getPointerOperand());
+ else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
+ PushPtrOperand(CmpX->getPointerOperand());
+ else if (auto *MI = dyn_cast<MemIntrinsic>(&I)) {
+ // For memset/memcpy/memmove, any pointer operand can be replaced.
+ PushPtrOperand(MI->getRawDest());
+
+ // Handle 2nd operand for memcpy/memmove.
+ if (auto *MTI = dyn_cast<MemTransferInst>(MI))
+ PushPtrOperand(MTI->getRawSource());
+ } else if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ collectRewritableIntrinsicOperands(II, &PostorderStack, &Visited);
+ else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(&I)) {
+ // FIXME: Handle vectors of pointers
+ if (Cmp->getOperand(0)->getType()->isPointerTy()) {
+ PushPtrOperand(Cmp->getOperand(0));
+ PushPtrOperand(Cmp->getOperand(1));
+ }
+ }
+ }
+
+ std::vector<Value *> Postorder; // The resultant postorder.
+ while (!PostorderStack.empty()) {
+ // If the operands of the expression on the top are already explored,
+ // adds that expression to the resultant postorder.
+ if (PostorderStack.back().second) {
+ Postorder.push_back(PostorderStack.back().first);
+ PostorderStack.pop_back();
+ continue;
+ }
+ // Otherwise, adds its operands to the stack and explores them.
+ PostorderStack.back().second = true;
+ for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) {
+ appendsFlatAddressExpressionToPostorderStack(PtrOperand, &PostorderStack,
+ &Visited);
+ }
+ }
+ return Postorder;
+}
+
+// A helper function for cloneInstructionWithNewAddressSpace. Returns the clone
+// of OperandUse.get() in the new address space. If the clone is not ready yet,
+// returns an undef in the new address space as a placeholder.
+static Value *operandWithNewAddressSpaceOrCreateUndef(
+ const Use &OperandUse, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace,
+ SmallVectorImpl<const Use *> *UndefUsesToFix) {
+ Value *Operand = OperandUse.get();
+
+ Type *NewPtrTy =
+ Operand->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
+
+ if (Constant *C = dyn_cast<Constant>(Operand))
+ return ConstantExpr::getAddrSpaceCast(C, NewPtrTy);
+
+ if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand))
+ return NewOperand;
+
+ UndefUsesToFix->push_back(&OperandUse);
+ return UndefValue::get(NewPtrTy);
+}
+
+// Returns a clone of `I` with its operands converted to those specified in
+// ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an
+// operand whose address space needs to be modified might not exist in
+// ValueWithNewAddrSpace. In that case, uses undef as a placeholder operand and
+// adds that operand use to UndefUsesToFix so that caller can fix them later.
+//
+// Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast
+// from a pointer whose type already matches. Therefore, this function returns a
+// Value* instead of an Instruction*.
+static Value *cloneInstructionWithNewAddressSpace(
+ Instruction *I, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace,
+ SmallVectorImpl<const Use *> *UndefUsesToFix) {
+ Type *NewPtrType =
+ I->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
+
+ if (I->getOpcode() == Instruction::AddrSpaceCast) {
+ Value *Src = I->getOperand(0);
+ // Because `I` is flat, the source address space must be specific.
+ // Therefore, the inferred address space must be the source space, according
+ // to our algorithm.
+ assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
+ if (Src->getType() != NewPtrType)
+ return new BitCastInst(Src, NewPtrType);
+ return Src;
+ }
+
+ // Computes the converted pointer operands.
+ SmallVector<Value *, 4> NewPointerOperands;
+ for (const Use &OperandUse : I->operands()) {
+ if (!OperandUse.get()->getType()->isPointerTy())
+ NewPointerOperands.push_back(nullptr);
+ else
+ NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef(
+ OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix));
+ }
+
+ switch (I->getOpcode()) {
+ case Instruction::BitCast:
+ return new BitCastInst(NewPointerOperands[0], NewPtrType);
+ case Instruction::PHI: {
+ assert(I->getType()->isPointerTy());
+ PHINode *PHI = cast<PHINode>(I);
+ PHINode *NewPHI = PHINode::Create(NewPtrType, PHI->getNumIncomingValues());
+ for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) {
+ unsigned OperandNo = PHINode::getOperandNumForIncomingValue(Index);
+ NewPHI->addIncoming(NewPointerOperands[OperandNo],
+ PHI->getIncomingBlock(Index));
+ }
+ return NewPHI;
+ }
+ case Instruction::GetElementPtr: {
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
+ GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
+ GEP->getSourceElementType(), NewPointerOperands[0],
+ SmallVector<Value *, 4>(GEP->idx_begin(), GEP->idx_end()));
+ NewGEP->setIsInBounds(GEP->isInBounds());
+ return NewGEP;
+ }
+ case Instruction::Select: {
+ assert(I->getType()->isPointerTy());
+ return SelectInst::Create(I->getOperand(0), NewPointerOperands[1],
+ NewPointerOperands[2], "", nullptr, I);
+ }
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+}
+
+// Similar to cloneInstructionWithNewAddressSpace, returns a clone of the
+// constant expression `CE` with its operands replaced as specified in
+// ValueWithNewAddrSpace.
+static Value *cloneConstantExprWithNewAddressSpace(
+ ConstantExpr *CE, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace) {
+ Type *TargetType =
+ CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
+
+ if (CE->getOpcode() == Instruction::AddrSpaceCast) {
+ // Because CE is flat, the source address space must be specific.
+ // Therefore, the inferred address space must be the source space according
+ // to our algorithm.
+ assert(CE->getOperand(0)->getType()->getPointerAddressSpace() ==
+ NewAddrSpace);
+ return ConstantExpr::getBitCast(CE->getOperand(0), TargetType);
+ }
+
+ if (CE->getOpcode() == Instruction::BitCast) {
+ if (Value *NewOperand = ValueWithNewAddrSpace.lookup(CE->getOperand(0)))
+ return ConstantExpr::getBitCast(cast<Constant>(NewOperand), TargetType);
+ return ConstantExpr::getAddrSpaceCast(CE, TargetType);
+ }
+
+ if (CE->getOpcode() == Instruction::Select) {
+ Constant *Src0 = CE->getOperand(1);
+ Constant *Src1 = CE->getOperand(2);
+ if (Src0->getType()->getPointerAddressSpace() ==
+ Src1->getType()->getPointerAddressSpace()) {
+
+ return ConstantExpr::getSelect(
+ CE->getOperand(0), ConstantExpr::getAddrSpaceCast(Src0, TargetType),
+ ConstantExpr::getAddrSpaceCast(Src1, TargetType));
+ }
+ }
+
+ // Computes the operands of the new constant expression.
+ SmallVector<Constant *, 4> NewOperands;
+ for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) {
+ Constant *Operand = CE->getOperand(Index);
+ // If the address space of `Operand` needs to be modified, the new operand
+ // with the new address space should already be in ValueWithNewAddrSpace
+ // because (1) the constant expressions we consider (i.e. addrspacecast,
+ // bitcast, and getelementptr) do not incur cycles in the data flow graph
+ // and (2) this function is called on constant expressions in postorder.
+ if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) {
+ NewOperands.push_back(cast<Constant>(NewOperand));
+ } else {
+ // Otherwise, reuses the old operand.
+ NewOperands.push_back(Operand);
+ }
+ }
+
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ // Needs to specify the source type while constructing a getelementptr
+ // constant expression.
+ return CE->getWithOperands(
+ NewOperands, TargetType, /*OnlyIfReduced=*/false,
+ NewOperands[0]->getType()->getPointerElementType());
+ }
+
+ return CE->getWithOperands(NewOperands, TargetType);
+}
+
+// Returns a clone of the value `V`, with its operands replaced as specified in
+// ValueWithNewAddrSpace. This function is called on every flat address
+// expression whose address space needs to be modified, in postorder.
+//
+// See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix.
+Value *InferAddressSpaces::cloneValueWithNewAddressSpace(
+ Value *V, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace,
+ SmallVectorImpl<const Use *> *UndefUsesToFix) const {
+ // All values in Postorder are flat address expressions.
+ assert(isAddressExpression(*V) &&
+ V->getType()->getPointerAddressSpace() == FlatAddrSpace);
+
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ Value *NewV = cloneInstructionWithNewAddressSpace(
+ I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix);
+ if (Instruction *NewI = dyn_cast<Instruction>(NewV)) {
+ if (NewI->getParent() == nullptr) {
+ NewI->insertBefore(I);
+ NewI->takeName(I);
+ }
+ }
+ return NewV;
+ }
+
+ return cloneConstantExprWithNewAddressSpace(
+ cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace);
+}
+
+// Defines the join operation on the address space lattice (see the file header
+// comments).
+unsigned InferAddressSpaces::joinAddressSpaces(unsigned AS1,
+ unsigned AS2) const {
+ if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace)
+ return FlatAddrSpace;
+
+ if (AS1 == UninitializedAddressSpace)
+ return AS2;
+ if (AS2 == UninitializedAddressSpace)
+ return AS1;
+
+ // The join of two different specific address spaces is flat.
+ return (AS1 == AS2) ? AS1 : FlatAddrSpace;
+}
+
+bool InferAddressSpaces::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ const TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ FlatAddrSpace = TTI.getFlatAddressSpace();
+ if (FlatAddrSpace == UninitializedAddressSpace)
+ return false;
+
+ // Collects all flat address expressions in postorder.
+ std::vector<Value *> Postorder = collectFlatAddressExpressions(F);
+
+ // Runs a data-flow analysis to refine the address spaces of every expression
+ // in Postorder.
+ ValueToAddrSpaceMapTy InferredAddrSpace;
+ inferAddressSpaces(Postorder, &InferredAddrSpace);
+
+ // Changes the address spaces of the flat address expressions who are inferred
+ // to point to a specific address space.
+ return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F);
+}
+
+void InferAddressSpaces::inferAddressSpaces(
+ const std::vector<Value *> &Postorder,
+ ValueToAddrSpaceMapTy *InferredAddrSpace) const {
+ SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
+ // Initially, all expressions are in the uninitialized address space.
+ for (Value *V : Postorder)
+ (*InferredAddrSpace)[V] = UninitializedAddressSpace;
+
+ while (!Worklist.empty()) {
+ Value *V = Worklist.pop_back_val();
+
+ // Tries to update the address space of the stack top according to the
+ // address spaces of its operands.
+ DEBUG(dbgs() << "Updating the address space of\n " << *V << '\n');
+ Optional<unsigned> NewAS = updateAddressSpace(*V, *InferredAddrSpace);
+ if (!NewAS.hasValue())
+ continue;
+ // If any updates are made, grabs its users to the worklist because
+ // their address spaces can also be possibly updated.
+ DEBUG(dbgs() << " to " << NewAS.getValue() << '\n');
+ (*InferredAddrSpace)[V] = NewAS.getValue();
+
+ for (Value *User : V->users()) {
+ // Skip if User is already in the worklist.
+ if (Worklist.count(User))
+ continue;
+
+ auto Pos = InferredAddrSpace->find(User);
+ // Our algorithm only updates the address spaces of flat address
+ // expressions, which are those in InferredAddrSpace.
+ if (Pos == InferredAddrSpace->end())
+ continue;
+
+ // Function updateAddressSpace moves the address space down a lattice
+ // path. Therefore, nothing to do if User is already inferred as flat (the
+ // bottom element in the lattice).
+ if (Pos->second == FlatAddrSpace)
+ continue;
+
+ Worklist.insert(User);
+ }
+ }
+}
+
+Optional<unsigned> InferAddressSpaces::updateAddressSpace(
+ const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const {
+ assert(InferredAddrSpace.count(&V));
+
+ // The new inferred address space equals the join of the address spaces
+ // of all its pointer operands.
+ unsigned NewAS = UninitializedAddressSpace;
+
+ const Operator &Op = cast<Operator>(V);
+ if (Op.getOpcode() == Instruction::Select) {
+ Value *Src0 = Op.getOperand(1);
+ Value *Src1 = Op.getOperand(2);
+
+ auto I = InferredAddrSpace.find(Src0);
+ unsigned Src0AS = (I != InferredAddrSpace.end()) ?
+ I->second : Src0->getType()->getPointerAddressSpace();
+
+ auto J = InferredAddrSpace.find(Src1);
+ unsigned Src1AS = (J != InferredAddrSpace.end()) ?
+ J->second : Src1->getType()->getPointerAddressSpace();
+
+ auto *C0 = dyn_cast<Constant>(Src0);
+ auto *C1 = dyn_cast<Constant>(Src1);
+
+ // If one of the inputs is a constant, we may be able to do a constant
+ // addrspacecast of it. Defer inferring the address space until the input
+ // address space is known.
+ if ((C1 && Src0AS == UninitializedAddressSpace) ||
+ (C0 && Src1AS == UninitializedAddressSpace))
+ return None;
+
+ if (C0 && isSafeToCastConstAddrSpace(C0, Src1AS))
+ NewAS = Src1AS;
+ else if (C1 && isSafeToCastConstAddrSpace(C1, Src0AS))
+ NewAS = Src0AS;
+ else
+ NewAS = joinAddressSpaces(Src0AS, Src1AS);
+ } else {
+ for (Value *PtrOperand : getPointerOperands(V)) {
+ auto I = InferredAddrSpace.find(PtrOperand);
+ unsigned OperandAS = I != InferredAddrSpace.end() ?
+ I->second : PtrOperand->getType()->getPointerAddressSpace();
+
+ // join(flat, *) = flat. So we can break if NewAS is already flat.
+ NewAS = joinAddressSpaces(NewAS, OperandAS);
+ if (NewAS == FlatAddrSpace)
+ break;
+ }
+ }
+
+ unsigned OldAS = InferredAddrSpace.lookup(&V);
+ assert(OldAS != FlatAddrSpace);
+ if (OldAS == NewAS)
+ return None;
+ return NewAS;
+}
+
+/// \p returns true if \p U is the pointer operand of a memory instruction with
+/// a single pointer operand that can have its address space changed by simply
+/// mutating the use to a new value.
+static bool isSimplePointerUseValidToReplace(Use &U) {
+ User *Inst = U.getUser();
+ unsigned OpNo = U.getOperandNo();
+
+ if (auto *LI = dyn_cast<LoadInst>(Inst))
+ return OpNo == LoadInst::getPointerOperandIndex() && !LI->isVolatile();
+
+ if (auto *SI = dyn_cast<StoreInst>(Inst))
+ return OpNo == StoreInst::getPointerOperandIndex() && !SI->isVolatile();
+
+ if (auto *RMW = dyn_cast<AtomicRMWInst>(Inst))
+ return OpNo == AtomicRMWInst::getPointerOperandIndex() && !RMW->isVolatile();
+
+ if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
+ return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() &&
+ !CmpX->isVolatile();
+ }
+
+ return false;
+}
+
+/// Update memory intrinsic uses that require more complex processing than
+/// simple memory instructions. Thse require re-mangling and may have multiple
+/// pointer operands.
+static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
+ Value *NewV) {
+ IRBuilder<> B(MI);
+ MDNode *TBAA = MI->getMetadata(LLVMContext::MD_tbaa);
+ MDNode *ScopeMD = MI->getMetadata(LLVMContext::MD_alias_scope);
+ MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias);
+
+ if (auto *MSI = dyn_cast<MemSetInst>(MI)) {
+ B.CreateMemSet(NewV, MSI->getValue(),
+ MSI->getLength(), MSI->getAlignment(),
+ false, // isVolatile
+ TBAA, ScopeMD, NoAliasMD);
+ } else if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {
+ Value *Src = MTI->getRawSource();
+ Value *Dest = MTI->getRawDest();
+
+ // Be careful in case this is a self-to-self copy.
+ if (Src == OldV)
+ Src = NewV;
+
+ if (Dest == OldV)
+ Dest = NewV;
+
+ if (isa<MemCpyInst>(MTI)) {
+ MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct);
+ B.CreateMemCpy(Dest, Src, MTI->getLength(),
+ MTI->getAlignment(),
+ false, // isVolatile
+ TBAA, TBAAStruct, ScopeMD, NoAliasMD);
+ } else {
+ assert(isa<MemMoveInst>(MTI));
+ B.CreateMemMove(Dest, Src, MTI->getLength(),
+ MTI->getAlignment(),
+ false, // isVolatile
+ TBAA, ScopeMD, NoAliasMD);
+ }
+ } else
+ llvm_unreachable("unhandled MemIntrinsic");
+
+ MI->eraseFromParent();
+ return true;
+}
+
+// \p returns true if it is OK to change the address space of constant \p C with
+// a ConstantExpr addrspacecast.
+bool InferAddressSpaces::isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const {
+ assert(NewAS != UninitializedAddressSpace);
+
+ unsigned SrcAS = C->getType()->getPointerAddressSpace();
+ if (SrcAS == NewAS || isa<UndefValue>(C))
+ return true;
+
+ // Prevent illegal casts between different non-flat address spaces.
+ if (SrcAS != FlatAddrSpace && NewAS != FlatAddrSpace)
+ return false;
+
+ if (isa<ConstantPointerNull>(C))
+ return true;
+
+ if (auto *Op = dyn_cast<Operator>(C)) {
+ // If we already have a constant addrspacecast, it should be safe to cast it
+ // off.
+ if (Op->getOpcode() == Instruction::AddrSpaceCast)
+ return isSafeToCastConstAddrSpace(cast<Constant>(Op->getOperand(0)), NewAS);
+
+ if (Op->getOpcode() == Instruction::IntToPtr &&
+ Op->getType()->getPointerAddressSpace() == FlatAddrSpace)
+ return true;
+ }
+
+ return false;
+}
+
+static Value::use_iterator skipToNextUser(Value::use_iterator I,
+ Value::use_iterator End) {
+ User *CurUser = I->getUser();
+ ++I;
+
+ while (I != End && I->getUser() == CurUser)
+ ++I;
+
+ return I;
+}
+
+bool InferAddressSpaces::rewriteWithNewAddressSpaces(
+ const std::vector<Value *> &Postorder,
+ const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
+ // For each address expression to be modified, creates a clone of it with its
+ // pointer operands converted to the new address space. Since the pointer
+ // operands are converted, the clone is naturally in the new address space by
+ // construction.
+ ValueToValueMapTy ValueWithNewAddrSpace;
+ SmallVector<const Use *, 32> UndefUsesToFix;
+ for (Value* V : Postorder) {
+ unsigned NewAddrSpace = InferredAddrSpace.lookup(V);
+ if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {
+ ValueWithNewAddrSpace[V] = cloneValueWithNewAddressSpace(
+ V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix);
+ }
+ }
+
+ if (ValueWithNewAddrSpace.empty())
+ return false;
+
+ // Fixes all the undef uses generated by cloneInstructionWithNewAddressSpace.
+ for (const Use *UndefUse : UndefUsesToFix) {
+ User *V = UndefUse->getUser();
+ User *NewV = cast<User>(ValueWithNewAddrSpace.lookup(V));
+ unsigned OperandNo = UndefUse->getOperandNo();
+ assert(isa<UndefValue>(NewV->getOperand(OperandNo)));
+ NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get()));
+ }
+
+ // Replaces the uses of the old address expressions with the new ones.
+ for (Value *V : Postorder) {
+ Value *NewV = ValueWithNewAddrSpace.lookup(V);
+ if (NewV == nullptr)
+ continue;
+
+ DEBUG(dbgs() << "Replacing the uses of " << *V
+ << "\n with\n " << *NewV << '\n');
+
+ Value::use_iterator I, E, Next;
+ for (I = V->use_begin(), E = V->use_end(); I != E; ) {
+ Use &U = *I;
+
+ // Some users may see the same pointer operand in multiple operands. Skip
+ // to the next instruction.
+ I = skipToNextUser(I, E);
+
+ if (isSimplePointerUseValidToReplace(U)) {
+ // If V is used as the pointer operand of a compatible memory operation,
+ // sets the pointer operand to NewV. This replacement does not change
+ // the element type, so the resultant load/store is still valid.
+ U.set(NewV);
+ continue;
+ }
+
+ User *CurUser = U.getUser();
+ // Handle more complex cases like intrinsic that need to be remangled.
+ if (auto *MI = dyn_cast<MemIntrinsic>(CurUser)) {
+ if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, V, NewV))
+ continue;
+ }
+
+ if (auto *II = dyn_cast<IntrinsicInst>(CurUser)) {
+ if (rewriteIntrinsicOperands(II, V, NewV))
+ continue;
+ }
+
+ if (isa<Instruction>(CurUser)) {
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(CurUser)) {
+ // If we can infer that both pointers are in the same addrspace,
+ // transform e.g.
+ // %cmp = icmp eq float* %p, %q
+ // into
+ // %cmp = icmp eq float addrspace(3)* %new_p, %new_q
+
+ unsigned NewAS = NewV->getType()->getPointerAddressSpace();
+ int SrcIdx = U.getOperandNo();
+ int OtherIdx = (SrcIdx == 0) ? 1 : 0;
+ Value *OtherSrc = Cmp->getOperand(OtherIdx);
+
+ if (Value *OtherNewV = ValueWithNewAddrSpace.lookup(OtherSrc)) {
+ if (OtherNewV->getType()->getPointerAddressSpace() == NewAS) {
+ Cmp->setOperand(OtherIdx, OtherNewV);
+ Cmp->setOperand(SrcIdx, NewV);
+ continue;
+ }
+ }
+
+ // Even if the type mismatches, we can cast the constant.
+ if (auto *KOtherSrc = dyn_cast<Constant>(OtherSrc)) {
+ if (isSafeToCastConstAddrSpace(KOtherSrc, NewAS)) {
+ Cmp->setOperand(SrcIdx, NewV);
+ Cmp->setOperand(OtherIdx,
+ ConstantExpr::getAddrSpaceCast(KOtherSrc, NewV->getType()));
+ continue;
+ }
+ }
+ }
+
+ // Otherwise, replaces the use with flat(NewV).
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ BasicBlock::iterator InsertPos = std::next(I->getIterator());
+ while (isa<PHINode>(InsertPos))
+ ++InsertPos;
+ U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
+ } else {
+ U.set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
+ V->getType()));
+ }
+ }
+ }
+
+ if (V->use_empty())
+ RecursivelyDeleteTriviallyDeadInstructions(V);
+ }
+
+ return true;
+}
+
+FunctionPass *llvm::createInferAddressSpacesPass() {
+ return new InferAddressSpaces();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 1870c3deb4f3..08eb95a1a3d3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
@@ -30,11 +31,13 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
@@ -89,6 +92,7 @@ namespace {
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<LazyValueInfoWrapperPass>();
AU.addPreserved<LazyValueInfoWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
@@ -104,6 +108,7 @@ INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
"Jump Threading", false, false)
INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
@@ -121,6 +126,7 @@ bool JumpThreading::runOnFunction(Function &F) {
return false;
auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
+ auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
bool HasProfileData = F.getEntryCount().hasValue();
@@ -129,7 +135,8 @@ bool JumpThreading::runOnFunction(Function &F) {
BPI.reset(new BranchProbabilityInfo(F, LI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- return Impl.runImpl(F, TLI, LVI, HasProfileData, std::move(BFI),
+
+ return Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI),
std::move(BPI));
}
@@ -138,6 +145,8 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &LVI = AM.getResult<LazyValueAnalysis>(F);
+ auto &AA = AM.getResult<AAManager>(F);
+
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
bool HasProfileData = F.getEntryCount().hasValue();
@@ -146,12 +155,9 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
BPI.reset(new BranchProbabilityInfo(F, LI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed =
- runImpl(F, &TLI, &LVI, HasProfileData, std::move(BFI), std::move(BPI));
- // FIXME: We need to invalidate LVI to avoid PR28400. Is there a better
- // solution?
- AM.invalidate<LazyValueAnalysis>(F);
+ bool Changed = runImpl(F, &TLI, &LVI, &AA, HasProfileData, std::move(BFI),
+ std::move(BPI));
if (!Changed)
return PreservedAnalyses::all();
@@ -161,18 +167,23 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
}
bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
- LazyValueInfo *LVI_, bool HasProfileData_,
+ LazyValueInfo *LVI_, AliasAnalysis *AA_,
+ bool HasProfileData_,
std::unique_ptr<BlockFrequencyInfo> BFI_,
std::unique_ptr<BranchProbabilityInfo> BPI_) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TLI = TLI_;
LVI = LVI_;
+ AA = AA_;
BFI.reset();
BPI.reset();
// When profile data is available, we need to update edge weights after
// successful jump threading, which requires both BPI and BFI being available.
HasProfileData = HasProfileData_;
+ auto *GuardDecl = F.getParent()->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_guard));
+ HasGuards = GuardDecl && !GuardDecl->use_empty();
if (HasProfileData) {
BPI = std::move(BPI_);
BFI = std::move(BFI_);
@@ -226,26 +237,13 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
BB != &BB->getParent()->getEntryBlock() &&
// If the terminator is the only non-phi instruction, try to nuke it.
BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB)) {
- // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
- // block, we have to make sure it isn't in the LoopHeaders set. We
- // reinsert afterward if needed.
- bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
- BasicBlock *Succ = BI->getSuccessor(0);
-
// FIXME: It is always conservatively correct to drop the info
// for a block even if it doesn't get erased. This isn't totally
// awesome, but it allows us to use AssertingVH to prevent nasty
// dangling pointer issues within LazyValueInfo.
LVI->eraseBlock(BB);
- if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
Changed = true;
- // If we deleted BB and BB was the header of a loop, then the
- // successor is now the header of the loop.
- BB = Succ;
- }
-
- if (ErasedFromLoopHeaders)
- LoopHeaders.insert(BB);
}
}
EverChanged |= Changed;
@@ -255,10 +253,13 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
return EverChanged;
}
-/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
-/// thread across it. Stop scanning the block when passing the threshold.
-static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
+/// Return the cost of duplicating a piece of this block from first non-phi
+/// and before StopAt instruction to thread across it. Stop scanning the block
+/// when exceeding the threshold. If duplication is impossible, returns ~0U.
+static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
+ Instruction *StopAt,
unsigned Threshold) {
+ assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
/// Ignore PHI nodes, these will be flattened when duplication happens.
BasicBlock::const_iterator I(BB->getFirstNonPHI());
@@ -266,15 +267,17 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
// branch, so they shouldn't count against the duplication cost.
unsigned Bonus = 0;
- const TerminatorInst *BBTerm = BB->getTerminator();
- // Threading through a switch statement is particularly profitable. If this
- // block ends in a switch, decrease its cost to make it more likely to happen.
- if (isa<SwitchInst>(BBTerm))
- Bonus = 6;
-
- // The same holds for indirect branches, but slightly more so.
- if (isa<IndirectBrInst>(BBTerm))
- Bonus = 8;
+ if (BB->getTerminator() == StopAt) {
+ // Threading through a switch statement is particularly profitable. If this
+ // block ends in a switch, decrease its cost to make it more likely to
+ // happen.
+ if (isa<SwitchInst>(StopAt))
+ Bonus = 6;
+
+ // The same holds for indirect branches, but slightly more so.
+ if (isa<IndirectBrInst>(StopAt))
+ Bonus = 8;
+ }
// Bump the threshold up so the early exit from the loop doesn't skip the
// terminator-based Size adjustment at the end.
@@ -283,7 +286,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
// Sum up the cost of each instruction until we get to the terminator. Don't
// include the terminator because the copy won't include it.
unsigned Size = 0;
- for (; !isa<TerminatorInst>(I); ++I) {
+ for (; &*I != StopAt; ++I) {
// Stop scanning the block if we've reached the threshold.
if (Size > Threshold)
@@ -729,6 +732,10 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
if (TryToUnfoldSelectInCurrBB(BB))
return true;
+ // Look if we can propagate guards to predecessors.
+ if (HasGuards && ProcessGuards(BB))
+ return true;
+
// What kind of constant we're looking for.
ConstantPreference Preference = WantInteger;
@@ -804,7 +811,6 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
return false;
}
-
if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
// If we're branching on a conditional, LVI might be able to determine
// it's value at the branch instruction. We only handle comparisons
@@ -812,7 +818,12 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// TODO: This should be extended to handle switches as well.
BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
- if (CondBr && CondConst && CondBr->isConditional()) {
+ if (CondBr && CondConst) {
+ // We should have returned as soon as we turn a conditional branch to
+ // unconditional. Because its no longer interesting as far as jump
+ // threading is concerned.
+ assert(CondBr->isConditional() && "Threading on unconditional terminator");
+
LazyValueInfo::Tristate Ret =
LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
CondConst, CondBr);
@@ -835,10 +846,12 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
}
return true;
}
- }
- if (CondBr && CondConst && TryToUnfoldSelect(CondCmp, BB))
- return true;
+ // We did not manage to simplify this branch, try to see whether
+ // CondCmp depends on a known phi-select pattern.
+ if (TryToUnfoldSelect(CondCmp, BB))
+ return true;
+ }
}
// Check for some cases that are worth simplifying. Right now we want to look
@@ -857,7 +870,6 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
if (SimplifyPartiallyRedundantLoad(LI))
return true;
-
// Handle a variety of cases where we are branching on something derived from
// a PHI node in the current block. If we can prove that any predecessors
// compute a predictable value based on a PHI node, thread those predecessors.
@@ -871,7 +883,6 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
return ProcessBranchOnPHI(PN);
-
// If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
if (CondInst->getOpcode() == Instruction::Xor &&
CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
@@ -920,6 +931,14 @@ bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) {
return false;
}
+/// Return true if Op is an instruction defined in the given block.
+static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB) {
+ if (Instruction *OpInst = dyn_cast<Instruction>(Op))
+ if (OpInst->getParent() == BB)
+ return true;
+ return false;
+}
+
/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
/// load instruction, eliminate it by replacing it with a PHI node. This is an
/// important optimization that encourages jump threading, and needs to be run
@@ -942,18 +961,17 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
Value *LoadedPtr = LI->getOperand(0);
- // If the loaded operand is defined in the LoadBB, it can't be available.
- // TODO: Could do simple PHI translation, that would be fun :)
- if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
- if (PtrOp->getParent() == LoadBB)
- return false;
+ // If the loaded operand is defined in the LoadBB and its not a phi,
+ // it can't be available in predecessors.
+ if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
+ return false;
// Scan a few instructions up from the load, to see if it is obviously live at
// the entry to its block.
BasicBlock::iterator BBIt(LI);
bool IsLoadCSE;
- if (Value *AvailableVal =
- FindAvailableLoadedValue(LI, LoadBB, BBIt, DefMaxInstsToScan, nullptr, &IsLoadCSE)) {
+ if (Value *AvailableVal = FindAvailableLoadedValue(
+ LI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) {
// If the value of the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
@@ -997,12 +1015,34 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (!PredsScanned.insert(PredBB).second)
continue;
- // Scan the predecessor to see if the value is available in the pred.
BBIt = PredBB->end();
- Value *PredAvailable = FindAvailableLoadedValue(LI, PredBB, BBIt,
- DefMaxInstsToScan,
- nullptr,
- &IsLoadCSE);
+ unsigned NumScanedInst = 0;
+ Value *PredAvailable = nullptr;
+ // NOTE: We don't CSE load that is volatile or anything stronger than
+ // unordered, that should have been checked when we entered the function.
+ assert(LI->isUnordered() && "Attempting to CSE volatile or atomic loads");
+ // If this is a load on a phi pointer, phi-translate it and search
+ // for available load/store to the pointer in predecessors.
+ Value *Ptr = LoadedPtr->DoPHITranslation(LoadBB, PredBB);
+ PredAvailable = FindAvailablePtrLoadStore(
+ Ptr, LI->getType(), LI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
+ AA, &IsLoadCSE, &NumScanedInst);
+
+ // If PredBB has a single predecessor, continue scanning through the
+ // single precessor.
+ BasicBlock *SinglePredBB = PredBB;
+ while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
+ NumScanedInst < DefMaxInstsToScan) {
+ SinglePredBB = SinglePredBB->getSinglePredecessor();
+ if (SinglePredBB) {
+ BBIt = SinglePredBB->end();
+ PredAvailable = FindAvailablePtrLoadStore(
+ Ptr, LI->getType(), LI->isAtomic(), SinglePredBB, BBIt,
+ (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE,
+ &NumScanedInst);
+ }
+ }
+
if (!PredAvailable) {
OneUnavailablePred = PredBB;
continue;
@@ -1062,10 +1102,10 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (UnavailablePred) {
assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
"Can't handle critical edge here!");
- LoadInst *NewVal =
- new LoadInst(LoadedPtr, LI->getName() + ".pr", false,
- LI->getAlignment(), LI->getOrdering(), LI->getSynchScope(),
- UnavailablePred->getTerminator());
+ LoadInst *NewVal = new LoadInst(
+ LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
+ LI->getName() + ".pr", false, LI->getAlignment(), LI->getOrdering(),
+ LI->getSynchScope(), UnavailablePred->getTerminator());
NewVal->setDebugLoc(LI->getDebugLoc());
if (AATags)
NewVal->setAAMetadata(AATags);
@@ -1229,7 +1269,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
- DestBB = SI->findCaseValue(cast<ConstantInt>(Val)).getCaseSuccessor();
+ DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
} else {
assert(isa<IndirectBrInst>(BB->getTerminator())
&& "Unexpected terminator");
@@ -1468,7 +1508,8 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
return false;
}
- unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB, BBDupThreshold);
+ unsigned JumpThreadCost =
+ getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
if (JumpThreadCost > BBDupThreshold) {
DEBUG(dbgs() << " Not threading BB '" << BB->getName()
<< "' - Cost is too high: " << JumpThreadCost << "\n");
@@ -1756,7 +1797,8 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
return false;
}
- unsigned DuplicationCost = getJumpThreadDuplicationCost(BB, BBDupThreshold);
+ unsigned DuplicationCost =
+ getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
if (DuplicationCost > BBDupThreshold) {
DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
<< "' - Cost is too high: " << DuplicationCost << "\n");
@@ -1888,10 +1930,10 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
/// TryToUnfoldSelect - Look for blocks of the form
/// bb1:
/// %a = select
-/// br bb
+/// br bb2
///
/// bb2:
-/// %p = phi [%a, %bb] ...
+/// %p = phi [%a, %bb1] ...
/// %c = icmp %p
/// br i1 %c
///
@@ -2021,3 +2063,130 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
return false;
}
+
+/// Try to propagate a guard from the current BB into one of its predecessors
+/// in case if another branch of execution implies that the condition of this
+/// guard is always true. Currently we only process the simplest case that
+/// looks like:
+///
+/// Start:
+/// %cond = ...
+/// br i1 %cond, label %T1, label %F1
+/// T1:
+/// br label %Merge
+/// F1:
+/// br label %Merge
+/// Merge:
+/// %condGuard = ...
+/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
+///
+/// And cond either implies condGuard or !condGuard. In this case all the
+/// instructions before the guard can be duplicated in both branches, and the
+/// guard is then threaded to one of them.
+bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) {
+ using namespace PatternMatch;
+ // We only want to deal with two predecessors.
+ BasicBlock *Pred1, *Pred2;
+ auto PI = pred_begin(BB), PE = pred_end(BB);
+ if (PI == PE)
+ return false;
+ Pred1 = *PI++;
+ if (PI == PE)
+ return false;
+ Pred2 = *PI++;
+ if (PI != PE)
+ return false;
+ if (Pred1 == Pred2)
+ return false;
+
+ // Try to thread one of the guards of the block.
+ // TODO: Look up deeper than to immediate predecessor?
+ auto *Parent = Pred1->getSinglePredecessor();
+ if (!Parent || Parent != Pred2->getSinglePredecessor())
+ return false;
+
+ if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
+ for (auto &I : *BB)
+ if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>()))
+ if (ThreadGuard(BB, cast<IntrinsicInst>(&I), BI))
+ return true;
+
+ return false;
+}
+
+/// Try to propagate the guard from BB which is the lower block of a diamond
+/// to one of its branches, in case if diamond's condition implies guard's
+/// condition.
+bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard,
+ BranchInst *BI) {
+ assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
+ assert(BI->isConditional() && "Unconditional branch has 2 successors?");
+ Value *GuardCond = Guard->getArgOperand(0);
+ Value *BranchCond = BI->getCondition();
+ BasicBlock *TrueDest = BI->getSuccessor(0);
+ BasicBlock *FalseDest = BI->getSuccessor(1);
+
+ auto &DL = BB->getModule()->getDataLayout();
+ bool TrueDestIsSafe = false;
+ bool FalseDestIsSafe = false;
+
+ // True dest is safe if BranchCond => GuardCond.
+ auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
+ if (Impl && *Impl)
+ TrueDestIsSafe = true;
+ else {
+ // False dest is safe if !BranchCond => GuardCond.
+ Impl =
+ isImpliedCondition(BranchCond, GuardCond, DL, /* InvertAPred */ true);
+ if (Impl && *Impl)
+ FalseDestIsSafe = true;
+ }
+
+ if (!TrueDestIsSafe && !FalseDestIsSafe)
+ return false;
+
+ BasicBlock *UnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
+ BasicBlock *GuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
+
+ ValueToValueMapTy UnguardedMapping, GuardedMapping;
+ Instruction *AfterGuard = Guard->getNextNode();
+ unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
+ if (Cost > BBDupThreshold)
+ return false;
+ // Duplicate all instructions before the guard and the guard itself to the
+ // branch where implication is not proved.
+ GuardedBlock = DuplicateInstructionsInSplitBetween(
+ BB, GuardedBlock, AfterGuard, GuardedMapping);
+ assert(GuardedBlock && "Could not create the guarded block?");
+ // Duplicate all instructions before the guard in the unguarded branch.
+ // Since we have successfully duplicated the guarded block and this block
+ // has fewer instructions, we expect it to succeed.
+ UnguardedBlock = DuplicateInstructionsInSplitBetween(BB, UnguardedBlock,
+ Guard, UnguardedMapping);
+ assert(UnguardedBlock && "Could not create the unguarded block?");
+ DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
+ << GuardedBlock->getName() << "\n");
+
+ // Some instructions before the guard may still have uses. For them, we need
+ // to create Phi nodes merging their copies in both guarded and unguarded
+ // branches. Those instructions that have no uses can be just removed.
+ SmallVector<Instruction *, 4> ToRemove;
+ for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
+ if (!isa<PHINode>(&*BI))
+ ToRemove.push_back(&*BI);
+
+ Instruction *InsertionPoint = &*BB->getFirstInsertionPt();
+ assert(InsertionPoint && "Empty block?");
+ // Substitute with Phis & remove.
+ for (auto *Inst : reverse(ToRemove)) {
+ if (!Inst->use_empty()) {
+ PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
+ NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
+ NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
+ NewPN->insertBefore(InsertionPoint);
+ Inst->replaceAllUsesWith(NewPN);
+ }
+ Inst->eraseFromParent();
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index f51d11c04cb2..340c81fed0fd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -77,10 +77,16 @@ STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
+/// Memory promotion is enabled by default.
static cl::opt<bool>
- DisablePromotion("disable-licm-promotion", cl::Hidden,
+ DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
cl::desc("Disable memory promotion in LICM pass"));
+static cl::opt<uint32_t> MaxNumUsesTraversed(
+ "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
+ cl::desc("Max num uses visited for identifying load "
+ "invariance in loop using invariant start (default = 8)"));
+
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo);
@@ -201,9 +207,9 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, ORE, true))
return PreservedAnalyses::all();
- // FIXME: There is no setPreservesCFG in the new PM. When that becomes
- // available, it should be used here.
- return getLoopPassPreservedAnalyses();
+ auto PA = getLoopPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
char LegacyLICMPass::ID = 0;
@@ -425,6 +431,29 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
continue;
}
+ // Attempt to remove floating point division out of the loop by converting
+ // it to a reciprocal multiplication.
+ if (I.getOpcode() == Instruction::FDiv &&
+ CurLoop->isLoopInvariant(I.getOperand(1)) &&
+ I.hasAllowReciprocal()) {
+ auto Divisor = I.getOperand(1);
+ auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
+ auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
+ ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
+ ReciprocalDivisor->insertBefore(&I);
+
+ auto Product = BinaryOperator::CreateFMul(I.getOperand(0),
+ ReciprocalDivisor);
+ Product->setFastMathFlags(I.getFastMathFlags());
+ Product->insertAfter(&I);
+ I.replaceAllUsesWith(Product);
+ I.eraseFromParent();
+
+ hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE);
+ Changed = true;
+ continue;
+ }
+
// Try hoisting the instruction out to the preheader. We can only do this
// if all of the operands of the instruction are loop invariant and if it
// is safe to hoist the instruction.
@@ -461,7 +490,10 @@ void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
// Iterate over loop instructions and compute safety info.
- for (Loop::block_iterator BB = CurLoop->block_begin(),
+ // Skip header as it has been computed and stored in HeaderMayThrow.
+ // The first block in loopinfo.Blocks is guaranteed to be the header.
+ assert(Header == *CurLoop->getBlocks().begin() && "First block must be header");
+ for (Loop::block_iterator BB = std::next(CurLoop->block_begin()),
BBE = CurLoop->block_end();
(BB != BBE) && !SafetyInfo->MayThrow; ++BB)
for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
@@ -477,6 +509,59 @@ void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
SafetyInfo->BlockColors = colorEHFunclets(*Fn);
}
+// Return true if LI is invariant within scope of the loop. LI is invariant if
+// CurLoop is dominated by an invariant.start representing the same memory location
+// and size as the memory location LI loads from, and also the invariant.start
+// has no uses.
+static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
+ Loop *CurLoop) {
+ Value *Addr = LI->getOperand(0);
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+ const uint32_t LocSizeInBits = DL.getTypeSizeInBits(
+ cast<PointerType>(Addr->getType())->getElementType());
+
+ // if the type is i8 addrspace(x)*, we know this is the type of
+ // llvm.invariant.start operand
+ auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
+ LI->getPointerAddressSpace());
+ unsigned BitcastsVisited = 0;
+ // Look through bitcasts until we reach the i8* type (this is invariant.start
+ // operand type).
+ while (Addr->getType() != PtrInt8Ty) {
+ auto *BC = dyn_cast<BitCastInst>(Addr);
+ // Avoid traversing high number of bitcast uses.
+ if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
+ return false;
+ Addr = BC->getOperand(0);
+ }
+
+ unsigned UsesVisited = 0;
+ // Traverse all uses of the load operand value, to see if invariant.start is
+ // one of the uses, and whether it dominates the load instruction.
+ for (auto *U : Addr->users()) {
+ // Avoid traversing for Load operand with high number of users.
+ if (++UsesVisited > MaxNumUsesTraversed)
+ return false;
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
+ // If there are escaping uses of invariant.start instruction, the load maybe
+ // non-invariant.
+ if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
+ II->hasNUsesOrMore(1))
+ continue;
+ unsigned InvariantSizeInBits =
+ cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
+ // Confirm the invariant.start location size contains the load operand size
+ // in bits. Also, the invariant.start should dominate the load, and we
+ // should not hoist the load out of a loop that contains this dominating
+ // invariant.start.
+ if (LocSizeInBits <= InvariantSizeInBits &&
+ DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
+ return true;
+ }
+
+ return false;
+}
+
bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
Loop *CurLoop, AliasSetTracker *CurAST,
LoopSafetyInfo *SafetyInfo,
@@ -493,6 +578,10 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (LI->getMetadata(LLVMContext::MD_invariant_load))
return true;
+ // This checks for an invariant.start dominating the load.
+ if (isLoadInvariantInLoop(LI, DT, CurLoop))
+ return true;
+
// Don't hoist loads which have may-aliased stores in loop.
uint64_t Size = 0;
if (LI->getType()->isSized())
@@ -782,7 +871,7 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I
<< "\n");
ORE->emit(OptimizationRemark(DEBUG_TYPE, "Hoisted", &I)
- << "hosting " << ore::NV("Inst", &I));
+ << "hoisting " << ore::NV("Inst", &I));
// Metadata can be dependent on conditions we are hoisting above.
// Conservatively strip all metadata on the instruction unless we were
@@ -852,6 +941,7 @@ class LoopPromoter : public LoadAndStorePromoter {
LoopInfo &LI;
DebugLoc DL;
int Alignment;
+ bool UnorderedAtomic;
AAMDNodes AATags;
Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
@@ -875,10 +965,11 @@ public:
SmallVectorImpl<BasicBlock *> &LEB,
SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
- const AAMDNodes &AATags)
+ bool UnorderedAtomic, const AAMDNodes &AATags)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
- LI(li), DL(std::move(dl)), Alignment(alignment), AATags(AATags) {}
+ LI(li), DL(std::move(dl)), Alignment(alignment),
+ UnorderedAtomic(UnorderedAtomic),AATags(AATags) {}
bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction *> &) const override {
@@ -902,6 +993,8 @@ public:
Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
Instruction *InsertPos = LoopInsertPts[i];
StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
+ if (UnorderedAtomic)
+ NewSI->setOrdering(AtomicOrdering::Unordered);
NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
if (AATags)
@@ -992,18 +1085,41 @@ bool llvm::promoteLoopAccessesToScalars(
// We start with an alignment of one and try to find instructions that allow
// us to prove better alignment.
unsigned Alignment = 1;
+ // Keep track of which types of access we see
+ bool SawUnorderedAtomic = false;
+ bool SawNotAtomic = false;
AAMDNodes AATags;
const DataLayout &MDL = Preheader->getModule()->getDataLayout();
+ // Do we know this object does not escape ?
+ bool IsKnownNonEscapingObject = false;
if (SafetyInfo->MayThrow) {
// If a loop can throw, we have to insert a store along each unwind edge.
// That said, we can't actually make the unwind edge explicit. Therefore,
// we have to prove that the store is dead along the unwind edge.
//
- // Currently, this code just special-cases alloca instructions.
- if (!isa<AllocaInst>(GetUnderlyingObject(SomePtr, MDL)))
- return false;
+ // If the underlying object is not an alloca, nor a pointer that does not
+ // escape, then we can not effectively prove that the store is dead along
+ // the unwind edge. i.e. the caller of this function could have ways to
+ // access the pointed object.
+ Value *Object = GetUnderlyingObject(SomePtr, MDL);
+ // If this is a base pointer we do not understand, simply bail.
+ // We only handle alloca and return value from alloc-like fn right now.
+ if (!isa<AllocaInst>(Object)) {
+ if (!isAllocLikeFn(Object, TLI))
+ return false;
+ // If this is an alloc like fn. There are more constraints we need to verify.
+ // More specifically, we must make sure that the pointer can not escape.
+ //
+ // NOTE: PointerMayBeCaptured is not enough as the pointer may have escaped
+ // even though its not captured by the enclosing function. Standard allocation
+ // functions like malloc, calloc, and operator new return values which can
+ // be assumed not to have previously escaped.
+ if (PointerMayBeCaptured(Object, true, true))
+ return false;
+ IsKnownNonEscapingObject = true;
+ }
}
// Check that all of the pointers in the alias set have the same type. We
@@ -1029,8 +1145,11 @@ bool llvm::promoteLoopAccessesToScalars(
// it.
if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
assert(!Load->isVolatile() && "AST broken");
- if (!Load->isSimple())
+ if (!Load->isUnordered())
return false;
+
+ SawUnorderedAtomic |= Load->isAtomic();
+ SawNotAtomic |= !Load->isAtomic();
if (!DereferenceableInPH)
DereferenceableInPH = isSafeToExecuteUnconditionally(
@@ -1041,9 +1160,12 @@ bool llvm::promoteLoopAccessesToScalars(
if (UI->getOperand(1) != ASIV)
continue;
assert(!Store->isVolatile() && "AST broken");
- if (!Store->isSimple())
+ if (!Store->isUnordered())
return false;
+ SawUnorderedAtomic |= Store->isAtomic();
+ SawNotAtomic |= !Store->isAtomic();
+
// If the store is guaranteed to execute, both properties are satisfied.
// We may want to check if a store is guaranteed to execute even if we
// already know that promotion is safe, since it may have higher
@@ -1096,6 +1218,12 @@ bool llvm::promoteLoopAccessesToScalars(
}
}
+ // If we found both an unordered atomic instruction and a non-atomic memory
+ // access, bail. We can't blindly promote non-atomic to atomic since we
+ // might not be able to lower the result. We can't downgrade since that
+ // would violate memory model. Also, align 0 is an error for atomics.
+ if (SawUnorderedAtomic && SawNotAtomic)
+ return false;
// If we couldn't prove we can hoist the load, bail.
if (!DereferenceableInPH)
@@ -1106,10 +1234,15 @@ bool llvm::promoteLoopAccessesToScalars(
// stores along paths which originally didn't have them without violating the
// memory model.
if (!SafeToInsertStore) {
- Value *Object = GetUnderlyingObject(SomePtr, MDL);
- SafeToInsertStore =
- (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
+ // If this is a known non-escaping object, it is safe to insert the stores.
+ if (IsKnownNonEscapingObject)
+ SafeToInsertStore = true;
+ else {
+ Value *Object = GetUnderlyingObject(SomePtr, MDL);
+ SafeToInsertStore =
+ (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
!PointerMayBeCaptured(Object, true, true);
+ }
}
// If we've still failed to prove we can sink the store, give up.
@@ -1134,12 +1267,15 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVector<PHINode *, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags);
+ InsertPts, PIC, *CurAST, *LI, DL, Alignment,
+ SawUnorderedAtomic, AATags);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
LoadInst *PreheaderLoad = new LoadInst(
SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator());
+ if (SawUnorderedAtomic)
+ PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
PreheaderLoad->setAlignment(Alignment);
PreheaderLoad->setDebugLoc(DL);
if (AATags)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
index 389f1c595aa4..02215d3450c2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -53,18 +54,20 @@ struct LoadPOPPair {
class LoadCombine : public BasicBlockPass {
LLVMContext *C;
AliasAnalysis *AA;
+ DominatorTree *DT;
public:
LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) {
initializeLoadCombinePass(*PassRegistry::getPassRegistry());
}
-
+
using llvm::Pass::doInitialization;
bool doInitialization(Function &) override;
bool runOnBasicBlock(BasicBlock &BB) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
}
@@ -234,6 +237,14 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
return false;
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ // Skip analysing dead blocks (not forward reachable from function entry).
+ if (!DT->isReachableFromEntry(&BB)) {
+ DEBUG(dbgs() << "LC: skipping unreachable " << BB.getName() <<
+ " in " << BB.getParent()->getName() << "\n");
+ return false;
+ }
IRBuilder<TargetFolder> TheBuilder(
BB.getContext(), TargetFolder(BB.getModule()->getDataLayout()));
@@ -245,13 +256,17 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
bool Combined = false;
unsigned Index = 0;
for (auto &I : BB) {
- if (I.mayThrow() || (I.mayWriteToMemory() && AST.containsUnknown(&I))) {
+ if (I.mayThrow() || AST.containsUnknown(&I)) {
if (combineLoads(LoadMap))
Combined = true;
LoadMap.clear();
AST.clear();
continue;
}
+ if (I.mayWriteToMemory()) {
+ AST.add(&I);
+ continue;
+ }
LoadInst *LI = dyn_cast<LoadInst>(&I);
if (!LI)
continue;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index cca75a365024..73e8ce0e1d93 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -29,32 +29,31 @@ using namespace llvm;
STATISTIC(NumDeleted, "Number of loops deleted");
-/// isLoopDead - Determined if a loop is dead. This assumes that we've already
-/// checked for unique exit and exiting blocks, and that the code is in LCSSA
-/// form.
-bool LoopDeletionPass::isLoopDead(Loop *L, ScalarEvolution &SE,
- SmallVectorImpl<BasicBlock *> &exitingBlocks,
- SmallVectorImpl<BasicBlock *> &exitBlocks,
- bool &Changed, BasicBlock *Preheader) {
- BasicBlock *exitBlock = exitBlocks[0];
-
+/// Determines if a loop is dead.
+///
+/// This assumes that we've already checked for unique exit and exiting blocks,
+/// and that the code is in LCSSA form.
+static bool isLoopDead(Loop *L, ScalarEvolution &SE,
+ SmallVectorImpl<BasicBlock *> &ExitingBlocks,
+ BasicBlock *ExitBlock, bool &Changed,
+ BasicBlock *Preheader) {
// Make sure that all PHI entries coming from the loop are loop invariant.
// Because the code is in LCSSA form, any values used outside of the loop
// must pass through a PHI in the exit block, meaning that this check is
// sufficient to guarantee that no loop-variant values are used outside
// of the loop.
- BasicBlock::iterator BI = exitBlock->begin();
+ BasicBlock::iterator BI = ExitBlock->begin();
bool AllEntriesInvariant = true;
bool AllOutgoingValuesSame = true;
while (PHINode *P = dyn_cast<PHINode>(BI)) {
- Value *incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
+ Value *incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
// Make sure all exiting blocks produce the same incoming value for the exit
// block. If there are different incoming values for different exiting
// blocks, then it is impossible to statically determine which value should
// be used.
AllOutgoingValuesSame =
- all_of(makeArrayRef(exitingBlocks).slice(1), [&](BasicBlock *BB) {
+ all_of(makeArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) {
return incoming == P->getIncomingValueForBlock(BB);
});
@@ -78,33 +77,37 @@ bool LoopDeletionPass::isLoopDead(Loop *L, ScalarEvolution &SE,
// Make sure that no instructions in the block have potential side-effects.
// This includes instructions that could write to memory, and loads that are
- // marked volatile. This could be made more aggressive by using aliasing
- // information to identify readonly and readnone calls.
- for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
- LI != LE; ++LI) {
- for (Instruction &I : **LI) {
- if (I.mayHaveSideEffects())
- return false;
- }
- }
-
+ // marked volatile.
+ for (auto &I : L->blocks())
+ if (any_of(*I, [](Instruction &I) { return I.mayHaveSideEffects(); }))
+ return false;
return true;
}
-/// Remove dead loops, by which we mean loops that do not impact the observable
-/// behavior of the program other than finite running time. Note we do ensure
-/// that this never remove a loop that might be infinite, as doing so could
-/// change the halting/non-halting nature of a program. NOTE: This entire
-/// process relies pretty heavily on LoopSimplify and LCSSA in order to make
-/// various safety checks work.
-bool LoopDeletionPass::runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
- LoopInfo &loopInfo) {
+/// Remove a loop if it is dead.
+///
+/// A loop is considered dead if it does not impact the observable behavior of
+/// the program other than finite running time. This never removes a loop that
+/// might be infinite, as doing so could change the halting/non-halting nature
+/// of a program.
+///
+/// This entire process relies pretty heavily on LoopSimplify form and LCSSA in
+/// order to make various safety checks work.
+///
+/// \returns true if any changes were made. This may mutate the loop even if it
+/// is unable to delete it due to hoisting trivially loop invariant
+/// instructions out of the loop.
+///
+/// This also updates the relevant analysis information in \p DT, \p SE, and \p
+/// LI. It also updates the loop PM if an updater struct is provided.
+static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
+ LoopInfo &LI, LPMUpdater *Updater = nullptr) {
assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
// We can only remove the loop if there is a preheader that we can
// branch from after removing it.
- BasicBlock *preheader = L->getLoopPreheader();
- if (!preheader)
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader)
return false;
// If LoopSimplify form is not available, stay out of trouble.
@@ -116,22 +119,20 @@ bool LoopDeletionPass::runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
if (L->begin() != L->end())
return false;
- SmallVector<BasicBlock *, 4> exitingBlocks;
- L->getExitingBlocks(exitingBlocks);
-
- SmallVector<BasicBlock *, 4> exitBlocks;
- L->getUniqueExitBlocks(exitBlocks);
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
// We require that the loop only have a single exit block. Otherwise, we'd
// be in the situation of needing to be able to solve statically which exit
// block will be branched to, or trying to preserve the branching logic in
// a loop invariant manner.
- if (exitBlocks.size() != 1)
+ BasicBlock *ExitBlock = L->getUniqueExitBlock();
+ if (!ExitBlock)
return false;
// Finally, we have to check that the loop really is dead.
bool Changed = false;
- if (!isLoopDead(L, SE, exitingBlocks, exitBlocks, Changed, preheader))
+ if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader))
return Changed;
// Don't remove loops for which we can't solve the trip count.
@@ -142,11 +143,13 @@ bool LoopDeletionPass::runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
// Now that we know the removal is safe, remove the loop by changing the
// branch from the preheader to go to the single exit block.
- BasicBlock *exitBlock = exitBlocks[0];
-
+ //
// Because we're deleting a large chunk of code at once, the sequence in which
- // we remove things is very important to avoid invalidation issues. Don't
- // mess with this unless you have good reason and know what you're doing.
+ // we remove things is very important to avoid invalidation issues.
+
+ // If we have an LPM updater, tell it about the loop being removed.
+ if (Updater)
+ Updater->markLoopAsDeleted(*L);
// Tell ScalarEvolution that the loop is deleted. Do this before
// deleting the loop so that ScalarEvolution can look at the loop
@@ -154,19 +157,19 @@ bool LoopDeletionPass::runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
SE.forgetLoop(L);
// Connect the preheader directly to the exit block.
- TerminatorInst *TI = preheader->getTerminator();
- TI->replaceUsesOfWith(L->getHeader(), exitBlock);
+ TerminatorInst *TI = Preheader->getTerminator();
+ TI->replaceUsesOfWith(L->getHeader(), ExitBlock);
// Rewrite phis in the exit block to get their inputs from
// the preheader instead of the exiting block.
- BasicBlock *exitingBlock = exitingBlocks[0];
- BasicBlock::iterator BI = exitBlock->begin();
+ BasicBlock *ExitingBlock = ExitingBlocks[0];
+ BasicBlock::iterator BI = ExitBlock->begin();
while (PHINode *P = dyn_cast<PHINode>(BI)) {
- int j = P->getBasicBlockIndex(exitingBlock);
+ int j = P->getBasicBlockIndex(ExitingBlock);
assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
- P->setIncomingBlock(j, preheader);
- for (unsigned i = 1; i < exitingBlocks.size(); ++i)
- P->removeIncomingValue(exitingBlocks[i]);
+ P->setIncomingBlock(j, Preheader);
+ for (unsigned i = 1; i < ExitingBlocks.size(); ++i)
+ P->removeIncomingValue(ExitingBlocks[i]);
++BI;
}
@@ -175,11 +178,11 @@ bool LoopDeletionPass::runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
SmallVector<DomTreeNode*, 8> ChildNodes;
for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
LI != LE; ++LI) {
- // Move all of the block's children to be children of the preheader, which
+ // Move all of the block's children to be children of the Preheader, which
// allows us to remove the domtree entry for the block.
ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end());
for (DomTreeNode *ChildNode : ChildNodes) {
- DT.changeImmediateDominator(ChildNode, DT[preheader]);
+ DT.changeImmediateDominator(ChildNode, DT[Preheader]);
}
ChildNodes.clear();
@@ -204,22 +207,19 @@ bool LoopDeletionPass::runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
SmallPtrSet<BasicBlock *, 8> blocks;
blocks.insert(L->block_begin(), L->block_end());
for (BasicBlock *BB : blocks)
- loopInfo.removeBlock(BB);
+ LI.removeBlock(BB);
// The last step is to update LoopInfo now that we've eliminated this loop.
- loopInfo.markAsRemoved(L);
- Changed = true;
-
+ LI.markAsRemoved(L);
++NumDeleted;
- return Changed;
+ return true;
}
PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
- LPMUpdater &) {
- bool Changed = runImpl(&L, AR.DT, AR.SE, AR.LI);
- if (!Changed)
+ LPMUpdater &Updater) {
+ if (!deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, &Updater))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
@@ -257,8 +257,7 @@ bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &) {
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- LoopInfo &loopInfo = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- LoopDeletionPass Impl;
- return Impl.runImpl(L, DT, SE, loopInfo);
+ return deleteLoopIfDead(L, DT, SE, LI);
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 19716b28ad66..3624bba10345 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -812,29 +812,29 @@ private:
const RuntimePointerChecking *RtPtrChecking) {
SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
- std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
- [&](const RuntimePointerChecking::PointerCheck &Check) {
- for (unsigned PtrIdx1 : Check.first->Members)
- for (unsigned PtrIdx2 : Check.second->Members)
- // Only include this check if there is a pair of pointers
- // that require checking and the pointers fall into
- // separate partitions.
- //
- // (Note that we already know at this point that the two
- // pointer groups need checking but it doesn't follow
- // that each pair of pointers within the two groups need
- // checking as well.
- //
- // In other words we don't want to include a check just
- // because there is a pair of pointers between the two
- // pointer groups that require checks and a different
- // pair whose pointers fall into different partitions.)
- if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
- !RuntimePointerChecking::arePointersInSamePartition(
- PtrToPartition, PtrIdx1, PtrIdx2))
- return true;
- return false;
- });
+ copy_if(AllChecks, std::back_inserter(Checks),
+ [&](const RuntimePointerChecking::PointerCheck &Check) {
+ for (unsigned PtrIdx1 : Check.first->Members)
+ for (unsigned PtrIdx2 : Check.second->Members)
+ // Only include this check if there is a pair of pointers
+ // that require checking and the pointers fall into
+ // separate partitions.
+ //
+ // (Note that we already know at this point that the two
+ // pointer groups need checking but it doesn't follow
+ // that each pair of pointers within the two groups need
+ // checking as well.
+ //
+ // In other words we don't want to include a check just
+ // because there is a pair of pointers between the two
+ // pointer groups that require checks and a different
+ // pair whose pointers fall into different partitions.)
+ if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
+ !RuntimePointerChecking::arePointersInSamePartition(
+ PtrToPartition, PtrIdx1, PtrIdx2))
+ return true;
+ return false;
+ });
return Checks;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 5fec51c095d0..946d85d7360f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -236,9 +236,9 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
ApplyCodeSizeHeuristics =
L->getHeader()->getParent()->optForSize() && UseLIRCodeSizeHeurs;
- HasMemset = TLI->has(LibFunc::memset);
- HasMemsetPattern = TLI->has(LibFunc::memset_pattern16);
- HasMemcpy = TLI->has(LibFunc::memcpy);
+ HasMemset = TLI->has(LibFunc_memset);
+ HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
+ HasMemcpy = TLI->has(LibFunc_memcpy);
if (HasMemset || HasMemsetPattern || HasMemcpy)
if (SE->hasLoopInvariantBackedgeTakenCount(L))
@@ -823,7 +823,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Module *M = TheStore->getModule();
Value *MSP =
M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(),
- Int8PtrTy, Int8PtrTy, IntPtr, (void *)nullptr);
+ Int8PtrTy, Int8PtrTy, IntPtr);
inferLibFuncAttributes(*M->getFunction("memset_pattern16"), *TLI);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 69102d10ff60..28e71ca05436 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -189,7 +189,9 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
if (!SimplifyLoopInst(&L, &AR.DT, &AR.LI, &AR.AC, &AR.TLI))
return PreservedAnalyses::all();
- return getLoopPassPreservedAnalyses();
+ auto PA = getLoopPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
char LoopInstSimplifyLegacyPass::ID = 0;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index e9f84edd1cbf..9f3875a3027f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -39,7 +39,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
+
using namespace llvm;
#define DEBUG_TYPE "loop-interchange"
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 8fb580183e30..cf63cb660db8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -20,13 +20,14 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -45,9 +46,9 @@
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
-#include <forward_list>
-#include <cassert>
#include <algorithm>
+#include <cassert>
+#include <forward_list>
#include <set>
#include <tuple>
#include <utility>
@@ -373,15 +374,15 @@ public:
const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
- std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
- [&](const RuntimePointerChecking::PointerCheck &Check) {
- for (auto PtrIdx1 : Check.first->Members)
- for (auto PtrIdx2 : Check.second->Members)
- if (needsChecking(PtrIdx1, PtrIdx2,
- PtrsWrittenOnFwdingPath, CandLoadPtrs))
- return true;
- return false;
- });
+ copy_if(AllChecks, std::back_inserter(Checks),
+ [&](const RuntimePointerChecking::PointerCheck &Check) {
+ for (auto PtrIdx1 : Check.first->Members)
+ for (auto PtrIdx2 : Check.second->Members)
+ if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath,
+ CandLoadPtrs))
+ return true;
+ return false;
+ });
DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size() << "):\n");
DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
@@ -558,6 +559,32 @@ private:
PredicatedScalarEvolution PSE;
};
+static bool
+eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
+ function_ref<const LoopAccessInfo &(Loop &)> GetLAI) {
+ // Build up a worklist of inner-loops to transform to avoid iterator
+ // invalidation.
+ // FIXME: This logic comes from other passes that actually change the loop
+ // nest structure. It isn't clear this is necessary (or useful) for a pass
+ // which merely optimizes the use of loads in a loop.
+ SmallVector<Loop *, 8> Worklist;
+
+ for (Loop *TopLevelLoop : LI)
+ for (Loop *L : depth_first(TopLevelLoop))
+ // We only handle inner-most loops.
+ if (L->empty())
+ Worklist.push_back(L);
+
+ // Now walk the identified inner loops.
+ bool Changed = false;
+ for (Loop *L : Worklist) {
+ // The actual work is performed by LoadEliminationForLoop.
+ LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT);
+ Changed |= LEL.processLoop();
+ }
+ return Changed;
+}
+
/// \brief The pass. Most of the work is delegated to the per-loop
/// LoadEliminationForLoop class.
class LoopLoadElimination : public FunctionPass {
@@ -570,32 +597,14 @@ public:
if (skipFunction(F))
return false;
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- // Build up a worklist of inner-loops to vectorize. This is necessary as the
- // act of distributing a loop creates new loops and can invalidate iterators
- // across the loops.
- SmallVector<Loop *, 8> Worklist;
-
- for (Loop *TopLevelLoop : *LI)
- for (Loop *L : depth_first(TopLevelLoop))
- // We only handle inner-most loops.
- if (L->empty())
- Worklist.push_back(L);
-
- // Now walk the identified inner loops.
- bool Changed = false;
- for (Loop *L : Worklist) {
- const LoopAccessInfo &LAI = LAA->getInfo(L);
- // The actual work is performed by LoadEliminationForLoop.
- LoadEliminationForLoop LEL(L, LI, LAI, DT);
- Changed |= LEL.processLoop();
- }
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
// Process each loop nest in the function.
- return Changed;
+ return eliminateLoadsAcrossLoops(
+ F, LI, DT,
+ [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -631,4 +640,28 @@ FunctionPass *createLoopLoadEliminationPass() {
return new LoopLoadElimination();
}
+PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &AA = AM.getResult<AAManager>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+
+ auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
+ bool Changed = eliminateLoadsAcrossLoops(
+ F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & {
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI};
+ return LAM.getResult<LoopAccessAnalysis>(L, AR);
+ });
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ return PA;
+}
+
} // end namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index 028f4bba8b1d..10f6fcdcfdb7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -42,6 +42,13 @@ PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
break;
}
+#ifndef NDEBUG
+ // Verify the loop structure and LCSSA form before visiting the loop.
+ L.verifyLoop();
+ assert(L.isRecursivelyLCSSAForm(AR.DT, AR.LI) &&
+ "Loops must remain in LCSSA form!");
+#endif
+
// Update the analysis manager as each pass runs and potentially
// invalidates analyses.
AM.invalidate(L, PassPA);
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp
new file mode 100644
index 000000000000..0ce604429326
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -0,0 +1,282 @@
+//===-- LoopPredication.cpp - Guard based loop predication pass -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LoopPredication pass tries to convert loop variant range checks to loop
+// invariant by widening checks across loop iterations. For example, it will
+// convert
+//
+// for (i = 0; i < n; i++) {
+// guard(i < len);
+// ...
+// }
+//
+// to
+//
+// for (i = 0; i < n; i++) {
+// guard(n - 1 < len);
+// ...
+// }
+//
+// After this transformation the condition of the guard is loop invariant, so
+// loop-unswitch can later unswitch the loop by this condition which basically
+// predicates the loop by the widened condition:
+//
+// if (n - 1 < len)
+// for (i = 0; i < n; i++) {
+// ...
+// }
+// else
+// deoptimize
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LoopPredication.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+#define DEBUG_TYPE "loop-predication"
+
+using namespace llvm;
+
+namespace {
+class LoopPredication {
+ ScalarEvolution *SE;
+
+ Loop *L;
+ const DataLayout *DL;
+ BasicBlock *Preheader;
+
+ Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander,
+ IRBuilder<> &Builder);
+ bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
+
+public:
+ LoopPredication(ScalarEvolution *SE) : SE(SE){};
+ bool runOnLoop(Loop *L);
+};
+
+class LoopPredicationLegacyPass : public LoopPass {
+public:
+ static char ID;
+ LoopPredicationLegacyPass() : LoopPass(ID) {
+ initializeLoopPredicationLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ getLoopAnalysisUsage(AU);
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+ if (skipLoop(L))
+ return false;
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ LoopPredication LP(SE);
+ return LP.runOnLoop(L);
+ }
+};
+
+char LoopPredicationLegacyPass::ID = 0;
+} // end namespace llvm
+
+INITIALIZE_PASS_BEGIN(LoopPredicationLegacyPass, "loop-predication",
+ "Loop predication", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_END(LoopPredicationLegacyPass, "loop-predication",
+ "Loop predication", false, false)
+
+Pass *llvm::createLoopPredicationPass() {
+ return new LoopPredicationLegacyPass();
+}
+
+PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ LoopPredication LP(&AR.SE);
+ if (!LP.runOnLoop(&L))
+ return PreservedAnalyses::all();
+
+ return getLoopPassPreservedAnalyses();
+}
+
+/// If ICI can be widened to a loop invariant condition emits the loop
+/// invariant condition in the loop preheader and return it, otherwise
+/// returns None.
+Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
+ SCEVExpander &Expander,
+ IRBuilder<> &Builder) {
+ DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
+ DEBUG(ICI->dump());
+
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+ const SCEV *LHSS = SE->getSCEV(LHS);
+ if (isa<SCEVCouldNotCompute>(LHSS))
+ return None;
+ const SCEV *RHSS = SE->getSCEV(RHS);
+ if (isa<SCEVCouldNotCompute>(RHSS))
+ return None;
+
+ // Canonicalize RHS to be loop invariant bound, LHS - a loop computable index
+ if (SE->isLoopInvariant(LHSS, L)) {
+ std::swap(LHS, RHS);
+ std::swap(LHSS, RHSS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+ if (!SE->isLoopInvariant(RHSS, L) || !isSafeToExpand(RHSS, *SE))
+ return None;
+
+ const SCEVAddRecExpr *IndexAR = dyn_cast<SCEVAddRecExpr>(LHSS);
+ if (!IndexAR || IndexAR->getLoop() != L)
+ return None;
+
+ DEBUG(dbgs() << "IndexAR: ");
+ DEBUG(IndexAR->dump());
+
+ bool IsIncreasing = false;
+ if (!SE->isMonotonicPredicate(IndexAR, Pred, IsIncreasing))
+ return None;
+
+ // If the predicate is increasing the condition can change from false to true
+ // as the loop progresses, in this case take the value on the first iteration
+ // for the widened check. Otherwise the condition can change from true to
+ // false as the loop progresses, so take the value on the last iteration.
+ const SCEV *NewLHSS = IsIncreasing
+ ? IndexAR->getStart()
+ : SE->getSCEVAtScope(IndexAR, L->getParentLoop());
+ if (NewLHSS == IndexAR) {
+ DEBUG(dbgs() << "Can't compute NewLHSS!\n");
+ return None;
+ }
+
+ DEBUG(dbgs() << "NewLHSS: ");
+ DEBUG(NewLHSS->dump());
+
+ if (!SE->isLoopInvariant(NewLHSS, L) || !isSafeToExpand(NewLHSS, *SE))
+ return None;
+
+ DEBUG(dbgs() << "NewLHSS is loop invariant and safe to expand. Expand!\n");
+
+ Type *Ty = LHS->getType();
+ Instruction *InsertAt = Preheader->getTerminator();
+ assert(Ty == RHS->getType() && "icmp operands have different types?");
+ Value *NewLHS = Expander.expandCodeFor(NewLHSS, Ty, InsertAt);
+ Value *NewRHS = Expander.expandCodeFor(RHSS, Ty, InsertAt);
+ return Builder.CreateICmp(Pred, NewLHS, NewRHS);
+}
+
+bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
+ SCEVExpander &Expander) {
+ DEBUG(dbgs() << "Processing guard:\n");
+ DEBUG(Guard->dump());
+
+ IRBuilder<> Builder(cast<Instruction>(Preheader->getTerminator()));
+
+ // The guard condition is expected to be in form of:
+ // cond1 && cond2 && cond3 ...
+ // Iterate over subconditions looking for for icmp conditions which can be
+ // widened across loop iterations. Widening these conditions remember the
+ // resulting list of subconditions in Checks vector.
+ SmallVector<Value *, 4> Worklist(1, Guard->getOperand(0));
+ SmallPtrSet<Value *, 4> Visited;
+
+ SmallVector<Value *, 4> Checks;
+
+ unsigned NumWidened = 0;
+ do {
+ Value *Condition = Worklist.pop_back_val();
+ if (!Visited.insert(Condition).second)
+ continue;
+
+ Value *LHS, *RHS;
+ using namespace llvm::PatternMatch;
+ if (match(Condition, m_And(m_Value(LHS), m_Value(RHS)))) {
+ Worklist.push_back(LHS);
+ Worklist.push_back(RHS);
+ continue;
+ }
+
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
+ if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, Builder)) {
+ Checks.push_back(NewRangeCheck.getValue());
+ NumWidened++;
+ continue;
+ }
+ }
+
+ // Save the condition as is if we can't widen it
+ Checks.push_back(Condition);
+ } while (Worklist.size() != 0);
+
+ if (NumWidened == 0)
+ return false;
+
+ // Emit the new guard condition
+ Builder.SetInsertPoint(Guard);
+ Value *LastCheck = nullptr;
+ for (auto *Check : Checks)
+ if (!LastCheck)
+ LastCheck = Check;
+ else
+ LastCheck = Builder.CreateAnd(LastCheck, Check);
+ Guard->setOperand(0, LastCheck);
+
+ DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
+ return true;
+}
+
+bool LoopPredication::runOnLoop(Loop *Loop) {
+ L = Loop;
+
+ DEBUG(dbgs() << "Analyzing ");
+ DEBUG(L->dump());
+
+ Module *M = L->getHeader()->getModule();
+
+ // There is nothing to do if the module doesn't use guards
+ auto *GuardDecl =
+ M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard));
+ if (!GuardDecl || GuardDecl->use_empty())
+ return false;
+
+ DL = &M->getDataLayout();
+
+ Preheader = L->getLoopPreheader();
+ if (!Preheader)
+ return false;
+
+ // Collect all the guards into a vector and process later, so as not
+ // to invalidate the instruction iterator.
+ SmallVector<IntrinsicInst *, 4> Guards;
+ for (const auto BB : L->blocks())
+ for (auto &I : *BB)
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == Intrinsic::experimental_guard)
+ Guards.push_back(II);
+
+ SCEVExpander Expander(*SE, *DL, "loop-predication");
+
+ bool Changed = false;
+ for (auto *Guard : Guards)
+ Changed |= widenGuardConditions(Guard, Expander);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index cc83069d5f52..e5689368de80 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -79,7 +79,8 @@ private:
/// to merge the two values. Do this now.
static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
BasicBlock *OrigPreheader,
- ValueToValueMapTy &ValueMap) {
+ ValueToValueMapTy &ValueMap,
+ SmallVectorImpl<PHINode*> *InsertedPHIs) {
// Remove PHI node entries that are no longer live.
BasicBlock::iterator I, E = OrigHeader->end();
for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
@@ -87,7 +88,7 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
// Now fix up users of the instructions in OrigHeader, inserting PHI nodes
// as necessary.
- SSAUpdater SSA;
+ SSAUpdater SSA(InsertedPHIs);
for (I = OrigHeader->begin(); I != E; ++I) {
Value *OrigHeaderVal = &*I;
@@ -174,6 +175,38 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
}
}
+/// Propagate dbg.value intrinsics through the newly inserted Phis.
+static void insertDebugValues(BasicBlock *OrigHeader,
+ SmallVectorImpl<PHINode*> &InsertedPHIs) {
+ ValueToValueMapTy DbgValueMap;
+
+ // Map existing PHI nodes to their dbg.values.
+ for (auto &I : *OrigHeader) {
+ if (auto DbgII = dyn_cast<DbgInfoIntrinsic>(&I)) {
+ if (auto *Loc = dyn_cast_or_null<PHINode>(DbgII->getVariableLocation()))
+ DbgValueMap.insert({Loc, DbgII});
+ }
+ }
+
+ // Then iterate through the new PHIs and look to see if they use one of the
+ // previously mapped PHIs. If so, insert a new dbg.value intrinsic that will
+ // propagate the info through the new PHI.
+ LLVMContext &C = OrigHeader->getContext();
+ for (auto PHI : InsertedPHIs) {
+ for (auto VI : PHI->operand_values()) {
+ auto V = DbgValueMap.find(VI);
+ if (V != DbgValueMap.end()) {
+ auto *DbgII = cast<DbgInfoIntrinsic>(V->second);
+ Instruction *NewDbgII = DbgII->clone();
+ auto PhiMAV = MetadataAsValue::get(C, ValueAsMetadata::get(PHI));
+ NewDbgII->setOperand(0, PhiMAV);
+ BasicBlock *Parent = PHI->getParent();
+ NewDbgII->insertBefore(Parent->getFirstNonPHIOrDbgOrLifetime());
+ }
+ }
+ }
+}
+
/// Rotate loop LP. Return true if the loop is rotated.
///
/// \param SimplifiedLatch is true if the latch was just folded into the final
@@ -347,9 +380,18 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// remove the corresponding incoming values from the PHI nodes in OrigHeader.
LoopEntryBranch->eraseFromParent();
+
+ SmallVector<PHINode*, 2> InsertedPHIs;
// If there were any uses of instructions in the duplicated block outside the
// loop, update them, inserting PHI nodes as required
- RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap);
+ RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap,
+ &InsertedPHIs);
+
+ // Attach dbg.value intrinsics to the new phis if that phi uses a value that
+ // previously had debug metadata attached. This keeps the debug info
+ // up-to-date in the loop body.
+ if (!InsertedPHIs.empty())
+ insertDebugValues(OrigHeader, InsertedPHIs);
// NewHeader is now the header of the loop.
L->moveToHeader(NewHeader);
@@ -634,6 +676,7 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
bool Changed = LR.processLoop(&L);
if (!Changed)
return PreservedAnalyses::all();
+
return getLoopPassPreservedAnalyses();
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 16061212ba38..a5a81c33a8eb 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -69,6 +69,7 @@ PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM,
LPMUpdater &) {
if (!simplifyLoopCFG(L, AR.DT, AR.LI))
return PreservedAnalyses::all();
+
return getLoopPassPreservedAnalyses();
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp
index f3f415275c0e..c9d55b4594fe 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -1,4 +1,4 @@
-//===-- LoopSink.cpp - Loop Sink Pass ------------------------===//
+//===-- LoopSink.cpp - Loop Sink Pass -------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -28,8 +28,10 @@
// InsertBBs = UseBBs - DomBBs + BB
// For BB in InsertBBs:
// Insert I at BB's beginning
+//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/LoopSink.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
@@ -297,6 +299,42 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
return Changed;
}
+PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
+ LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
+ // Nothing to do if there are no loops.
+ if (LI.empty())
+ return PreservedAnalyses::all();
+
+ AAResults &AA = FAM.getResult<AAManager>(F);
+ DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+ BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+
+ // We want to do a postorder walk over the loops. Since loops are a tree this
+ // is equivalent to a reversed preorder walk and preorder is easy to compute
+ // without recursion. Since we reverse the preorder, we will visit siblings
+ // in reverse program order. This isn't expected to matter at all but is more
+ // consistent with sinking algorithms which generally work bottom-up.
+ SmallVector<Loop *, 4> PreorderLoops = LI.getLoopsInPreorder();
+
+ bool Changed = false;
+ do {
+ Loop &L = *PreorderLoops.pop_back_val();
+
+ // Note that we don't pass SCEV here because it is only used to invalidate
+ // loops in SCEV and we don't preserve (or request) SCEV at all making that
+ // unnecessary.
+ Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI,
+ /*ScalarEvolution*/ nullptr);
+ } while (!PreorderLoops.empty());
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
namespace {
struct LegacyLoopSinkPass : public LoopPass {
static char ID;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 194587a85e7c..af137f6faa63 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -129,6 +129,17 @@ static cl::opt<bool> EnablePhiElim(
"enable-lsr-phielim", cl::Hidden, cl::init(true),
cl::desc("Enable LSR phi elimination"));
+// The flag adds instruction count to solutions cost comparision.
+static cl::opt<bool> InsnsCost(
+ "lsr-insns-cost", cl::Hidden, cl::init(false),
+ cl::desc("Add instruction count to a LSR cost model"));
+
+// Flag to choose how to narrow complex lsr solution
+static cl::opt<bool> LSRExpNarrow(
+ "lsr-exp-narrow", cl::Hidden, cl::init(false),
+ cl::desc("Narrow LSR complex solution using"
+ " expectation of registers number"));
+
#ifndef NDEBUG
// Stress test IV chain generation.
static cl::opt<bool> StressIVChain(
@@ -181,10 +192,11 @@ void RegSortData::print(raw_ostream &OS) const {
OS << "[NumUses=" << UsedByIndices.count() << ']';
}
-LLVM_DUMP_METHOD
-void RegSortData::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void RegSortData::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
@@ -295,9 +307,13 @@ struct Formula {
/// canonical representation of a formula is
/// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
/// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
+ /// 3. The reg containing recurrent expr related with currect loop in the
+ /// formula should be put in the ScaledReg.
/// #1 enforces that the scaled register is always used when at least two
/// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
/// #2 enforces that 1 * reg is reg.
+ /// #3 ensures invariant regs with respect to current loop can be combined
+ /// together in LSR codegen.
/// This invariant can be temporarly broken while building a formula.
/// However, every formula inserted into the LSRInstance must be in canonical
/// form.
@@ -318,12 +334,14 @@ struct Formula {
void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
- bool isCanonical() const;
+ bool isCanonical(const Loop &L) const;
- void canonicalize();
+ void canonicalize(const Loop &L);
bool unscale();
+ bool hasZeroEnd() const;
+
size_t getNumRegs() const;
Type *getType() const;
@@ -410,16 +428,35 @@ void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
BaseRegs.push_back(Sum);
HasBaseReg = true;
}
- canonicalize();
+ canonicalize(*L);
}
/// \brief Check whether or not this formula statisfies the canonical
/// representation.
/// \see Formula::BaseRegs.
-bool Formula::isCanonical() const {
- if (ScaledReg)
- return Scale != 1 || !BaseRegs.empty();
- return BaseRegs.size() <= 1;
+bool Formula::isCanonical(const Loop &L) const {
+ if (!ScaledReg)
+ return BaseRegs.size() <= 1;
+
+ if (Scale != 1)
+ return true;
+
+ if (Scale == 1 && BaseRegs.empty())
+ return false;
+
+ const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
+ if (SAR && SAR->getLoop() == &L)
+ return true;
+
+ // If ScaledReg is not a recurrent expr, or it is but its loop is not current
+ // loop, meanwhile BaseRegs contains a recurrent expr reg related with current
+ // loop, we want to swap the reg in BaseRegs with ScaledReg.
+ auto I =
+ find_if(make_range(BaseRegs.begin(), BaseRegs.end()), [&](const SCEV *S) {
+ return isa<const SCEVAddRecExpr>(S) &&
+ (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
+ });
+ return I == BaseRegs.end();
}
/// \brief Helper method to morph a formula into its canonical representation.
@@ -428,21 +465,33 @@ bool Formula::isCanonical() const {
/// field. Otherwise, we would have to do special cases everywhere in LSR
/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
/// On the other hand, 1*reg should be canonicalized into reg.
-void Formula::canonicalize() {
- if (isCanonical())
+void Formula::canonicalize(const Loop &L) {
+ if (isCanonical(L))
return;
// So far we did not need this case. This is easy to implement but it is
// useless to maintain dead code. Beside it could hurt compile time.
assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.");
+
// Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
- ScaledReg = BaseRegs.back();
- BaseRegs.pop_back();
- Scale = 1;
- size_t BaseRegsSize = BaseRegs.size();
- size_t Try = 0;
- // If ScaledReg is an invariant, try to find a variant expression.
- while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg))
- std::swap(ScaledReg, BaseRegs[Try++]);
+ if (!ScaledReg) {
+ ScaledReg = BaseRegs.back();
+ BaseRegs.pop_back();
+ Scale = 1;
+ }
+
+ // If ScaledReg is an invariant with respect to L, find the reg from
+ // BaseRegs containing the recurrent expr related with Loop L. Swap the
+ // reg with ScaledReg.
+ const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
+ if (!SAR || SAR->getLoop() != &L) {
+ auto I = find_if(make_range(BaseRegs.begin(), BaseRegs.end()),
+ [&](const SCEV *S) {
+ return isa<const SCEVAddRecExpr>(S) &&
+ (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
+ });
+ if (I != BaseRegs.end())
+ std::swap(ScaledReg, *I);
+ }
}
/// \brief Get rid of the scale in the formula.
@@ -458,6 +507,14 @@ bool Formula::unscale() {
return true;
}
+bool Formula::hasZeroEnd() const {
+ if (UnfoldedOffset || BaseOffset)
+ return false;
+ if (BaseRegs.size() != 1 || ScaledReg)
+ return false;
+ return true;
+}
+
/// Return the total number of register operands used by this formula. This does
/// not include register uses implied by non-constant addrec strides.
size_t Formula::getNumRegs() const {
@@ -534,10 +591,11 @@ void Formula::print(raw_ostream &OS) const {
}
}
-LLVM_DUMP_METHOD
-void Formula::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void Formula::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// Return true if the given addrec can be sign-extended without changing its
/// value.
@@ -711,7 +769,7 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
bool isAddress = isa<LoadInst>(Inst);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (SI->getOperand(1) == OperandVal)
+ if (SI->getPointerOperand() == OperandVal)
isAddress = true;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Addressing modes can also be folded into prefetches and a variety
@@ -723,6 +781,12 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
isAddress = true;
break;
}
+ } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
+ if (RMW->getPointerOperand() == OperandVal)
+ isAddress = true;
+ } else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
+ if (CmpX->getPointerOperand() == OperandVal)
+ isAddress = true;
}
return isAddress;
}
@@ -735,6 +799,10 @@ static MemAccessTy getAccessType(const Instruction *Inst) {
AccessTy.AddrSpace = SI->getPointerAddressSpace();
} else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
AccessTy.AddrSpace = LI->getPointerAddressSpace();
+ } else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
+ AccessTy.AddrSpace = RMW->getPointerAddressSpace();
+ } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
+ AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
}
// All pointers have the same requirements, so canonicalize them to an
@@ -875,7 +943,8 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F);
// Get the cost of the scaling factor used in F for LU.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
- const LSRUse &LU, const Formula &F);
+ const LSRUse &LU, const Formula &F,
+ const Loop &L);
namespace {
@@ -883,6 +952,7 @@ namespace {
class Cost {
/// TODO: Some of these could be merged. Also, a lexical ordering
/// isn't always optimal.
+ unsigned Insns;
unsigned NumRegs;
unsigned AddRecCost;
unsigned NumIVMuls;
@@ -893,8 +963,8 @@ class Cost {
public:
Cost()
- : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
- SetupCost(0), ScaleCost(0) {}
+ : Insns(0), NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0),
+ ImmCost(0), SetupCost(0), ScaleCost(0) {}
bool operator<(const Cost &Other) const;
@@ -903,9 +973,9 @@ public:
#ifndef NDEBUG
// Once any of the metrics loses, they must all remain losers.
bool isValid() {
- return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
+ return ((Insns | NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
| ImmCost | SetupCost | ScaleCost) != ~0u)
- || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
+ || ((Insns & NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
& ImmCost & SetupCost & ScaleCost) == ~0u);
}
#endif
@@ -1067,7 +1137,8 @@ public:
}
bool HasFormulaWithSameRegs(const Formula &F) const;
- bool InsertFormula(const Formula &F);
+ float getNotSelectedProbability(const SCEV *Reg) const;
+ bool InsertFormula(const Formula &F, const Loop &L);
void DeleteFormula(Formula &F);
void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
@@ -1083,17 +1154,23 @@ void Cost::RateRegister(const SCEV *Reg,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
- // If this is an addrec for another loop, don't second-guess its addrec phi
- // nodes. LSR isn't currently smart enough to reason about more than one
- // loop at a time. LSR has already run on inner loops, will not run on outer
- // loops, and cannot be expected to change sibling loops.
+ // If this is an addrec for another loop, it should be an invariant
+ // with respect to L since L is the innermost loop (at least
+ // for now LSR only handles innermost loops).
if (AR->getLoop() != L) {
// If the AddRec exists, consider it's register free and leave it alone.
if (isExistingPhi(AR, SE))
return;
- // Otherwise, do not consider this formula at all.
- Lose();
+ // It is bad to allow LSR for current loop to add induction variables
+ // for its sibling loops.
+ if (!AR->getLoop()->contains(L)) {
+ Lose();
+ return;
+ }
+
+ // Otherwise, it will be an invariant with respect to Loop L.
+ ++NumRegs;
return;
}
AddRecCost += 1; /// TODO: This should be a function of the stride.
@@ -1150,8 +1227,11 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
SmallPtrSetImpl<const SCEV *> *LoserRegs) {
- assert(F.isCanonical() && "Cost is accurate only for canonical formula");
+ assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
// Tally up the registers.
+ unsigned PrevAddRecCost = AddRecCost;
+ unsigned PrevNumRegs = NumRegs;
+ unsigned PrevNumBaseAdds = NumBaseAdds;
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
Lose();
@@ -1171,6 +1251,18 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
return;
}
+ // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
+ // additional instruction (at least fill).
+ unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1;
+ if (NumRegs > TTIRegNum) {
+ // Cost already exceeded TTIRegNum, then only newly added register can add
+ // new instructions.
+ if (PrevNumRegs > TTIRegNum)
+ Insns += (NumRegs - PrevNumRegs);
+ else
+ Insns += (NumRegs - TTIRegNum);
+ }
+
// Determine how many (unfolded) adds we'll need inside the loop.
size_t NumBaseParts = F.getNumRegs();
if (NumBaseParts > 1)
@@ -1181,7 +1273,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
NumBaseAdds += (F.UnfoldedOffset != 0);
// Accumulate non-free scaling amounts.
- ScaleCost += getScalingFactorCost(TTI, LU, F);
+ ScaleCost += getScalingFactorCost(TTI, LU, F, *L);
// Tally up the non-zero immediates.
for (const LSRFixup &Fixup : LU.Fixups) {
@@ -1199,11 +1291,30 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
!TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset))
NumBaseAdds++;
}
+
+ // If ICmpZero formula ends with not 0, it could not be replaced by
+ // just add or sub. We'll need to compare final result of AddRec.
+ // That means we'll need an additional instruction.
+ // For -10 + {0, +, 1}:
+ // i = i + 1;
+ // cmp i, 10
+ //
+ // For {-10, +, 1}:
+ // i = i + 1;
+ if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd())
+ Insns++;
+ // Each new AddRec adds 1 instruction to calculation.
+ Insns += (AddRecCost - PrevAddRecCost);
+
+ // BaseAdds adds instructions for unfolded registers.
+ if (LU.Kind != LSRUse::ICmpZero)
+ Insns += NumBaseAdds - PrevNumBaseAdds;
assert(isValid() && "invalid cost");
}
/// Set this cost to a losing value.
void Cost::Lose() {
+ Insns = ~0u;
NumRegs = ~0u;
AddRecCost = ~0u;
NumIVMuls = ~0u;
@@ -1215,6 +1326,8 @@ void Cost::Lose() {
/// Choose the lower cost.
bool Cost::operator<(const Cost &Other) const {
+ if (InsnsCost && Insns != Other.Insns)
+ return Insns < Other.Insns;
return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
ImmCost, SetupCost) <
std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
@@ -1223,6 +1336,7 @@ bool Cost::operator<(const Cost &Other) const {
}
void Cost::print(raw_ostream &OS) const {
+ OS << Insns << " instruction" << (Insns == 1 ? " " : "s ");
OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
if (AddRecCost != 0)
OS << ", with addrec cost " << AddRecCost;
@@ -1239,10 +1353,11 @@ void Cost::print(raw_ostream &OS) const {
OS << ", plus " << SetupCost << " setup cost";
}
-LLVM_DUMP_METHOD
-void Cost::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void Cost::dump() const {
print(errs()); errs() << '\n';
}
+#endif
LSRFixup::LSRFixup()
: UserInst(nullptr), OperandValToReplace(nullptr),
@@ -1285,10 +1400,11 @@ void LSRFixup::print(raw_ostream &OS) const {
OS << ", Offset=" << Offset;
}
-LLVM_DUMP_METHOD
-void LSRFixup::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LSRFixup::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// Test whether this use as a formula which has the same registers as the given
/// formula.
@@ -1300,10 +1416,19 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
return Uniquifier.count(Key);
}
+/// The function returns a probability of selecting formula without Reg.
+float LSRUse::getNotSelectedProbability(const SCEV *Reg) const {
+ unsigned FNum = 0;
+ for (const Formula &F : Formulae)
+ if (F.referencesReg(Reg))
+ FNum++;
+ return ((float)(Formulae.size() - FNum)) / Formulae.size();
+}
+
/// If the given formula has not yet been inserted, add it to the list, and
/// return true. Return false otherwise. The formula must be in canonical form.
-bool LSRUse::InsertFormula(const Formula &F) {
- assert(F.isCanonical() && "Invalid canonical representation");
+bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
+ assert(F.isCanonical(L) && "Invalid canonical representation");
if (!Formulae.empty() && RigidFormula)
return false;
@@ -1391,10 +1516,11 @@ void LSRUse::print(raw_ostream &OS) const {
OS << ", widest fixup type: " << *WidestFixupType;
}
-LLVM_DUMP_METHOD
-void LSRUse::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LSRUse::dump() const {
print(errs()); errs() << '\n';
}
+#endif
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, MemAccessTy AccessTy,
@@ -1472,7 +1598,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
int64_t MinOffset, int64_t MaxOffset,
LSRUse::KindType Kind, MemAccessTy AccessTy,
- const Formula &F) {
+ const Formula &F, const Loop &L) {
// For the purpose of isAMCompletelyFolded either having a canonical formula
// or a scale not equal to zero is correct.
// Problems may arise from non canonical formulae having a scale == 0.
@@ -1480,7 +1606,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
// However, when we generate the scaled formulae, we first check that the
// scaling factor is profitable before computing the actual ScaledReg for
// compile time sake.
- assert((F.isCanonical() || F.Scale != 0));
+ assert((F.isCanonical(L) || F.Scale != 0));
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
}
@@ -1515,14 +1641,15 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
}
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
- const LSRUse &LU, const Formula &F) {
+ const LSRUse &LU, const Formula &F,
+ const Loop &L) {
if (!F.Scale)
return 0;
// If the use is not completely folded in that instruction, we will have to
// pay an extra cost only for scale != 1.
if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, F))
+ LU.AccessTy, F, L))
return F.Scale != 1;
switch (LU.Kind) {
@@ -1772,6 +1899,7 @@ class LSRInstance {
void NarrowSearchSpaceByDetectingSupersets();
void NarrowSearchSpaceByCollapsingUnrolledCode();
void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ void NarrowSearchSpaceByDeletingCostlyFormulas();
void NarrowSearchSpaceByPickingWinnerRegs();
void NarrowSearchSpaceUsingHeuristics();
@@ -2492,7 +2620,12 @@ static Value *getWideOperand(Value *Oper) {
static bool isCompatibleIVType(Value *LVal, Value *RVal) {
Type *LType = LVal->getType();
Type *RType = RVal->getType();
- return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
+ return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy() &&
+ // Different address spaces means (possibly)
+ // different types of the pointer implementation,
+ // e.g. i16 vs i32 so disallow that.
+ (LType->getPointerAddressSpace() ==
+ RType->getPointerAddressSpace()));
}
/// Return an approximation of this SCEV expression's "base", or NULL for any
@@ -2989,8 +3122,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
User::op_iterator UseI =
find(UserInst->operands(), U.getOperandValToReplace());
assert(UseI != UserInst->op_end() && "cannot find IV operand");
- if (IVIncSet.count(UseI))
+ if (IVIncSet.count(UseI)) {
+ DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n');
continue;
+ }
LSRUse::KindType Kind = LSRUse::Basic;
MemAccessTy AccessTy;
@@ -3025,8 +3160,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
- N = TransformForPostIncUse(Normalize, N, CI, nullptr,
- TmpPostIncLoops, SE, DT);
+ N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
}
@@ -3108,7 +3242,8 @@ bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
// Do not insert formula that we will not be able to expand.
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
"Formula is illegal");
- if (!LU.InsertFormula(F))
+
+ if (!LU.InsertFormula(F, *L))
return false;
CountRegisters(F, LUIdx);
@@ -3347,7 +3482,7 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
F.BaseRegs.push_back(*J);
// We may have changed the number of register in base regs, adjust the
// formula accordingly.
- F.canonicalize();
+ F.canonicalize(*L);
if (InsertFormula(LU, LUIdx, F))
// If that formula hadn't been seen before, recurse to find more like
@@ -3359,7 +3494,7 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
/// Split out subexpressions from adds and the bases of addrecs.
void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
Formula Base, unsigned Depth) {
- assert(Base.isCanonical() && "Input must be in the canonical form");
+ assert(Base.isCanonical(*L) && "Input must be in the canonical form");
// Arbitrarily cap recursion to protect compile time.
if (Depth >= 3)
return;
@@ -3400,7 +3535,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
// rather than proceed with zero in a register.
if (!Sum->isZero()) {
F.BaseRegs.push_back(Sum);
- F.canonicalize();
+ F.canonicalize(*L);
(void)InsertFormula(LU, LUIdx, F);
}
}
@@ -3457,7 +3592,7 @@ void LSRInstance::GenerateConstantOffsetsImpl(
F.ScaledReg = nullptr;
} else
F.deleteBaseReg(F.BaseRegs[Idx]);
- F.canonicalize();
+ F.canonicalize(*L);
} else if (IsScaledReg)
F.ScaledReg = NewG;
else
@@ -3620,10 +3755,10 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (LU.Kind == LSRUse::ICmpZero &&
!Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
continue;
- // For each addrec base reg, apply the scale, if possible.
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
- if (const SCEVAddRecExpr *AR =
- dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
+ // For each addrec base reg, if its loop is current loop, apply the scale.
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i]);
+ if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) {
const SCEV *FactorS = SE.getConstant(IntTy, Factor);
if (FactorS->isZero())
continue;
@@ -3637,11 +3772,17 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// The canonical representation of 1*reg is reg, which is already in
// Base. In that case, do not try to insert the formula, it will be
// rejected anyway.
- if (F.Scale == 1 && F.BaseRegs.empty())
+ if (F.Scale == 1 && (F.BaseRegs.empty() ||
+ (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
continue;
+ // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate
+ // non canonical Formula with ScaledReg's loop not being L.
+ if (F.Scale == 1 && LU.AllFixupsOutsideLoop)
+ F.canonicalize(*L);
(void)InsertFormula(LU, LUIdx, F);
}
}
+ }
}
}
@@ -3697,10 +3838,11 @@ void WorkItem::print(raw_ostream &OS) const {
<< " , add offset " << Imm;
}
-LLVM_DUMP_METHOD
-void WorkItem::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void WorkItem::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// Look for registers which are a constant distance apart and try to form reuse
/// opportunities between them.
@@ -3821,7 +3963,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
continue;
// OK, looks good.
- NewF.canonicalize();
+ NewF.canonicalize(*this->L);
(void)InsertFormula(LU, LUIdx, NewF);
} else {
// Use the immediate in a base register.
@@ -3853,7 +3995,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
goto skip_formula;
// Ok, looks good.
- NewF.canonicalize();
+ NewF.canonicalize(*this->L);
(void)InsertFormula(LU, LUIdx, NewF);
break;
skip_formula:;
@@ -4165,6 +4307,144 @@ void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
}
}
+/// The function delete formulas with high registers number expectation.
+/// Assuming we don't know the value of each formula (already delete
+/// all inefficient), generate probability of not selecting for each
+/// register.
+/// For example,
+/// Use1:
+/// reg(a) + reg({0,+,1})
+/// reg(a) + reg({-1,+,1}) + 1
+/// reg({a,+,1})
+/// Use2:
+/// reg(b) + reg({0,+,1})
+/// reg(b) + reg({-1,+,1}) + 1
+/// reg({b,+,1})
+/// Use3:
+/// reg(c) + reg(b) + reg({0,+,1})
+/// reg(c) + reg({b,+,1})
+///
+/// Probability of not selecting
+/// Use1 Use2 Use3
+/// reg(a) (1/3) * 1 * 1
+/// reg(b) 1 * (1/3) * (1/2)
+/// reg({0,+,1}) (2/3) * (2/3) * (1/2)
+/// reg({-1,+,1}) (2/3) * (2/3) * 1
+/// reg({a,+,1}) (2/3) * 1 * 1
+/// reg({b,+,1}) 1 * (2/3) * (2/3)
+/// reg(c) 1 * 1 * 0
+///
+/// Now count registers number mathematical expectation for each formula:
+/// Note that for each use we exclude probability if not selecting for the use.
+/// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding
+/// probabilty 1/3 of not selecting for Use1).
+/// Use1:
+/// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted
+/// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted
+/// reg({a,+,1}) 1
+/// Use2:
+/// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted
+/// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted
+/// reg({b,+,1}) 2/3
+/// Use3:
+/// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted
+/// reg(c) + reg({b,+,1}) 1 + 2/3
+
+void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
+ if (EstimateSearchSpaceComplexity() < ComplexityLimit)
+ return;
+ // Ok, we have too many of formulae on our hands to conveniently handle.
+ // Use a rough heuristic to thin out the list.
+
+ // Set of Regs wich will be 100% used in final solution.
+ // Used in each formula of a solution (in example above this is reg(c)).
+ // We can skip them in calculations.
+ SmallPtrSet<const SCEV *, 4> UniqRegs;
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ // Map each register to probability of not selecting
+ DenseMap <const SCEV *, float> RegNumMap;
+ for (const SCEV *Reg : RegUses) {
+ if (UniqRegs.count(Reg))
+ continue;
+ float PNotSel = 1;
+ for (const LSRUse &LU : Uses) {
+ if (!LU.Regs.count(Reg))
+ continue;
+ float P = LU.getNotSelectedProbability(Reg);
+ if (P != 0.0)
+ PNotSel *= P;
+ else
+ UniqRegs.insert(Reg);
+ }
+ RegNumMap.insert(std::make_pair(Reg, PNotSel));
+ }
+
+ DEBUG(dbgs() << "Narrowing the search space by deleting costly formulas\n");
+
+ // Delete formulas where registers number expectation is high.
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ // If nothing to delete - continue.
+ if (LU.Formulae.size() < 2)
+ continue;
+ // This is temporary solution to test performance. Float should be
+ // replaced with round independent type (based on integers) to avoid
+ // different results for different target builds.
+ float FMinRegNum = LU.Formulae[0].getNumRegs();
+ float FMinARegNum = LU.Formulae[0].getNumRegs();
+ size_t MinIdx = 0;
+ for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+ Formula &F = LU.Formulae[i];
+ float FRegNum = 0;
+ float FARegNum = 0;
+ for (const SCEV *BaseReg : F.BaseRegs) {
+ if (UniqRegs.count(BaseReg))
+ continue;
+ FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
+ if (isa<SCEVAddRecExpr>(BaseReg))
+ FARegNum +=
+ RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
+ }
+ if (const SCEV *ScaledReg = F.ScaledReg) {
+ if (!UniqRegs.count(ScaledReg)) {
+ FRegNum +=
+ RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
+ if (isa<SCEVAddRecExpr>(ScaledReg))
+ FARegNum +=
+ RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
+ }
+ }
+ if (FMinRegNum > FRegNum ||
+ (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
+ FMinRegNum = FRegNum;
+ FMinARegNum = FARegNum;
+ MinIdx = i;
+ }
+ }
+ DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs());
+ dbgs() << " with min reg num " << FMinRegNum << '\n');
+ if (MinIdx != 0)
+ std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
+ while (LU.Formulae.size() != 1) {
+ DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs());
+ dbgs() << '\n');
+ LU.Formulae.pop_back();
+ }
+ LU.RecomputeRegs(LUIdx, RegUses);
+ assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula");
+ Formula &F = LU.Formulae[0];
+ DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << '\n');
+ // When we choose the formula, the regs become unique.
+ UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+ if (F.ScaledReg)
+ UniqRegs.insert(F.ScaledReg);
+ }
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+}
+
+
/// Pick a register which seems likely to be profitable, and then in any use
/// which has any reference to that register, delete all formulae which do not
/// reference that register.
@@ -4237,7 +4517,10 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
NarrowSearchSpaceByDetectingSupersets();
NarrowSearchSpaceByCollapsingUnrolledCode();
NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
- NarrowSearchSpaceByPickingWinnerRegs();
+ if (LSRExpNarrow)
+ NarrowSearchSpaceByDeletingCostlyFormulas();
+ else
+ NarrowSearchSpaceByPickingWinnerRegs();
}
/// This is the recursive solver.
@@ -4515,11 +4798,7 @@ Value *LSRInstance::Expand(const LSRUse &LU,
assert(!Reg->isZero() && "Zero allocated in a base register!");
// If we're expanding for a post-inc user, make the post-inc adjustment.
- PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
- Reg = TransformForPostIncUse(Denormalize, Reg,
- LF.UserInst, LF.OperandValToReplace,
- Loops, SE, DT);
-
+ Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE);
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));
}
@@ -4530,9 +4809,7 @@ Value *LSRInstance::Expand(const LSRUse &LU,
// If we're expanding for a post-inc user, make the post-inc adjustment.
PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
- ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
- LF.UserInst, LF.OperandValToReplace,
- Loops, SE, DT);
+ ScaledS = denormalizeForPostIncUse(ScaledS, Loops, SE);
if (LU.Kind == LSRUse::ICmpZero) {
// Expand ScaleReg as if it was part of the base regs.
@@ -4975,10 +5252,11 @@ void LSRInstance::print(raw_ostream &OS) const {
print_uses(OS);
}
-LLVM_DUMP_METHOD
-void LSRInstance::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LSRInstance::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index c7f91226d222..62aa6ee48069 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -44,7 +44,11 @@ using namespace llvm;
static cl::opt<unsigned>
UnrollThreshold("unroll-threshold", cl::Hidden,
- cl::desc("The baseline cost threshold for loop unrolling"));
+ cl::desc("The cost threshold for loop unrolling"));
+
+static cl::opt<unsigned> UnrollPartialThreshold(
+ "unroll-partial-threshold", cl::Hidden,
+ cl::desc("The cost threshold for partial loop unrolling"));
static cl::opt<unsigned> UnrollMaxPercentThresholdBoost(
"unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
@@ -106,10 +110,19 @@ static cl::opt<unsigned> FlatLoopTripCountThreshold(
"aggressively unrolled."));
static cl::opt<bool>
- UnrollAllowPeeling("unroll-allow-peeling", cl::Hidden,
+ UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
cl::desc("Allows loops to be peeled when the dynamic "
"trip count is known to be low."));
+// This option isn't ever intended to be enabled, it serves to allow
+// experiments to check the assumptions about when this kind of revisit is
+// necessary.
+static cl::opt<bool> UnrollRevisitChildLoops(
+ "unroll-revisit-child-loops", cl::Hidden,
+ cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
+ "This shouldn't typically be needed as child loops (or their "
+ "clones) were already visited."));
+
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
@@ -118,16 +131,17 @@ static const unsigned NoThreshold = UINT_MAX;
/// Gather the various unrolling parameters based on the defaults, compiler
/// flags, TTI overrides and user specified parameters.
static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
- Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
- Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
- Optional<bool> UserRuntime, Optional<bool> UserUpperBound) {
+ Loop *L, const TargetTransformInfo &TTI, int OptLevel,
+ Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
+ Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
+ Optional<bool> UserUpperBound) {
TargetTransformInfo::UnrollingPreferences UP;
// Set up the defaults
- UP.Threshold = 150;
+ UP.Threshold = OptLevel > 2 ? 300 : 150;
UP.MaxPercentThresholdBoost = 400;
UP.OptSizeThreshold = 0;
- UP.PartialThreshold = UP.Threshold;
+ UP.PartialThreshold = 150;
UP.PartialOptSizeThreshold = 0;
UP.Count = 0;
UP.PeelCount = 0;
@@ -141,7 +155,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.AllowExpensiveTripCount = false;
UP.Force = false;
UP.UpperBound = false;
- UP.AllowPeeling = false;
+ UP.AllowPeeling = true;
// Override with any target specific settings
TTI.getUnrollingPreferences(L, UP);
@@ -153,10 +167,10 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
}
// Apply any user values specified by cl::opt
- if (UnrollThreshold.getNumOccurrences() > 0) {
+ if (UnrollThreshold.getNumOccurrences() > 0)
UP.Threshold = UnrollThreshold;
- UP.PartialThreshold = UnrollThreshold;
- }
+ if (UnrollPartialThreshold.getNumOccurrences() > 0)
+ UP.PartialThreshold = UnrollPartialThreshold;
if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
UP.MaxPercentThresholdBoost = UnrollMaxPercentThresholdBoost;
if (UnrollMaxCount.getNumOccurrences() > 0)
@@ -495,7 +509,7 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
KnownSucc = SI->getSuccessor(0);
else if (ConstantInt *SimpleCondVal =
dyn_cast<ConstantInt>(SimpleCond))
- KnownSucc = SI->findCaseValue(SimpleCondVal).getCaseSuccessor();
+ KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
}
}
if (KnownSucc) {
@@ -770,7 +784,15 @@ static bool computeUnrollCount(
}
}
- // 4rd priority is partial unrolling.
+ // 4th priority is loop peeling
+ computePeelCount(L, LoopSize, UP, TripCount);
+ if (UP.PeelCount) {
+ UP.Runtime = false;
+ UP.Count = 1;
+ return ExplicitUnroll;
+ }
+
+ // 5th priority is partial unrolling.
// Try partial unroll only when TripCount could be staticaly calculated.
if (TripCount) {
UP.Partial |= ExplicitUnroll;
@@ -833,14 +855,6 @@ static bool computeUnrollCount(
<< "Unable to fully unroll loop as directed by unroll(full) pragma "
"because loop has a runtime trip count.");
- // 5th priority is loop peeling
- computePeelCount(L, LoopSize, UP);
- if (UP.PeelCount) {
- UP.Runtime = false;
- UP.Count = 1;
- return ExplicitUnroll;
- }
-
// 6th priority is runtime unrolling.
// Don't unroll a runtime trip count loop when it is disabled.
if (HasRuntimeUnrollDisablePragma(L)) {
@@ -914,7 +928,7 @@ static bool computeUnrollCount(
static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
ScalarEvolution *SE, const TargetTransformInfo &TTI,
AssumptionCache &AC, OptimizationRemarkEmitter &ORE,
- bool PreserveLCSSA,
+ bool PreserveLCSSA, int OptLevel,
Optional<unsigned> ProvidedCount,
Optional<unsigned> ProvidedThreshold,
Optional<bool> ProvidedAllowPartial,
@@ -934,7 +948,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
bool NotDuplicatable;
bool Convergent;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
+ L, TTI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
ProvidedRuntime, ProvidedUpperBound);
// Exit early if unrolling is disabled.
if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
@@ -1034,16 +1048,17 @@ namespace {
class LoopUnroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopUnroll(Optional<unsigned> Threshold = None,
+ LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None,
Optional<unsigned> Count = None,
Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
Optional<bool> UpperBound = None)
- : LoopPass(ID), ProvidedCount(std::move(Count)),
+ : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)),
ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound) {
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
+ int OptLevel;
Optional<unsigned> ProvidedCount;
Optional<unsigned> ProvidedThreshold;
Optional<bool> ProvidedAllowPartial;
@@ -1068,7 +1083,7 @@ public:
OptimizationRemarkEmitter ORE(&F);
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
- return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA,
+ return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel,
ProvidedCount, ProvidedThreshold,
ProvidedAllowPartial, ProvidedRuntime,
ProvidedUpperBound);
@@ -1094,26 +1109,27 @@ INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
- int Runtime, int UpperBound) {
+Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
+ int AllowPartial, int Runtime,
+ int UpperBound) {
// TODO: It would make more sense for this function to take the optionals
// directly, but that's dangerous since it would silently break out of tree
// callers.
- return new LoopUnroll(Threshold == -1 ? None : Optional<unsigned>(Threshold),
- Count == -1 ? None : Optional<unsigned>(Count),
- AllowPartial == -1 ? None
- : Optional<bool>(AllowPartial),
- Runtime == -1 ? None : Optional<bool>(Runtime),
- UpperBound == -1 ? None : Optional<bool>(UpperBound));
+ return new LoopUnroll(
+ OptLevel, Threshold == -1 ? None : Optional<unsigned>(Threshold),
+ Count == -1 ? None : Optional<unsigned>(Count),
+ AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
+ Runtime == -1 ? None : Optional<bool>(Runtime),
+ UpperBound == -1 ? None : Optional<bool>(UpperBound));
}
-Pass *llvm::createSimpleLoopUnrollPass() {
- return llvm::createLoopUnrollPass(-1, -1, 0, 0, 0);
+Pass *llvm::createSimpleLoopUnrollPass(int OptLevel) {
+ return llvm::createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0);
}
PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
- LPMUpdater &) {
+ LPMUpdater &Updater) {
const auto &FAM =
AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
Function *F = L.getHeader()->getParent();
@@ -1124,12 +1140,84 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not "
"cached at a higher level");
- bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE,
- /*PreserveLCSSA*/ true, ProvidedCount,
- ProvidedThreshold, ProvidedAllowPartial,
- ProvidedRuntime, ProvidedUpperBound);
-
+ // Keep track of the previous loop structure so we can identify new loops
+ // created by unrolling.
+ Loop *ParentL = L.getParentLoop();
+ SmallPtrSet<Loop *, 4> OldLoops;
+ if (ParentL)
+ OldLoops.insert(ParentL->begin(), ParentL->end());
+ else
+ OldLoops.insert(AR.LI.begin(), AR.LI.end());
+
+ // The API here is quite complex to call, but there are only two interesting
+ // states we support: partial and full (or "simple") unrolling. However, to
+ // enable these things we actually pass "None" in for the optional to avoid
+ // providing an explicit choice.
+ Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam;
+ if (!AllowPartialUnrolling)
+ AllowPartialParam = RuntimeParam = UpperBoundParam = false;
+ bool Changed = tryToUnrollLoop(
+ &L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE,
+ /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
+ /*Threshold*/ None, AllowPartialParam, RuntimeParam, UpperBoundParam);
if (!Changed)
return PreservedAnalyses::all();
+
+ // The parent must not be damaged by unrolling!
+#ifndef NDEBUG
+ if (ParentL)
+ ParentL->verifyLoop();
+#endif
+
+ // Unrolling can do several things to introduce new loops into a loop nest:
+ // - Partial unrolling clones child loops within the current loop. If it
+ // uses a remainder, then it can also create any number of sibling loops.
+ // - Full unrolling clones child loops within the current loop but then
+ // removes the current loop making all of the children appear to be new
+ // sibling loops.
+ // - Loop peeling can directly introduce new sibling loops by peeling one
+ // iteration.
+ //
+ // When a new loop appears as a sibling loop, either from peeling an
+ // iteration or fully unrolling, its nesting structure has fundamentally
+ // changed and we want to revisit it to reflect that.
+ //
+ // When unrolling has removed the current loop, we need to tell the
+ // infrastructure that it is gone.
+ //
+ // Finally, we support a debugging/testing mode where we revisit child loops
+ // as well. These are not expected to require further optimizations as either
+ // they or the loop they were cloned from have been directly visited already.
+ // But the debugging mode allows us to check this assumption.
+ bool IsCurrentLoopValid = false;
+ SmallVector<Loop *, 4> SibLoops;
+ if (ParentL)
+ SibLoops.append(ParentL->begin(), ParentL->end());
+ else
+ SibLoops.append(AR.LI.begin(), AR.LI.end());
+ erase_if(SibLoops, [&](Loop *SibLoop) {
+ if (SibLoop == &L) {
+ IsCurrentLoopValid = true;
+ return true;
+ }
+
+ // Otherwise erase the loop from the list if it was in the old loops.
+ return OldLoops.count(SibLoop) != 0;
+ });
+ Updater.addSiblingLoops(SibLoops);
+
+ if (!IsCurrentLoopValid) {
+ Updater.markLoopAsDeleted(L);
+ } else {
+ // We can only walk child loops if the current loop remained valid.
+ if (UnrollRevisitChildLoops) {
+ // Walk *all* of the child loops. This is a highly speculative mode
+ // anyways so look for any simplifications that arose from partial
+ // unrolling or peeling off of iterations.
+ SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
+ Updater.addChildLoops(ChildLoops);
+ }
+ }
+
return getLoopPassPreservedAnalyses();
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 76fe91884c7b..a99c9999c619 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -33,6 +33,7 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -47,6 +48,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/Support/CommandLine.h"
@@ -77,19 +79,6 @@ static cl::opt<unsigned>
Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
cl::init(100), cl::Hidden);
-static cl::opt<bool>
-LoopUnswitchWithBlockFrequency("loop-unswitch-with-block-frequency",
- cl::init(false), cl::Hidden,
- cl::desc("Enable the use of the block frequency analysis to access PGO "
- "heuristics to minimize code growth in cold regions."));
-
-static cl::opt<unsigned>
-ColdnessThreshold("loop-unswitch-coldness-threshold", cl::init(1), cl::Hidden,
- cl::desc("Coldness threshold in percentage. The loop header frequency "
- "(relative to the entry frequency) is compared with this "
- "threshold to determine if non-trivial unswitching should be "
- "enabled."));
-
namespace {
class LUAnalysisCache {
@@ -174,13 +163,6 @@ namespace {
LUAnalysisCache BranchesInfo;
- bool EnabledPGO;
-
- // BFI and ColdEntryFreq are only used when PGO and
- // LoopUnswitchWithBlockFrequency are enabled.
- BlockFrequencyInfo BFI;
- BlockFrequency ColdEntryFreq;
-
bool OptimizeForSize;
bool redoLoop;
@@ -199,12 +181,14 @@ namespace {
// NewBlocks contained cloned copy of basic blocks from LoopBlocks.
std::vector<BasicBlock*> NewBlocks;
+ bool hasBranchDivergence;
+
public:
static char ID; // Pass ID, replacement for typeid
- explicit LoopUnswitch(bool Os = false) :
+ explicit LoopUnswitch(bool Os = false, bool hasBranchDivergence = false) :
LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
currentLoop(nullptr), DT(nullptr), loopHeader(nullptr),
- loopPreheader(nullptr) {
+ loopPreheader(nullptr), hasBranchDivergence(hasBranchDivergence) {
initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
}
@@ -217,6 +201,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ if (hasBranchDivergence)
+ AU.addRequired<DivergenceAnalysis>();
getLoopAnalysisUsage(AU);
}
@@ -255,6 +241,11 @@ namespace {
TerminatorInst *TI);
void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
+
+ /// Given that the Invariant is not equal to Val. Simplify instructions
+ /// in the loop.
+ Value *SimplifyInstructionWithNotEqual(Instruction *Inst, Value *Invariant,
+ Constant *Val);
};
}
@@ -381,16 +372,35 @@ INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
false, false)
-Pass *llvm::createLoopUnswitchPass(bool Os) {
- return new LoopUnswitch(Os);
+Pass *llvm::createLoopUnswitchPass(bool Os, bool hasBranchDivergence) {
+ return new LoopUnswitch(Os, hasBranchDivergence);
}
+/// Operator chain lattice.
+enum OperatorChain {
+ OC_OpChainNone, ///< There is no operator.
+ OC_OpChainOr, ///< There are only ORs.
+ OC_OpChainAnd, ///< There are only ANDs.
+ OC_OpChainMixed ///< There are ANDs and ORs.
+};
+
/// Cond is a condition that occurs in L. If it is invariant in the loop, or has
/// an invariant piece, return the invariant. Otherwise, return null.
+//
+/// NOTE: FindLIVLoopCondition will not return a partial LIV by walking up a
+/// mixed operator chain, as we can not reliably find a value which will simplify
+/// the operator chain. If the chain is AND-only or OR-only, we can use 0 or ~0
+/// to simplify the chain.
+///
+/// NOTE: In case a partial LIV and a mixed operator chain, we may be able to
+/// simplify the condition itself to a loop variant condition, but at the
+/// cost of creating an entirely new loop.
static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
+ OperatorChain &ParentChain,
DenseMap<Value *, Value *> &Cache) {
auto CacheIt = Cache.find(Cond);
if (CacheIt != Cache.end())
@@ -414,21 +424,53 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
return Cond;
}
+ // Walk up the operator chain to find partial invariant conditions.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
if (BO->getOpcode() == Instruction::And ||
BO->getOpcode() == Instruction::Or) {
- // If either the left or right side is invariant, we can unswitch on this,
- // which will cause the branch to go away in one loop and the condition to
- // simplify in the other one.
- if (Value *LHS =
- FindLIVLoopCondition(BO->getOperand(0), L, Changed, Cache)) {
- Cache[Cond] = LHS;
- return LHS;
+ // Given the previous operator, compute the current operator chain status.
+ OperatorChain NewChain;
+ switch (ParentChain) {
+ case OC_OpChainNone:
+ NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd :
+ OC_OpChainOr;
+ break;
+ case OC_OpChainOr:
+ NewChain = BO->getOpcode() == Instruction::Or ? OC_OpChainOr :
+ OC_OpChainMixed;
+ break;
+ case OC_OpChainAnd:
+ NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd :
+ OC_OpChainMixed;
+ break;
+ case OC_OpChainMixed:
+ NewChain = OC_OpChainMixed;
+ break;
}
- if (Value *RHS =
- FindLIVLoopCondition(BO->getOperand(1), L, Changed, Cache)) {
- Cache[Cond] = RHS;
- return RHS;
+
+ // If we reach a Mixed state, we do not want to keep walking up as we can not
+ // reliably find a value that will simplify the chain. With this check, we
+ // will return null on the first sight of mixed chain and the caller will
+ // either backtrack to find partial LIV in other operand or return null.
+ if (NewChain != OC_OpChainMixed) {
+ // Update the current operator chain type before we search up the chain.
+ ParentChain = NewChain;
+ // If either the left or right side is invariant, we can unswitch on this,
+ // which will cause the branch to go away in one loop and the condition to
+ // simplify in the other one.
+ if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed,
+ ParentChain, Cache)) {
+ Cache[Cond] = LHS;
+ return LHS;
+ }
+ // We did not manage to find a partial LIV in operand(0). Backtrack and try
+ // operand(1).
+ ParentChain = NewChain;
+ if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed,
+ ParentChain, Cache)) {
+ Cache[Cond] = RHS;
+ return RHS;
+ }
}
}
@@ -436,9 +478,21 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
return nullptr;
}
-static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
+/// Cond is a condition that occurs in L. If it is invariant in the loop, or has
+/// an invariant piece, return the invariant along with the operator chain type.
+/// Otherwise, return null.
+static std::pair<Value *, OperatorChain> FindLIVLoopCondition(Value *Cond,
+ Loop *L,
+ bool &Changed) {
DenseMap<Value *, Value *> Cache;
- return FindLIVLoopCondition(Cond, L, Changed, Cache);
+ OperatorChain OpChain = OC_OpChainNone;
+ Value *FCond = FindLIVLoopCondition(Cond, L, Changed, OpChain, Cache);
+
+ // In case we do find a LIV, it can not be obtained by walking up a mixed
+ // operator chain.
+ assert((!FCond || OpChain != OC_OpChainMixed) &&
+ "Do not expect a partial LIV with mixed operator chain");
+ return {FCond, OpChain};
}
bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
@@ -457,19 +511,6 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
if (SanitizeMemory)
computeLoopSafetyInfo(&SafetyInfo, L);
- EnabledPGO = F->getEntryCount().hasValue();
-
- if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
- BranchProbabilityInfo BPI(*F, *LI);
- BFI.calculate(*L->getHeader()->getParent(), BPI, *LI);
-
- // Use BranchProbability to compute a minimum frequency based on
- // function entry baseline frequency. Loops with headers below this
- // frequency are considered as cold.
- const BranchProbability ColdProb(ColdnessThreshold, 100);
- ColdEntryFreq = BlockFrequency(BFI.getEntryFreq()) * ColdProb;
- }
-
bool Changed = false;
do {
assert(currentLoop->isLCSSAForm(*DT));
@@ -581,19 +622,9 @@ bool LoopUnswitch::processCurrentLoop() {
loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
return false;
- if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
- // Compute the weighted frequency of the hottest block in the
- // loop (loopHeader in this case since inner loops should be
- // processed before outer loop). If it is less than ColdFrequency,
- // we should not unswitch.
- BlockFrequency LoopEntryFreq = BFI.getBlockFreq(loopHeader);
- if (LoopEntryFreq < ColdEntryFreq)
- return false;
- }
-
for (IntrinsicInst *Guard : Guards) {
Value *LoopCond =
- FindLIVLoopCondition(Guard->getOperand(0), currentLoop, Changed);
+ FindLIVLoopCondition(Guard->getOperand(0), currentLoop, Changed).first;
if (LoopCond &&
UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) {
// NB! Unswitching (if successful) could have erased some of the
@@ -634,7 +665,7 @@ bool LoopUnswitch::processCurrentLoop() {
// See if this, or some part of it, is loop invariant. If so, we can
// unswitch on it if we desire.
Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
- currentLoop, Changed);
+ currentLoop, Changed).first;
if (LoopCond &&
UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) {
++NumBranches;
@@ -642,24 +673,48 @@ bool LoopUnswitch::processCurrentLoop() {
}
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
- currentLoop, Changed);
+ Value *SC = SI->getCondition();
+ Value *LoopCond;
+ OperatorChain OpChain;
+ std::tie(LoopCond, OpChain) =
+ FindLIVLoopCondition(SC, currentLoop, Changed);
+
unsigned NumCases = SI->getNumCases();
if (LoopCond && NumCases) {
// Find a value to unswitch on:
// FIXME: this should chose the most expensive case!
// FIXME: scan for a case with a non-critical edge?
Constant *UnswitchVal = nullptr;
-
- // Do not process same value again and again.
- // At this point we have some cases already unswitched and
- // some not yet unswitched. Let's find the first not yet unswitched one.
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i) {
- Constant *UnswitchValCandidate = i.getCaseValue();
- if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) {
- UnswitchVal = UnswitchValCandidate;
- break;
+ // Find a case value such that at least one case value is unswitched
+ // out.
+ if (OpChain == OC_OpChainAnd) {
+ // If the chain only has ANDs and the switch has a case value of 0.
+ // Dropping in a 0 to the chain will unswitch out the 0-casevalue.
+ auto *AllZero = cast<ConstantInt>(Constant::getNullValue(SC->getType()));
+ if (BranchesInfo.isUnswitched(SI, AllZero))
+ continue;
+ // We are unswitching 0 out.
+ UnswitchVal = AllZero;
+ } else if (OpChain == OC_OpChainOr) {
+ // If the chain only has ORs and the switch has a case value of ~0.
+ // Dropping in a ~0 to the chain will unswitch out the ~0-casevalue.
+ auto *AllOne = cast<ConstantInt>(Constant::getAllOnesValue(SC->getType()));
+ if (BranchesInfo.isUnswitched(SI, AllOne))
+ continue;
+ // We are unswitching ~0 out.
+ UnswitchVal = AllOne;
+ } else {
+ assert(OpChain == OC_OpChainNone &&
+ "Expect to unswitch on trivial chain");
+ // Do not process same value again and again.
+ // At this point we have some cases already unswitched and
+ // some not yet unswitched. Let's find the first not yet unswitched one.
+ for (auto Case : SI->cases()) {
+ Constant *UnswitchValCandidate = Case.getCaseValue();
+ if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) {
+ UnswitchVal = UnswitchValCandidate;
+ break;
+ }
}
}
@@ -668,6 +723,11 @@ bool LoopUnswitch::processCurrentLoop() {
if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
++NumSwitches;
+ // In case of a full LIV, UnswitchVal is the value we unswitched out.
+ // In case of a partial LIV, we only unswitch when its an AND-chain
+ // or OR-chain. In both cases switch input value simplifies to
+ // UnswitchVal.
+ BranchesInfo.setUnswitched(SI, UnswitchVal);
return true;
}
}
@@ -678,7 +738,7 @@ bool LoopUnswitch::processCurrentLoop() {
BBI != E; ++BBI)
if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
- currentLoop, Changed);
+ currentLoop, Changed).first;
if (LoopCond && UnswitchIfProfitable(LoopCond,
ConstantInt::getTrue(Context))) {
++NumSelects;
@@ -753,6 +813,15 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
<< ". Cost too high.\n");
return false;
}
+ if (hasBranchDivergence &&
+ getAnalysis<DivergenceAnalysis>().isDivergent(LoopCond)) {
+ DEBUG(dbgs() << "NOT unswitching loop %"
+ << currentLoop->getHeader()->getName()
+ << " at non-trivial condition '" << *Val
+ << "' == " << *LoopCond << "\n"
+ << ". Condition is divergent.\n");
+ return false;
+ }
UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI);
return true;
@@ -899,7 +968,6 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
if (I.mayHaveSideEffects())
return false;
- // FIXME: add check for constant foldable switch instructions.
if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
if (BI->isUnconditional()) {
CurrentBB = BI->getSuccessor(0);
@@ -911,7 +979,16 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
// Found a trivial condition candidate: non-foldable conditional branch.
break;
}
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
+ // At this point, any constant-foldable instructions should have probably
+ // been folded.
+ ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
+ if (!Cond)
+ break;
+ // Find the target block we are definitely going to.
+ CurrentBB = SI->findCaseValue(Cond)->getCaseSuccessor();
} else {
+ // We do not understand these terminator instructions.
break;
}
@@ -929,7 +1006,7 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
return false;
Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
- currentLoop, Changed);
+ currentLoop, Changed).first;
// Unswitch only if the trivial condition itself is an LIV (not
// partial LIV which could occur in and/or)
@@ -960,7 +1037,7 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
// If this isn't switching on an invariant condition, we can't unswitch it.
Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
- currentLoop, Changed);
+ currentLoop, Changed).first;
// Unswitch only if the trivial condition itself is an LIV (not
// partial LIV which could occur in and/or)
@@ -973,13 +1050,12 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
// this.
// Note that we can't trivially unswitch on the default case or
// on already unswitched cases.
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i) {
+ for (auto Case : SI->cases()) {
BasicBlock *LoopExitCandidate;
- if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
- i.getCaseSuccessor()))) {
+ if ((LoopExitCandidate =
+ isTrivialLoopExitBlock(currentLoop, Case.getCaseSuccessor()))) {
// Okay, we found a trivial case, remember the value that is trivial.
- ConstantInt *CaseVal = i.getCaseValue();
+ ConstantInt *CaseVal = Case.getCaseValue();
// Check that it was not unswitched before, since already unswitched
// trivial vals are looks trivial too.
@@ -998,6 +1074,9 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB,
nullptr);
+
+ // We are only unswitching full LIV.
+ BranchesInfo.setUnswitched(SI, CondVal);
++NumSwitches;
return true;
}
@@ -1253,18 +1332,38 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
if (!UI || !L->contains(UI))
continue;
- Worklist.push_back(UI);
+ // At this point, we know LIC is definitely not Val. Try to use some simple
+ // logic to simplify the user w.r.t. to the context.
+ if (Value *Replacement = SimplifyInstructionWithNotEqual(UI, LIC, Val)) {
+ if (LI->replacementPreservesLCSSAForm(UI, Replacement)) {
+ // This in-loop instruction has been simplified w.r.t. its context,
+ // i.e. LIC != Val, make sure we propagate its replacement value to
+ // all its users.
+ //
+ // We can not yet delete UI, the LIC user, yet, because that would invalidate
+ // the LIC->users() iterator !. However, we can make this instruction
+ // dead by replacing all its users and push it onto the worklist so that
+ // it can be properly deleted and its operands simplified.
+ UI->replaceAllUsesWith(Replacement);
+ }
+ }
- // TODO: We could do other simplifications, for example, turning
- // 'icmp eq LIC, Val' -> false.
+ // This is a LIC user, push it into the worklist so that SimplifyCode can
+ // attempt to simplify it.
+ Worklist.push_back(UI);
// If we know that LIC is not Val, use this info to simplify code.
SwitchInst *SI = dyn_cast<SwitchInst>(UI);
if (!SI || !isa<ConstantInt>(Val)) continue;
- SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
+ // NOTE: if a case value for the switch is unswitched out, we record it
+ // after the unswitch finishes. We can not record it here as the switch
+ // is not a direct user of the partial LIV.
+ SwitchInst::CaseHandle DeadCase =
+ *SI->findCaseValue(cast<ConstantInt>(Val));
// Default case is live for multiple values.
- if (DeadCase == SI->case_default()) continue;
+ if (DeadCase == *SI->case_default())
+ continue;
// Found a dead case value. Don't remove PHI nodes in the
// successor if they become single-entry, those PHI nodes may
@@ -1274,8 +1373,6 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
BasicBlock *SISucc = DeadCase.getCaseSuccessor();
BasicBlock *Latch = L->getLoopLatch();
- BranchesInfo.setUnswitched(SI, Val);
-
if (!SI->findCaseDest(SISucc)) continue; // Edge is critical.
// If the DeadCase successor dominates the loop latch, then the
// transformation isn't safe since it will delete the sole predecessor edge
@@ -1397,3 +1494,27 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
}
}
}
+
+/// Simple simplifications we can do given the information that Cond is
+/// definitely not equal to Val.
+Value *LoopUnswitch::SimplifyInstructionWithNotEqual(Instruction *Inst,
+ Value *Invariant,
+ Constant *Val) {
+ // icmp eq cond, val -> false
+ ICmpInst *CI = dyn_cast<ICmpInst>(Inst);
+ if (CI && CI->isEquality()) {
+ Value *Op0 = CI->getOperand(0);
+ Value *Op1 = CI->getOperand(1);
+ if ((Op0 == Invariant && Op1 == Val) || (Op0 == Val && Op1 == Invariant)) {
+ LLVMContext &Ctx = Inst->getContext();
+ if (CI->getPredicate() == CmpInst::ICMP_EQ)
+ return ConstantInt::getFalse(Ctx);
+ else
+ return ConstantInt::getTrue(Ctx);
+ }
+ }
+
+ // FIXME: there may be other opportunities, e.g. comparison with floating
+ // point, or Invariant - Val != 0, etc.
+ return nullptr;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 52975ef35153..a143b9a3c645 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -67,11 +67,11 @@ static bool handleSwitchExpect(SwitchInst &SI) {
if (!ExpectedValue)
return false;
- SwitchInst::CaseIt Case = SI.findCaseValue(ExpectedValue);
+ SwitchInst::CaseHandle Case = *SI.findCaseValue(ExpectedValue);
unsigned n = SI.getNumCases(); // +1 for default case.
SmallVector<uint32_t, 16> Weights(n + 1, UnlikelyBranchWeight);
- if (Case == SI.case_default())
+ if (Case == *SI.case_default())
Weights[0] = LikelyBranchWeight;
else
Weights[Case.getCaseIndex() + 1] = LikelyBranchWeight;
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 1b590140f70a..a3f3f25c1e0f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -12,20 +12,49 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "memcpyopt"
@@ -119,6 +148,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
return true;
}
+namespace {
/// Represents a range of memset'd bytes with the ByteVal value.
/// This allows us to analyze stores like:
@@ -130,7 +160,6 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
/// the first store, we make a range [1, 2). The second store extends the range
/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
/// two ranges into [0, 3) which is memset'able.
-namespace {
struct MemsetRange {
// Start/End - A semi range that describes the span that this range covers.
// The range is closed at the start and open at the end: [Start, End).
@@ -148,7 +177,8 @@ struct MemsetRange {
bool isProfitableToUseMemset(const DataLayout &DL) const;
};
-} // end anon namespace
+
+} // end anonymous namespace
bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// If we found more than 4 stores to merge or 16 bytes, use memset.
@@ -192,13 +222,14 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
return TheStores.size() > NumPointerStores+NumByteStores;
}
-
namespace {
+
class MemsetRanges {
/// A sorted list of the memset ranges.
SmallVector<MemsetRange, 8> Ranges;
typedef SmallVectorImpl<MemsetRange>::iterator range_iterator;
const DataLayout &DL;
+
public:
MemsetRanges(const DataLayout &DL) : DL(DL) {}
@@ -231,8 +262,7 @@ public:
};
-} // end anon namespace
-
+} // end anonymous namespace
/// Add a new store to the MemsetRanges data structure. This adds a
/// new range for the specified store at the specified offset, merging into
@@ -299,48 +329,36 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
//===----------------------------------------------------------------------===//
namespace {
- class MemCpyOptLegacyPass : public FunctionPass {
- MemCpyOptPass Impl;
- public:
- static char ID; // Pass identification, replacement for typeid
- MemCpyOptLegacyPass() : FunctionPass(ID) {
- initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override;
-
- private:
- // This transformation requires dominator postdominator info
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<MemoryDependenceWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<MemoryDependenceWrapperPass>();
- }
+class MemCpyOptLegacyPass : public FunctionPass {
+ MemCpyOptPass Impl;
- // Helper functions
- bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
- bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
- bool processMemCpy(MemCpyInst *M);
- bool processMemMove(MemMoveInst *M);
- bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
- uint64_t cpyLen, unsigned cpyAlign, CallInst *C);
- bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
- bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep);
- bool performMemCpyToMemSetOptzn(MemCpyInst *M, MemSetInst *MDep);
- bool processByValArgument(CallSite CS, unsigned ArgNo);
- Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
- Value *ByteVal);
-
- bool iterateOnFunction(Function &F);
- };
+public:
+ static char ID; // Pass identification, replacement for typeid
- char MemCpyOptLegacyPass::ID = 0;
-}
+ MemCpyOptLegacyPass() : FunctionPass(ID) {
+ initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+private:
+ // This transformation requires dominator postdominator info
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<MemoryDependenceWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<MemoryDependenceWrapperPass>();
+ }
+};
+
+char MemCpyOptLegacyPass::ID = 0;
+
+} // end anonymous namespace
/// The public interface to this file...
FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
@@ -523,14 +541,15 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
if (Args.erase(C))
NeedLift = true;
else if (MayAlias) {
- NeedLift = any_of(MemLocs, [C, &AA](const MemoryLocation &ML) {
+ NeedLift = llvm::any_of(MemLocs, [C, &AA](const MemoryLocation &ML) {
return AA.getModRefInfo(C, ML);
});
if (!NeedLift)
- NeedLift = any_of(CallSites, [C, &AA](const ImmutableCallSite &CS) {
- return AA.getModRefInfo(C, CS);
- });
+ NeedLift =
+ llvm::any_of(CallSites, [C, &AA](const ImmutableCallSite &CS) {
+ return AA.getModRefInfo(C, CS);
+ });
}
if (!NeedLift)
@@ -567,7 +586,7 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
}
// We made it, we need to lift
- for (auto *I : reverse(ToLift)) {
+ for (auto *I : llvm::reverse(ToLift)) {
DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
I->moveBefore(P);
}
@@ -761,7 +780,6 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
return false;
}
-
/// Takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
@@ -914,6 +932,17 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
if (MR != MRI_NoModRef)
return false;
+ // We can't create address space casts here because we don't know if they're
+ // safe for the target.
+ if (cpySrc->getType()->getPointerAddressSpace() !=
+ cpyDest->getType()->getPointerAddressSpace())
+ return false;
+ for (unsigned i = 0; i < CS.arg_size(); ++i)
+ if (CS.getArgument(i)->stripPointerCasts() == cpySrc &&
+ cpySrc->getType()->getPointerAddressSpace() !=
+ CS.getArgument(i)->getType()->getPointerAddressSpace())
+ return false;
+
// All the checks have passed, so do the transformation.
bool changedArgument = false;
for (unsigned i = 0; i < CS.arg_size(); ++i)
@@ -1240,7 +1269,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
AliasAnalysis &AA = LookupAliasAnalysis();
- if (!TLI->has(LibFunc::memmove))
+ if (!TLI->has(LibFunc_memmove))
return false;
// See if the pointers alias.
@@ -1306,6 +1335,11 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
CS.getInstruction(), &AC, &DT) < ByValAlign)
return false;
+ // The address space of the memcpy source must match the byval argument
+ if (MDep->getSource()->getType()->getPointerAddressSpace() !=
+ ByValArg->getType()->getPointerAddressSpace())
+ return false;
+
// Verify that the copied-from memory doesn't change in between the memcpy and
// the byval call.
// memcpy(a <- b)
@@ -1375,7 +1409,6 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
}
PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
-
auto &MD = AM.getResult<MemoryDependenceAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
@@ -1393,7 +1426,9 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
LookupAssumptionCache, LookupDomTree);
if (!MadeChange)
return PreservedAnalyses::all();
+
PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
PA.preserve<MemoryDependenceAnalysis>();
return PA;
@@ -1414,10 +1449,10 @@ bool MemCpyOptPass::runImpl(
// If we don't have at least memset and memcpy, there is little point of doing
// anything here. These are required by a freestanding implementation, so if
// even they are disabled, there is no point in trying hard.
- if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy))
+ if (!TLI->has(LibFunc_memset) || !TLI->has(LibFunc_memcpy))
return false;
- while (1) {
+ while (true) {
if (!iterateOnFunction(F))
break;
MadeChange = true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 6a64c6b3619c..acd3ef6791be 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -19,6 +19,8 @@
// thinks it safe to do so. This optimization helps with eg. hiding load
// latencies, triggering if-conversion, and reducing static code size.
//
+// NOTE: This code no longer performs load hoisting, it is subsumed by GVNHoist.
+//
//===----------------------------------------------------------------------===//
//
//
@@ -87,7 +89,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
@@ -118,16 +119,6 @@ private:
void removeInstruction(Instruction *Inst);
BasicBlock *getDiamondTail(BasicBlock *BB);
bool isDiamondHead(BasicBlock *BB);
- // Routines for hoisting loads
- bool isLoadHoistBarrierInRange(const Instruction &Start,
- const Instruction &End, LoadInst *LI,
- bool SafeToLoadUnconditionally);
- LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI);
- void hoistInstruction(BasicBlock *BB, Instruction *HoistCand,
- Instruction *ElseInst);
- bool isSafeToHoist(Instruction *I) const;
- bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst);
- bool mergeLoads(BasicBlock *BB);
// Routines for sinking stores
StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI);
PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1);
@@ -188,169 +179,6 @@ bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
return true;
}
-///
-/// \brief True when instruction is a hoist barrier for a load
-///
-/// Whenever an instruction could possibly modify the value
-/// being loaded or protect against the load from happening
-/// it is considered a hoist barrier.
-///
-bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(
- const Instruction &Start, const Instruction &End, LoadInst *LI,
- bool SafeToLoadUnconditionally) {
- if (!SafeToLoadUnconditionally)
- for (const Instruction &Inst :
- make_range(Start.getIterator(), End.getIterator()))
- if (!isGuaranteedToTransferExecutionToSuccessor(&Inst))
- return true;
- MemoryLocation Loc = MemoryLocation::get(LI);
- return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod);
-}
-
-///
-/// \brief Decide if a load can be hoisted
-///
-/// When there is a load in \p BB to the same address as \p LI
-/// and it can be hoisted from \p BB, return that load.
-/// Otherwise return Null.
-///
-LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1,
- LoadInst *Load0) {
- BasicBlock *BB0 = Load0->getParent();
- BasicBlock *Head = BB0->getSinglePredecessor();
- bool SafeToLoadUnconditionally = isSafeToLoadUnconditionally(
- Load0->getPointerOperand(), Load0->getAlignment(),
- Load0->getModule()->getDataLayout(),
- /*ScanFrom=*/Head->getTerminator());
- for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE;
- ++BBI) {
- Instruction *Inst = &*BBI;
-
- // Only merge and hoist loads when their result in used only in BB
- auto *Load1 = dyn_cast<LoadInst>(Inst);
- if (!Load1 || Inst->isUsedOutsideOfBlock(BB1))
- continue;
-
- MemoryLocation Loc0 = MemoryLocation::get(Load0);
- MemoryLocation Loc1 = MemoryLocation::get(Load1);
- if (Load0->isSameOperationAs(Load1) && AA->isMustAlias(Loc0, Loc1) &&
- !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1,
- SafeToLoadUnconditionally) &&
- !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0,
- SafeToLoadUnconditionally)) {
- return Load1;
- }
- }
- return nullptr;
-}
-
-///
-/// \brief Merge two equivalent instructions \p HoistCand and \p ElseInst into
-/// \p BB
-///
-/// BB is the head of a diamond
-///
-void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB,
- Instruction *HoistCand,
- Instruction *ElseInst) {
- DEBUG(dbgs() << " Hoist Instruction into BB \n"; BB->dump();
- dbgs() << "Instruction Left\n"; HoistCand->dump(); dbgs() << "\n";
- dbgs() << "Instruction Right\n"; ElseInst->dump(); dbgs() << "\n");
- // Hoist the instruction.
- assert(HoistCand->getParent() != BB);
-
- // Intersect optional metadata.
- HoistCand->andIRFlags(ElseInst);
- HoistCand->dropUnknownNonDebugMetadata();
-
- // Prepend point for instruction insert
- Instruction *HoistPt = BB->getTerminator();
-
- // Merged instruction
- Instruction *HoistedInst = HoistCand->clone();
-
- // Hoist instruction.
- HoistedInst->insertBefore(HoistPt);
-
- HoistCand->replaceAllUsesWith(HoistedInst);
- removeInstruction(HoistCand);
- // Replace the else block instruction.
- ElseInst->replaceAllUsesWith(HoistedInst);
- removeInstruction(ElseInst);
-}
-
-///
-/// \brief Return true if no operand of \p I is defined in I's parent block
-///
-bool MergedLoadStoreMotion::isSafeToHoist(Instruction *I) const {
- BasicBlock *Parent = I->getParent();
- for (Use &U : I->operands())
- if (auto *Instr = dyn_cast<Instruction>(&U))
- if (Instr->getParent() == Parent)
- return false;
- return true;
-}
-
-///
-/// \brief Merge two equivalent loads and GEPs and hoist into diamond head
-///
-bool MergedLoadStoreMotion::hoistLoad(BasicBlock *BB, LoadInst *L0,
- LoadInst *L1) {
- // Only one definition?
- auto *A0 = dyn_cast<Instruction>(L0->getPointerOperand());
- auto *A1 = dyn_cast<Instruction>(L1->getPointerOperand());
- if (A0 && A1 && A0->isIdenticalTo(A1) && isSafeToHoist(A0) &&
- A0->hasOneUse() && (A0->getParent() == L0->getParent()) &&
- A1->hasOneUse() && (A1->getParent() == L1->getParent()) &&
- isa<GetElementPtrInst>(A0)) {
- DEBUG(dbgs() << "Hoist Instruction into BB \n"; BB->dump();
- dbgs() << "Instruction Left\n"; L0->dump(); dbgs() << "\n";
- dbgs() << "Instruction Right\n"; L1->dump(); dbgs() << "\n");
- hoistInstruction(BB, A0, A1);
- hoistInstruction(BB, L0, L1);
- return true;
- }
- return false;
-}
-
-///
-/// \brief Try to hoist two loads to same address into diamond header
-///
-/// Starting from a diamond head block, iterate over the instructions in one
-/// successor block and try to match a load in the second successor.
-///
-bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
- bool MergedLoads = false;
- assert(isDiamondHead(BB));
- BranchInst *BI = cast<BranchInst>(BB->getTerminator());
- BasicBlock *Succ0 = BI->getSuccessor(0);
- BasicBlock *Succ1 = BI->getSuccessor(1);
- // #Instructions in Succ1 for Compile Time Control
- int Size1 = Succ1->size();
- int NLoads = 0;
- for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end();
- BBI != BBE;) {
- Instruction *I = &*BBI;
- ++BBI;
-
- // Don't move non-simple (atomic, volatile) loads.
- auto *L0 = dyn_cast<LoadInst>(I);
- if (!L0 || !L0->isSimple() || L0->isUsedOutsideOfBlock(Succ0))
- continue;
-
- ++NLoads;
- if (NLoads * Size1 >= MagicCompileTimeControl)
- break;
- if (LoadInst *L1 = canHoistFromBlock(Succ1, L0)) {
- bool Res = hoistLoad(BB, L0, L1);
- MergedLoads |= Res;
- // Don't attempt to hoist above loads that had not been hoisted.
- if (!Res)
- break;
- }
- }
- return MergedLoads;
-}
///
/// \brief True when instruction is a sink barrier for a store
@@ -534,7 +362,6 @@ bool MergedLoadStoreMotion::run(Function &F, MemoryDependenceResults *MD,
// Hoist equivalent loads and sink stores
// outside diamonds when possible
if (isDiamondHead(BB)) {
- Changed |= mergeLoads(BB);
Changed |= mergeStores(getDiamondTail(BB));
}
}
@@ -596,8 +423,8 @@ MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) {
if (!Impl.run(F, MD, AA))
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
PA.preserve<MemoryDependenceAnalysis>();
return PA;
diff --git a/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
index 0a3bf7b4c31b..c5bf2f28d185 100644
--- a/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -156,20 +156,12 @@ PreservedAnalyses NaryReassociatePass::run(Function &F,
auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
- bool Changed = runImpl(F, AC, DT, SE, TLI, TTI);
-
- // FIXME: We need to invalidate this to avoid PR28400. Is there a better
- // solution?
- AM.invalidate<ScalarEvolutionAnalysis>(F);
-
- if (!Changed)
+ if (!runImpl(F, AC, DT, SE, TLI, TTI))
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<ScalarEvolutionAnalysis>();
- PA.preserve<TargetLibraryAnalysis>();
return PA;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 57e6e3ddad94..3d8ce888867e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -17,6 +17,27 @@
/// "A Sparse Algorithm for Predicated Global Value Numbering" from
/// Karthik Gargi.
///
+/// A brief overview of the algorithm: The algorithm is essentially the same as
+/// the standard RPO value numbering algorithm (a good reference is the paper
+/// "SCC based value numbering" by L. Taylor Simpson) with one major difference:
+/// The RPO algorithm proceeds, on every iteration, to process every reachable
+/// block and every instruction in that block. This is because the standard RPO
+/// algorithm does not track what things have the same value number, it only
+/// tracks what the value number of a given operation is (the mapping is
+/// operation -> value number). Thus, when a value number of an operation
+/// changes, it must reprocess everything to ensure all uses of a value number
+/// get updated properly. In constrast, the sparse algorithm we use *also*
+/// tracks what operations have a given value number (IE it also tracks the
+/// reverse mapping from value number -> operations with that value number), so
+/// that it only needs to reprocess the instructions that are affected when
+/// something's value number changes. The rest of the algorithm is devoted to
+/// performing symbolic evaluation, forward propagation, and simplification of
+/// operations based on the value numbers deduced so far.
+///
+/// We also do not perform elimination by using any published algorithm. All
+/// published algorithms are O(Instructions). Instead, we use a technique that
+/// is O(number of operations with the same value number), enabling us to skip
+/// trying to eliminate things that have unique value numbers.
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/NewGVN.h"
@@ -40,13 +61,10 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalVariable.h"
@@ -55,24 +73,25 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/PredIteratorCache.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVNExpression.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/MemorySSA.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/PredicateInfo.h"
+#include "llvm/Transforms/Utils/VNCoercion.h"
+#include <numeric>
#include <unordered_map>
#include <utility>
#include <vector>
using namespace llvm;
using namespace PatternMatch;
using namespace llvm::GVNExpression;
-
+using namespace llvm::VNCoercion;
#define DEBUG_TYPE "newgvn"
STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted");
@@ -87,6 +106,15 @@ STATISTIC(NumGVNAvoidedSortedLeaderChanges,
"Number of avoided sorted leader changes");
STATISTIC(NumGVNNotMostDominatingLeader,
"Number of times a member dominated it's new classes' leader");
+STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated");
+DEBUG_COUNTER(VNCounter, "newgvn-vn",
+ "Controls which instructions are value numbered")
+
+// Currently store defining access refinement is too slow due to basicaa being
+// egregiously slow. This flag lets us keep it working while we work on this
+// issue.
+static cl::opt<bool> EnableStoreRefinement("enable-store-refinement",
+ cl::init(false), cl::Hidden);
//===----------------------------------------------------------------------===//
// GVN Pass
@@ -105,6 +133,77 @@ PHIExpression::~PHIExpression() = default;
}
}
+// Tarjan's SCC finding algorithm with Nuutila's improvements
+// SCCIterator is actually fairly complex for the simple thing we want.
+// It also wants to hand us SCC's that are unrelated to the phi node we ask
+// about, and have us process them there or risk redoing work.
+// Graph traits over a filter iterator also doesn't work that well here.
+// This SCC finder is specialized to walk use-def chains, and only follows instructions,
+// not generic values (arguments, etc).
+struct TarjanSCC {
+
+ TarjanSCC() : Components(1) {}
+
+ void Start(const Instruction *Start) {
+ if (Root.lookup(Start) == 0)
+ FindSCC(Start);
+ }
+
+ const SmallPtrSetImpl<const Value *> &getComponentFor(const Value *V) const {
+ unsigned ComponentID = ValueToComponent.lookup(V);
+
+ assert(ComponentID > 0 &&
+ "Asking for a component for a value we never processed");
+ return Components[ComponentID];
+ }
+
+private:
+ void FindSCC(const Instruction *I) {
+ Root[I] = ++DFSNum;
+ // Store the DFS Number we had before it possibly gets incremented.
+ unsigned int OurDFS = DFSNum;
+ for (auto &Op : I->operands()) {
+ if (auto *InstOp = dyn_cast<Instruction>(Op)) {
+ if (Root.lookup(Op) == 0)
+ FindSCC(InstOp);
+ if (!InComponent.count(Op))
+ Root[I] = std::min(Root.lookup(I), Root.lookup(Op));
+ }
+ }
+ // See if we really were the root of a component, by seeing if we still have our DFSNumber.
+ // If we do, we are the root of the component, and we have completed a component. If we do not,
+ // we are not the root of a component, and belong on the component stack.
+ if (Root.lookup(I) == OurDFS) {
+ unsigned ComponentID = Components.size();
+ Components.resize(Components.size() + 1);
+ auto &Component = Components.back();
+ Component.insert(I);
+ DEBUG(dbgs() << "Component root is " << *I << "\n");
+ InComponent.insert(I);
+ ValueToComponent[I] = ComponentID;
+ // Pop a component off the stack and label it.
+ while (!Stack.empty() && Root.lookup(Stack.back()) >= OurDFS) {
+ auto *Member = Stack.back();
+ DEBUG(dbgs() << "Component member is " << *Member << "\n");
+ Component.insert(Member);
+ InComponent.insert(Member);
+ ValueToComponent[Member] = ComponentID;
+ Stack.pop_back();
+ }
+ } else {
+ // Part of a component, push to stack
+ Stack.push_back(I);
+ }
+ }
+ unsigned int DFSNum = 1;
+ SmallPtrSet<const Value *, 8> InComponent;
+ DenseMap<const Value *, unsigned int> Root;
+ SmallVector<const Value *, 8> Stack;
+ // Store the components as vector of ptr sets, because we need the topo order
+ // of SCC's, but not individual member order
+ SmallVector<SmallPtrSet<const Value *, 8>, 8> Components;
+ DenseMap<const Value *, unsigned> ValueToComponent;
+};
// Congruence classes represent the set of expressions/instructions
// that are all the same *during some scope in the function*.
// That is, because of the way we perform equality propagation, and
@@ -115,43 +214,152 @@ PHIExpression::~PHIExpression() = default;
// For any Value in the Member set, it is valid to replace any dominated member
// with that Value.
//
-// Every congruence class has a leader, and the leader is used to
-// symbolize instructions in a canonical way (IE every operand of an
-// instruction that is a member of the same congruence class will
-// always be replaced with leader during symbolization).
-// To simplify symbolization, we keep the leader as a constant if class can be
-// proved to be a constant value.
-// Otherwise, the leader is a randomly chosen member of the value set, it does
-// not matter which one is chosen.
-// Each congruence class also has a defining expression,
-// though the expression may be null. If it exists, it can be used for forward
-// propagation and reassociation of values.
-//
-struct CongruenceClass {
- using MemberSet = SmallPtrSet<Value *, 4>;
+// Every congruence class has a leader, and the leader is used to symbolize
+// instructions in a canonical way (IE every operand of an instruction that is a
+// member of the same congruence class will always be replaced with leader
+// during symbolization). To simplify symbolization, we keep the leader as a
+// constant if class can be proved to be a constant value. Otherwise, the
+// leader is the member of the value set with the smallest DFS number. Each
+// congruence class also has a defining expression, though the expression may be
+// null. If it exists, it can be used for forward propagation and reassociation
+// of values.
+
+// For memory, we also track a representative MemoryAccess, and a set of memory
+// members for MemoryPhis (which have no real instructions). Note that for
+// memory, it seems tempting to try to split the memory members into a
+// MemoryCongruenceClass or something. Unfortunately, this does not work
+// easily. The value numbering of a given memory expression depends on the
+// leader of the memory congruence class, and the leader of memory congruence
+// class depends on the value numbering of a given memory expression. This
+// leads to wasted propagation, and in some cases, missed optimization. For
+// example: If we had value numbered two stores together before, but now do not,
+// we move them to a new value congruence class. This in turn will move at one
+// of the memorydefs to a new memory congruence class. Which in turn, affects
+// the value numbering of the stores we just value numbered (because the memory
+// congruence class is part of the value number). So while theoretically
+// possible to split them up, it turns out to be *incredibly* complicated to get
+// it to work right, because of the interdependency. While structurally
+// slightly messier, it is algorithmically much simpler and faster to do what we
+// do here, and track them both at once in the same class.
+// Note: The default iterators for this class iterate over values
+class CongruenceClass {
+public:
+ using MemberType = Value;
+ using MemberSet = SmallPtrSet<MemberType *, 4>;
+ using MemoryMemberType = MemoryPhi;
+ using MemoryMemberSet = SmallPtrSet<const MemoryMemberType *, 2>;
+
+ explicit CongruenceClass(unsigned ID) : ID(ID) {}
+ CongruenceClass(unsigned ID, Value *Leader, const Expression *E)
+ : ID(ID), RepLeader(Leader), DefiningExpr(E) {}
+ unsigned getID() const { return ID; }
+ // True if this class has no members left. This is mainly used for assertion
+ // purposes, and for skipping empty classes.
+ bool isDead() const {
+ // If it's both dead from a value perspective, and dead from a memory
+ // perspective, it's really dead.
+ return empty() && memory_empty();
+ }
+ // Leader functions
+ Value *getLeader() const { return RepLeader; }
+ void setLeader(Value *Leader) { RepLeader = Leader; }
+ const std::pair<Value *, unsigned int> &getNextLeader() const {
+ return NextLeader;
+ }
+ void resetNextLeader() { NextLeader = {nullptr, ~0}; }
+
+ void addPossibleNextLeader(std::pair<Value *, unsigned int> LeaderPair) {
+ if (LeaderPair.second < NextLeader.second)
+ NextLeader = LeaderPair;
+ }
+
+ Value *getStoredValue() const { return RepStoredValue; }
+ void setStoredValue(Value *Leader) { RepStoredValue = Leader; }
+ const MemoryAccess *getMemoryLeader() const { return RepMemoryAccess; }
+ void setMemoryLeader(const MemoryAccess *Leader) { RepMemoryAccess = Leader; }
+
+ // Forward propagation info
+ const Expression *getDefiningExpr() const { return DefiningExpr; }
+ void setDefiningExpr(const Expression *E) { DefiningExpr = E; }
+
+ // Value member set
+ bool empty() const { return Members.empty(); }
+ unsigned size() const { return Members.size(); }
+ MemberSet::const_iterator begin() const { return Members.begin(); }
+ MemberSet::const_iterator end() const { return Members.end(); }
+ void insert(MemberType *M) { Members.insert(M); }
+ void erase(MemberType *M) { Members.erase(M); }
+ void swap(MemberSet &Other) { Members.swap(Other); }
+
+ // Memory member set
+ bool memory_empty() const { return MemoryMembers.empty(); }
+ unsigned memory_size() const { return MemoryMembers.size(); }
+ MemoryMemberSet::const_iterator memory_begin() const {
+ return MemoryMembers.begin();
+ }
+ MemoryMemberSet::const_iterator memory_end() const {
+ return MemoryMembers.end();
+ }
+ iterator_range<MemoryMemberSet::const_iterator> memory() const {
+ return make_range(memory_begin(), memory_end());
+ }
+ void memory_insert(const MemoryMemberType *M) { MemoryMembers.insert(M); }
+ void memory_erase(const MemoryMemberType *M) { MemoryMembers.erase(M); }
+
+ // Store count
+ unsigned getStoreCount() const { return StoreCount; }
+ void incStoreCount() { ++StoreCount; }
+ void decStoreCount() {
+ assert(StoreCount != 0 && "Store count went negative");
+ --StoreCount;
+ }
+
+ // Return true if two congruence classes are equivalent to each other. This
+ // means
+ // that every field but the ID number and the dead field are equivalent.
+ bool isEquivalentTo(const CongruenceClass *Other) const {
+ if (!Other)
+ return false;
+ if (this == Other)
+ return true;
+
+ if (std::tie(StoreCount, RepLeader, RepStoredValue, RepMemoryAccess) !=
+ std::tie(Other->StoreCount, Other->RepLeader, Other->RepStoredValue,
+ Other->RepMemoryAccess))
+ return false;
+ if (DefiningExpr != Other->DefiningExpr)
+ if (!DefiningExpr || !Other->DefiningExpr ||
+ *DefiningExpr != *Other->DefiningExpr)
+ return false;
+ // We need some ordered set
+ std::set<Value *> AMembers(Members.begin(), Members.end());
+ std::set<Value *> BMembers(Members.begin(), Members.end());
+ return AMembers == BMembers;
+ }
+
+private:
unsigned ID;
// Representative leader.
Value *RepLeader = nullptr;
+ // The most dominating leader after our current leader, because the member set
+ // is not sorted and is expensive to keep sorted all the time.
+ std::pair<Value *, unsigned int> NextLeader = {nullptr, ~0U};
+ // If this is represented by a store, the value of the store.
+ Value *RepStoredValue = nullptr;
+ // If this class contains MemoryDefs or MemoryPhis, this is the leading memory
+ // access.
+ const MemoryAccess *RepMemoryAccess = nullptr;
// Defining Expression.
const Expression *DefiningExpr = nullptr;
// Actual members of this class.
MemberSet Members;
-
- // True if this class has no members left. This is mainly used for assertion
- // purposes, and for skipping empty classes.
- bool Dead = false;
-
+ // This is the set of MemoryPhis that exist in the class. MemoryDefs and
+ // MemoryUses have real instructions representing them, so we only need to
+ // track MemoryPhis here.
+ MemoryMemberSet MemoryMembers;
// Number of stores in this congruence class.
// This is used so we can detect store equivalence changes properly.
int StoreCount = 0;
-
- // The most dominating leader after our current leader, because the member set
- // is not sorted and is expensive to keep sorted all the time.
- std::pair<Value *, unsigned int> NextLeader = {nullptr, ~0U};
-
- explicit CongruenceClass(unsigned ID) : ID(ID) {}
- CongruenceClass(unsigned ID, Value *Leader, const Expression *E)
- : ID(ID), RepLeader(Leader), DefiningExpr(E) {}
};
namespace llvm {
@@ -180,19 +388,34 @@ template <> struct DenseMapInfo<const Expression *> {
};
} // end namespace llvm
-class NewGVN : public FunctionPass {
+namespace {
+class NewGVN {
+ Function &F;
DominatorTree *DT;
- const DataLayout *DL;
- const TargetLibraryInfo *TLI;
AssumptionCache *AC;
+ const TargetLibraryInfo *TLI;
AliasAnalysis *AA;
MemorySSA *MSSA;
MemorySSAWalker *MSSAWalker;
+ const DataLayout &DL;
+ std::unique_ptr<PredicateInfo> PredInfo;
BumpPtrAllocator ExpressionAllocator;
ArrayRecycler<Value *> ArgRecycler;
+ TarjanSCC SCCFinder;
+
+ // Number of function arguments, used by ranking
+ unsigned int NumFuncArgs;
+
+ // RPOOrdering of basic blocks
+ DenseMap<const DomTreeNode *, unsigned> RPOOrdering;
// Congruence class info.
- CongruenceClass *InitialClass;
+
+ // This class is called INITIAL in the paper. It is the class everything
+ // startsout in, and represents any value. Being an optimistic analysis,
+ // anything in the TOP class has the value TOP, which is indeterminate and
+ // equivalent to everything.
+ CongruenceClass *TOPClass;
std::vector<CongruenceClass *> CongruenceClasses;
unsigned NextCongruenceNum;
@@ -200,13 +423,38 @@ class NewGVN : public FunctionPass {
DenseMap<Value *, CongruenceClass *> ValueToClass;
DenseMap<Value *, const Expression *> ValueToExpression;
+ // Mapping from predicate info we used to the instructions we used it with.
+ // In order to correctly ensure propagation, we must keep track of what
+ // comparisons we used, so that when the values of the comparisons change, we
+ // propagate the information to the places we used the comparison.
+ DenseMap<const Value *, SmallPtrSet<Instruction *, 2>> PredicateToUsers;
+ // Mapping from MemoryAccess we used to the MemoryAccess we used it with. Has
+ // the same reasoning as PredicateToUsers. When we skip MemoryAccesses for
+ // stores, we no longer can rely solely on the def-use chains of MemorySSA.
+ DenseMap<const MemoryAccess *, SmallPtrSet<MemoryAccess *, 2>> MemoryToUsers;
+
// A table storing which memorydefs/phis represent a memory state provably
// equivalent to another memory state.
// We could use the congruence class machinery, but the MemoryAccess's are
// abstract memory states, so they can only ever be equivalent to each other,
// and not to constants, etc.
- DenseMap<const MemoryAccess *, MemoryAccess *> MemoryAccessEquiv;
-
+ DenseMap<const MemoryAccess *, CongruenceClass *> MemoryAccessToClass;
+
+ // We could, if we wanted, build MemoryPhiExpressions and
+ // MemoryVariableExpressions, etc, and value number them the same way we value
+ // number phi expressions. For the moment, this seems like overkill. They
+ // can only exist in one of three states: they can be TOP (equal to
+ // everything), Equivalent to something else, or unique. Because we do not
+ // create expressions for them, we need to simulate leader change not just
+ // when they change class, but when they change state. Note: We can do the
+ // same thing for phis, and avoid having phi expressions if we wanted, We
+ // should eventually unify in one direction or the other, so this is a little
+ // bit of an experiment in which turns out easier to maintain.
+ enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique };
+ DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState;
+
+ enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle };
+ DenseMap<const PHINode *, PhiCycleState> PhiCycleState;
// Expression to class mapping.
using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>;
ExpressionClassMap ExpressionToClass;
@@ -231,8 +479,6 @@ class NewGVN : public FunctionPass {
BitVector TouchedInstructions;
DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange;
- DenseMap<const DomTreeNode *, std::pair<unsigned, unsigned>>
- DominatedInstRange;
#ifndef NDEBUG
// Debugging for how many times each block and instruction got processed.
@@ -240,56 +486,42 @@ class NewGVN : public FunctionPass {
#endif
// DFS info.
- DenseMap<const BasicBlock *, std::pair<int, int>> DFSDomMap;
+ // This contains a mapping from Instructions to DFS numbers.
+ // The numbering starts at 1. An instruction with DFS number zero
+ // means that the instruction is dead.
DenseMap<const Value *, unsigned> InstrDFS;
+
+ // This contains the mapping DFS numbers to instructions.
SmallVector<Value *, 32> DFSToInstr;
// Deletion info.
SmallPtrSet<Instruction *, 8> InstructionsToErase;
public:
- static char ID; // Pass identification, replacement for typeid.
- NewGVN() : FunctionPass(ID) {
- initializeNewGVNPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
- bool runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
- TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA);
+ NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
+ TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA,
+ const DataLayout &DL)
+ : F(F), DT(DT), AC(AC), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL),
+ PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)) {}
+ bool runGVN();
private:
- // This transformation requires dominator postdominator info.
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
-
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-
// Expression handling.
- const Expression *createExpression(Instruction *, const BasicBlock *);
- const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *,
- const BasicBlock *);
- PHIExpression *createPHIExpression(Instruction *);
+ const Expression *createExpression(Instruction *);
+ const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *);
+ PHIExpression *createPHIExpression(Instruction *, bool &HasBackedge,
+ bool &AllConstant);
const VariableExpression *createVariableExpression(Value *);
const ConstantExpression *createConstantExpression(Constant *);
- const Expression *createVariableOrConstant(Value *V, const BasicBlock *B);
+ const Expression *createVariableOrConstant(Value *V);
const UnknownExpression *createUnknownExpression(Instruction *);
- const StoreExpression *createStoreExpression(StoreInst *, MemoryAccess *,
- const BasicBlock *);
+ const StoreExpression *createStoreExpression(StoreInst *,
+ const MemoryAccess *);
LoadExpression *createLoadExpression(Type *, Value *, LoadInst *,
- MemoryAccess *, const BasicBlock *);
-
- const CallExpression *createCallExpression(CallInst *, MemoryAccess *,
- const BasicBlock *);
- const AggregateValueExpression *
- createAggregateValueExpression(Instruction *, const BasicBlock *);
- bool setBasicExpressionInfo(Instruction *, BasicExpression *,
- const BasicBlock *);
+ const MemoryAccess *);
+ const CallExpression *createCallExpression(CallInst *, const MemoryAccess *);
+ const AggregateValueExpression *createAggregateValueExpression(Instruction *);
+ bool setBasicExpressionInfo(Instruction *, BasicExpression *);
// Congruence class handling.
CongruenceClass *createCongruenceClass(Value *Leader, const Expression *E) {
@@ -298,9 +530,21 @@ private:
return result;
}
+ CongruenceClass *createMemoryClass(MemoryAccess *MA) {
+ auto *CC = createCongruenceClass(nullptr, nullptr);
+ CC->setMemoryLeader(MA);
+ return CC;
+ }
+ CongruenceClass *ensureLeaderOfMemoryClass(MemoryAccess *MA) {
+ auto *CC = getMemoryClass(MA);
+ if (CC->getMemoryLeader() != MA)
+ CC = createMemoryClass(MA);
+ return CC;
+ }
+
CongruenceClass *createSingletonCongruenceClass(Value *Member) {
CongruenceClass *CClass = createCongruenceClass(Member, nullptr);
- CClass->Members.insert(Member);
+ CClass->insert(Member);
ValueToClass[Member] = CClass;
return CClass;
}
@@ -313,37 +557,49 @@ private:
// Symbolic evaluation.
const Expression *checkSimplificationResults(Expression *, Instruction *,
Value *);
- const Expression *performSymbolicEvaluation(Value *, const BasicBlock *);
- const Expression *performSymbolicLoadEvaluation(Instruction *,
- const BasicBlock *);
- const Expression *performSymbolicStoreEvaluation(Instruction *,
- const BasicBlock *);
- const Expression *performSymbolicCallEvaluation(Instruction *,
- const BasicBlock *);
- const Expression *performSymbolicPHIEvaluation(Instruction *,
- const BasicBlock *);
- bool setMemoryAccessEquivTo(MemoryAccess *From, MemoryAccess *To);
- const Expression *performSymbolicAggrValueEvaluation(Instruction *,
- const BasicBlock *);
+ const Expression *performSymbolicEvaluation(Value *);
+ const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *,
+ Instruction *, MemoryAccess *);
+ const Expression *performSymbolicLoadEvaluation(Instruction *);
+ const Expression *performSymbolicStoreEvaluation(Instruction *);
+ const Expression *performSymbolicCallEvaluation(Instruction *);
+ const Expression *performSymbolicPHIEvaluation(Instruction *);
+ const Expression *performSymbolicAggrValueEvaluation(Instruction *);
+ const Expression *performSymbolicCmpEvaluation(Instruction *);
+ const Expression *performSymbolicPredicateInfoEvaluation(Instruction *);
// Congruence finding.
- // Templated to allow them to work both on BB's and BB-edges.
- template <class T>
- Value *lookupOperandLeader(Value *, const User *, const T &) const;
+ bool someEquivalentDominates(const Instruction *, const Instruction *) const;
+ Value *lookupOperandLeader(Value *) const;
void performCongruenceFinding(Instruction *, const Expression *);
- void moveValueToNewCongruenceClass(Instruction *, CongruenceClass *,
- CongruenceClass *);
+ void moveValueToNewCongruenceClass(Instruction *, const Expression *,
+ CongruenceClass *, CongruenceClass *);
+ void moveMemoryToNewCongruenceClass(Instruction *, MemoryAccess *,
+ CongruenceClass *, CongruenceClass *);
+ Value *getNextValueLeader(CongruenceClass *) const;
+ const MemoryAccess *getNextMemoryLeader(CongruenceClass *) const;
+ bool setMemoryClass(const MemoryAccess *From, CongruenceClass *To);
+ CongruenceClass *getMemoryClass(const MemoryAccess *MA) const;
+ const MemoryAccess *lookupMemoryLeader(const MemoryAccess *) const;
+ bool isMemoryAccessTop(const MemoryAccess *) const;
+
+ // Ranking
+ unsigned int getRank(const Value *) const;
+ bool shouldSwapOperands(const Value *, const Value *) const;
+
// Reachability handling.
void updateReachableEdge(BasicBlock *, BasicBlock *);
void processOutgoingEdges(TerminatorInst *, BasicBlock *);
- bool isOnlyReachableViaThisEdge(const BasicBlockEdge &) const;
- Value *findConditionEquivalence(Value *, BasicBlock *) const;
- MemoryAccess *lookupMemoryAccessEquiv(MemoryAccess *) const;
+ Value *findConditionEquivalence(Value *) const;
// Elimination.
struct ValueDFS;
- void convertDenseToDFSOrdered(CongruenceClass::MemberSet &,
- SmallVectorImpl<ValueDFS> &);
+ void convertClassToDFSOrdered(const CongruenceClass &,
+ SmallVectorImpl<ValueDFS> &,
+ DenseMap<const Value *, unsigned int> &,
+ SmallPtrSetImpl<Instruction *> &) const;
+ void convertClassToLoadsAndStores(const CongruenceClass &,
+ SmallVectorImpl<ValueDFS> &) const;
bool eliminateInstructions(Function &);
void replaceInstruction(Instruction *, Value *);
@@ -355,35 +611,58 @@ private:
// Various instruction touch utilities
void markUsersTouched(Value *);
- void markMemoryUsersTouched(MemoryAccess *);
- void markLeaderChangeTouched(CongruenceClass *CC);
+ void markMemoryUsersTouched(const MemoryAccess *);
+ void markMemoryDefTouched(const MemoryAccess *);
+ void markPredicateUsersTouched(Instruction *);
+ void markValueLeaderChangeTouched(CongruenceClass *CC);
+ void markMemoryLeaderChangeTouched(CongruenceClass *CC);
+ void addPredicateUsers(const PredicateBase *, Instruction *);
+ void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U);
+
+ // Main loop of value numbering
+ void iterateTouchedInstructions();
// Utilities.
void cleanupTables();
std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned);
void updateProcessedCount(Value *V);
void verifyMemoryCongruency() const;
+ void verifyIterationSettled(Function &F);
bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const;
-};
-
-char NewGVN::ID = 0;
+ BasicBlock *getBlockForValue(Value *V) const;
+ void deleteExpression(const Expression *E);
+ unsigned InstrToDFSNum(const Value *V) const {
+ assert(isa<Instruction>(V) && "This should not be used for MemoryAccesses");
+ return InstrDFS.lookup(V);
+ }
-// createGVNPass - The public interface to this file.
-FunctionPass *llvm::createNewGVNPass() { return new NewGVN(); }
+ unsigned InstrToDFSNum(const MemoryAccess *MA) const {
+ return MemoryToDFSNum(MA);
+ }
+ Value *InstrFromDFSNum(unsigned DFSNum) { return DFSToInstr[DFSNum]; }
+ // Given a MemoryAccess, return the relevant instruction DFS number. Note:
+ // This deliberately takes a value so it can be used with Use's, which will
+ // auto-convert to Value's but not to MemoryAccess's.
+ unsigned MemoryToDFSNum(const Value *MA) const {
+ assert(isa<MemoryAccess>(MA) &&
+ "This should not be used with instructions");
+ return isa<MemoryUseOrDef>(MA)
+ ? InstrToDFSNum(cast<MemoryUseOrDef>(MA)->getMemoryInst())
+ : InstrDFS.lookup(MA);
+ }
+ bool isCycleFree(const PHINode *PN);
+ template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
+ // Debug counter info. When verifying, we have to reset the value numbering
+ // debug counter to the same state it started in to get the same results.
+ std::pair<int, int> StartingVNCounter;
+};
+} // end anonymous namespace
template <typename T>
static bool equalsLoadStoreHelper(const T &LHS, const Expression &RHS) {
- if ((!isa<LoadExpression>(RHS) && !isa<StoreExpression>(RHS)) ||
- !LHS.BasicExpression::equals(RHS)) {
+ if (!isa<LoadExpression>(RHS) && !isa<StoreExpression>(RHS))
return false;
- } else if (const auto *L = dyn_cast<LoadExpression>(&RHS)) {
- if (LHS.getDefiningAccess() != L->getDefiningAccess())
- return false;
- } else if (const auto *S = dyn_cast<StoreExpression>(&RHS)) {
- if (LHS.getDefiningAccess() != S->getDefiningAccess())
- return false;
- }
- return true;
+ return LHS.MemoryExpression::equals(RHS);
}
bool LoadExpression::equals(const Expression &Other) const {
@@ -391,7 +670,13 @@ bool LoadExpression::equals(const Expression &Other) const {
}
bool StoreExpression::equals(const Expression &Other) const {
- return equalsLoadStoreHelper(*this, Other);
+ if (!equalsLoadStoreHelper(*this, Other))
+ return false;
+ // Make sure that store vs store includes the value operand.
+ if (const auto *S = dyn_cast<StoreExpression>(&Other))
+ if (getStoredValue() != S->getStoredValue())
+ return false;
+ return true;
}
#ifndef NDEBUG
@@ -400,16 +685,28 @@ static std::string getBlockName(const BasicBlock *B) {
}
#endif
-INITIALIZE_PASS_BEGIN(NewGVN, "newgvn", "Global Value Numbering", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_END(NewGVN, "newgvn", "Global Value Numbering", false, false)
+// Get the basic block from an instruction/memory value.
+BasicBlock *NewGVN::getBlockForValue(Value *V) const {
+ if (auto *I = dyn_cast<Instruction>(V))
+ return I->getParent();
+ else if (auto *MP = dyn_cast<MemoryPhi>(V))
+ return MP->getBlock();
+ llvm_unreachable("Should have been able to figure out a block for our value");
+ return nullptr;
+}
-PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
+// Delete a definitely dead expression, so it can be reused by the expression
+// allocator. Some of these are not in creation functions, so we have to accept
+// const versions.
+void NewGVN::deleteExpression(const Expression *E) {
+ assert(isa<BasicExpression>(E));
+ auto *BE = cast<BasicExpression>(E);
+ const_cast<BasicExpression *>(BE)->deallocateOperands(ArgRecycler);
+ ExpressionAllocator.Deallocate(E);
+}
+
+PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
+ bool &AllConstant) {
BasicBlock *PHIBlock = I->getParent();
auto *PN = cast<PHINode>(I);
auto *E =
@@ -419,28 +716,32 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
E->setType(I->getType());
E->setOpcode(I->getOpcode());
- auto ReachablePhiArg = [&](const Use &U) {
- return ReachableBlocks.count(PN->getIncomingBlock(U));
- };
+ unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock));
- // Filter out unreachable operands
- auto Filtered = make_filter_range(PN->operands(), ReachablePhiArg);
+ // Filter out unreachable phi operands.
+ auto Filtered = make_filter_range(PN->operands(), [&](const Use &U) {
+ return ReachableEdges.count({PN->getIncomingBlock(U), PHIBlock});
+ });
std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
[&](const Use &U) -> Value * {
+ auto *BB = PN->getIncomingBlock(U);
+ auto *DTN = DT->getNode(BB);
+ if (RPOOrdering.lookup(DTN) >= PHIRPO)
+ HasBackedge = true;
+ AllConstant &= isa<UndefValue>(U) || isa<Constant>(U);
+
// Don't try to transform self-defined phis.
if (U == PN)
return PN;
- const BasicBlockEdge BBE(PN->getIncomingBlock(U), PHIBlock);
- return lookupOperandLeader(U, I, BBE);
+ return lookupOperandLeader(U);
});
return E;
}
// Set basic expression info (Arguments, type, opcode) for Expression
// E from Instruction I in block B.
-bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E,
- const BasicBlock *B) {
+bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) {
bool AllConstant = true;
if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
E->setType(GEP->getSourceElementType());
@@ -452,7 +753,7 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E,
// Transform the operand array into an operand leader array, and keep track of
// whether all members are constant.
std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) {
- auto Operand = lookupOperandLeader(O, I, B);
+ auto Operand = lookupOperandLeader(O);
AllConstant &= isa<Constant>(Operand);
return Operand;
});
@@ -461,8 +762,7 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E,
}
const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
- Value *Arg1, Value *Arg2,
- const BasicBlock *B) {
+ Value *Arg1, Value *Arg2) {
auto *E = new (ExpressionAllocator) BasicExpression(2);
E->setType(T);
@@ -473,13 +773,13 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
// of their operands get the same value number by sorting the operand value
// numbers. Since all commutative instructions have two operands it is more
// efficient to sort by hand rather than using, say, std::sort.
- if (Arg1 > Arg2)
+ if (shouldSwapOperands(Arg1, Arg2))
std::swap(Arg1, Arg2);
}
- E->op_push_back(lookupOperandLeader(Arg1, nullptr, B));
- E->op_push_back(lookupOperandLeader(Arg2, nullptr, B));
+ E->op_push_back(lookupOperandLeader(Arg1));
+ E->op_push_back(lookupOperandLeader(Arg2));
- Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), *DL, TLI,
+ Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), DL, TLI,
DT, AC);
if (const Expression *SimplifiedE = checkSimplificationResults(E, nullptr, V))
return SimplifiedE;
@@ -502,40 +802,32 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E,
NumGVNOpsSimplified++;
assert(isa<BasicExpression>(E) &&
"We should always have had a basic expression here");
-
- cast<BasicExpression>(E)->deallocateOperands(ArgRecycler);
- ExpressionAllocator.Deallocate(E);
+ deleteExpression(E);
return createConstantExpression(C);
} else if (isa<Argument>(V) || isa<GlobalVariable>(V)) {
if (I)
DEBUG(dbgs() << "Simplified " << *I << " to "
<< " variable " << *V << "\n");
- cast<BasicExpression>(E)->deallocateOperands(ArgRecycler);
- ExpressionAllocator.Deallocate(E);
+ deleteExpression(E);
return createVariableExpression(V);
}
CongruenceClass *CC = ValueToClass.lookup(V);
- if (CC && CC->DefiningExpr) {
+ if (CC && CC->getDefiningExpr()) {
if (I)
DEBUG(dbgs() << "Simplified " << *I << " to "
<< " expression " << *V << "\n");
NumGVNOpsSimplified++;
- assert(isa<BasicExpression>(E) &&
- "We should always have had a basic expression here");
- cast<BasicExpression>(E)->deallocateOperands(ArgRecycler);
- ExpressionAllocator.Deallocate(E);
- return CC->DefiningExpr;
+ deleteExpression(E);
+ return CC->getDefiningExpr();
}
return nullptr;
}
-const Expression *NewGVN::createExpression(Instruction *I,
- const BasicBlock *B) {
-
+const Expression *NewGVN::createExpression(Instruction *I) {
auto *E = new (ExpressionAllocator) BasicExpression(I->getNumOperands());
- bool AllConstant = setBasicExpressionInfo(I, E, B);
+ bool AllConstant = setBasicExpressionInfo(I, E);
if (I->isCommutative()) {
// Ensure that commutative instructions that only differ by a permutation
@@ -543,7 +835,7 @@ const Expression *NewGVN::createExpression(Instruction *I,
// numbers. Since all commutative instructions have two operands it is more
// efficient to sort by hand rather than using, say, std::sort.
assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
- if (E->getOperand(0) > E->getOperand(1))
+ if (shouldSwapOperands(E->getOperand(0), E->getOperand(1)))
E->swapOperands(0, 1);
}
@@ -559,48 +851,43 @@ const Expression *NewGVN::createExpression(Instruction *I,
// Sort the operand value numbers so x<y and y>x get the same value
// number.
CmpInst::Predicate Predicate = CI->getPredicate();
- if (E->getOperand(0) > E->getOperand(1)) {
+ if (shouldSwapOperands(E->getOperand(0), E->getOperand(1))) {
E->swapOperands(0, 1);
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
E->setOpcode((CI->getOpcode() << 8) | Predicate);
// TODO: 25% of our time is spent in SimplifyCmpInst with pointer operands
- // TODO: Since we noop bitcasts, we may need to check types before
- // simplifying, so that we don't end up simplifying based on a wrong
- // type assumption. We should clean this up so we can use constants of the
- // wrong type
-
assert(I->getOperand(0)->getType() == I->getOperand(1)->getType() &&
"Wrong types on cmp instruction");
- if ((E->getOperand(0)->getType() == I->getOperand(0)->getType() &&
- E->getOperand(1)->getType() == I->getOperand(1)->getType())) {
- Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1),
- *DL, TLI, DT, AC);
- if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
- return SimplifiedE;
- }
+ assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() &&
+ E->getOperand(1)->getType() == I->getOperand(1)->getType()));
+ Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1),
+ DL, TLI, DT, AC);
+ if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
+ return SimplifiedE;
} else if (isa<SelectInst>(I)) {
if (isa<Constant>(E->getOperand(0)) ||
- (E->getOperand(1)->getType() == I->getOperand(1)->getType() &&
- E->getOperand(2)->getType() == I->getOperand(2)->getType())) {
+ E->getOperand(0) == E->getOperand(1)) {
+ assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() &&
+ E->getOperand(2)->getType() == I->getOperand(2)->getType());
Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1),
- E->getOperand(2), *DL, TLI, DT, AC);
+ E->getOperand(2), DL, TLI, DT, AC);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
}
} else if (I->isBinaryOp()) {
Value *V = SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1),
- *DL, TLI, DT, AC);
+ DL, TLI, DT, AC);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
} else if (auto *BI = dyn_cast<BitCastInst>(I)) {
- Value *V = SimplifyInstruction(BI, *DL, TLI, DT, AC);
+ Value *V = SimplifyInstruction(BI, DL, TLI, DT, AC);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
} else if (isa<GetElementPtrInst>(I)) {
Value *V = SimplifyGEPInst(E->getType(),
ArrayRef<Value *>(E->op_begin(), E->op_end()),
- *DL, TLI, DT, AC);
+ DL, TLI, DT, AC);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
} else if (AllConstant) {
@@ -615,7 +902,7 @@ const Expression *NewGVN::createExpression(Instruction *I,
for (Value *Arg : E->operands())
C.emplace_back(cast<Constant>(Arg));
- if (Value *V = ConstantFoldInstOperands(I, C, *DL, TLI))
+ if (Value *V = ConstantFoldInstOperands(I, C, DL, TLI))
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
}
@@ -623,18 +910,18 @@ const Expression *NewGVN::createExpression(Instruction *I,
}
const AggregateValueExpression *
-NewGVN::createAggregateValueExpression(Instruction *I, const BasicBlock *B) {
+NewGVN::createAggregateValueExpression(Instruction *I) {
if (auto *II = dyn_cast<InsertValueInst>(I)) {
auto *E = new (ExpressionAllocator)
AggregateValueExpression(I->getNumOperands(), II->getNumIndices());
- setBasicExpressionInfo(I, E, B);
+ setBasicExpressionInfo(I, E);
E->allocateIntOperands(ExpressionAllocator);
std::copy(II->idx_begin(), II->idx_end(), int_op_inserter(E));
return E;
} else if (auto *EI = dyn_cast<ExtractValueInst>(I)) {
auto *E = new (ExpressionAllocator)
AggregateValueExpression(I->getNumOperands(), EI->getNumIndices());
- setBasicExpressionInfo(EI, E, B);
+ setBasicExpressionInfo(EI, E);
E->allocateIntOperands(ExpressionAllocator);
std::copy(EI->idx_begin(), EI->idx_end(), int_op_inserter(E));
return E;
@@ -648,12 +935,10 @@ const VariableExpression *NewGVN::createVariableExpression(Value *V) {
return E;
}
-const Expression *NewGVN::createVariableOrConstant(Value *V,
- const BasicBlock *B) {
- auto Leader = lookupOperandLeader(V, nullptr, B);
- if (auto *C = dyn_cast<Constant>(Leader))
+const Expression *NewGVN::createVariableOrConstant(Value *V) {
+ if (auto *C = dyn_cast<Constant>(V))
return createConstantExpression(C);
- return createVariableExpression(Leader);
+ return createVariableExpression(V);
}
const ConstantExpression *NewGVN::createConstantExpression(Constant *C) {
@@ -669,40 +954,90 @@ const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) {
}
const CallExpression *NewGVN::createCallExpression(CallInst *CI,
- MemoryAccess *HV,
- const BasicBlock *B) {
+ const MemoryAccess *MA) {
// FIXME: Add operand bundles for calls.
auto *E =
- new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, HV);
- setBasicExpressionInfo(CI, E, B);
+ new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, MA);
+ setBasicExpressionInfo(CI, E);
return E;
}
+// Return true if some equivalent of instruction Inst dominates instruction U.
+bool NewGVN::someEquivalentDominates(const Instruction *Inst,
+ const Instruction *U) const {
+ auto *CC = ValueToClass.lookup(Inst);
+ // This must be an instruction because we are only called from phi nodes
+ // in the case that the value it needs to check against is an instruction.
+
+ // The most likely candiates for dominance are the leader and the next leader.
+ // The leader or nextleader will dominate in all cases where there is an
+ // equivalent that is higher up in the dom tree.
+ // We can't *only* check them, however, because the
+ // dominator tree could have an infinite number of non-dominating siblings
+ // with instructions that are in the right congruence class.
+ // A
+ // B C D E F G
+ // |
+ // H
+ // Instruction U could be in H, with equivalents in every other sibling.
+ // Depending on the rpo order picked, the leader could be the equivalent in
+ // any of these siblings.
+ if (!CC)
+ return false;
+ if (DT->dominates(cast<Instruction>(CC->getLeader()), U))
+ return true;
+ if (CC->getNextLeader().first &&
+ DT->dominates(cast<Instruction>(CC->getNextLeader().first), U))
+ return true;
+ return llvm::any_of(*CC, [&](const Value *Member) {
+ return Member != CC->getLeader() &&
+ DT->dominates(cast<Instruction>(Member), U);
+ });
+}
+
// See if we have a congruence class and leader for this operand, and if so,
// return it. Otherwise, return the operand itself.
-template <class T>
-Value *NewGVN::lookupOperandLeader(Value *V, const User *U, const T &B) const {
+Value *NewGVN::lookupOperandLeader(Value *V) const {
CongruenceClass *CC = ValueToClass.lookup(V);
- if (CC && (CC != InitialClass))
- return CC->RepLeader;
+ if (CC) {
+ // Everything in TOP is represneted by undef, as it can be any value.
+ // We do have to make sure we get the type right though, so we can't set the
+ // RepLeader to undef.
+ if (CC == TOPClass)
+ return UndefValue::get(V->getType());
+ return CC->getStoredValue() ? CC->getStoredValue() : CC->getLeader();
+ }
+
return V;
}
-MemoryAccess *NewGVN::lookupMemoryAccessEquiv(MemoryAccess *MA) const {
- MemoryAccess *Result = MemoryAccessEquiv.lookup(MA);
- return Result ? Result : MA;
+const MemoryAccess *NewGVN::lookupMemoryLeader(const MemoryAccess *MA) const {
+ auto *CC = getMemoryClass(MA);
+ assert(CC->getMemoryLeader() &&
+ "Every MemoryAccess should be mapped to a "
+ "congruence class with a represenative memory "
+ "access");
+ return CC->getMemoryLeader();
+}
+
+// Return true if the MemoryAccess is really equivalent to everything. This is
+// equivalent to the lattice value "TOP" in most lattices. This is the initial
+// state of all MemoryAccesses.
+bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const {
+ return getMemoryClass(MA) == TOPClass;
}
LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp,
- LoadInst *LI, MemoryAccess *DA,
- const BasicBlock *B) {
- auto *E = new (ExpressionAllocator) LoadExpression(1, LI, DA);
+ LoadInst *LI,
+ const MemoryAccess *MA) {
+ auto *E =
+ new (ExpressionAllocator) LoadExpression(1, LI, lookupMemoryLeader(MA));
E->allocateOperands(ArgRecycler, ExpressionAllocator);
E->setType(LoadType);
// Give store and loads same opcode so they value number together.
E->setOpcode(0);
- E->op_push_back(lookupOperandLeader(PointerOp, LI, B));
+ E->op_push_back(PointerOp);
if (LI)
E->setAlignment(LI->getAlignment());
@@ -713,16 +1048,16 @@ LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp,
}
const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI,
- MemoryAccess *DA,
- const BasicBlock *B) {
- auto *E =
- new (ExpressionAllocator) StoreExpression(SI->getNumOperands(), SI, DA);
+ const MemoryAccess *MA) {
+ auto *StoredValueLeader = lookupOperandLeader(SI->getValueOperand());
+ auto *E = new (ExpressionAllocator)
+ StoreExpression(SI->getNumOperands(), SI, StoredValueLeader, MA);
E->allocateOperands(ArgRecycler, ExpressionAllocator);
E->setType(SI->getValueOperand()->getType());
// Give store and loads same opcode so they value number together.
E->setOpcode(0);
- E->op_push_back(lookupOperandLeader(SI->getPointerOperand(), SI, B));
+ E->op_push_back(lookupOperandLeader(SI->getPointerOperand()));
// TODO: Value number heap versions. We may be able to discover
// things alias analysis can't on it's own (IE that a store and a
@@ -730,44 +1065,140 @@ const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI,
return E;
}
-// Utility function to check whether the congruence class has a member other
-// than the given instruction.
-bool hasMemberOtherThanUs(const CongruenceClass *CC, Instruction *I) {
- // Either it has more than one store, in which case it must contain something
- // other than us (because it's indexed by value), or if it only has one store
- // right now, that member should not be us.
- return CC->StoreCount > 1 || CC->Members.count(I) == 0;
-}
-
-const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I,
- const BasicBlock *B) {
+const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) {
// Unlike loads, we never try to eliminate stores, so we do not check if they
// are simple and avoid value numbering them.
auto *SI = cast<StoreInst>(I);
- MemoryAccess *StoreAccess = MSSA->getMemoryAccess(SI);
- // See if we are defined by a previous store expression, it already has a
- // value, and it's the same value as our current store. FIXME: Right now, we
- // only do this for simple stores, we should expand to cover memcpys, etc.
+ auto *StoreAccess = MSSA->getMemoryAccess(SI);
+ // Get the expression, if any, for the RHS of the MemoryDef.
+ const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess();
+ if (EnableStoreRefinement)
+ StoreRHS = MSSAWalker->getClobberingMemoryAccess(StoreAccess);
+ // If we bypassed the use-def chains, make sure we add a use.
+ if (StoreRHS != StoreAccess->getDefiningAccess())
+ addMemoryUsers(StoreRHS, StoreAccess);
+
+ StoreRHS = lookupMemoryLeader(StoreRHS);
+ // If we are defined by ourselves, use the live on entry def.
+ if (StoreRHS == StoreAccess)
+ StoreRHS = MSSA->getLiveOnEntryDef();
+
if (SI->isSimple()) {
- // Get the expression, if any, for the RHS of the MemoryDef.
- MemoryAccess *StoreRHS = lookupMemoryAccessEquiv(
- cast<MemoryDef>(StoreAccess)->getDefiningAccess());
- const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
- CongruenceClass *CC = ExpressionToClass.lookup(OldStore);
+ // See if we are defined by a previous store expression, it already has a
+ // value, and it's the same value as our current store. FIXME: Right now, we
+ // only do this for simple stores, we should expand to cover memcpys, etc.
+ const auto *LastStore = createStoreExpression(SI, StoreRHS);
+ const auto *LastCC = ExpressionToClass.lookup(LastStore);
// Basically, check if the congruence class the store is in is defined by a
// store that isn't us, and has the same value. MemorySSA takes care of
// ensuring the store has the same memory state as us already.
- if (CC && CC->DefiningExpr && isa<StoreExpression>(CC->DefiningExpr) &&
- CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B) &&
- hasMemberOtherThanUs(CC, I))
- return createStoreExpression(SI, StoreRHS, B);
+ // The RepStoredValue gets nulled if all the stores disappear in a class, so
+ // we don't need to check if the class contains a store besides us.
+ if (LastCC &&
+ LastCC->getStoredValue() == lookupOperandLeader(SI->getValueOperand()))
+ return LastStore;
+ deleteExpression(LastStore);
+ // Also check if our value operand is defined by a load of the same memory
+ // location, and the memory state is the same as it was then (otherwise, it
+ // could have been overwritten later. See test32 in
+ // transforms/DeadStoreElimination/simple.ll).
+ if (auto *LI =
+ dyn_cast<LoadInst>(lookupOperandLeader(SI->getValueOperand()))) {
+ if ((lookupOperandLeader(LI->getPointerOperand()) ==
+ lookupOperandLeader(SI->getPointerOperand())) &&
+ (lookupMemoryLeader(MSSA->getMemoryAccess(LI)->getDefiningAccess()) ==
+ StoreRHS))
+ return createVariableExpression(LI);
+ }
}
- return createStoreExpression(SI, StoreAccess, B);
+ // If the store is not equivalent to anything, value number it as a store that
+ // produces a unique memory state (instead of using it's MemoryUse, we use
+ // it's MemoryDef).
+ return createStoreExpression(SI, StoreAccess);
}
-const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I,
- const BasicBlock *B) {
+// See if we can extract the value of a loaded pointer from a load, a store, or
+// a memory instruction.
+const Expression *
+NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
+ LoadInst *LI, Instruction *DepInst,
+ MemoryAccess *DefiningAccess) {
+ assert((!LI || LI->isSimple()) && "Not a simple load");
+ if (auto *DepSI = dyn_cast<StoreInst>(DepInst)) {
+ // Can't forward from non-atomic to atomic without violating memory model.
+ // Also don't need to coerce if they are the same type, we will just
+ // propogate..
+ if (LI->isAtomic() > DepSI->isAtomic() ||
+ LoadType == DepSI->getValueOperand()->getType())
+ return nullptr;
+ int Offset = analyzeLoadFromClobberingStore(LoadType, LoadPtr, DepSI, DL);
+ if (Offset >= 0) {
+ if (auto *C = dyn_cast<Constant>(
+ lookupOperandLeader(DepSI->getValueOperand()))) {
+ DEBUG(dbgs() << "Coercing load from store " << *DepSI << " to constant "
+ << *C << "\n");
+ return createConstantExpression(
+ getConstantStoreValueForLoad(C, Offset, LoadType, DL));
+ }
+ }
+
+ } else if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
+ // Can't forward from non-atomic to atomic without violating memory model.
+ if (LI->isAtomic() > DepLI->isAtomic())
+ return nullptr;
+ int Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr, DepLI, DL);
+ if (Offset >= 0) {
+ // We can coerce a constant load into a load
+ if (auto *C = dyn_cast<Constant>(lookupOperandLeader(DepLI)))
+ if (auto *PossibleConstant =
+ getConstantLoadValueForLoad(C, Offset, LoadType, DL)) {
+ DEBUG(dbgs() << "Coercing load from load " << *LI << " to constant "
+ << *PossibleConstant << "\n");
+ return createConstantExpression(PossibleConstant);
+ }
+ }
+
+ } else if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInst)) {
+ int Offset = analyzeLoadFromClobberingMemInst(LoadType, LoadPtr, DepMI, DL);
+ if (Offset >= 0) {
+ if (auto *PossibleConstant =
+ getConstantMemInstValueForLoad(DepMI, Offset, LoadType, DL)) {
+ DEBUG(dbgs() << "Coercing load from meminst " << *DepMI
+ << " to constant " << *PossibleConstant << "\n");
+ return createConstantExpression(PossibleConstant);
+ }
+ }
+ }
+
+ // All of the below are only true if the loaded pointer is produced
+ // by the dependent instruction.
+ if (LoadPtr != lookupOperandLeader(DepInst) &&
+ !AA->isMustAlias(LoadPtr, DepInst))
+ return nullptr;
+ // If this load really doesn't depend on anything, then we must be loading an
+ // undef value. This can happen when loading for a fresh allocation with no
+ // intervening stores, for example. Note that this is only true in the case
+ // that the result of the allocation is pointer equal to the load ptr.
+ if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) {
+ return createConstantExpression(UndefValue::get(LoadType));
+ }
+ // If this load occurs either right after a lifetime begin,
+ // then the loaded value is undefined.
+ else if (auto *II = dyn_cast<IntrinsicInst>(DepInst)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ return createConstantExpression(UndefValue::get(LoadType));
+ }
+ // If this load follows a calloc (which zero initializes memory),
+ // then the loaded value is zero
+ else if (isCallocLikeFn(DepInst, TLI)) {
+ return createConstantExpression(Constant::getNullValue(LoadType));
+ }
+
+ return nullptr;
+}
+
+const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) {
auto *LI = cast<LoadInst>(I);
// We can eliminate in favor of non-simple loads, but we won't be able to
@@ -775,7 +1206,7 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I,
if (!LI->isSimple())
return nullptr;
- Value *LoadAddressLeader = lookupOperandLeader(LI->getPointerOperand(), I, B);
+ Value *LoadAddressLeader = lookupOperandLeader(LI->getPointerOperand());
// Load of undef is undef.
if (isa<UndefValue>(LoadAddressLeader))
return createConstantExpression(UndefValue::get(LI->getType()));
@@ -788,61 +1219,233 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I,
// If the defining instruction is not reachable, replace with undef.
if (!ReachableBlocks.count(DefiningInst->getParent()))
return createConstantExpression(UndefValue::get(LI->getType()));
+ // This will handle stores and memory insts. We only do if it the
+ // defining access has a different type, or it is a pointer produced by
+ // certain memory operations that cause the memory to have a fixed value
+ // (IE things like calloc).
+ if (const auto *CoercionResult =
+ performSymbolicLoadCoercion(LI->getType(), LoadAddressLeader, LI,
+ DefiningInst, DefiningAccess))
+ return CoercionResult;
}
}
- const Expression *E =
- createLoadExpression(LI->getType(), LI->getPointerOperand(), LI,
- lookupMemoryAccessEquiv(DefiningAccess), B);
+ const Expression *E = createLoadExpression(LI->getType(), LoadAddressLeader,
+ LI, DefiningAccess);
return E;
}
+const Expression *
+NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) {
+ auto *PI = PredInfo->getPredicateInfoFor(I);
+ if (!PI)
+ return nullptr;
+
+ DEBUG(dbgs() << "Found predicate info from instruction !\n");
+
+ auto *PWC = dyn_cast<PredicateWithCondition>(PI);
+ if (!PWC)
+ return nullptr;
+
+ auto *CopyOf = I->getOperand(0);
+ auto *Cond = PWC->Condition;
+
+ // If this a copy of the condition, it must be either true or false depending
+ // on the predicate info type and edge
+ if (CopyOf == Cond) {
+ // We should not need to add predicate users because the predicate info is
+ // already a use of this operand.
+ if (isa<PredicateAssume>(PI))
+ return createConstantExpression(ConstantInt::getTrue(Cond->getType()));
+ if (auto *PBranch = dyn_cast<PredicateBranch>(PI)) {
+ if (PBranch->TrueEdge)
+ return createConstantExpression(ConstantInt::getTrue(Cond->getType()));
+ return createConstantExpression(ConstantInt::getFalse(Cond->getType()));
+ }
+ if (auto *PSwitch = dyn_cast<PredicateSwitch>(PI))
+ return createConstantExpression(cast<Constant>(PSwitch->CaseValue));
+ }
+
+ // Not a copy of the condition, so see what the predicates tell us about this
+ // value. First, though, we check to make sure the value is actually a copy
+ // of one of the condition operands. It's possible, in certain cases, for it
+ // to be a copy of a predicateinfo copy. In particular, if two branch
+ // operations use the same condition, and one branch dominates the other, we
+ // will end up with a copy of a copy. This is currently a small deficiency in
+ // predicateinfo. What will end up happening here is that we will value
+ // number both copies the same anyway.
+
+ // Everything below relies on the condition being a comparison.
+ auto *Cmp = dyn_cast<CmpInst>(Cond);
+ if (!Cmp)
+ return nullptr;
+
+ if (CopyOf != Cmp->getOperand(0) && CopyOf != Cmp->getOperand(1)) {
+ DEBUG(dbgs() << "Copy is not of any condition operands!");
+ return nullptr;
+ }
+ Value *FirstOp = lookupOperandLeader(Cmp->getOperand(0));
+ Value *SecondOp = lookupOperandLeader(Cmp->getOperand(1));
+ bool SwappedOps = false;
+ // Sort the ops
+ if (shouldSwapOperands(FirstOp, SecondOp)) {
+ std::swap(FirstOp, SecondOp);
+ SwappedOps = true;
+ }
+ CmpInst::Predicate Predicate =
+ SwappedOps ? Cmp->getSwappedPredicate() : Cmp->getPredicate();
+
+ if (isa<PredicateAssume>(PI)) {
+ // If the comparison is true when the operands are equal, then we know the
+ // operands are equal, because assumes must always be true.
+ if (CmpInst::isTrueWhenEqual(Predicate)) {
+ addPredicateUsers(PI, I);
+ return createVariableOrConstant(FirstOp);
+ }
+ }
+ if (const auto *PBranch = dyn_cast<PredicateBranch>(PI)) {
+ // If we are *not* a copy of the comparison, we may equal to the other
+ // operand when the predicate implies something about equality of
+ // operations. In particular, if the comparison is true/false when the
+ // operands are equal, and we are on the right edge, we know this operation
+ // is equal to something.
+ if ((PBranch->TrueEdge && Predicate == CmpInst::ICMP_EQ) ||
+ (!PBranch->TrueEdge && Predicate == CmpInst::ICMP_NE)) {
+ addPredicateUsers(PI, I);
+ return createVariableOrConstant(FirstOp);
+ }
+ // Handle the special case of floating point.
+ if (((PBranch->TrueEdge && Predicate == CmpInst::FCMP_OEQ) ||
+ (!PBranch->TrueEdge && Predicate == CmpInst::FCMP_UNE)) &&
+ isa<ConstantFP>(FirstOp) && !cast<ConstantFP>(FirstOp)->isZero()) {
+ addPredicateUsers(PI, I);
+ return createConstantExpression(cast<Constant>(FirstOp));
+ }
+ }
+ return nullptr;
+}
+
// Evaluate read only and pure calls, and create an expression result.
-const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I,
- const BasicBlock *B) {
+const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I) {
auto *CI = cast<CallInst>(I);
- if (AA->doesNotAccessMemory(CI))
- return createCallExpression(CI, nullptr, B);
- if (AA->onlyReadsMemory(CI)) {
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ // Instrinsics with the returned attribute are copies of arguments.
+ if (auto *ReturnedValue = II->getReturnedArgOperand()) {
+ if (II->getIntrinsicID() == Intrinsic::ssa_copy)
+ if (const auto *Result = performSymbolicPredicateInfoEvaluation(I))
+ return Result;
+ return createVariableOrConstant(ReturnedValue);
+ }
+ }
+ if (AA->doesNotAccessMemory(CI)) {
+ return createCallExpression(CI, TOPClass->getMemoryLeader());
+ } else if (AA->onlyReadsMemory(CI)) {
MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(CI);
- return createCallExpression(CI, lookupMemoryAccessEquiv(DefiningAccess), B);
+ return createCallExpression(CI, DefiningAccess);
}
return nullptr;
}
-// Update the memory access equivalence table to say that From is equal to To,
+// Retrieve the memory class for a given MemoryAccess.
+CongruenceClass *NewGVN::getMemoryClass(const MemoryAccess *MA) const {
+
+ auto *Result = MemoryAccessToClass.lookup(MA);
+ assert(Result && "Should have found memory class");
+ return Result;
+}
+
+// Update the MemoryAccess equivalence table to say that From is equal to To,
// and return true if this is different from what already existed in the table.
-bool NewGVN::setMemoryAccessEquivTo(MemoryAccess *From, MemoryAccess *To) {
- DEBUG(dbgs() << "Setting " << *From << " equivalent to ");
- if (!To)
- DEBUG(dbgs() << "itself");
- else
- DEBUG(dbgs() << *To);
+bool NewGVN::setMemoryClass(const MemoryAccess *From,
+ CongruenceClass *NewClass) {
+ assert(NewClass &&
+ "Every MemoryAccess should be getting mapped to a non-null class");
+ DEBUG(dbgs() << "Setting " << *From);
+ DEBUG(dbgs() << " equivalent to congruence class ");
+ DEBUG(dbgs() << NewClass->getID() << " with current MemoryAccess leader ");
+ DEBUG(dbgs() << *NewClass->getMemoryLeader());
DEBUG(dbgs() << "\n");
- auto LookupResult = MemoryAccessEquiv.find(From);
+
+ auto LookupResult = MemoryAccessToClass.find(From);
bool Changed = false;
// If it's already in the table, see if the value changed.
- if (LookupResult != MemoryAccessEquiv.end()) {
- if (To && LookupResult->second != To) {
+ if (LookupResult != MemoryAccessToClass.end()) {
+ auto *OldClass = LookupResult->second;
+ if (OldClass != NewClass) {
+ // If this is a phi, we have to handle memory member updates.
+ if (auto *MP = dyn_cast<MemoryPhi>(From)) {
+ OldClass->memory_erase(MP);
+ NewClass->memory_insert(MP);
+ // This may have killed the class if it had no non-memory members
+ if (OldClass->getMemoryLeader() == From) {
+ if (OldClass->memory_empty()) {
+ OldClass->setMemoryLeader(nullptr);
+ } else {
+ OldClass->setMemoryLeader(getNextMemoryLeader(OldClass));
+ DEBUG(dbgs() << "Memory class leader change for class "
+ << OldClass->getID() << " to "
+ << *OldClass->getMemoryLeader()
+ << " due to removal of a memory member " << *From
+ << "\n");
+ markMemoryLeaderChangeTouched(OldClass);
+ }
+ }
+ }
// It wasn't equivalent before, and now it is.
- LookupResult->second = To;
- Changed = true;
- } else if (!To) {
- // It used to be equivalent to something, and now it's not.
- MemoryAccessEquiv.erase(LookupResult);
+ LookupResult->second = NewClass;
Changed = true;
}
- } else {
- assert(!To &&
- "Memory equivalence should never change from nothing to something");
}
return Changed;
}
+
+// Determine if a phi is cycle-free. That means the values in the phi don't
+// depend on any expressions that can change value as a result of the phi.
+// For example, a non-cycle free phi would be v = phi(0, v+1).
+bool NewGVN::isCycleFree(const PHINode *PN) {
+ // In order to compute cycle-freeness, we do SCC finding on the phi, and see
+ // what kind of SCC it ends up in. If it is a singleton, it is cycle-free.
+ // If it is not in a singleton, it is only cycle free if the other members are
+ // all phi nodes (as they do not compute anything, they are copies). TODO:
+ // There are likely a few other intrinsics or expressions that could be
+ // included here, but this happens so infrequently already that it is not
+ // likely to be worth it.
+ auto PCS = PhiCycleState.lookup(PN);
+ if (PCS == PCS_Unknown) {
+ SCCFinder.Start(PN);
+ auto &SCC = SCCFinder.getComponentFor(PN);
+ // It's cycle free if it's size 1 or or the SCC is *only* phi nodes.
+ if (SCC.size() == 1)
+ PhiCycleState.insert({PN, PCS_CycleFree});
+ else {
+ bool AllPhis =
+ llvm::all_of(SCC, [](const Value *V) { return isa<PHINode>(V); });
+ PCS = AllPhis ? PCS_CycleFree : PCS_Cycle;
+ for (auto *Member : SCC)
+ if (auto *MemberPhi = dyn_cast<PHINode>(Member))
+ PhiCycleState.insert({MemberPhi, PCS});
+ }
+ }
+ if (PCS == PCS_Cycle)
+ return false;
+ return true;
+}
+
// Evaluate PHI nodes symbolically, and create an expression result.
-const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I,
- const BasicBlock *B) {
- auto *E = cast<PHIExpression>(createPHIExpression(I));
+const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) {
+ // True if one of the incoming phi edges is a backedge.
+ bool HasBackedge = false;
+ // All constant tracks the state of whether all the *original* phi operands
+ // were constant.
+ // This is really shorthand for "this phi cannot cycle due to forward
+ // propagation", as any
+ // change in value of the phi is guaranteed not to later change the value of
+ // the phi.
+ // IE it can't be v = phi(undef, v+1)
+ bool AllConstant = true;
+ auto *E =
+ cast<PHIExpression>(createPHIExpression(I, HasBackedge, AllConstant));
// We match the semantics of SimplifyPhiNode from InstructionSimplify here.
// See if all arguaments are the same.
@@ -861,14 +1464,15 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I,
if (Filtered.begin() == Filtered.end()) {
DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef"
<< "\n");
- E->deallocateOperands(ArgRecycler);
- ExpressionAllocator.Deallocate(E);
+ deleteExpression(E);
return createConstantExpression(UndefValue::get(I->getType()));
}
+ unsigned NumOps = 0;
Value *AllSameValue = *(Filtered.begin());
++Filtered.begin();
// Can't use std::equal here, sadly, because filter.begin moves.
- if (llvm::all_of(Filtered, [AllSameValue](const Value *V) {
+ if (llvm::all_of(Filtered, [AllSameValue, &NumOps](const Value *V) {
+ ++NumOps;
return V == AllSameValue;
})) {
// In LLVM's non-standard representation of phi nodes, it's possible to have
@@ -881,27 +1485,32 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I,
// We also special case undef, so that if we have an undef, we can't use the
// common value unless it dominates the phi block.
if (HasUndef) {
+ // If we have undef and at least one other value, this is really a
+ // multivalued phi, and we need to know if it's cycle free in order to
+ // evaluate whether we can ignore the undef. The other parts of this are
+ // just shortcuts. If there is no backedge, or all operands are
+ // constants, or all operands are ignored but the undef, it also must be
+ // cycle free.
+ if (!AllConstant && HasBackedge && NumOps > 0 &&
+ !isa<UndefValue>(AllSameValue) && !isCycleFree(cast<PHINode>(I)))
+ return E;
+
// Only have to check for instructions
if (auto *AllSameInst = dyn_cast<Instruction>(AllSameValue))
- if (!DT->dominates(AllSameInst, I))
+ if (!someEquivalentDominates(AllSameInst, I))
return E;
}
NumGVNPhisAllSame++;
DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue
<< "\n");
- E->deallocateOperands(ArgRecycler);
- ExpressionAllocator.Deallocate(E);
- if (auto *C = dyn_cast<Constant>(AllSameValue))
- return createConstantExpression(C);
- return createVariableExpression(AllSameValue);
+ deleteExpression(E);
+ return createVariableOrConstant(AllSameValue);
}
return E;
}
-const Expression *
-NewGVN::performSymbolicAggrValueEvaluation(Instruction *I,
- const BasicBlock *B) {
+const Expression *NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) {
if (auto *EI = dyn_cast<ExtractValueInst>(I)) {
auto *II = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
if (II && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
@@ -931,19 +1540,130 @@ NewGVN::performSymbolicAggrValueEvaluation(Instruction *I,
// expression.
assert(II->getNumArgOperands() == 2 &&
"Expect two args for recognised intrinsics.");
- return createBinaryExpression(Opcode, EI->getType(),
- II->getArgOperand(0),
- II->getArgOperand(1), B);
+ return createBinaryExpression(
+ Opcode, EI->getType(), II->getArgOperand(0), II->getArgOperand(1));
}
}
}
- return createAggregateValueExpression(I, B);
+ return createAggregateValueExpression(I);
+}
+const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) {
+ auto *CI = dyn_cast<CmpInst>(I);
+ // See if our operands are equal to those of a previous predicate, and if so,
+ // if it implies true or false.
+ auto Op0 = lookupOperandLeader(CI->getOperand(0));
+ auto Op1 = lookupOperandLeader(CI->getOperand(1));
+ auto OurPredicate = CI->getPredicate();
+ if (shouldSwapOperands(Op0, Op1)) {
+ std::swap(Op0, Op1);
+ OurPredicate = CI->getSwappedPredicate();
+ }
+
+ // Avoid processing the same info twice
+ const PredicateBase *LastPredInfo = nullptr;
+ // See if we know something about the comparison itself, like it is the target
+ // of an assume.
+ auto *CmpPI = PredInfo->getPredicateInfoFor(I);
+ if (dyn_cast_or_null<PredicateAssume>(CmpPI))
+ return createConstantExpression(ConstantInt::getTrue(CI->getType()));
+
+ if (Op0 == Op1) {
+ // This condition does not depend on predicates, no need to add users
+ if (CI->isTrueWhenEqual())
+ return createConstantExpression(ConstantInt::getTrue(CI->getType()));
+ else if (CI->isFalseWhenEqual())
+ return createConstantExpression(ConstantInt::getFalse(CI->getType()));
+ }
+
+ // NOTE: Because we are comparing both operands here and below, and using
+ // previous comparisons, we rely on fact that predicateinfo knows to mark
+ // comparisons that use renamed operands as users of the earlier comparisons.
+ // It is *not* enough to just mark predicateinfo renamed operands as users of
+ // the earlier comparisons, because the *other* operand may have changed in a
+ // previous iteration.
+ // Example:
+ // icmp slt %a, %b
+ // %b.0 = ssa.copy(%b)
+ // false branch:
+ // icmp slt %c, %b.0
+
+ // %c and %a may start out equal, and thus, the code below will say the second
+ // %icmp is false. c may become equal to something else, and in that case the
+ // %second icmp *must* be reexamined, but would not if only the renamed
+ // %operands are considered users of the icmp.
+
+ // *Currently* we only check one level of comparisons back, and only mark one
+ // level back as touched when changes appen . If you modify this code to look
+ // back farther through comparisons, you *must* mark the appropriate
+ // comparisons as users in PredicateInfo.cpp, or you will cause bugs. See if
+ // we know something just from the operands themselves
+
+ // See if our operands have predicate info, so that we may be able to derive
+ // something from a previous comparison.
+ for (const auto &Op : CI->operands()) {
+ auto *PI = PredInfo->getPredicateInfoFor(Op);
+ if (const auto *PBranch = dyn_cast_or_null<PredicateBranch>(PI)) {
+ if (PI == LastPredInfo)
+ continue;
+ LastPredInfo = PI;
+
+ // TODO: Along the false edge, we may know more things too, like icmp of
+ // same operands is false.
+ // TODO: We only handle actual comparison conditions below, not and/or.
+ auto *BranchCond = dyn_cast<CmpInst>(PBranch->Condition);
+ if (!BranchCond)
+ continue;
+ auto *BranchOp0 = lookupOperandLeader(BranchCond->getOperand(0));
+ auto *BranchOp1 = lookupOperandLeader(BranchCond->getOperand(1));
+ auto BranchPredicate = BranchCond->getPredicate();
+ if (shouldSwapOperands(BranchOp0, BranchOp1)) {
+ std::swap(BranchOp0, BranchOp1);
+ BranchPredicate = BranchCond->getSwappedPredicate();
+ }
+ if (BranchOp0 == Op0 && BranchOp1 == Op1) {
+ if (PBranch->TrueEdge) {
+ // If we know the previous predicate is true and we are in the true
+ // edge then we may be implied true or false.
+ if (CmpInst::isImpliedTrueByMatchingCmp(OurPredicate,
+ BranchPredicate)) {
+ addPredicateUsers(PI, I);
+ return createConstantExpression(
+ ConstantInt::getTrue(CI->getType()));
+ }
+
+ if (CmpInst::isImpliedFalseByMatchingCmp(OurPredicate,
+ BranchPredicate)) {
+ addPredicateUsers(PI, I);
+ return createConstantExpression(
+ ConstantInt::getFalse(CI->getType()));
+ }
+
+ } else {
+ // Just handle the ne and eq cases, where if we have the same
+ // operands, we may know something.
+ if (BranchPredicate == OurPredicate) {
+ addPredicateUsers(PI, I);
+ // Same predicate, same ops,we know it was false, so this is false.
+ return createConstantExpression(
+ ConstantInt::getFalse(CI->getType()));
+ } else if (BranchPredicate ==
+ CmpInst::getInversePredicate(OurPredicate)) {
+ addPredicateUsers(PI, I);
+ // Inverse predicate, we know the other was false, so this is true.
+ return createConstantExpression(
+ ConstantInt::getTrue(CI->getType()));
+ }
+ }
+ }
+ }
+ }
+ // Create expression will take care of simplifyCmpInst
+ return createExpression(I);
}
// Substitute and symbolize the value before value numbering.
-const Expression *NewGVN::performSymbolicEvaluation(Value *V,
- const BasicBlock *B) {
+const Expression *NewGVN::performSymbolicEvaluation(Value *V) {
const Expression *E = nullptr;
if (auto *C = dyn_cast<Constant>(V))
E = createConstantExpression(C);
@@ -957,24 +1677,27 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V,
switch (I->getOpcode()) {
case Instruction::ExtractValue:
case Instruction::InsertValue:
- E = performSymbolicAggrValueEvaluation(I, B);
+ E = performSymbolicAggrValueEvaluation(I);
break;
case Instruction::PHI:
- E = performSymbolicPHIEvaluation(I, B);
+ E = performSymbolicPHIEvaluation(I);
break;
case Instruction::Call:
- E = performSymbolicCallEvaluation(I, B);
+ E = performSymbolicCallEvaluation(I);
break;
case Instruction::Store:
- E = performSymbolicStoreEvaluation(I, B);
+ E = performSymbolicStoreEvaluation(I);
break;
case Instruction::Load:
- E = performSymbolicLoadEvaluation(I, B);
+ E = performSymbolicLoadEvaluation(I);
break;
case Instruction::BitCast: {
- E = createExpression(I, B);
+ E = createExpression(I);
+ } break;
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ E = performSymbolicCmpEvaluation(I);
} break;
-
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -993,8 +1716,6 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V,
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
- case Instruction::ICmp:
- case Instruction::FCmp:
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
@@ -1011,7 +1732,7 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V,
case Instruction::InsertElement:
case Instruction::ShuffleVector:
case Instruction::GetElementPtr:
- E = createExpression(I, B);
+ E = createExpression(I);
break;
default:
return nullptr;
@@ -1020,129 +1741,297 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V,
return E;
}
-// There is an edge from 'Src' to 'Dst'. Return true if every path from
-// the entry block to 'Dst' passes via this edge. In particular 'Dst'
-// must not be reachable via another edge from 'Src'.
-bool NewGVN::isOnlyReachableViaThisEdge(const BasicBlockEdge &E) const {
-
- // While in theory it is interesting to consider the case in which Dst has
- // more than one predecessor, because Dst might be part of a loop which is
- // only reachable from Src, in practice it is pointless since at the time
- // GVN runs all such loops have preheaders, which means that Dst will have
- // been changed to have only one predecessor, namely Src.
- const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
- const BasicBlock *Src = E.getStart();
- assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
- (void)Src;
- return Pred != nullptr;
-}
-
void NewGVN::markUsersTouched(Value *V) {
// Now mark the users as touched.
for (auto *User : V->users()) {
assert(isa<Instruction>(User) && "Use of value not within an instruction?");
- TouchedInstructions.set(InstrDFS[User]);
+ TouchedInstructions.set(InstrToDFSNum(User));
}
}
-void NewGVN::markMemoryUsersTouched(MemoryAccess *MA) {
- for (auto U : MA->users()) {
- if (auto *MUD = dyn_cast<MemoryUseOrDef>(U))
- TouchedInstructions.set(InstrDFS[MUD->getMemoryInst()]);
- else
- TouchedInstructions.set(InstrDFS[U]);
+void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) {
+ DEBUG(dbgs() << "Adding memory user " << *U << " to " << *To << "\n");
+ MemoryToUsers[To].insert(U);
+}
+
+void NewGVN::markMemoryDefTouched(const MemoryAccess *MA) {
+ TouchedInstructions.set(MemoryToDFSNum(MA));
+}
+
+void NewGVN::markMemoryUsersTouched(const MemoryAccess *MA) {
+ if (isa<MemoryUse>(MA))
+ return;
+ for (auto U : MA->users())
+ TouchedInstructions.set(MemoryToDFSNum(U));
+ const auto Result = MemoryToUsers.find(MA);
+ if (Result != MemoryToUsers.end()) {
+ for (auto *User : Result->second)
+ TouchedInstructions.set(MemoryToDFSNum(User));
+ MemoryToUsers.erase(Result);
+ }
+}
+
+// Add I to the set of users of a given predicate.
+void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) {
+ if (auto *PBranch = dyn_cast<PredicateBranch>(PB))
+ PredicateToUsers[PBranch->Condition].insert(I);
+ else if (auto *PAssume = dyn_cast<PredicateBranch>(PB))
+ PredicateToUsers[PAssume->Condition].insert(I);
+}
+
+// Touch all the predicates that depend on this instruction.
+void NewGVN::markPredicateUsersTouched(Instruction *I) {
+ const auto Result = PredicateToUsers.find(I);
+ if (Result != PredicateToUsers.end()) {
+ for (auto *User : Result->second)
+ TouchedInstructions.set(InstrToDFSNum(User));
+ PredicateToUsers.erase(Result);
}
}
+// Mark users affected by a memory leader change.
+void NewGVN::markMemoryLeaderChangeTouched(CongruenceClass *CC) {
+ for (auto M : CC->memory())
+ markMemoryDefTouched(M);
+}
+
// Touch the instructions that need to be updated after a congruence class has a
// leader change, and mark changed values.
-void NewGVN::markLeaderChangeTouched(CongruenceClass *CC) {
- for (auto M : CC->Members) {
+void NewGVN::markValueLeaderChangeTouched(CongruenceClass *CC) {
+ for (auto M : *CC) {
if (auto *I = dyn_cast<Instruction>(M))
- TouchedInstructions.set(InstrDFS[I]);
+ TouchedInstructions.set(InstrToDFSNum(I));
LeaderChanges.insert(M);
}
}
+// Give a range of things that have instruction DFS numbers, this will return
+// the member of the range with the smallest dfs number.
+template <class T, class Range>
+T *NewGVN::getMinDFSOfRange(const Range &R) const {
+ std::pair<T *, unsigned> MinDFS = {nullptr, ~0U};
+ for (const auto X : R) {
+ auto DFSNum = InstrToDFSNum(X);
+ if (DFSNum < MinDFS.second)
+ MinDFS = {X, DFSNum};
+ }
+ return MinDFS.first;
+}
+
+// This function returns the MemoryAccess that should be the next leader of
+// congruence class CC, under the assumption that the current leader is going to
+// disappear.
+const MemoryAccess *NewGVN::getNextMemoryLeader(CongruenceClass *CC) const {
+ // TODO: If this ends up to slow, we can maintain a next memory leader like we
+ // do for regular leaders.
+ // Make sure there will be a leader to find
+ assert((CC->getStoreCount() > 0 || !CC->memory_empty()) &&
+ "Can't get next leader if there is none");
+ if (CC->getStoreCount() > 0) {
+ if (auto *NL = dyn_cast_or_null<StoreInst>(CC->getNextLeader().first))
+ return MSSA->getMemoryAccess(NL);
+ // Find the store with the minimum DFS number.
+ auto *V = getMinDFSOfRange<Value>(make_filter_range(
+ *CC, [&](const Value *V) { return isa<StoreInst>(V); }));
+ return MSSA->getMemoryAccess(cast<StoreInst>(V));
+ }
+ assert(CC->getStoreCount() == 0);
+
+ // Given our assertion, hitting this part must mean
+ // !OldClass->memory_empty()
+ if (CC->memory_size() == 1)
+ return *CC->memory_begin();
+ return getMinDFSOfRange<const MemoryPhi>(CC->memory());
+}
+
+// This function returns the next value leader of a congruence class, under the
+// assumption that the current leader is going away. This should end up being
+// the next most dominating member.
+Value *NewGVN::getNextValueLeader(CongruenceClass *CC) const {
+ // We don't need to sort members if there is only 1, and we don't care about
+ // sorting the TOP class because everything either gets out of it or is
+ // unreachable.
+
+ if (CC->size() == 1 || CC == TOPClass) {
+ return *(CC->begin());
+ } else if (CC->getNextLeader().first) {
+ ++NumGVNAvoidedSortedLeaderChanges;
+ return CC->getNextLeader().first;
+ } else {
+ ++NumGVNSortedLeaderChanges;
+ // NOTE: If this ends up to slow, we can maintain a dual structure for
+ // member testing/insertion, or keep things mostly sorted, and sort only
+ // here, or use SparseBitVector or ....
+ return getMinDFSOfRange<Value>(*CC);
+ }
+}
+
+// Move a MemoryAccess, currently in OldClass, to NewClass, including updates to
+// the memory members, etc for the move.
+//
+// The invariants of this function are:
+//
+// I must be moving to NewClass from OldClass The StoreCount of OldClass and
+// NewClass is expected to have been updated for I already if it is is a store.
+// The OldClass memory leader has not been updated yet if I was the leader.
+void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I,
+ MemoryAccess *InstMA,
+ CongruenceClass *OldClass,
+ CongruenceClass *NewClass) {
+ // If the leader is I, and we had a represenative MemoryAccess, it should
+ // be the MemoryAccess of OldClass.
+ assert((!InstMA || !OldClass->getMemoryLeader() ||
+ OldClass->getLeader() != I ||
+ OldClass->getMemoryLeader() == InstMA) &&
+ "Representative MemoryAccess mismatch");
+ // First, see what happens to the new class
+ if (!NewClass->getMemoryLeader()) {
+ // Should be a new class, or a store becoming a leader of a new class.
+ assert(NewClass->size() == 1 ||
+ (isa<StoreInst>(I) && NewClass->getStoreCount() == 1));
+ NewClass->setMemoryLeader(InstMA);
+ // Mark it touched if we didn't just create a singleton
+ DEBUG(dbgs() << "Memory class leader change for class " << NewClass->getID()
+ << " due to new memory instruction becoming leader\n");
+ markMemoryLeaderChangeTouched(NewClass);
+ }
+ setMemoryClass(InstMA, NewClass);
+ // Now, fixup the old class if necessary
+ if (OldClass->getMemoryLeader() == InstMA) {
+ if (OldClass->getStoreCount() != 0 || !OldClass->memory_empty()) {
+ OldClass->setMemoryLeader(getNextMemoryLeader(OldClass));
+ DEBUG(dbgs() << "Memory class leader change for class "
+ << OldClass->getID() << " to "
+ << *OldClass->getMemoryLeader()
+ << " due to removal of old leader " << *InstMA << "\n");
+ markMemoryLeaderChangeTouched(OldClass);
+ } else
+ OldClass->setMemoryLeader(nullptr);
+ }
+}
+
// Move a value, currently in OldClass, to be part of NewClass
-// Update OldClass for the move (including changing leaders, etc)
-void NewGVN::moveValueToNewCongruenceClass(Instruction *I,
+// Update OldClass and NewClass for the move (including changing leaders, etc).
+void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
CongruenceClass *OldClass,
CongruenceClass *NewClass) {
- DEBUG(dbgs() << "New congruence class for " << I << " is " << NewClass->ID
- << "\n");
-
- if (I == OldClass->NextLeader.first)
- OldClass->NextLeader = {nullptr, ~0U};
+ if (I == OldClass->getNextLeader().first)
+ OldClass->resetNextLeader();
// It's possible, though unlikely, for us to discover equivalences such
// that the current leader does not dominate the old one.
// This statistic tracks how often this happens.
// We assert on phi nodes when this happens, currently, for debugging, because
// we want to make sure we name phi node cycles properly.
- if (isa<Instruction>(NewClass->RepLeader) && NewClass->RepLeader &&
- I != NewClass->RepLeader &&
- DT->properlyDominates(
- I->getParent(),
- cast<Instruction>(NewClass->RepLeader)->getParent())) {
- ++NumGVNNotMostDominatingLeader;
- assert(!isa<PHINode>(I) &&
- "New class for instruction should not be dominated by instruction");
- }
-
- if (NewClass->RepLeader != I) {
- auto DFSNum = InstrDFS.lookup(I);
- if (DFSNum < NewClass->NextLeader.second)
- NewClass->NextLeader = {I, DFSNum};
+ if (isa<Instruction>(NewClass->getLeader()) && NewClass->getLeader() &&
+ I != NewClass->getLeader()) {
+ auto *IBB = I->getParent();
+ auto *NCBB = cast<Instruction>(NewClass->getLeader())->getParent();
+ bool Dominated =
+ IBB == NCBB && InstrToDFSNum(I) < InstrToDFSNum(NewClass->getLeader());
+ Dominated = Dominated || DT->properlyDominates(IBB, NCBB);
+ if (Dominated) {
+ ++NumGVNNotMostDominatingLeader;
+ assert(
+ !isa<PHINode>(I) &&
+ "New class for instruction should not be dominated by instruction");
+ }
}
- OldClass->Members.erase(I);
- NewClass->Members.insert(I);
- if (isa<StoreInst>(I)) {
- --OldClass->StoreCount;
- assert(OldClass->StoreCount >= 0);
- ++NewClass->StoreCount;
- assert(NewClass->StoreCount > 0);
+ if (NewClass->getLeader() != I)
+ NewClass->addPossibleNextLeader({I, InstrToDFSNum(I)});
+
+ OldClass->erase(I);
+ NewClass->insert(I);
+ // Handle our special casing of stores.
+ if (auto *SI = dyn_cast<StoreInst>(I)) {
+ OldClass->decStoreCount();
+ // Okay, so when do we want to make a store a leader of a class?
+ // If we have a store defined by an earlier load, we want the earlier load
+ // to lead the class.
+ // If we have a store defined by something else, we want the store to lead
+ // the class so everything else gets the "something else" as a value.
+ // If we have a store as the single member of the class, we want the store
+ // as the leader
+ if (NewClass->getStoreCount() == 0 && !NewClass->getStoredValue()) {
+ // If it's a store expression we are using, it means we are not equivalent
+ // to something earlier.
+ if (isa<StoreExpression>(E)) {
+ assert(lookupOperandLeader(SI->getValueOperand()) !=
+ NewClass->getLeader());
+ NewClass->setStoredValue(lookupOperandLeader(SI->getValueOperand()));
+ markValueLeaderChangeTouched(NewClass);
+ // Shift the new class leader to be the store
+ DEBUG(dbgs() << "Changing leader of congruence class "
+ << NewClass->getID() << " from " << *NewClass->getLeader()
+ << " to " << *SI << " because store joined class\n");
+ // If we changed the leader, we have to mark it changed because we don't
+ // know what it will do to symbolic evlauation.
+ NewClass->setLeader(SI);
+ }
+ // We rely on the code below handling the MemoryAccess change.
+ }
+ NewClass->incStoreCount();
}
-
+ // True if there is no memory instructions left in a class that had memory
+ // instructions before.
+
+ // If it's not a memory use, set the MemoryAccess equivalence
+ auto *InstMA = dyn_cast_or_null<MemoryDef>(MSSA->getMemoryAccess(I));
+ bool InstWasMemoryLeader = InstMA && OldClass->getMemoryLeader() == InstMA;
+ if (InstMA)
+ moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass);
ValueToClass[I] = NewClass;
// See if we destroyed the class or need to swap leaders.
- if (OldClass->Members.empty() && OldClass != InitialClass) {
- if (OldClass->DefiningExpr) {
- OldClass->Dead = true;
- DEBUG(dbgs() << "Erasing expression " << OldClass->DefiningExpr
+ if (OldClass->empty() && OldClass != TOPClass) {
+ if (OldClass->getDefiningExpr()) {
+ DEBUG(dbgs() << "Erasing expression " << OldClass->getDefiningExpr()
<< " from table\n");
- ExpressionToClass.erase(OldClass->DefiningExpr);
+ ExpressionToClass.erase(OldClass->getDefiningExpr());
}
- } else if (OldClass->RepLeader == I) {
+ } else if (OldClass->getLeader() == I) {
// When the leader changes, the value numbering of
// everything may change due to symbolization changes, so we need to
// reprocess.
- DEBUG(dbgs() << "Leader change!\n");
+ DEBUG(dbgs() << "Value class leader change for class " << OldClass->getID()
+ << "\n");
++NumGVNLeaderChanges;
- // We don't need to sort members if there is only 1, and we don't care about
- // sorting the initial class because everything either gets out of it or is
- // unreachable.
- if (OldClass->Members.size() == 1 || OldClass == InitialClass) {
- OldClass->RepLeader = *(OldClass->Members.begin());
- } else if (OldClass->NextLeader.first) {
- ++NumGVNAvoidedSortedLeaderChanges;
- OldClass->RepLeader = OldClass->NextLeader.first;
- OldClass->NextLeader = {nullptr, ~0U};
- } else {
- ++NumGVNSortedLeaderChanges;
- // TODO: If this ends up to slow, we can maintain a dual structure for
- // member testing/insertion, or keep things mostly sorted, and sort only
- // here, or ....
- std::pair<Value *, unsigned> MinDFS = {nullptr, ~0U};
- for (const auto X : OldClass->Members) {
- auto DFSNum = InstrDFS.lookup(X);
- if (DFSNum < MinDFS.second)
- MinDFS = {X, DFSNum};
- }
- OldClass->RepLeader = MinDFS.first;
+ // Destroy the stored value if there are no more stores to represent it.
+ // Note that this is basically clean up for the expression removal that
+ // happens below. If we remove stores from a class, we may leave it as a
+ // class of equivalent memory phis.
+ if (OldClass->getStoreCount() == 0) {
+ if (OldClass->getStoredValue())
+ OldClass->setStoredValue(nullptr);
+ }
+ // If we destroy the old access leader and it's a store, we have to
+ // effectively destroy the congruence class. When it comes to scalars,
+ // anything with the same value is as good as any other. That means that
+ // one leader is as good as another, and as long as you have some leader for
+ // the value, you are good.. When it comes to *memory states*, only one
+ // particular thing really represents the definition of a given memory
+ // state. Once it goes away, we need to re-evaluate which pieces of memory
+ // are really still equivalent. The best way to do this is to re-value
+ // number things. The only way to really make that happen is to destroy the
+ // rest of the class. In order to effectively destroy the class, we reset
+ // ExpressionToClass for each by using the ValueToExpression mapping. The
+ // members later get marked as touched due to the leader change. We will
+ // create new congruence classes, and the pieces that are still equivalent
+ // will end back together in a new class. If this becomes too expensive, it
+ // is possible to use a versioning scheme for the congruence classes to
+ // avoid the expressions finding this old class. Note that the situation is
+ // different for memory phis, becuase they are evaluated anew each time, and
+ // they become equal not by hashing, but by seeing if all operands are the
+ // same (or only one is reachable).
+ if (OldClass->getStoreCount() > 0 && InstWasMemoryLeader) {
+ DEBUG(dbgs() << "Kicking everything out of class " << OldClass->getID()
+ << " because MemoryAccess leader changed");
+ for (auto Member : *OldClass)
+ ExpressionToClass.erase(ValueToExpression.lookup(Member));
}
- markLeaderChangeTouched(OldClass);
+ OldClass->setLeader(getNextValueLeader(OldClass));
+ OldClass->resetNextLeader();
+ markValueLeaderChangeTouched(OldClass);
}
}
@@ -1150,12 +2039,12 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I,
void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
ValueToExpression[I] = E;
// This is guaranteed to return something, since it will at least find
- // INITIAL.
+ // TOP.
CongruenceClass *IClass = ValueToClass[I];
assert(IClass && "Should have found a IClass");
// Dead classes should have been eliminated from the mapping.
- assert(!IClass->Dead && "Found a dead class");
+ assert(!IClass->isDead() && "Found a dead class");
CongruenceClass *EClass;
if (const auto *VE = dyn_cast<VariableExpression>(E)) {
@@ -1171,79 +2060,52 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
// Constants and variables should always be made the leader.
if (const auto *CE = dyn_cast<ConstantExpression>(E)) {
- NewClass->RepLeader = CE->getConstantValue();
+ NewClass->setLeader(CE->getConstantValue());
} else if (const auto *SE = dyn_cast<StoreExpression>(E)) {
StoreInst *SI = SE->getStoreInst();
- NewClass->RepLeader =
- lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());
+ NewClass->setLeader(SI);
+ NewClass->setStoredValue(lookupOperandLeader(SI->getValueOperand()));
+ // The RepMemoryAccess field will be filled in properly by the
+ // moveValueToNewCongruenceClass call.
} else {
- NewClass->RepLeader = I;
+ NewClass->setLeader(I);
}
assert(!isa<VariableExpression>(E) &&
"VariableExpression should have been handled already");
EClass = NewClass;
DEBUG(dbgs() << "Created new congruence class for " << *I
- << " using expression " << *E << " at " << NewClass->ID
- << " and leader " << *(NewClass->RepLeader) << "\n");
- DEBUG(dbgs() << "Hash value was " << E->getHashValue() << "\n");
+ << " using expression " << *E << " at " << NewClass->getID()
+ << " and leader " << *(NewClass->getLeader()));
+ if (NewClass->getStoredValue())
+ DEBUG(dbgs() << " and stored value " << *(NewClass->getStoredValue()));
+ DEBUG(dbgs() << "\n");
} else {
EClass = lookupResult.first->second;
if (isa<ConstantExpression>(E))
- assert(isa<Constant>(EClass->RepLeader) &&
+ assert((isa<Constant>(EClass->getLeader()) ||
+ (EClass->getStoredValue() &&
+ isa<Constant>(EClass->getStoredValue()))) &&
"Any class with a constant expression should have a "
"constant leader");
assert(EClass && "Somehow don't have an eclass");
- assert(!EClass->Dead && "We accidentally looked up a dead class");
+ assert(!EClass->isDead() && "We accidentally looked up a dead class");
}
}
bool ClassChanged = IClass != EClass;
bool LeaderChanged = LeaderChanges.erase(I);
if (ClassChanged || LeaderChanged) {
- DEBUG(dbgs() << "Found class " << EClass->ID << " for expression " << E
+ DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " << *E
<< "\n");
-
if (ClassChanged)
- moveValueToNewCongruenceClass(I, IClass, EClass);
+ moveValueToNewCongruenceClass(I, E, IClass, EClass);
markUsersTouched(I);
- if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
- // If this is a MemoryDef, we need to update the equivalence table. If
- // we determined the expression is congruent to a different memory
- // state, use that different memory state. If we determined it didn't,
- // we update that as well. Right now, we only support store
- // expressions.
- if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) &&
- EClass->Members.size() != 1) {
- auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
- setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
- } else {
- setMemoryAccessEquivTo(MA, nullptr);
- }
+ if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
markMemoryUsersTouched(MA);
- }
- } else if (auto *SI = dyn_cast<StoreInst>(I)) {
- // There is, sadly, one complicating thing for stores. Stores do not
- // produce values, only consume them. However, in order to make loads and
- // stores value number the same, we ignore the value operand of the store.
- // But the value operand will still be the leader of our class, and thus, it
- // may change. Because the store is a use, the store will get reprocessed,
- // but nothing will change about it, and so nothing above will catch it
- // (since the class will not change). In order to make sure everything ends
- // up okay, we need to recheck the leader of the class. Since stores of
- // different values value number differently due to different memorydefs, we
- // are guaranteed the leader is always the same between stores in the same
- // class.
- DEBUG(dbgs() << "Checking store leader\n");
- auto ProperLeader =
- lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());
- if (EClass->RepLeader != ProperLeader) {
- DEBUG(dbgs() << "Store leader changed, fixing\n");
- EClass->RepLeader = ProperLeader;
- markLeaderChangeTouched(EClass);
- markMemoryUsersTouched(MSSA->getMemoryAccess(SI));
- }
+ if (auto *CI = dyn_cast<CmpInst>(I))
+ markPredicateUsersTouched(CI);
}
}
@@ -1267,11 +2129,11 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
// they are the only thing that depend on new edges. Anything using their
// values will get propagated to if necessary.
if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To))
- TouchedInstructions.set(InstrDFS[MemPhi]);
+ TouchedInstructions.set(InstrToDFSNum(MemPhi));
auto BI = To->begin();
while (isa<PHINode>(BI)) {
- TouchedInstructions.set(InstrDFS[&*BI]);
+ TouchedInstructions.set(InstrToDFSNum(&*BI));
++BI;
}
}
@@ -1280,8 +2142,8 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
// Given a predicate condition (from a switch, cmp, or whatever) and a block,
// see if we know some constant value for it already.
-Value *NewGVN::findConditionEquivalence(Value *Cond, BasicBlock *B) const {
- auto Result = lookupOperandLeader(Cond, nullptr, B);
+Value *NewGVN::findConditionEquivalence(Value *Cond) const {
+ auto Result = lookupOperandLeader(Cond);
if (isa<Constant>(Result))
return Result;
return nullptr;
@@ -1293,10 +2155,10 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
BranchInst *BR;
if ((BR = dyn_cast<BranchInst>(TI)) && BR->isConditional()) {
Value *Cond = BR->getCondition();
- Value *CondEvaluated = findConditionEquivalence(Cond, B);
+ Value *CondEvaluated = findConditionEquivalence(Cond);
if (!CondEvaluated) {
if (auto *I = dyn_cast<Instruction>(Cond)) {
- const Expression *E = createExpression(I, B);
+ const Expression *E = createExpression(I);
if (const auto *CE = dyn_cast<ConstantExpression>(E)) {
CondEvaluated = CE->getConstantValue();
}
@@ -1329,13 +2191,13 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
Value *SwitchCond = SI->getCondition();
- Value *CondEvaluated = findConditionEquivalence(SwitchCond, B);
+ Value *CondEvaluated = findConditionEquivalence(SwitchCond);
// See if we were able to turn this switch statement into a constant.
if (CondEvaluated && isa<ConstantInt>(CondEvaluated)) {
auto *CondVal = cast<ConstantInt>(CondEvaluated);
// We should be able to get case value for this.
- auto CaseVal = SI->findCaseValue(CondVal);
- if (CaseVal.getCaseSuccessor() == SI->getDefaultDest()) {
+ auto Case = *SI->findCaseValue(CondVal);
+ if (Case.getCaseSuccessor() == SI->getDefaultDest()) {
// We proved the value is outside of the range of the case.
// We can't do anything other than mark the default dest as reachable,
// and go home.
@@ -1343,7 +2205,7 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
return;
}
// Now get where it goes and mark it reachable.
- BasicBlock *TargetBlock = CaseVal.getCaseSuccessor();
+ BasicBlock *TargetBlock = Case.getCaseSuccessor();
updateReachableEdge(B, TargetBlock);
} else {
for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
@@ -1361,45 +2223,66 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
}
// This also may be a memory defining terminator, in which case, set it
- // equivalent to nothing.
- if (MemoryAccess *MA = MSSA->getMemoryAccess(TI))
- setMemoryAccessEquivTo(MA, nullptr);
+ // equivalent only to itself.
+ //
+ auto *MA = MSSA->getMemoryAccess(TI);
+ if (MA && !isa<MemoryUse>(MA)) {
+ auto *CC = ensureLeaderOfMemoryClass(MA);
+ if (setMemoryClass(MA, CC))
+ markMemoryUsersTouched(MA);
+ }
}
}
-// The algorithm initially places the values of the routine in the INITIAL
-// congruence
-// class. The leader of INITIAL is the undetermined value `TOP`.
-// When the algorithm has finished, values still in INITIAL are unreachable.
+// The algorithm initially places the values of the routine in the TOP
+// congruence class. The leader of TOP is the undetermined value `undef`.
+// When the algorithm has finished, values still in TOP are unreachable.
void NewGVN::initializeCongruenceClasses(Function &F) {
- // FIXME now i can't remember why this is 2
- NextCongruenceNum = 2;
- // Initialize all other instructions to be in INITIAL class.
- CongruenceClass::MemberSet InitialValues;
- InitialClass = createCongruenceClass(nullptr, nullptr);
+ NextCongruenceNum = 0;
+
+ // Note that even though we use the live on entry def as a representative
+ // MemoryAccess, it is *not* the same as the actual live on entry def. We
+ // have no real equivalemnt to undef for MemoryAccesses, and so we really
+ // should be checking whether the MemoryAccess is top if we want to know if it
+ // is equivalent to everything. Otherwise, what this really signifies is that
+ // the access "it reaches all the way back to the beginning of the function"
+
+ // Initialize all other instructions to be in TOP class.
+ TOPClass = createCongruenceClass(nullptr, nullptr);
+ TOPClass->setMemoryLeader(MSSA->getLiveOnEntryDef());
+ // The live on entry def gets put into it's own class
+ MemoryAccessToClass[MSSA->getLiveOnEntryDef()] =
+ createMemoryClass(MSSA->getLiveOnEntryDef());
+
for (auto &B : F) {
- if (auto *MP = MSSA->getMemoryAccess(&B))
- MemoryAccessEquiv.insert({MP, MSSA->getLiveOnEntryDef()});
+ // All MemoryAccesses are equivalent to live on entry to start. They must
+ // be initialized to something so that initial changes are noticed. For
+ // the maximal answer, we initialize them all to be the same as
+ // liveOnEntry.
+ auto *MemoryBlockDefs = MSSA->getBlockDefs(&B);
+ if (MemoryBlockDefs)
+ for (const auto &Def : *MemoryBlockDefs) {
+ MemoryAccessToClass[&Def] = TOPClass;
+ auto *MD = dyn_cast<MemoryDef>(&Def);
+ // Insert the memory phis into the member list.
+ if (!MD) {
+ const MemoryPhi *MP = cast<MemoryPhi>(&Def);
+ TOPClass->memory_insert(MP);
+ MemoryPhiState.insert({MP, MPS_TOP});
+ }
- for (auto &I : B) {
- InitialValues.insert(&I);
- ValueToClass[&I] = InitialClass;
- // All memory accesses are equivalent to live on entry to start. They must
- // be initialized to something so that initial changes are noticed. For
- // the maximal answer, we initialize them all to be the same as
- // liveOnEntry. Note that to save time, we only initialize the
- // MemoryDef's for stores and all MemoryPhis to be equal. Right now, no
- // other expression can generate a memory equivalence. If we start
- // handling memcpy/etc, we can expand this.
- if (isa<StoreInst>(&I)) {
- MemoryAccessEquiv.insert(
- {MSSA->getMemoryAccess(&I), MSSA->getLiveOnEntryDef()});
- ++InitialClass->StoreCount;
- assert(InitialClass->StoreCount > 0);
+ if (MD && isa<StoreInst>(MD->getMemoryInst()))
+ TOPClass->incStoreCount();
}
+ for (auto &I : B) {
+ // Don't insert void terminators into the class. We don't value number
+ // them, and they just end up sitting in TOP.
+ if (isa<TerminatorInst>(I) && I.getType()->isVoidTy())
+ continue;
+ TOPClass->insert(&I);
+ ValueToClass[&I] = TOPClass;
}
}
- InitialClass->Members.swap(InitialValues);
// Initialize arguments to be in their own unique congruence classes
for (auto &FA : F.args())
@@ -1408,8 +2291,8 @@ void NewGVN::initializeCongruenceClasses(Function &F) {
void NewGVN::cleanupTables() {
for (unsigned i = 0, e = CongruenceClasses.size(); i != e; ++i) {
- DEBUG(dbgs() << "Congruence class " << CongruenceClasses[i]->ID << " has "
- << CongruenceClasses[i]->Members.size() << " members\n");
+ DEBUG(dbgs() << "Congruence class " << CongruenceClasses[i]->getID()
+ << " has " << CongruenceClasses[i]->size() << " members\n");
// Make sure we delete the congruence class (probably worth switching to
// a unique_ptr at some point.
delete CongruenceClasses[i];
@@ -1427,15 +2310,14 @@ void NewGVN::cleanupTables() {
#ifndef NDEBUG
ProcessedCount.clear();
#endif
- DFSDomMap.clear();
InstrDFS.clear();
InstructionsToErase.clear();
-
DFSToInstr.clear();
BlockInstRange.clear();
TouchedInstructions.clear();
- DominatedInstRange.clear();
- MemoryAccessEquiv.clear();
+ MemoryAccessToClass.clear();
+ PredicateToUsers.clear();
+ MemoryToUsers.clear();
}
std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
@@ -1447,6 +2329,16 @@ std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
}
for (auto &I : *B) {
+ // There's no need to call isInstructionTriviallyDead more than once on
+ // an instruction. Therefore, once we know that an instruction is dead
+ // we change its DFS number so that it doesn't get value numbered.
+ if (isInstructionTriviallyDead(&I, TLI)) {
+ InstrDFS[&I] = 0;
+ DEBUG(dbgs() << "Skipping trivially dead instruction " << I << "\n");
+ markInstructionForDeletion(&I);
+ continue;
+ }
+
InstrDFS[&I] = End++;
DFSToInstr.emplace_back(&I);
}
@@ -1462,7 +2354,7 @@ void NewGVN::updateProcessedCount(Value *V) {
if (ProcessedCount.count(V) == 0) {
ProcessedCount.insert({V, 1});
} else {
- ProcessedCount[V] += 1;
+ ++ProcessedCount[V];
assert(ProcessedCount[V] < 100 &&
"Seem to have processed the same Value a lot");
}
@@ -1472,26 +2364,33 @@ void NewGVN::updateProcessedCount(Value *V) {
void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) {
// If all the arguments are the same, the MemoryPhi has the same value as the
// argument.
- // Filter out unreachable blocks from our operands.
+ // Filter out unreachable blocks and self phis from our operands.
+ const BasicBlock *PHIBlock = MP->getBlock();
auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) {
- return ReachableBlocks.count(MP->getIncomingBlock(U));
+ return lookupMemoryLeader(cast<MemoryAccess>(U)) != MP &&
+ !isMemoryAccessTop(cast<MemoryAccess>(U)) &&
+ ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock});
});
-
- assert(Filtered.begin() != Filtered.end() &&
- "We should not be processing a MemoryPhi in a completely "
- "unreachable block");
+ // If all that is left is nothing, our memoryphi is undef. We keep it as
+ // InitialClass. Note: The only case this should happen is if we have at
+ // least one self-argument.
+ if (Filtered.begin() == Filtered.end()) {
+ if (setMemoryClass(MP, TOPClass))
+ markMemoryUsersTouched(MP);
+ return;
+ }
// Transform the remaining operands into operand leaders.
// FIXME: mapped_iterator should have a range version.
auto LookupFunc = [&](const Use &U) {
- return lookupMemoryAccessEquiv(cast<MemoryAccess>(U));
+ return lookupMemoryLeader(cast<MemoryAccess>(U));
};
auto MappedBegin = map_iterator(Filtered.begin(), LookupFunc);
auto MappedEnd = map_iterator(Filtered.end(), LookupFunc);
// and now check if all the elements are equal.
// Sadly, we can't use std::equals since these are random access iterators.
- MemoryAccess *AllSameValue = *MappedBegin;
+ const auto *AllSameValue = *MappedBegin;
++MappedBegin;
bool AllEqual = std::all_of(
MappedBegin, MappedEnd,
@@ -1501,8 +2400,18 @@ void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) {
DEBUG(dbgs() << "Memory Phi value numbered to " << *AllSameValue << "\n");
else
DEBUG(dbgs() << "Memory Phi value numbered to itself\n");
-
- if (setMemoryAccessEquivTo(MP, AllEqual ? AllSameValue : nullptr))
+ // If it's equal to something, it's in that class. Otherwise, it has to be in
+ // a class where it is the leader (other things may be equivalent to it, but
+ // it needs to start off in its own class, which means it must have been the
+ // leader, and it can't have stopped being the leader because it was never
+ // removed).
+ CongruenceClass *CC =
+ AllEqual ? getMemoryClass(AllSameValue) : ensureLeaderOfMemoryClass(MP);
+ auto OldState = MemoryPhiState.lookup(MP);
+ assert(OldState != MPS_Invalid && "Invalid memory phi state");
+ auto NewState = AllEqual ? MPS_Equivalent : MPS_Unique;
+ MemoryPhiState[MP] = NewState;
+ if (setMemoryClass(MP, CC) || OldState != NewState)
markMemoryUsersTouched(MP);
}
@@ -1510,21 +2419,25 @@ void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) {
// congruence finding, and updating mappings.
void NewGVN::valueNumberInstruction(Instruction *I) {
DEBUG(dbgs() << "Processing instruction " << *I << "\n");
- if (isInstructionTriviallyDead(I, TLI)) {
- DEBUG(dbgs() << "Skipping unused instruction\n");
- markInstructionForDeletion(I);
- return;
- }
if (!I->isTerminator()) {
- const auto *Symbolized = performSymbolicEvaluation(I, I->getParent());
+ const Expression *Symbolized = nullptr;
+ if (DebugCounter::shouldExecute(VNCounter)) {
+ Symbolized = performSymbolicEvaluation(I);
+ } else {
+ // Mark the instruction as unused so we don't value number it again.
+ InstrDFS[I] = 0;
+ }
// If we couldn't come up with a symbolic expression, use the unknown
// expression
- if (Symbolized == nullptr)
+ if (Symbolized == nullptr) {
Symbolized = createUnknownExpression(I);
+ }
+
performCongruenceFinding(I, Symbolized);
} else {
// Handle terminators that return values. All of them produce values we
- // don't currently understand.
+ // don't currently understand. We don't place non-value producing
+ // terminators in a class.
if (!I->getType()->isVoidTy()) {
auto *Symbolized = createUnknownExpression(I);
performCongruenceFinding(I, Symbolized);
@@ -1539,72 +2452,102 @@ bool NewGVN::singleReachablePHIPath(const MemoryAccess *First,
const MemoryAccess *Second) const {
if (First == Second)
return true;
-
- if (auto *FirstDef = dyn_cast<MemoryUseOrDef>(First)) {
- auto *DefAccess = FirstDef->getDefiningAccess();
- return singleReachablePHIPath(DefAccess, Second);
- } else {
- auto *MP = cast<MemoryPhi>(First);
- auto ReachableOperandPred = [&](const Use &U) {
- return ReachableBlocks.count(MP->getIncomingBlock(U));
- };
- auto FilteredPhiArgs =
- make_filter_range(MP->operands(), ReachableOperandPred);
- SmallVector<const Value *, 32> OperandList;
- std::copy(FilteredPhiArgs.begin(), FilteredPhiArgs.end(),
- std::back_inserter(OperandList));
- bool Okay = OperandList.size() == 1;
- if (!Okay)
- Okay = std::equal(OperandList.begin(), OperandList.end(),
- OperandList.begin());
- if (Okay)
- return singleReachablePHIPath(cast<MemoryAccess>(OperandList[0]), Second);
+ if (MSSA->isLiveOnEntryDef(First))
return false;
+
+ const auto *EndDef = First;
+ for (auto *ChainDef : optimized_def_chain(First)) {
+ if (ChainDef == Second)
+ return true;
+ if (MSSA->isLiveOnEntryDef(ChainDef))
+ return false;
+ EndDef = ChainDef;
}
+ auto *MP = cast<MemoryPhi>(EndDef);
+ auto ReachableOperandPred = [&](const Use &U) {
+ return ReachableEdges.count({MP->getIncomingBlock(U), MP->getBlock()});
+ };
+ auto FilteredPhiArgs =
+ make_filter_range(MP->operands(), ReachableOperandPred);
+ SmallVector<const Value *, 32> OperandList;
+ std::copy(FilteredPhiArgs.begin(), FilteredPhiArgs.end(),
+ std::back_inserter(OperandList));
+ bool Okay = OperandList.size() == 1;
+ if (!Okay)
+ Okay =
+ std::equal(OperandList.begin(), OperandList.end(), OperandList.begin());
+ if (Okay)
+ return singleReachablePHIPath(cast<MemoryAccess>(OperandList[0]), Second);
+ return false;
}
// Verify the that the memory equivalence table makes sense relative to the
// congruence classes. Note that this checking is not perfect, and is currently
-// subject to very rare false negatives. It is only useful for testing/debugging.
+// subject to very rare false negatives. It is only useful for
+// testing/debugging.
void NewGVN::verifyMemoryCongruency() const {
- // Anything equivalent in the memory access table should be in the same
+#ifndef NDEBUG
+ // Verify that the memory table equivalence and memory member set match
+ for (const auto *CC : CongruenceClasses) {
+ if (CC == TOPClass || CC->isDead())
+ continue;
+ if (CC->getStoreCount() != 0) {
+ assert((CC->getStoredValue() || !isa<StoreInst>(CC->getLeader())) &&
+ "Any class with a store as a "
+ "leader should have a "
+ "representative stored value\n");
+ assert(CC->getMemoryLeader() &&
+ "Any congruence class with a store should "
+ "have a representative access\n");
+ }
+
+ if (CC->getMemoryLeader())
+ assert(MemoryAccessToClass.lookup(CC->getMemoryLeader()) == CC &&
+ "Representative MemoryAccess does not appear to be reverse "
+ "mapped properly");
+ for (auto M : CC->memory())
+ assert(MemoryAccessToClass.lookup(M) == CC &&
+ "Memory member does not appear to be reverse mapped properly");
+ }
+
+ // Anything equivalent in the MemoryAccess table should be in the same
// congruence class.
// Filter out the unreachable and trivially dead entries, because they may
// never have been updated if the instructions were not processed.
auto ReachableAccessPred =
- [&](const std::pair<const MemoryAccess *, MemoryAccess *> Pair) {
+ [&](const std::pair<const MemoryAccess *, CongruenceClass *> Pair) {
bool Result = ReachableBlocks.count(Pair.first->getBlock());
if (!Result)
return false;
+ if (MSSA->isLiveOnEntryDef(Pair.first))
+ return true;
if (auto *MemDef = dyn_cast<MemoryDef>(Pair.first))
return !isInstructionTriviallyDead(MemDef->getMemoryInst());
+ if (MemoryToDFSNum(Pair.first) == 0)
+ return false;
return true;
};
- auto Filtered = make_filter_range(MemoryAccessEquiv, ReachableAccessPred);
+ auto Filtered = make_filter_range(MemoryAccessToClass, ReachableAccessPred);
for (auto KV : Filtered) {
- assert(KV.first != KV.second &&
- "We added a useless equivalence to the memory equivalence table");
- // Unreachable instructions may not have changed because we never process
- // them.
- if (!ReachableBlocks.count(KV.first->getBlock()))
- continue;
+ assert(KV.second != TOPClass &&
+ "Memory not unreachable but ended up in TOP");
if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) {
- auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second);
+ auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second->getMemoryLeader());
if (FirstMUD && SecondMUD)
assert((singleReachablePHIPath(FirstMUD, SecondMUD) ||
- ValueToClass.lookup(FirstMUD->getMemoryInst()) ==
- ValueToClass.lookup(SecondMUD->getMemoryInst())) &&
- "The instructions for these memory operations should have "
- "been in the same congruence class or reachable through"
- "a single argument phi");
+ ValueToClass.lookup(FirstMUD->getMemoryInst()) ==
+ ValueToClass.lookup(SecondMUD->getMemoryInst())) &&
+ "The instructions for these memory operations should have "
+ "been in the same congruence class or reachable through"
+ "a single argument phi");
} else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) {
-
// We can only sanely verify that MemoryDefs in the operand list all have
// the same class.
auto ReachableOperandPred = [&](const Use &U) {
- return ReachableBlocks.count(FirstMP->getIncomingBlock(U)) &&
+ return ReachableEdges.count(
+ {FirstMP->getIncomingBlock(U), FirstMP->getBlock()}) &&
isa<MemoryDef>(U);
};
@@ -1622,19 +2565,127 @@ void NewGVN::verifyMemoryCongruency() const {
"All MemoryPhi arguments should be in the same class");
}
}
+#endif
+}
+
+// Verify that the sparse propagation we did actually found the maximal fixpoint
+// We do this by storing the value to class mapping, touching all instructions,
+// and redoing the iteration to see if anything changed.
+void NewGVN::verifyIterationSettled(Function &F) {
+#ifndef NDEBUG
+ DEBUG(dbgs() << "Beginning iteration verification\n");
+ if (DebugCounter::isCounterSet(VNCounter))
+ DebugCounter::setCounterValue(VNCounter, StartingVNCounter);
+
+ // Note that we have to store the actual classes, as we may change existing
+ // classes during iteration. This is because our memory iteration propagation
+ // is not perfect, and so may waste a little work. But it should generate
+ // exactly the same congruence classes we have now, with different IDs.
+ std::map<const Value *, CongruenceClass> BeforeIteration;
+
+ for (auto &KV : ValueToClass) {
+ if (auto *I = dyn_cast<Instruction>(KV.first))
+ // Skip unused/dead instructions.
+ if (InstrToDFSNum(I) == 0)
+ continue;
+ BeforeIteration.insert({KV.first, *KV.second});
+ }
+
+ TouchedInstructions.set();
+ TouchedInstructions.reset(0);
+ iterateTouchedInstructions();
+ DenseSet<std::pair<const CongruenceClass *, const CongruenceClass *>>
+ EqualClasses;
+ for (const auto &KV : ValueToClass) {
+ if (auto *I = dyn_cast<Instruction>(KV.first))
+ // Skip unused/dead instructions.
+ if (InstrToDFSNum(I) == 0)
+ continue;
+ // We could sink these uses, but i think this adds a bit of clarity here as
+ // to what we are comparing.
+ auto *BeforeCC = &BeforeIteration.find(KV.first)->second;
+ auto *AfterCC = KV.second;
+ // Note that the classes can't change at this point, so we memoize the set
+ // that are equal.
+ if (!EqualClasses.count({BeforeCC, AfterCC})) {
+ assert(BeforeCC->isEquivalentTo(AfterCC) &&
+ "Value number changed after main loop completed!");
+ EqualClasses.insert({BeforeCC, AfterCC});
+ }
+ }
+#endif
+}
+
+// This is the main value numbering loop, it iterates over the initial touched
+// instruction set, propagating value numbers, marking things touched, etc,
+// until the set of touched instructions is completely empty.
+void NewGVN::iterateTouchedInstructions() {
+ unsigned int Iterations = 0;
+ // Figure out where touchedinstructions starts
+ int FirstInstr = TouchedInstructions.find_first();
+ // Nothing set, nothing to iterate, just return.
+ if (FirstInstr == -1)
+ return;
+ BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr));
+ while (TouchedInstructions.any()) {
+ ++Iterations;
+ // Walk through all the instructions in all the blocks in RPO.
+ // TODO: As we hit a new block, we should push and pop equalities into a
+ // table lookupOperandLeader can use, to catch things PredicateInfo
+ // might miss, like edge-only equivalences.
+ for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1;
+ InstrNum = TouchedInstructions.find_next(InstrNum)) {
+
+ // This instruction was found to be dead. We don't bother looking
+ // at it again.
+ if (InstrNum == 0) {
+ TouchedInstructions.reset(InstrNum);
+ continue;
+ }
+
+ Value *V = InstrFromDFSNum(InstrNum);
+ BasicBlock *CurrBlock = getBlockForValue(V);
+
+ // If we hit a new block, do reachability processing.
+ if (CurrBlock != LastBlock) {
+ LastBlock = CurrBlock;
+ bool BlockReachable = ReachableBlocks.count(CurrBlock);
+ const auto &CurrInstRange = BlockInstRange.lookup(CurrBlock);
+
+ // If it's not reachable, erase any touched instructions and move on.
+ if (!BlockReachable) {
+ TouchedInstructions.reset(CurrInstRange.first, CurrInstRange.second);
+ DEBUG(dbgs() << "Skipping instructions in block "
+ << getBlockName(CurrBlock)
+ << " because it is unreachable\n");
+ continue;
+ }
+ updateProcessedCount(CurrBlock);
+ }
+
+ if (auto *MP = dyn_cast<MemoryPhi>(V)) {
+ DEBUG(dbgs() << "Processing MemoryPhi " << *MP << "\n");
+ valueNumberMemoryPhi(MP);
+ } else if (auto *I = dyn_cast<Instruction>(V)) {
+ valueNumberInstruction(I);
+ } else {
+ llvm_unreachable("Should have been a MemoryPhi or Instruction");
+ }
+ updateProcessedCount(V);
+ // Reset after processing (because we may mark ourselves as touched when
+ // we propagate equalities).
+ TouchedInstructions.reset(InstrNum);
+ }
+ }
+ NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations);
}
// This is the main transformation entry point.
-bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
- TargetLibraryInfo *_TLI, AliasAnalysis *_AA,
- MemorySSA *_MSSA) {
+bool NewGVN::runGVN() {
+ if (DebugCounter::isCounterSet(VNCounter))
+ StartingVNCounter = DebugCounter::getCounterValue(VNCounter);
bool Changed = false;
- DT = _DT;
- AC = _AC;
- TLI = _TLI;
- AA = _AA;
- MSSA = _MSSA;
- DL = &F.getParent()->getDataLayout();
+ NumFuncArgs = F.arg_size();
MSSAWalker = MSSA->getWalker();
// Count number of instructions for sizing of hash tables, and come
@@ -1642,15 +2693,14 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
unsigned ICount = 1;
// Add an empty instruction to account for the fact that we start at 1
DFSToInstr.emplace_back(nullptr);
- // Note: We want RPO traversal of the blocks, which is not quite the same as
- // dominator tree order, particularly with regard whether backedges get
- // visited first or second, given a block with multiple successors.
+ // Note: We want ideal RPO traversal of the blocks, which is not quite the
+ // same as dominator tree order, particularly with regard whether backedges
+ // get visited first or second, given a block with multiple successors.
// If we visit in the wrong order, we will end up performing N times as many
// iterations.
// The dominator tree does guarantee that, for a given dom tree node, it's
// parent must occur before it in the RPO ordering. Thus, we only need to sort
// the siblings.
- DenseMap<const DomTreeNode *, unsigned> RPOOrdering;
ReversePostOrderTraversal<Function *> RPOT(&F);
unsigned Counter = 0;
for (auto &B : RPOT) {
@@ -1663,7 +2713,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
auto *Node = DT->getNode(B);
if (Node->getChildren().size() > 1)
std::sort(Node->begin(), Node->end(),
- [&RPOOrdering](const DomTreeNode *A, const DomTreeNode *B) {
+ [&](const DomTreeNode *A, const DomTreeNode *B) {
return RPOOrdering[A] < RPOOrdering[B];
});
}
@@ -1689,7 +2739,6 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
}
TouchedInstructions.resize(ICount);
- DominatedInstRange.reserve(F.size());
// Ensure we don't end up resizing the expressionToClass map, as
// that can be quite expensive. At most, we have one expression per
// instruction.
@@ -1701,62 +2750,10 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
ReachableBlocks.insert(&F.getEntryBlock());
initializeCongruenceClasses(F);
-
- unsigned int Iterations = 0;
- // We start out in the entry block.
- BasicBlock *LastBlock = &F.getEntryBlock();
- while (TouchedInstructions.any()) {
- ++Iterations;
- // Walk through all the instructions in all the blocks in RPO.
- for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1;
- InstrNum = TouchedInstructions.find_next(InstrNum)) {
- assert(InstrNum != 0 && "Bit 0 should never be set, something touched an "
- "instruction not in the lookup table");
- Value *V = DFSToInstr[InstrNum];
- BasicBlock *CurrBlock = nullptr;
-
- if (auto *I = dyn_cast<Instruction>(V))
- CurrBlock = I->getParent();
- else if (auto *MP = dyn_cast<MemoryPhi>(V))
- CurrBlock = MP->getBlock();
- else
- llvm_unreachable("DFSToInstr gave us an unknown type of instruction");
-
- // If we hit a new block, do reachability processing.
- if (CurrBlock != LastBlock) {
- LastBlock = CurrBlock;
- bool BlockReachable = ReachableBlocks.count(CurrBlock);
- const auto &CurrInstRange = BlockInstRange.lookup(CurrBlock);
-
- // If it's not reachable, erase any touched instructions and move on.
- if (!BlockReachable) {
- TouchedInstructions.reset(CurrInstRange.first, CurrInstRange.second);
- DEBUG(dbgs() << "Skipping instructions in block "
- << getBlockName(CurrBlock)
- << " because it is unreachable\n");
- continue;
- }
- updateProcessedCount(CurrBlock);
- }
-
- if (auto *MP = dyn_cast<MemoryPhi>(V)) {
- DEBUG(dbgs() << "Processing MemoryPhi " << *MP << "\n");
- valueNumberMemoryPhi(MP);
- } else if (auto *I = dyn_cast<Instruction>(V)) {
- valueNumberInstruction(I);
- } else {
- llvm_unreachable("Should have been a MemoryPhi or Instruction");
- }
- updateProcessedCount(V);
- // Reset after processing (because we may mark ourselves as touched when
- // we propagate equalities).
- TouchedInstructions.reset(InstrNum);
- }
- }
- NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations);
-#ifndef NDEBUG
+ iterateTouchedInstructions();
verifyMemoryCongruency();
-#endif
+ verifyIterationSettled(F);
+
Changed |= eliminateInstructions(F);
// Delete all instructions marked for deletion.
@@ -1783,36 +2780,6 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
return Changed;
}
-bool NewGVN::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
- return runGVN(F, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
- &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
- &getAnalysis<AAResultsWrapperPass>().getAAResults(),
- &getAnalysis<MemorySSAWrapperPass>().getMSSA());
-}
-
-PreservedAnalyses NewGVNPass::run(Function &F, AnalysisManager<Function> &AM) {
- NewGVN Impl;
-
- // Apparently the order in which we get these results matter for
- // the old GVN (see Chandler's comment in GVN.cpp). I'll keep
- // the same order here, just in case.
- auto &AC = AM.getResult<AssumptionAnalysis>(F);
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
- auto &AA = AM.getResult<AAManager>(F);
- auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
- bool Changed = Impl.runGVN(F, &DT, &AC, &TLI, &AA, &MSSA);
- if (!Changed)
- return PreservedAnalyses::all();
- PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<GlobalsAA>();
- return PA;
-}
-
// Return true if V is a value that will always be available (IE can
// be placed anywhere) in the function. We don't do globals here
// because they are often worse to put in place.
@@ -1821,21 +2788,15 @@ static bool alwaysAvailable(Value *V) {
return isa<Constant>(V) || isa<Argument>(V);
}
-// Get the basic block from an instruction/value.
-static BasicBlock *getBlockForValue(Value *V) {
- if (auto *I = dyn_cast<Instruction>(V))
- return I->getParent();
- return nullptr;
-}
-
struct NewGVN::ValueDFS {
int DFSIn = 0;
int DFSOut = 0;
int LocalNum = 0;
- // Only one of these will be set.
- Value *Val = nullptr;
+ // Only one of Def and U will be set.
+ // The bool in the Def tells us whether the Def is the stored value of a
+ // store.
+ PointerIntPair<Value *, 1, bool> Def;
Use *U = nullptr;
-
bool operator<(const ValueDFS &Other) const {
// It's not enough that any given field be less than - we have sets
// of fields that need to be evaluated together to give a proper ordering.
@@ -1875,89 +2836,151 @@ struct NewGVN::ValueDFS {
// but .val and .u.
// It does not matter what order we replace these operands in.
// You will always end up with the same IR, and this is guaranteed.
- return std::tie(DFSIn, DFSOut, LocalNum, Val, U) <
- std::tie(Other.DFSIn, Other.DFSOut, Other.LocalNum, Other.Val,
+ return std::tie(DFSIn, DFSOut, LocalNum, Def, U) <
+ std::tie(Other.DFSIn, Other.DFSOut, Other.LocalNum, Other.Def,
Other.U);
}
};
-void NewGVN::convertDenseToDFSOrdered(
- CongruenceClass::MemberSet &Dense,
- SmallVectorImpl<ValueDFS> &DFSOrderedSet) {
+// This function converts the set of members for a congruence class from values,
+// to sets of defs and uses with associated DFS info. The total number of
+// reachable uses for each value is stored in UseCount, and instructions that
+// seem
+// dead (have no non-dead uses) are stored in ProbablyDead.
+void NewGVN::convertClassToDFSOrdered(
+ const CongruenceClass &Dense, SmallVectorImpl<ValueDFS> &DFSOrderedSet,
+ DenseMap<const Value *, unsigned int> &UseCounts,
+ SmallPtrSetImpl<Instruction *> &ProbablyDead) const {
for (auto D : Dense) {
// First add the value.
BasicBlock *BB = getBlockForValue(D);
// Constants are handled prior to ever calling this function, so
// we should only be left with instructions as members.
assert(BB && "Should have figured out a basic block for value");
- ValueDFS VD;
-
- std::pair<int, int> DFSPair = DFSDomMap[BB];
- assert(DFSPair.first != -1 && DFSPair.second != -1 && "Invalid DFS Pair");
- VD.DFSIn = DFSPair.first;
- VD.DFSOut = DFSPair.second;
- VD.Val = D;
- // If it's an instruction, use the real local dfs number.
- if (auto *I = dyn_cast<Instruction>(D))
- VD.LocalNum = InstrDFS[I];
- else
- llvm_unreachable("Should have been an instruction");
-
- DFSOrderedSet.emplace_back(VD);
-
- // Now add the users.
- for (auto &U : D->uses()) {
+ ValueDFS VDDef;
+ DomTreeNode *DomNode = DT->getNode(BB);
+ VDDef.DFSIn = DomNode->getDFSNumIn();
+ VDDef.DFSOut = DomNode->getDFSNumOut();
+ // If it's a store, use the leader of the value operand, if it's always
+ // available, or the value operand. TODO: We could do dominance checks to
+ // find a dominating leader, but not worth it ATM.
+ if (auto *SI = dyn_cast<StoreInst>(D)) {
+ auto Leader = lookupOperandLeader(SI->getValueOperand());
+ if (alwaysAvailable(Leader)) {
+ VDDef.Def.setPointer(Leader);
+ } else {
+ VDDef.Def.setPointer(SI->getValueOperand());
+ VDDef.Def.setInt(true);
+ }
+ } else {
+ VDDef.Def.setPointer(D);
+ }
+ assert(isa<Instruction>(D) &&
+ "The dense set member should always be an instruction");
+ VDDef.LocalNum = InstrToDFSNum(D);
+ DFSOrderedSet.emplace_back(VDDef);
+ Instruction *Def = cast<Instruction>(D);
+ unsigned int UseCount = 0;
+ // Now add the uses.
+ for (auto &U : Def->uses()) {
if (auto *I = dyn_cast<Instruction>(U.getUser())) {
- ValueDFS VD;
+ // Don't try to replace into dead uses
+ if (InstructionsToErase.count(I))
+ continue;
+ ValueDFS VDUse;
// Put the phi node uses in the incoming block.
BasicBlock *IBlock;
if (auto *P = dyn_cast<PHINode>(I)) {
IBlock = P->getIncomingBlock(U);
// Make phi node users appear last in the incoming block
// they are from.
- VD.LocalNum = InstrDFS.size() + 1;
+ VDUse.LocalNum = InstrDFS.size() + 1;
} else {
IBlock = I->getParent();
- VD.LocalNum = InstrDFS[I];
+ VDUse.LocalNum = InstrToDFSNum(I);
}
- std::pair<int, int> DFSPair = DFSDomMap[IBlock];
- VD.DFSIn = DFSPair.first;
- VD.DFSOut = DFSPair.second;
- VD.U = &U;
- DFSOrderedSet.emplace_back(VD);
+
+ // Skip uses in unreachable blocks, as we're going
+ // to delete them.
+ if (ReachableBlocks.count(IBlock) == 0)
+ continue;
+
+ DomTreeNode *DomNode = DT->getNode(IBlock);
+ VDUse.DFSIn = DomNode->getDFSNumIn();
+ VDUse.DFSOut = DomNode->getDFSNumOut();
+ VDUse.U = &U;
+ ++UseCount;
+ DFSOrderedSet.emplace_back(VDUse);
}
}
+
+ // If there are no uses, it's probably dead (but it may have side-effects,
+ // so not definitely dead. Otherwise, store the number of uses so we can
+ // track if it becomes dead later).
+ if (UseCount == 0)
+ ProbablyDead.insert(Def);
+ else
+ UseCounts[Def] = UseCount;
}
}
-static void patchReplacementInstruction(Instruction *I, Value *Repl) {
- // Patch the replacement so that it is not more restrictive than the value
- // being replaced.
- auto *Op = dyn_cast<BinaryOperator>(I);
- auto *ReplOp = dyn_cast<BinaryOperator>(Repl);
+// This function converts the set of members for a congruence class from values,
+// to the set of defs for loads and stores, with associated DFS info.
+void NewGVN::convertClassToLoadsAndStores(
+ const CongruenceClass &Dense,
+ SmallVectorImpl<ValueDFS> &LoadsAndStores) const {
+ for (auto D : Dense) {
+ if (!isa<LoadInst>(D) && !isa<StoreInst>(D))
+ continue;
- if (Op && ReplOp)
- ReplOp->andIRFlags(Op);
+ BasicBlock *BB = getBlockForValue(D);
+ ValueDFS VD;
+ DomTreeNode *DomNode = DT->getNode(BB);
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.Def.setPointer(D);
- if (auto *ReplInst = dyn_cast<Instruction>(Repl)) {
- // FIXME: If both the original and replacement value are part of the
- // same control-flow region (meaning that the execution of one
- // guarentees the executation of the other), then we can combine the
- // noalias scopes here and do better than the general conservative
- // answer used in combineMetadata().
+ // If it's an instruction, use the real local dfs number.
+ if (auto *I = dyn_cast<Instruction>(D))
+ VD.LocalNum = InstrToDFSNum(I);
+ else
+ llvm_unreachable("Should have been an instruction");
- // In general, GVN unifies expressions over different control-flow
- // regions, and so we need a conservative combination of the noalias
- // scopes.
- unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_range,
- LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
- LLVMContext::MD_invariant_group};
- combineMetadata(ReplInst, I, KnownIDs);
+ LoadsAndStores.emplace_back(VD);
}
}
+static void patchReplacementInstruction(Instruction *I, Value *Repl) {
+ auto *ReplInst = dyn_cast<Instruction>(Repl);
+ if (!ReplInst)
+ return;
+
+ // Patch the replacement so that it is not more restrictive than the value
+ // being replaced.
+ // Note that if 'I' is a load being replaced by some operation,
+ // for example, by an arithmetic operation, then andIRFlags()
+ // would just erase all math flags from the original arithmetic
+ // operation, which is clearly not wanted and not needed.
+ if (!isa<LoadInst>(I))
+ ReplInst->andIRFlags(I);
+
+ // FIXME: If both the original and replacement value are part of the
+ // same control-flow region (meaning that the execution of one
+ // guarantees the execution of the other), then we can combine the
+ // noalias scopes here and do better than the general conservative
+ // answer used in combineMetadata().
+
+ // In general, GVN unifies expressions over different control-flow
+ // regions, and so we need a conservative combination of the noalias
+ // scopes.
+ static const unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_range,
+ LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
+ LLVMContext::MD_invariant_group};
+ combineMetadata(ReplInst, I, KnownIDs);
+}
+
static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
patchReplacementInstruction(I, Repl);
I->replaceAllUsesWith(Repl);
@@ -1967,10 +2990,6 @@ void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) {
DEBUG(dbgs() << " BasicBlock Dead:" << *BB);
++NumGVNBlocksDeleted;
- // Check to see if there are non-terminating instructions to delete.
- if (isa<TerminatorInst>(BB->begin()))
- return;
-
// Delete the instructions backwards, as it has a reduced likelihood of having
// to update as many def-use and use-def chains. Start after the terminator.
auto StartPoint = BB->rbegin();
@@ -1987,6 +3006,11 @@ void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) {
Inst.eraseFromParent();
++NumGVNInstrDeleted;
}
+ // Now insert something that simplifycfg will turn into an unreachable.
+ Type *Int8Ty = Type::getInt8Ty(BB->getContext());
+ new StoreInst(UndefValue::get(Int8Ty),
+ Constant::getNullValue(Int8Ty->getPointerTo()),
+ BB->getTerminator());
}
void NewGVN::markInstructionForDeletion(Instruction *I) {
@@ -2086,59 +3110,59 @@ bool NewGVN::eliminateInstructions(Function &F) {
}
}
}
- DomTreeNode *Node = DT->getNode(&B);
- if (Node)
- DFSDomMap[&B] = {Node->getDFSNumIn(), Node->getDFSNumOut()};
}
- for (CongruenceClass *CC : CongruenceClasses) {
- // FIXME: We should eventually be able to replace everything still
- // in the initial class with undef, as they should be unreachable.
- // Right now, initial still contains some things we skip value
- // numbering of (UNREACHABLE's, for example).
- if (CC == InitialClass || CC->Dead)
+ // Map to store the use counts
+ DenseMap<const Value *, unsigned int> UseCounts;
+ for (CongruenceClass *CC : reverse(CongruenceClasses)) {
+ // Track the equivalent store info so we can decide whether to try
+ // dead store elimination.
+ SmallVector<ValueDFS, 8> PossibleDeadStores;
+ SmallPtrSet<Instruction *, 8> ProbablyDead;
+ if (CC->isDead() || CC->empty())
continue;
- assert(CC->RepLeader && "We should have had a leader");
+ // Everything still in the TOP class is unreachable or dead.
+ if (CC == TOPClass) {
+#ifndef NDEBUG
+ for (auto M : *CC)
+ assert((!ReachableBlocks.count(cast<Instruction>(M)->getParent()) ||
+ InstructionsToErase.count(cast<Instruction>(M))) &&
+ "Everything in TOP should be unreachable or dead at this "
+ "point");
+#endif
+ continue;
+ }
+ assert(CC->getLeader() && "We should have had a leader");
// If this is a leader that is always available, and it's a
// constant or has no equivalences, just replace everything with
// it. We then update the congruence class with whatever members
// are left.
- if (alwaysAvailable(CC->RepLeader)) {
- SmallPtrSet<Value *, 4> MembersLeft;
- for (auto M : CC->Members) {
-
+ Value *Leader =
+ CC->getStoredValue() ? CC->getStoredValue() : CC->getLeader();
+ if (alwaysAvailable(Leader)) {
+ CongruenceClass::MemberSet MembersLeft;
+ for (auto M : *CC) {
Value *Member = M;
-
// Void things have no uses we can replace.
- if (Member == CC->RepLeader || Member->getType()->isVoidTy()) {
+ if (Member == Leader || !isa<Instruction>(Member) ||
+ Member->getType()->isVoidTy()) {
MembersLeft.insert(Member);
continue;
}
-
- DEBUG(dbgs() << "Found replacement " << *(CC->RepLeader) << " for "
- << *Member << "\n");
- // Due to equality propagation, these may not always be
- // instructions, they may be real values. We don't really
- // care about trying to replace the non-instructions.
- if (auto *I = dyn_cast<Instruction>(Member)) {
- assert(CC->RepLeader != I &&
- "About to accidentally remove our leader");
- replaceInstruction(I, CC->RepLeader);
- AnythingReplaced = true;
-
- continue;
- } else {
- MembersLeft.insert(I);
- }
+ DEBUG(dbgs() << "Found replacement " << *(Leader) << " for " << *Member
+ << "\n");
+ auto *I = cast<Instruction>(Member);
+ assert(Leader != I && "About to accidentally remove our leader");
+ replaceInstruction(I, Leader);
+ AnythingReplaced = true;
}
- CC->Members.swap(MembersLeft);
-
+ CC->swap(MembersLeft);
} else {
- DEBUG(dbgs() << "Eliminating in congruence class " << CC->ID << "\n");
+ DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID()
+ << "\n");
// If this is a singleton, we can skip it.
- if (CC->Members.size() != 1) {
-
+ if (CC->size() != 1) {
// This is a stack because equality replacement/etc may place
// constants in the middle of the member list, and we want to use
// those constant values in preference to the current leader, over
@@ -2147,24 +3171,19 @@ bool NewGVN::eliminateInstructions(Function &F) {
// Convert the members to DFS ordered sets and then merge them.
SmallVector<ValueDFS, 8> DFSOrderedSet;
- convertDenseToDFSOrdered(CC->Members, DFSOrderedSet);
+ convertClassToDFSOrdered(*CC, DFSOrderedSet, UseCounts, ProbablyDead);
// Sort the whole thing.
std::sort(DFSOrderedSet.begin(), DFSOrderedSet.end());
-
for (auto &VD : DFSOrderedSet) {
int MemberDFSIn = VD.DFSIn;
int MemberDFSOut = VD.DFSOut;
- Value *Member = VD.Val;
- Use *MemberUse = VD.U;
-
- if (Member) {
- // We ignore void things because we can't get a value from them.
- // FIXME: We could actually use this to kill dead stores that are
- // dominated by equivalent earlier stores.
- if (Member->getType()->isVoidTy())
- continue;
- }
+ Value *Def = VD.Def.getPointer();
+ bool FromStore = VD.Def.getInt();
+ Use *U = VD.U;
+ // We ignore void things because we can't get a value from them.
+ if (Def && Def->getType()->isVoidTy())
+ continue;
if (EliminationStack.empty()) {
DEBUG(dbgs() << "Elimination Stack is empty\n");
@@ -2189,69 +3208,240 @@ bool NewGVN::eliminateInstructions(Function &F) {
// start using, we also push.
// Otherwise, we walk along, processing members who are
// dominated by this scope, and eliminate them.
- bool ShouldPush =
- Member && (EliminationStack.empty() || isa<Constant>(Member));
+ bool ShouldPush = Def && EliminationStack.empty();
bool OutOfScope =
!EliminationStack.isInScope(MemberDFSIn, MemberDFSOut);
if (OutOfScope || ShouldPush) {
// Sync to our current scope.
EliminationStack.popUntilDFSScope(MemberDFSIn, MemberDFSOut);
- ShouldPush |= Member && EliminationStack.empty();
+ bool ShouldPush = Def && EliminationStack.empty();
if (ShouldPush) {
- EliminationStack.push_back(Member, MemberDFSIn, MemberDFSOut);
+ EliminationStack.push_back(Def, MemberDFSIn, MemberDFSOut);
+ }
+ }
+
+ // Skip the Def's, we only want to eliminate on their uses. But mark
+ // dominated defs as dead.
+ if (Def) {
+ // For anything in this case, what and how we value number
+ // guarantees that any side-effets that would have occurred (ie
+ // throwing, etc) can be proven to either still occur (because it's
+ // dominated by something that has the same side-effects), or never
+ // occur. Otherwise, we would not have been able to prove it value
+ // equivalent to something else. For these things, we can just mark
+ // it all dead. Note that this is different from the "ProbablyDead"
+ // set, which may not be dominated by anything, and thus, are only
+ // easy to prove dead if they are also side-effect free. Note that
+ // because stores are put in terms of the stored value, we skip
+ // stored values here. If the stored value is really dead, it will
+ // still be marked for deletion when we process it in its own class.
+ if (!EliminationStack.empty() && Def != EliminationStack.back() &&
+ isa<Instruction>(Def) && !FromStore)
+ markInstructionForDeletion(cast<Instruction>(Def));
+ continue;
+ }
+ // At this point, we know it is a Use we are trying to possibly
+ // replace.
+
+ assert(isa<Instruction>(U->get()) &&
+ "Current def should have been an instruction");
+ assert(isa<Instruction>(U->getUser()) &&
+ "Current user should have been an instruction");
+
+ // If the thing we are replacing into is already marked to be dead,
+ // this use is dead. Note that this is true regardless of whether
+ // we have anything dominating the use or not. We do this here
+ // because we are already walking all the uses anyway.
+ Instruction *InstUse = cast<Instruction>(U->getUser());
+ if (InstructionsToErase.count(InstUse)) {
+ auto &UseCount = UseCounts[U->get()];
+ if (--UseCount == 0) {
+ ProbablyDead.insert(cast<Instruction>(U->get()));
}
}
// If we get to this point, and the stack is empty we must have a use
- // with nothing we can use to eliminate it, just skip it.
+ // with nothing we can use to eliminate this use, so just skip it.
if (EliminationStack.empty())
continue;
- // Skip the Value's, we only want to eliminate on their uses.
- if (Member)
- continue;
- Value *Result = EliminationStack.back();
+ Value *DominatingLeader = EliminationStack.back();
// Don't replace our existing users with ourselves.
- if (MemberUse->get() == Result)
+ if (U->get() == DominatingLeader)
continue;
-
- DEBUG(dbgs() << "Found replacement " << *Result << " for "
- << *MemberUse->get() << " in " << *(MemberUse->getUser())
- << "\n");
+ DEBUG(dbgs() << "Found replacement " << *DominatingLeader << " for "
+ << *U->get() << " in " << *(U->getUser()) << "\n");
// If we replaced something in an instruction, handle the patching of
- // metadata.
- if (auto *ReplacedInst = dyn_cast<Instruction>(MemberUse->get()))
- patchReplacementInstruction(ReplacedInst, Result);
-
- assert(isa<Instruction>(MemberUse->getUser()));
- MemberUse->set(Result);
+ // metadata. Skip this if we are replacing predicateinfo with its
+ // original operand, as we already know we can just drop it.
+ auto *ReplacedInst = cast<Instruction>(U->get());
+ auto *PI = PredInfo->getPredicateInfoFor(ReplacedInst);
+ if (!PI || DominatingLeader != PI->OriginalOp)
+ patchReplacementInstruction(ReplacedInst, DominatingLeader);
+ U->set(DominatingLeader);
+ // This is now a use of the dominating leader, which means if the
+ // dominating leader was dead, it's now live!
+ auto &LeaderUseCount = UseCounts[DominatingLeader];
+ // It's about to be alive again.
+ if (LeaderUseCount == 0 && isa<Instruction>(DominatingLeader))
+ ProbablyDead.erase(cast<Instruction>(DominatingLeader));
+ ++LeaderUseCount;
AnythingReplaced = true;
}
}
}
+ // At this point, anything still in the ProbablyDead set is actually dead if
+ // would be trivially dead.
+ for (auto *I : ProbablyDead)
+ if (wouldInstructionBeTriviallyDead(I))
+ markInstructionForDeletion(I);
+
// Cleanup the congruence class.
- SmallPtrSet<Value *, 4> MembersLeft;
- for (Value *Member : CC->Members) {
- if (Member->getType()->isVoidTy()) {
+ CongruenceClass::MemberSet MembersLeft;
+ for (auto *Member : *CC)
+ if (!isa<Instruction>(Member) ||
+ !InstructionsToErase.count(cast<Instruction>(Member)))
MembersLeft.insert(Member);
- continue;
- }
-
- if (auto *MemberInst = dyn_cast<Instruction>(Member)) {
- if (isInstructionTriviallyDead(MemberInst)) {
- // TODO: Don't mark loads of undefs.
- markInstructionForDeletion(MemberInst);
- continue;
+ CC->swap(MembersLeft);
+
+ // If we have possible dead stores to look at, try to eliminate them.
+ if (CC->getStoreCount() > 0) {
+ convertClassToLoadsAndStores(*CC, PossibleDeadStores);
+ std::sort(PossibleDeadStores.begin(), PossibleDeadStores.end());
+ ValueDFSStack EliminationStack;
+ for (auto &VD : PossibleDeadStores) {
+ int MemberDFSIn = VD.DFSIn;
+ int MemberDFSOut = VD.DFSOut;
+ Instruction *Member = cast<Instruction>(VD.Def.getPointer());
+ if (EliminationStack.empty() ||
+ !EliminationStack.isInScope(MemberDFSIn, MemberDFSOut)) {
+ // Sync to our current scope.
+ EliminationStack.popUntilDFSScope(MemberDFSIn, MemberDFSOut);
+ if (EliminationStack.empty()) {
+ EliminationStack.push_back(Member, MemberDFSIn, MemberDFSOut);
+ continue;
+ }
}
+ // We already did load elimination, so nothing to do here.
+ if (isa<LoadInst>(Member))
+ continue;
+ assert(!EliminationStack.empty());
+ Instruction *Leader = cast<Instruction>(EliminationStack.back());
+ (void)Leader;
+ assert(DT->dominates(Leader->getParent(), Member->getParent()));
+ // Member is dominater by Leader, and thus dead
+ DEBUG(dbgs() << "Marking dead store " << *Member
+ << " that is dominated by " << *Leader << "\n");
+ markInstructionForDeletion(Member);
+ CC->erase(Member);
+ ++NumGVNDeadStores;
}
- MembersLeft.insert(Member);
}
- CC->Members.swap(MembersLeft);
}
return AnythingReplaced;
}
+
+// This function provides global ranking of operations so that we can place them
+// in a canonical order. Note that rank alone is not necessarily enough for a
+// complete ordering, as constants all have the same rank. However, generally,
+// we will simplify an operation with all constants so that it doesn't matter
+// what order they appear in.
+unsigned int NewGVN::getRank(const Value *V) const {
+ // Prefer undef to anything else
+ if (isa<UndefValue>(V))
+ return 0;
+ if (isa<Constant>(V))
+ return 1;
+ else if (auto *A = dyn_cast<Argument>(V))
+ return 2 + A->getArgNo();
+
+ // Need to shift the instruction DFS by number of arguments + 3 to account for
+ // the constant and argument ranking above.
+ unsigned Result = InstrToDFSNum(V);
+ if (Result > 0)
+ return 3 + NumFuncArgs + Result;
+ // Unreachable or something else, just return a really large number.
+ return ~0;
+}
+
+// This is a function that says whether two commutative operations should
+// have their order swapped when canonicalizing.
+bool NewGVN::shouldSwapOperands(const Value *A, const Value *B) const {
+ // Because we only care about a total ordering, and don't rewrite expressions
+ // in this order, we order by rank, which will give a strict weak ordering to
+ // everything but constants, and then we order by pointer address.
+ return std::make_pair(getRank(A), A) > std::make_pair(getRank(B), B);
+}
+
+class NewGVNLegacyPass : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid.
+ NewGVNLegacyPass() : FunctionPass(ID) {
+ initializeNewGVNLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override;
+
+private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+
+bool NewGVNLegacyPass::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+ return NewGVN(F, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ &getAnalysis<AAResultsWrapperPass>().getAAResults(),
+ &getAnalysis<MemorySSAWrapperPass>().getMSSA(),
+ F.getParent()->getDataLayout())
+ .runGVN();
+}
+
+INITIALIZE_PASS_BEGIN(NewGVNLegacyPass, "newgvn", "Global Value Numbering",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_END(NewGVNLegacyPass, "newgvn", "Global Value Numbering", false,
+ false)
+
+char NewGVNLegacyPass::ID = 0;
+
+// createGVNPass - The public interface to this file.
+FunctionPass *llvm::createNewGVNPass() { return new NewGVNLegacyPass(); }
+
+PreservedAnalyses NewGVNPass::run(Function &F, AnalysisManager<Function> &AM) {
+ // Apparently the order in which we get these results matter for
+ // the old GVN (see Chandler's comment in GVN.cpp). I'll keep
+ // the same order here, just in case.
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &AA = AM.getResult<AAManager>(F);
+ auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
+ bool Changed =
+ NewGVN(F, &DT, &AC, &TLI, &AA, &MSSA, F.getParent()->getDataLayout())
+ .runGVN();
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<GlobalsAA>();
+ return PA;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 1a7ddc9585ba..1bfecea2f61e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -66,7 +66,7 @@ static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
// Add attribute "readnone" so that backend can use a native sqrt instruction
// for this call. Insert a FP compare instruction and a conditional branch
// at the end of CurrBB.
- Call->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
+ Call->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
CurrBB.getTerminator()->eraseFromParent();
Builder.SetInsertPoint(&CurrBB);
Value *FCmp = Builder.CreateFCmpOEQ(Call, Call);
@@ -98,14 +98,14 @@ static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
// Skip if function either has local linkage or is not a known library
// function.
- LibFunc::Func LibFunc;
+ LibFunc LF;
if (CalledFunc->hasLocalLinkage() || !CalledFunc->hasName() ||
- !TLI->getLibFunc(CalledFunc->getName(), LibFunc))
+ !TLI->getLibFunc(CalledFunc->getName(), LF))
continue;
- switch (LibFunc) {
- case LibFunc::sqrtf:
- case LibFunc::sqrt:
+ switch (LF) {
+ case LibFunc_sqrtf:
+ case LibFunc_sqrt:
if (TTI->haveFastSqrt(Call->getType()) &&
optimizeSQRT(Call, CalledFunc, *CurrBB, BB))
break;
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 65c814d7a63b..3dcab6090789 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -1069,8 +1069,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
///
/// Ops is the top-level list of add operands we're trying to factor.
static void FindSingleUseMultiplyFactors(Value *V,
- SmallVectorImpl<Value*> &Factors,
- const SmallVectorImpl<ValueEntry> &Ops) {
+ SmallVectorImpl<Value*> &Factors) {
BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
if (!BO) {
Factors.push_back(V);
@@ -1078,8 +1077,8 @@ static void FindSingleUseMultiplyFactors(Value *V,
}
// Otherwise, add the LHS and RHS to the list of factors.
- FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops);
- FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops);
+ FindSingleUseMultiplyFactors(BO->getOperand(1), Factors);
+ FindSingleUseMultiplyFactors(BO->getOperand(0), Factors);
}
/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
@@ -1499,7 +1498,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
// Compute all of the factors of this added value.
SmallVector<Value*, 8> Factors;
- FindSingleUseMultiplyFactors(BOp, Factors, Ops);
+ FindSingleUseMultiplyFactors(BOp, Factors);
assert(Factors.size() > 1 && "Bad linearize!");
// Add one to FactorOccurrences for each unique factor in this op.
@@ -2236,8 +2235,8 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
ValueRankMap.clear();
if (MadeChange) {
- // FIXME: This should also 'preserve the CFG'.
- auto PA = PreservedAnalyses();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
return PA;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 1de742050cb3..f344eb151464 100644
--- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -365,6 +365,11 @@ findBaseDefiningValueOfVector(Value *I) {
// for particular sufflevector patterns.
return BaseDefiningValueResult(I, false);
+ // The behavior of getelementptr instructions is the same for vector and
+ // non-vector data types.
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
+ return findBaseDefiningValue(GEP->getPointerOperand());
+
// A PHI or Select is a base defining value. The outer findBasePointer
// algorithm is responsible for constructing a base value for this BDV.
assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
@@ -634,7 +639,7 @@ static BDVState meetBDVStateImpl(const BDVState &LHS, const BDVState &RHS) {
// Values of type BDVState form a lattice, and this function implements the meet
// operation.
-static BDVState meetBDVState(BDVState LHS, BDVState RHS) {
+static BDVState meetBDVState(const BDVState &LHS, const BDVState &RHS) {
BDVState Result = meetBDVStateImpl(LHS, RHS);
assert(Result == meetBDVStateImpl(RHS, LHS) &&
"Math is wrong: meet does not commute!");
@@ -1123,14 +1128,14 @@ normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,
// Create new attribute set containing only attributes which can be transferred
// from original call to the safepoint.
-static AttributeSet legalizeCallAttributes(AttributeSet AS) {
- AttributeSet Ret;
+static AttributeList legalizeCallAttributes(AttributeList AS) {
+ AttributeList Ret;
for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) {
unsigned Index = AS.getSlotIndex(Slot);
- if (Index == AttributeSet::ReturnIndex ||
- Index == AttributeSet::FunctionIndex) {
+ if (Index == AttributeList::ReturnIndex ||
+ Index == AttributeList::FunctionIndex) {
for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) {
@@ -1148,7 +1153,7 @@ static AttributeSet legalizeCallAttributes(AttributeSet AS) {
Ret = Ret.addAttributes(
AS.getContext(), Index,
- AttributeSet::get(AS.getContext(), Index, AttrBuilder(Attr)));
+ AttributeList::get(AS.getContext(), Index, AttrBuilder(Attr)));
}
}
@@ -1299,12 +1304,11 @@ static StringRef getDeoptLowering(CallSite CS) {
const char *DeoptLowering = "deopt-lowering";
if (CS.hasFnAttr(DeoptLowering)) {
// FIXME: CallSite has a *really* confusing interface around attributes
- // with values.
- const AttributeSet &CSAS = CS.getAttributes();
- if (CSAS.hasAttribute(AttributeSet::FunctionIndex,
- DeoptLowering))
- return CSAS.getAttribute(AttributeSet::FunctionIndex,
- DeoptLowering).getValueAsString();
+ // with values.
+ const AttributeList &CSAS = CS.getAttributes();
+ if (CSAS.hasAttribute(AttributeList::FunctionIndex, DeoptLowering))
+ return CSAS.getAttribute(AttributeList::FunctionIndex, DeoptLowering)
+ .getValueAsString();
Function *F = CS.getCalledFunction();
assert(F && F->hasFnAttribute(DeoptLowering));
return F->getFnAttribute(DeoptLowering).getValueAsString();
@@ -1388,7 +1392,6 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// Create the statepoint given all the arguments
Instruction *Token = nullptr;
- AttributeSet ReturnAttrs;
if (CS.isCall()) {
CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
CallInst *Call = Builder.CreateGCStatepointCall(
@@ -1400,11 +1403,12 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// Currently we will fail on parameter attributes and on certain
// function attributes.
- AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
+ AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
// In case if we can handle this set of attributes - set up function attrs
// directly on statepoint and return attrs later for gc_result intrinsic.
- Call->setAttributes(NewAttrs.getFnAttributes());
- ReturnAttrs = NewAttrs.getRetAttributes();
+ Call->setAttributes(AttributeList::get(Call->getContext(),
+ AttributeList::FunctionIndex,
+ NewAttrs.getFnAttributes()));
Token = Call;
@@ -1428,11 +1432,12 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// Currently we will fail on parameter attributes and on certain
// function attributes.
- AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
+ AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
// In case if we can handle this set of attributes - set up function attrs
// directly on statepoint and return attrs later for gc_result intrinsic.
- Invoke->setAttributes(NewAttrs.getFnAttributes());
- ReturnAttrs = NewAttrs.getRetAttributes();
+ Invoke->setAttributes(AttributeList::get(Invoke->getContext(),
+ AttributeList::FunctionIndex,
+ NewAttrs.getFnAttributes()));
Token = Invoke;
@@ -1478,7 +1483,9 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
StringRef Name =
CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), Name);
- GCResult->setAttributes(CS.getAttributes().getRetAttributes());
+ GCResult->setAttributes(
+ AttributeList::get(GCResult->getContext(), AttributeList::ReturnIndex,
+ CS.getAttributes().getRetAttributes()));
// We cannot RAUW or delete CS.getInstruction() because it could be in the
// live set of some other safepoint, in which case that safepoint's
@@ -1615,8 +1622,10 @@ static void relocationViaAlloca(
// Emit alloca for "LiveValue" and record it in "allocaMap" and
// "PromotableAllocas"
+ const DataLayout &DL = F.getParent()->getDataLayout();
auto emitAllocaFor = [&](Value *LiveValue) {
- AllocaInst *Alloca = new AllocaInst(LiveValue->getType(), "",
+ AllocaInst *Alloca = new AllocaInst(LiveValue->getType(),
+ DL.getAllocaAddrSpace(), "",
F.getEntryBlock().getFirstNonPHI());
AllocaMap[LiveValue] = Alloca;
PromotableAllocas.push_back(Alloca);
@@ -1873,7 +1882,7 @@ chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
"non noop cast is found during rematerialization");
Type *SrcTy = CI->getOperand(0)->getType();
- Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy);
+ Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy, CI);
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
// Cost of the address calculation
@@ -2304,7 +2313,7 @@ static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
if (!R.empty())
AH.setAttributes(AH.getAttributes().removeAttributes(
- Ctx, Index, AttributeSet::get(Ctx, Index, R)));
+ Ctx, Index, AttributeList::get(Ctx, Index, R)));
}
void
@@ -2316,7 +2325,7 @@ RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) {
RemoveNonValidAttrAtIndex(Ctx, F, A.getArgNo() + 1);
if (isa<PointerType>(F.getReturnType()))
- RemoveNonValidAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex);
+ RemoveNonValidAttrAtIndex(Ctx, F, AttributeList::ReturnIndex);
}
void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
@@ -2351,7 +2360,7 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
if (isa<PointerType>(CS.getArgument(i)->getType()))
RemoveNonValidAttrAtIndex(Ctx, CS, i + 1);
if (isa<PointerType>(CS.getType()))
- RemoveNonValidAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex);
+ RemoveNonValidAttrAtIndex(Ctx, CS, AttributeList::ReturnIndex);
}
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index ede381c4c243..8908dae2f545 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -140,6 +140,14 @@ public:
return nullptr;
}
+ /// getBlockAddress - If this is a constant with a BlockAddress value, return
+ /// it, otherwise return null.
+ BlockAddress *getBlockAddress() const {
+ if (isConstant())
+ return dyn_cast<BlockAddress>(getConstant());
+ return nullptr;
+ }
+
void markForcedConstant(Constant *V) {
assert(isUnknown() && "Can't force a defined value!");
Val.setInt(forcedconstant);
@@ -306,20 +314,14 @@ public:
return MRVFunctionsTracked;
}
- void markOverdefined(Value *V) {
- assert(!V->getType()->isStructTy() &&
- "structs should use markAnythingOverdefined");
- markOverdefined(ValueState[V], V);
- }
-
- /// markAnythingOverdefined - Mark the specified value overdefined. This
+ /// markOverdefined - Mark the specified value overdefined. This
/// works with both scalars and structs.
- void markAnythingOverdefined(Value *V) {
+ void markOverdefined(Value *V) {
if (auto *STy = dyn_cast<StructType>(V->getType()))
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
markOverdefined(getStructValueState(V, i), V);
else
- markOverdefined(V);
+ markOverdefined(ValueState[V], V);
}
// isStructLatticeConstant - Return true if all the lattice values
@@ -513,12 +515,12 @@ private:
void visitCmpInst(CmpInst &I);
void visitExtractValueInst(ExtractValueInst &EVI);
void visitInsertValueInst(InsertValueInst &IVI);
- void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); }
+ void visitLandingPadInst(LandingPadInst &I) { markOverdefined(&I); }
void visitFuncletPadInst(FuncletPadInst &FPI) {
- markAnythingOverdefined(&FPI);
+ markOverdefined(&FPI);
}
void visitCatchSwitchInst(CatchSwitchInst &CPI) {
- markAnythingOverdefined(&CPI);
+ markOverdefined(&CPI);
visitTerminatorInst(CPI);
}
@@ -538,16 +540,16 @@ private:
void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
void visitFenceInst (FenceInst &I) { /*returns void*/ }
void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
- markAnythingOverdefined(&I);
+ markOverdefined(&I);
}
void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); }
void visitAllocaInst (Instruction &I) { markOverdefined(&I); }
- void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); }
+ void visitVAArgInst (Instruction &I) { markOverdefined(&I); }
void visitInstruction(Instruction &I) {
// If a new instruction is added to LLVM that we don't handle.
DEBUG(dbgs() << "SCCP: Don't know how to handle: " << I << '\n');
- markAnythingOverdefined(&I); // Just in case
+ markOverdefined(&I); // Just in case
}
};
@@ -602,14 +604,36 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
return;
}
- Succs[SI->findCaseValue(CI).getSuccessorIndex()] = true;
+ Succs[SI->findCaseValue(CI)->getSuccessorIndex()] = true;
return;
}
- // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
- if (isa<IndirectBrInst>(&TI)) {
- // Just mark all destinations executable!
- Succs.assign(TI.getNumSuccessors(), true);
+ // In case of indirect branch and its address is a blockaddress, we mark
+ // the target as executable.
+ if (auto *IBR = dyn_cast<IndirectBrInst>(&TI)) {
+ // Casts are folded by visitCastInst.
+ LatticeVal IBRValue = getValueState(IBR->getAddress());
+ BlockAddress *Addr = IBRValue.getBlockAddress();
+ if (!Addr) { // Overdefined or unknown condition?
+ // All destinations are executable!
+ if (!IBRValue.isUnknown())
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ BasicBlock* T = Addr->getBasicBlock();
+ assert(Addr->getFunction() == T->getParent() &&
+ "Block address of a different function ?");
+ for (unsigned i = 0; i < IBR->getNumSuccessors(); ++i) {
+ // This is the target.
+ if (IBR->getDestination(i) == T) {
+ Succs[i] = true;
+ return;
+ }
+ }
+
+ // If we didn't find our destination in the IBR successor list, then we
+ // have undefined behavior. Its ok to assume no successor is executable.
return;
}
@@ -659,13 +683,21 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
if (!CI)
return !SCValue.isUnknown();
- return SI->findCaseValue(CI).getCaseSuccessor() == To;
+ return SI->findCaseValue(CI)->getCaseSuccessor() == To;
}
- // Just mark all destinations executable!
- // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
- if (isa<IndirectBrInst>(TI))
- return true;
+ // In case of indirect branch and its address is a blockaddress, we mark
+ // the target as executable.
+ if (auto *IBR = dyn_cast<IndirectBrInst>(TI)) {
+ LatticeVal IBRValue = getValueState(IBR->getAddress());
+ BlockAddress *Addr = IBRValue.getBlockAddress();
+
+ if (!Addr)
+ return !IBRValue.isUnknown();
+
+ // At this point, the indirectbr is branching on a blockaddress.
+ return Addr->getBasicBlock() == To;
+ }
DEBUG(dbgs() << "Unknown terminator instruction: " << *TI << '\n');
llvm_unreachable("SCCP: Don't know how to handle this terminator!");
@@ -693,7 +725,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
// If this PN returns a struct, just mark the result overdefined.
// TODO: We could do a lot better than this if code actually uses this.
if (PN.getType()->isStructTy())
- return markAnythingOverdefined(&PN);
+ return markOverdefined(&PN);
if (getValueState(&PN).isOverdefined())
return; // Quick exit
@@ -803,7 +835,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
// If this returns a struct, mark all elements over defined, we don't track
// structs in structs.
if (EVI.getType()->isStructTy())
- return markAnythingOverdefined(&EVI);
+ return markOverdefined(&EVI);
// If this is extracting from more than one level of struct, we don't know.
if (EVI.getNumIndices() != 1)
@@ -828,7 +860,7 @@ void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
// If this has more than one index, we can't handle it, drive all results to
// undef.
if (IVI.getNumIndices() != 1)
- return markAnythingOverdefined(&IVI);
+ return markOverdefined(&IVI);
Value *Aggr = IVI.getAggregateOperand();
unsigned Idx = *IVI.idx_begin();
@@ -857,7 +889,7 @@ void SCCPSolver::visitSelectInst(SelectInst &I) {
// If this select returns a struct, just mark the result overdefined.
// TODO: We could do a lot better than this if code actually uses this.
if (I.getType()->isStructTy())
- return markAnythingOverdefined(&I);
+ return markOverdefined(&I);
LatticeVal CondValue = getValueState(I.getCondition());
if (CondValue.isUnknown())
@@ -910,9 +942,16 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
// Otherwise, one of our operands is overdefined. Try to produce something
// better than overdefined with some tricks.
-
- // If this is an AND or OR with 0 or -1, it doesn't matter that the other
- // operand is overdefined.
+ // If this is 0 / Y, it doesn't matter that the second operand is
+ // overdefined, and we can replace it with zero.
+ if (I.getOpcode() == Instruction::UDiv || I.getOpcode() == Instruction::SDiv)
+ if (V1State.isConstant() && V1State.getConstant()->isNullValue())
+ return markConstant(IV, &I, V1State.getConstant());
+
+ // If this is:
+ // -> AND/MUL with 0
+ // -> OR with -1
+ // it doesn't matter that the other operand is overdefined.
if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Mul ||
I.getOpcode() == Instruction::Or) {
LatticeVal *NonOverdefVal = nullptr;
@@ -1021,7 +1060,7 @@ void SCCPSolver::visitStoreInst(StoreInst &SI) {
void SCCPSolver::visitLoadInst(LoadInst &I) {
// If this load is of a struct, just mark the result overdefined.
if (I.getType()->isStructTy())
- return markAnythingOverdefined(&I);
+ return markOverdefined(&I);
LatticeVal PtrVal = getValueState(I.getOperand(0));
if (PtrVal.isUnknown()) return; // The pointer is not resolved yet!
@@ -1107,7 +1146,7 @@ CallOverdefined:
}
// Otherwise, we don't know anything about this call, mark it overdefined.
- return markAnythingOverdefined(I);
+ return markOverdefined(I);
}
// If this is a local function that doesn't have its address taken, mark its
@@ -1483,6 +1522,31 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
return true;
}
+ if (auto *IBR = dyn_cast<IndirectBrInst>(TI)) {
+ // Indirect branch with no successor ?. Its ok to assume it branches
+ // to no target.
+ if (IBR->getNumSuccessors() < 1)
+ continue;
+
+ if (!getValueState(IBR->getAddress()).isUnknown())
+ continue;
+
+ // If the input to SCCP is actually branch on undef, fix the undef to
+ // the first successor of the indirect branch.
+ if (isa<UndefValue>(IBR->getAddress())) {
+ IBR->setAddress(BlockAddress::get(IBR->getSuccessor(0)));
+ markEdgeExecutable(&BB, IBR->getSuccessor(0));
+ return true;
+ }
+
+ // Otherwise, it is a branch on a symbolic value which is currently
+ // considered to be undef. Handle this by forcing the input value to the
+ // branch to the first successor.
+ markForcedConstant(IBR->getAddress(),
+ BlockAddress::get(IBR->getSuccessor(0)));
+ return true;
+ }
+
if (auto *SI = dyn_cast<SwitchInst>(TI)) {
if (!SI->getNumCases() || !getValueState(SI->getCondition()).isUnknown())
continue;
@@ -1490,12 +1554,12 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// If the input to SCCP is actually switch on undef, fix the undef to
// the first constant.
if (isa<UndefValue>(SI->getCondition())) {
- SI->setCondition(SI->case_begin().getCaseValue());
- markEdgeExecutable(&BB, SI->case_begin().getCaseSuccessor());
+ SI->setCondition(SI->case_begin()->getCaseValue());
+ markEdgeExecutable(&BB, SI->case_begin()->getCaseSuccessor());
return true;
}
- markForcedConstant(SI->getCondition(), SI->case_begin().getCaseValue());
+ markForcedConstant(SI->getCondition(), SI->case_begin()->getCaseValue());
return true;
}
}
@@ -1545,7 +1609,7 @@ static bool runSCCP(Function &F, const DataLayout &DL,
// Mark all arguments to the function as being overdefined.
for (Argument &AI : F.args())
- Solver.markAnythingOverdefined(&AI);
+ Solver.markOverdefined(&AI);
// Solve for constants.
bool ResolvedUndefs = true;
@@ -1728,7 +1792,7 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
// Assume nothing about the incoming arguments.
for (Argument &AI : F.args())
- Solver.markAnythingOverdefined(&AI);
+ Solver.markOverdefined(&AI);
}
// Loop over global variables. We inform the solver about any internal global
@@ -1817,32 +1881,9 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
if (!I) continue;
bool Folded = ConstantFoldTerminator(I->getParent());
- if (!Folded) {
- // The constant folder may not have been able to fold the terminator
- // if this is a branch or switch on undef. Fold it manually as a
- // branch to the first successor.
-#ifndef NDEBUG
- if (auto *BI = dyn_cast<BranchInst>(I)) {
- assert(BI->isConditional() && isa<UndefValue>(BI->getCondition()) &&
- "Branch should be foldable!");
- } else if (auto *SI = dyn_cast<SwitchInst>(I)) {
- assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold");
- } else {
- llvm_unreachable("Didn't fold away reference to block!");
- }
-#endif
-
- // Make this an uncond branch to the first successor.
- TerminatorInst *TI = I->getParent()->getTerminator();
- BranchInst::Create(TI->getSuccessor(0), TI);
-
- // Remove entries in successor phi nodes to remove edges.
- for (unsigned i = 1, e = TI->getNumSuccessors(); i != e; ++i)
- TI->getSuccessor(i)->removePredecessor(TI->getParent());
-
- // Remove the old terminator.
- TI->eraseFromParent();
- }
+ assert(Folded &&
+ "Expect TermInst on constantint or blockaddress to be folded");
+ (void) Folded;
}
// Finally, delete the basic block.
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
index bfcb15530ef5..d01e91a7f235 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -1825,6 +1825,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
// Rank the remaining candidate vector types. This is easy because we know
// they're all integer vectors. We sort by ascending number of elements.
auto RankVectorTypes = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
+ (void)DL;
assert(DL.getTypeSizeInBits(RHSTy) == DL.getTypeSizeInBits(LHSTy) &&
"Cannot have vector types of different sizes!");
assert(RHSTy->getElementType()->isIntegerTy() &&
@@ -2294,7 +2295,8 @@ private:
#endif
return getAdjustedPtr(IRB, DL, &NewAI,
- APInt(DL.getPointerSizeInBits(), Offset), PointerTy,
+ APInt(DL.getPointerTypeSizeInBits(PointerTy), Offset),
+ PointerTy,
#ifndef NDEBUG
Twine(OldName) + "."
#else
@@ -2369,6 +2371,8 @@ private:
Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);
+ unsigned AS = LI.getPointerAddressSpace();
+
Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
: LI.getType();
const bool IsLoadPastEnd = DL.getTypeStoreSize(TargetTy) > SliceSize;
@@ -2387,6 +2391,10 @@ private:
LI.isVolatile(), LI.getName());
if (LI.isVolatile())
NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+
+ // Try to preserve nonnull metadata
+ if (TargetTy->isPointerTy())
+ NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
V = NewLI;
// If this is an integer load past the end of the slice (which means the
@@ -2401,7 +2409,7 @@ private:
"endian_shift");
}
} else {
- Type *LTy = TargetTy->getPointerTo();
+ Type *LTy = TargetTy->getPointerTo(AS);
LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
getSliceAlign(TargetTy),
LI.isVolatile(), LI.getName());
@@ -2429,7 +2437,7 @@ private:
// the computed value, and then replace the placeholder with LI, leaving
// LI only used for this computation.
Value *Placeholder =
- new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
+ new LoadInst(UndefValue::get(LI.getType()->getPointerTo(AS)));
V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
"insert");
LI.replaceAllUsesWith(V);
@@ -2542,7 +2550,8 @@ private:
NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
SI.isVolatile());
} else {
- Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo());
+ unsigned AS = SI.getPointerAddressSpace();
+ Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo(AS));
NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
SI.isVolatile());
}
@@ -3857,7 +3866,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
if (Alignment <= DL.getABITypeAlignment(SliceTy))
Alignment = 0;
NewAI = new AllocaInst(
- SliceTy, nullptr, Alignment,
+ SliceTy, AI.getType()->getAddressSpace(), nullptr, Alignment,
AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI);
++NumNewAllocas;
}
@@ -4184,7 +4193,7 @@ bool SROA::promoteAllocas(Function &F) {
NumPromoted += PromotableAllocas.size();
DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
- PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC);
+ PromoteMemToReg(PromotableAllocas, *DT, AC);
PromotableAllocas.clear();
return true;
}
@@ -4234,9 +4243,8 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
if (!Changed)
return PreservedAnalyses::all();
- // FIXME: Even when promoting allocas we should preserve some abstract set of
- // CFG-specific analyses.
PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
return PA;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index afe7483006ae..00e3c95f6f06 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -43,13 +43,14 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeDSELegacyPassPass(Registry);
initializeGuardWideningLegacyPassPass(Registry);
initializeGVNLegacyPassPass(Registry);
- initializeNewGVNPass(Registry);
+ initializeNewGVNLegacyPassPass(Registry);
initializeEarlyCSELegacyPassPass(Registry);
initializeEarlyCSEMemSSALegacyPassPass(Registry);
initializeGVNHoistLegacyPassPass(Registry);
initializeFlattenCFGPassPass(Registry);
initializeInductiveRangeCheckEliminationPass(Registry);
initializeIndVarSimplifyLegacyPassPass(Registry);
+ initializeInferAddressSpacesPass(Registry);
initializeJumpThreadingPass(Registry);
initializeLegacyLICMPassPass(Registry);
initializeLegacyLoopSinkPassPass(Registry);
@@ -58,6 +59,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoopAccessLegacyAnalysisPass(Registry);
initializeLoopInstSimplifyLegacyPassPass(Registry);
initializeLoopInterchangePass(Registry);
+ initializeLoopPredicationLegacyPassPass(Registry);
initializeLoopRotateLegacyPassPass(Registry);
initializeLoopStrengthReducePass(Registry);
initializeLoopRerollPass(Registry);
@@ -79,6 +81,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeIPSCCPLegacyPassPass(Registry);
initializeSROALegacyPassPass(Registry);
initializeCFGSimplifyPassPass(Registry);
+ initializeLateCFGSimplifyPassPass(Registry);
initializeStructurizeCFGPass(Registry);
initializeSinkingLegacyPassPass(Registry);
initializeTailCallElimPass(Registry);
@@ -115,6 +118,10 @@ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createCFGSimplificationPass());
}
+void LLVMAddLateCFGSimplificationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLateCFGSimplificationPass());
+}
+
void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createDeadStoreEliminationPass());
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 39969e27367f..c0c09a7e43fe 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -520,12 +520,25 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
unsigned NumElems = VT->getNumElements();
unsigned NumIndices = GEPI.getNumIndices();
- Scatterer Base = scatter(&GEPI, GEPI.getOperand(0));
+ // The base pointer might be scalar even if it's a vector GEP. In those cases,
+ // splat the pointer into a vector value, and scatter that vector.
+ Value *Op0 = GEPI.getOperand(0);
+ if (!Op0->getType()->isVectorTy())
+ Op0 = Builder.CreateVectorSplat(NumElems, Op0);
+ Scatterer Base = scatter(&GEPI, Op0);
SmallVector<Scatterer, 8> Ops;
Ops.resize(NumIndices);
- for (unsigned I = 0; I < NumIndices; ++I)
- Ops[I] = scatter(&GEPI, GEPI.getOperand(I + 1));
+ for (unsigned I = 0; I < NumIndices; ++I) {
+ Value *Op = GEPI.getOperand(I + 1);
+
+ // The indices might be scalars even if it's a vector GEP. In those cases,
+ // splat the scalar into a vector value, and scatter that vector.
+ if (!Op->getType()->isVectorTy())
+ Op = Builder.CreateVectorSplat(NumElems, Op);
+
+ Ops[I] = scatter(&GEPI, Op);
+ }
ValueVector Res;
Res.resize(NumElems);
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index f2723bd7af82..8754c714c5b2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -130,7 +130,8 @@ static bool mergeEmptyReturnBlocks(Function &F) {
/// iterating until no more changes are made.
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
AssumptionCache *AC,
- unsigned BonusInstThreshold) {
+ unsigned BonusInstThreshold,
+ bool LateSimplifyCFG) {
bool Changed = false;
bool LocalChange = true;
@@ -145,7 +146,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
// Loop over all of the basic blocks and remove them if they are unneeded.
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
- if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC, &LoopHeaders)) {
+ if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC, &LoopHeaders, LateSimplifyCFG)) {
LocalChange = true;
++NumSimpl;
}
@@ -156,10 +157,12 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
}
static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
- AssumptionCache *AC, int BonusInstThreshold) {
+ AssumptionCache *AC, int BonusInstThreshold,
+ bool LateSimplifyCFG) {
bool EverChanged = removeUnreachableBlocks(F);
EverChanged |= mergeEmptyReturnBlocks(F);
- EverChanged |= iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold);
+ EverChanged |= iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold,
+ LateSimplifyCFG);
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
@@ -173,7 +176,8 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
return true;
do {
- EverChanged = iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold);
+ EverChanged = iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold,
+ LateSimplifyCFG);
EverChanged |= removeUnreachableBlocks(F);
} while (EverChanged);
@@ -181,17 +185,19 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
}
SimplifyCFGPass::SimplifyCFGPass()
- : BonusInstThreshold(UserBonusInstThreshold) {}
+ : BonusInstThreshold(UserBonusInstThreshold),
+ LateSimplifyCFG(true) {}
-SimplifyCFGPass::SimplifyCFGPass(int BonusInstThreshold)
- : BonusInstThreshold(BonusInstThreshold) {}
+SimplifyCFGPass::SimplifyCFGPass(int BonusInstThreshold, bool LateSimplifyCFG)
+ : BonusInstThreshold(BonusInstThreshold),
+ LateSimplifyCFG(LateSimplifyCFG) {}
PreservedAnalyses SimplifyCFGPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
- if (!simplifyFunctionCFG(F, TTI, &AC, BonusInstThreshold))
+ if (!simplifyFunctionCFG(F, TTI, &AC, BonusInstThreshold, LateSimplifyCFG))
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<GlobalsAA>();
@@ -199,16 +205,17 @@ PreservedAnalyses SimplifyCFGPass::run(Function &F,
}
namespace {
-struct CFGSimplifyPass : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
+struct BaseCFGSimplifyPass : public FunctionPass {
unsigned BonusInstThreshold;
std::function<bool(const Function &)> PredicateFtor;
+ bool LateSimplifyCFG;
- CFGSimplifyPass(int T = -1,
- std::function<bool(const Function &)> Ftor = nullptr)
- : FunctionPass(ID), PredicateFtor(std::move(Ftor)) {
+ BaseCFGSimplifyPass(int T, bool LateSimplifyCFG,
+ std::function<bool(const Function &)> Ftor,
+ char &ID)
+ : FunctionPass(ID), PredicateFtor(std::move(Ftor)),
+ LateSimplifyCFG(LateSimplifyCFG) {
BonusInstThreshold = (T == -1) ? UserBonusInstThreshold : unsigned(T);
- initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
if (skipFunction(F) || (PredicateFtor && !PredicateFtor(F)))
@@ -218,7 +225,7 @@ struct CFGSimplifyPass : public FunctionPass {
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
const TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- return simplifyFunctionCFG(F, TTI, AC, BonusInstThreshold);
+ return simplifyFunctionCFG(F, TTI, AC, BonusInstThreshold, LateSimplifyCFG);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -227,6 +234,26 @@ struct CFGSimplifyPass : public FunctionPass {
AU.addPreserved<GlobalsAAWrapperPass>();
}
};
+
+struct CFGSimplifyPass : public BaseCFGSimplifyPass {
+ static char ID; // Pass identification, replacement for typeid
+
+ CFGSimplifyPass(int T = -1,
+ std::function<bool(const Function &)> Ftor = nullptr)
+ : BaseCFGSimplifyPass(T, false, Ftor, ID) {
+ initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct LateCFGSimplifyPass : public BaseCFGSimplifyPass {
+ static char ID; // Pass identification, replacement for typeid
+
+ LateCFGSimplifyPass(int T = -1,
+ std::function<bool(const Function &)> Ftor = nullptr)
+ : BaseCFGSimplifyPass(T, true, Ftor, ID) {
+ initializeLateCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+ }
+};
}
char CFGSimplifyPass::ID = 0;
@@ -237,9 +264,24 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
false)
+char LateCFGSimplifyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LateCFGSimplifyPass, "latesimplifycfg",
+ "Simplify the CFG more aggressively", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(LateCFGSimplifyPass, "latesimplifycfg",
+ "Simplify the CFG more aggressively", false, false)
+
// Public interface to the CFGSimplification pass
FunctionPass *
llvm::createCFGSimplificationPass(int Threshold,
- std::function<bool(const Function &)> Ftor) {
+ std::function<bool(const Function &)> Ftor) {
return new CFGSimplifyPass(Threshold, std::move(Ftor));
}
+
+// Public interface to the LateCFGSimplification pass
+FunctionPass *
+llvm::createLateCFGSimplificationPass(int Threshold,
+ std::function<bool(const Function &)> Ftor) {
+ return new LateCFGSimplifyPass(Threshold, std::move(Ftor));
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
index c3f14a0f4b1e..102e9eaeab77 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -164,13 +164,14 @@ static bool SinkInstruction(Instruction *Inst,
// Instructions can only be sunk if all their uses are in blocks
// dominated by one of the successors.
- // Look at all the postdominators and see if we can sink it in one.
+ // Look at all the dominated blocks and see if we can sink it in one.
DomTreeNode *DTN = DT.getNode(Inst->getParent());
for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end();
I != E && SuccToSinkTo == nullptr; ++I) {
BasicBlock *Candidate = (*I)->getBlock();
- if ((*I)->getIDom()->getBlock() == Inst->getParent() &&
- IsAcceptableTarget(Inst, Candidate, DT, LI))
+ // A node always immediate-dominates its children on the dominator
+ // tree.
+ if (IsAcceptableTarget(Inst, Candidate, DT, LI))
SuccToSinkTo = Candidate;
}
@@ -262,9 +263,8 @@ PreservedAnalyses SinkingPass::run(Function &F, FunctionAnalysisManager &AM) {
if (!iterativelySinkInstructions(F, DT, LI, AA))
return PreservedAnalyses::all();
- auto PA = PreservedAnalyses();
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<LoopAnalysis>();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
return PA;
}