summaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2016-08-17 19:41:29 +0000
committerDimitry Andric <dim@FreeBSD.org>2016-08-17 19:41:29 +0000
commit6c4bc1bd2772d4db151a13576558e1320c7c3b65 (patch)
tree06f8f3845db41aed4b2b4228d561c3f3b5a09563 /contrib/llvm/lib/Transforms
parent4bb0738ee7438ed572a4b9d8b609271b029de5b8 (diff)
parenta7fe922b98bb45be7dce7c1cfe668ec27eeddc74 (diff)
Notes
Diffstat (limited to 'contrib/llvm/lib/Transforms')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp35
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp13
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp22
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp62
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp93
17 files changed, 254 insertions, 49 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index fff544085414..787f4342831d 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -332,6 +332,7 @@ struct ArgumentUsesTracker : public CaptureTracker {
namespace llvm {
template <> struct GraphTraits<ArgumentGraphNode *> {
typedef ArgumentGraphNode NodeType;
+ typedef ArgumentGraphNode *NodeRef;
typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType;
static inline NodeType *getEntryNode(NodeType *A) { return A; }
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 310c29275faf..99b12d4db0d0 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -44,6 +44,7 @@
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/Evaluator.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
@@ -779,7 +780,8 @@ static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
// Instructions could multiply use V.
while (UI != E && *UI == I)
++UI;
- I->eraseFromParent();
+ if (isInstructionTriviallyDead(I, TLI))
+ I->eraseFromParent();
}
}
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index cf5b76dc365b..df6a48e05d42 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -134,6 +134,10 @@ static cl::opt<int> PreInlineThreshold(
cl::desc("Control the amount of inlining in pre-instrumentation inliner "
"(default = 75)"));
+static cl::opt<bool> EnableGVNHoist(
+ "enable-gvn-hoist", cl::init(false), cl::Hidden,
+ cl::desc("Enable the experimental GVN Hoisting pass"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -232,7 +236,8 @@ void PassManagerBuilder::populateFunctionPassManager(
FPM.add(createCFGSimplificationPass());
FPM.add(createSROAPass());
FPM.add(createEarlyCSEPass());
- FPM.add(createGVNHoistPass());
+ if(EnableGVNHoist)
+ FPM.add(createGVNHoistPass());
FPM.add(createLowerExpectIntrinsicPass());
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index d7eed790e2ab..8f1ff8ac0e66 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -553,8 +553,11 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
}
}
+ // FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring
+ // decomposeBitTestICmp() might help.
{
- unsigned BitWidth = DL.getTypeSizeInBits(TrueVal->getType());
+ unsigned BitWidth =
+ DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
APInt MinSignedValue = APInt::getSignBit(BitWidth);
Value *X;
const APInt *Y, *C;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 51c3262b5d14..377ccb9c37f7 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2830,7 +2830,8 @@ bool InstCombiner::run() {
// Add operands to the worklist.
replaceInstUsesWith(*I, C);
++NumConstProp;
- eraseInstFromFunction(*I);
+ if (isInstructionTriviallyDead(I, TLI))
+ eraseInstFromFunction(*I);
MadeIRChange = true;
continue;
}
@@ -2851,7 +2852,8 @@ bool InstCombiner::run() {
// Add operands to the worklist.
replaceInstUsesWith(*I, C);
++NumConstProp;
- eraseInstFromFunction(*I);
+ if (isInstructionTriviallyDead(I, TLI))
+ eraseInstFromFunction(*I);
MadeIRChange = true;
continue;
}
@@ -3007,7 +3009,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
<< *Inst << '\n');
Inst->replaceAllUsesWith(C);
++NumConstProp;
- Inst->eraseFromParent();
+ if (isInstructionTriviallyDead(Inst, TLI))
+ Inst->eraseFromParent();
continue;
}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index dcb62d3ed1b5..41041c78db97 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -272,8 +272,9 @@ static bool shouldInstrumentReadWriteFromAddress(Value *Addr) {
return false;
}
- // Check if the global is in a GCOV counter array.
- if (GV->getName().startswith("__llvm_gcov_ctr"))
+ // Check if the global is private gcov data.
+ if (GV->getName().startswith("__llvm_gcov") ||
+ GV->getName().startswith("__llvm_gcda"))
return false;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
index 88172d19fe5a..9e982194bac7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -19,6 +19,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/Constant.h"
@@ -90,11 +91,13 @@ bool ConstantPropagation::runOnFunction(Function &F) {
// Remove the dead instruction.
WorkList.erase(I);
- I->eraseFromParent();
+ if (isInstructionTriviallyDead(I, TLI)) {
+ I->eraseFromParent();
+ ++NumInstKilled;
+ }
// We made a change to the function...
Changed = true;
- ++NumInstKilled;
}
}
return Changed;
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 9d0ef42e0396..0b16e2703dc4 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -582,6 +582,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// its simpler value.
if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) {
DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
+ bool Killed = false;
if (!Inst->use_empty()) {
Inst->replaceAllUsesWith(V);
Changed = true;
@@ -589,11 +590,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (isInstructionTriviallyDead(Inst, &TLI)) {
Inst->eraseFromParent();
Changed = true;
+ Killed = true;
}
- if (Changed) {
+ if (Changed)
++NumSimplify;
+ if (Killed)
continue;
- }
}
// If this is a simple instruction that we can value number, process it.
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 542cf38e43bb..e958563e2d10 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -815,6 +815,14 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
return;
+ // Check that `Cast` actually extends the induction variable (we rely on this
+ // later). This takes care of cases where `Cast` is extending a truncation of
+ // the narrow induction variable, and thus can end up being narrower than the
+ // "narrow" induction variable.
+ uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
+ if (NarrowIVWidth >= Width)
+ return;
+
// Cast is either an sext or zext up to this point.
// We should not widen an indvar if arithmetics on the wider indvar are more
// expensive than those on the narrower indvar. We check only the cost of ADD
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index b9e717cf763e..d1769fc3ebb3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -758,7 +758,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI);
if (SimpleVal) {
I->replaceAllUsesWith(SimpleVal);
- I->eraseFromParent();
+ if (isInstructionTriviallyDead(I, TLI))
+ I->eraseFromParent();
Condition = SimpleVal;
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index 2c0a70e44f57..cdd17fc516a8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -377,9 +377,11 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
&I, I.getModule()->getDataLayout(), TLI)) {
DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
CurAST->copyValue(&I, C);
- CurAST->deleteValue(&I);
I.replaceAllUsesWith(C);
- I.eraseFromParent();
+ if (isInstructionTriviallyDead(&I, TLI)) {
+ CurAST->deleteValue(&I);
+ I.eraseFromParent();
+ }
continue;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 77c77eb7d798..70bd9d3cca95 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -4442,6 +4442,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Determine an input position which will be dominated by the operands and
// which will dominate the result.
IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
+ Rewriter.setInsertPoint(&*IP);
// Inform the Rewriter if we have a post-increment use, so that it can
// perform an advantageous expansion.
@@ -4473,7 +4474,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
- Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, &*IP)));
+ Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));
}
// Expand the ScaledReg portion.
@@ -4491,14 +4492,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Expand ScaleReg as if it was part of the base regs.
if (F.Scale == 1)
Ops.push_back(
- SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP)));
+ SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)));
else {
// An interesting way of "folding" with an icmp is to use a negated
// scale, which we'll implement by inserting it into the other operand
// of the icmp.
assert(F.Scale == -1 &&
"The only scale supported by ICmpZero uses is -1!");
- ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, &*IP);
+ ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);
}
} else {
// Otherwise just expand the scaled register and an explicit scale,
@@ -4508,11 +4509,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Unless the addressing mode will not be folded.
if (!Ops.empty() && LU.Kind == LSRUse::Address &&
isAMCompletelyFolded(TTI, LU, F)) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
- ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP));
+ ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr));
if (F.Scale != 1)
ScaledS =
SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
@@ -4524,7 +4525,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
if (F.BaseGV) {
// Flush the operand list to suppress SCEVExpander hoisting.
if (!Ops.empty()) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
@@ -4534,7 +4535,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Flush the operand list to suppress SCEVExpander hoisting of both folded and
// unfolded offsets. LSR assumes they both live next to their uses.
if (!Ops.empty()) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
@@ -4570,7 +4571,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
const SCEV *FullS = Ops.empty() ?
SE.getConstant(IntTy, 0) :
SE.getAddExpr(Ops);
- Value *FullV = Rewriter.expandCodeFor(FullS, Ty, &*IP);
+ Value *FullV = Rewriter.expandCodeFor(FullS, Ty);
// We're done expanding now, so reset the rewriter.
Rewriter.clearPostInc();
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index c5ca56360fc8..4f1052d81433 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -552,9 +553,39 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// two PHINodes, the iteration over the old PHIs remains valid, and the
// mapping will just map us to the new node (which may not even be a PHI
// node).
+ const DataLayout &DL = NewFunc->getParent()->getDataLayout();
+ SmallSetVector<const Value *, 8> Worklist;
for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
- if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
- recursivelySimplifyInstruction(PN);
+ if (isa<PHINode>(VMap[PHIToResolve[Idx]]))
+ Worklist.insert(PHIToResolve[Idx]);
+
+ // Note that we must test the size on each iteration, the worklist can grow.
+ for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
+ const Value *OrigV = Worklist[Idx];
+ auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV));
+ if (!I)
+ continue;
+
+ // See if this instruction simplifies.
+ Value *SimpleV = SimplifyInstruction(I, DL);
+ if (!SimpleV)
+ continue;
+
+ // Stash away all the uses of the old instruction so we can check them for
+ // recursive simplifications after a RAUW. This is cheaper than checking all
+ // uses of To on the recursive step in most cases.
+ for (const User *U : OrigV->users())
+ Worklist.insert(cast<Instruction>(U));
+
+ // Replace the instruction with its simplified value.
+ I->replaceAllUsesWith(SimpleV);
+
+ // If the original instruction had no side effects, remove it.
+ if (isInstructionTriviallyDead(I))
+ I->eraseFromParent();
+ else
+ VMap[OrigV] = I;
+ }
// Now that the inlined function body has been fully constructed, go through
// and zap unconditional fall-through branches. This happens all the time when
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 1fbb19d2b8ad..e82c07fd7b59 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1294,6 +1294,13 @@ updateInlinedAtInfo(const DebugLoc &DL, DILocation *InlinedAtNode,
return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last);
}
+/// Return the result of AI->isStaticAlloca() if AI were moved to the entry
+/// block. Allocas used in inalloca calls and allocas of dynamic array size
+/// cannot be static.
+static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
+ return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca();
+}
+
/// Update inlined instructions' line numbers to
/// to encode location where these instructions are inlined.
static void fixupLineNumbers(Function *Fn, Function::iterator FI,
@@ -1328,7 +1335,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
// Don't update static allocas, as they may get moved later.
if (auto *AI = dyn_cast<AllocaInst>(BI))
- if (isa<Constant>(AI->getArraySize()))
+ if (allocaWouldBeStaticInEntry(AI))
continue;
BI->setDebugLoc(TheCallDL);
@@ -1626,7 +1633,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
continue;
}
- if (!isa<Constant>(AI->getArraySize()))
+ if (!allocaWouldBeStaticInEntry(AI))
continue;
// Keep track of the static allocas that we inline into the caller.
@@ -1635,7 +1642,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Scan for the block of allocas that we can move over, and move them
// all at once.
while (isa<AllocaInst>(I) &&
- isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
+ allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) {
IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
++I;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
index 9658966779b9..0d5a25b8ebc5 100644
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -64,6 +64,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
DominatorTree &DT, LoopInfo &LI) {
SmallVector<Use *, 16> UsesToRewrite;
SmallVector<BasicBlock *, 8> ExitBlocks;
+ SmallSetVector<PHINode *, 16> PHIsToRemove;
PredIteratorCache PredCache;
bool Changed = false;
@@ -115,7 +116,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
SmallVector<PHINode *, 16> AddedPHIs;
SmallVector<PHINode *, 8> PostProcessPHIs;
- SSAUpdater SSAUpdate;
+ SmallVector<PHINode *, 4> InsertedPHIs;
+ SSAUpdater SSAUpdate(&InsertedPHIs);
SSAUpdate.Initialize(I->getType(), I->getName());
// Insert the LCSSA phi's into all of the exit blocks dominated by the
@@ -184,6 +186,14 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// Otherwise, do full PHI insertion.
SSAUpdate.RewriteUse(*UseToRewrite);
+
+ // SSAUpdater might have inserted phi-nodes inside other loops. We'll need
+ // to post-process them to keep LCSSA form.
+ for (PHINode *InsertedPN : InsertedPHIs) {
+ if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent()))
+ if (!L->contains(OtherLoop))
+ PostProcessPHIs.push_back(InsertedPN);
+ }
}
// Post process PHI instructions that were inserted into another disjoint
@@ -196,13 +206,19 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
Worklist.push_back(PostProcessPN);
}
- // Remove PHI nodes that did not have any uses rewritten.
+ // Keep track of PHI nodes that we want to remove because they did not have
+ // any uses rewritten.
for (PHINode *PN : AddedPHIs)
if (PN->use_empty())
- PN->eraseFromParent();
+ PHIsToRemove.insert(PN);
Changed = true;
}
+ // Remove PHI nodes that did not have any uses rewritten.
+ for (PHINode *PN : PHIsToRemove) {
+ assert (PN->use_empty() && "Trying to remove a phi with uses.");
+ PN->eraseFromParent();
+ }
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index b3a928bf7753..2846e8f235b7 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -327,6 +327,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
else
NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
+ SmallVector<BasicBlock *, 8> OuterLoopBlocks;
+ OuterLoopBlocks.push_back(NewBB);
// Now that we know which blocks are in L and which need to be moved to
// OuterLoop, move any blocks that need it.
for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
@@ -334,12 +336,53 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
if (!BlocksInL.count(BB)) {
// Move this block to the parent, updating the exit blocks sets
L->removeBlockFromLoop(BB);
- if ((*LI)[BB] == L)
+ if ((*LI)[BB] == L) {
LI->changeLoopFor(BB, NewOuter);
+ OuterLoopBlocks.push_back(BB);
+ }
--i;
}
}
+ // Split edges to exit blocks from the inner loop, if they emerged in the
+ // process of separating the outer one.
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
+ ExitBlocks.end());
+ for (BasicBlock *ExitBlock : ExitBlockSet) {
+ if (any_of(predecessors(ExitBlock),
+ [L](BasicBlock *BB) { return !L->contains(BB); })) {
+ rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA);
+ }
+ }
+
+ if (PreserveLCSSA) {
+ // Fix LCSSA form for L. Some values, which previously were only used inside
+ // L, can now be used in NewOuter loop. We need to insert phi-nodes for them
+ // in corresponding exit blocks.
+
+ // Go through all instructions in OuterLoopBlocks and check if they are
+ // using operands from the inner loop. In this case we'll need to fix LCSSA
+ // for these instructions.
+ SmallSetVector<Instruction *, 8> WorklistSet;
+ for (BasicBlock *OuterBB: OuterLoopBlocks) {
+ for (Instruction &I : *OuterBB) {
+ for (Value *Op : I.operands()) {
+ Instruction *OpI = dyn_cast<Instruction>(Op);
+ if (!OpI || !L->contains(OpI))
+ continue;
+ WorklistSet.insert(OpI);
+ }
+ }
+ }
+ SmallVector<Instruction *, 8> Worklist(WorklistSet.begin(),
+ WorklistSet.end());
+ formLCSSAForInstructions(Worklist, *DT, *LI);
+ assert(NewOuter->isRecursivelyLCSSAForm(*DT) &&
+ "LCSSA is broken after separating nested loops!");
+ }
+
return NewOuter;
}
@@ -541,17 +584,12 @@ ReprocessLoop:
SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
ExitBlocks.end());
for (BasicBlock *ExitBlock : ExitBlockSet) {
- for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
- PI != PE; ++PI)
- // Must be exactly this loop: no subloops, parent loops, or non-loop preds
- // allowed.
- if (!L->contains(*PI)) {
- if (rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA)) {
- ++NumInserted;
- Changed = true;
- }
- break;
- }
+ if (any_of(predecessors(ExitBlock),
+ [L](BasicBlock *BB) { return !L->contains(BB); })) {
+ rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA);
+ ++NumInserted;
+ Changed = true;
+ }
}
// If the header has more than two predecessors at this point (from the
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8b85e320d3b2..ee5733d20f4f 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -50,6 +50,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -220,6 +221,81 @@ class LoopVectorizationLegality;
class LoopVectorizationCostModel;
class LoopVectorizationRequirements;
+// A traits type that is intended to be used in graph algorithms. The graph it
+// models starts at the loop header, and traverses the BasicBlocks that are in
+// the loop body, but not the loop header. Since the loop header is skipped,
+// the back edges are excluded.
+struct LoopBodyTraits {
+ using NodeRef = std::pair<const Loop *, BasicBlock *>;
+
+ // This wraps a const Loop * into the iterator, so we know which edges to
+ // filter out.
+ class WrappedSuccIterator
+ : public iterator_adaptor_base<
+ WrappedSuccIterator, succ_iterator,
+ typename std::iterator_traits<succ_iterator>::iterator_category,
+ NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
+ using BaseT = iterator_adaptor_base<
+ WrappedSuccIterator, succ_iterator,
+ typename std::iterator_traits<succ_iterator>::iterator_category,
+ NodeRef, std::ptrdiff_t, NodeRef *, NodeRef>;
+
+ const Loop *L;
+
+ public:
+ WrappedSuccIterator(succ_iterator Begin, const Loop *L)
+ : BaseT(Begin), L(L) {}
+
+ NodeRef operator*() const { return {L, *I}; }
+ };
+
+ struct LoopBodyFilter {
+ bool operator()(NodeRef N) const {
+ const Loop *L = N.first;
+ return N.second != L->getHeader() && L->contains(N.second);
+ }
+ };
+
+ using ChildIteratorType =
+ filter_iterator<WrappedSuccIterator, LoopBodyFilter>;
+
+ static NodeRef getEntryNode(const Loop &G) { return {&G, G.getHeader()}; }
+
+ static ChildIteratorType child_begin(NodeRef Node) {
+ return make_filter_range(make_range<WrappedSuccIterator>(
+ {succ_begin(Node.second), Node.first},
+ {succ_end(Node.second), Node.first}),
+ LoopBodyFilter{})
+ .begin();
+ }
+
+ static ChildIteratorType child_end(NodeRef Node) {
+ return make_filter_range(make_range<WrappedSuccIterator>(
+ {succ_begin(Node.second), Node.first},
+ {succ_end(Node.second), Node.first}),
+ LoopBodyFilter{})
+ .end();
+ }
+};
+
+/// Returns true if the given loop body has a cycle, excluding the loop
+/// itself.
+static bool hasCyclesInLoopBody(const Loop &L) {
+ if (!L.empty())
+ return true;
+
+ for (const auto SCC :
+ make_range(scc_iterator<Loop, LoopBodyTraits>::begin(L),
+ scc_iterator<Loop, LoopBodyTraits>::end(L))) {
+ if (SCC.size() > 1) {
+ DEBUG(dbgs() << "LVL: Detected a cycle in the loop body:\n");
+ DEBUG(L.dump());
+ return true;
+ }
+ }
+ return false;
+}
+
/// \brief This modifies LoopAccessReport to initialize message with
/// loop-vectorizer-specific part.
class VectorizationReport : public LoopAccessReport {
@@ -1782,12 +1858,14 @@ private:
Instruction *UnsafeAlgebraInst;
};
-static void addInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
- if (L.empty())
- return V.push_back(&L);
-
+static void addAcyclicInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
+ if (L.empty()) {
+ if (!hasCyclesInLoopBody(L))
+ V.push_back(&L);
+ return;
+ }
for (Loop *InnerL : L)
- addInnerLoop(*InnerL, V);
+ addAcyclicInnerLoop(*InnerL, V);
}
/// The LoopVectorize Pass.
@@ -4395,6 +4473,9 @@ bool LoopVectorizationLegality::canVectorize() {
return false;
}
+ // FIXME: The code is currently dead, since the loop gets sent to
+ // LoopVectorizationLegality is already an innermost loop.
+ //
// We can only vectorize innermost loops.
if (!TheLoop->empty()) {
emitAnalysis(VectorizationReport() << "loop is not the innermost loop");
@@ -6639,7 +6720,7 @@ bool LoopVectorizePass::runImpl(
SmallVector<Loop *, 8> Worklist;
for (Loop *L : *LI)
- addInnerLoop(*L, Worklist);
+ addAcyclicInnerLoop(*L, Worklist);
LoopsAnalyzed += Worklist.size();