author: Dimitry Andric <dim@FreeBSD.org> 2016-08-17 19:41:29 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2016-08-17 19:41:29 +0000
commit: 6c4bc1bd2772d4db151a13576558e1320c7c3b65 (patch)
tree: 06f8f3845db41aed4b2b4228d561c3f3b5a09563 /contrib/llvm/lib/Transforms
parent: 4bb0738ee7438ed572a4b9d8b609271b029de5b8 (diff)
parent: a7fe922b98bb45be7dce7c1cfe668ec27eeddc74 (diff)
17 files changed, 254 insertions, 49 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index fff544085414..787f4342831d 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -332,6 +332,7 @@ struct ArgumentUsesTracker : public CaptureTracker {
 namespace llvm {
 template <> struct GraphTraits<ArgumentGraphNode *> {
   typedef ArgumentGraphNode NodeType;
+  typedef ArgumentGraphNode *NodeRef;
   typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType;
 
   static inline NodeType *getEntryNode(NodeType *A) { return A; }
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 310c29275faf..99b12d4db0d0 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -44,6 +44,7 @@
 #include "llvm/Transforms/Utils/CtorUtils.h"
 #include "llvm/Transforms/Utils/Evaluator.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -779,7 +780,8 @@ static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
         // Instructions could multiply use V.
         while (UI != E && *UI == I)
           ++UI;
-        I->eraseFromParent();
+        if (isInstructionTriviallyDead(I, TLI))
+          I->eraseFromParent();
       }
 }
 
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index cf5b76dc365b..df6a48e05d42 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -134,6 +134,10 @@ static cl::opt<int> PreInlineThreshold(
     cl::desc("Control the amount of inlining in pre-instrumentation inliner "
              "(default = 75)"));
 
+static cl::opt<bool> EnableGVNHoist(
+    "enable-gvn-hoist", cl::init(false), cl::Hidden,
+    cl::desc("Enable the experimental GVN Hoisting pass"));
+
 PassManagerBuilder::PassManagerBuilder() {
     OptLevel = 2;
     SizeLevel = 0;
@@ -232,7 +236,8 @@ void PassManagerBuilder::populateFunctionPassManager(
   FPM.add(createCFGSimplificationPass());
   FPM.add(createSROAPass());
   FPM.add(createEarlyCSEPass());
-  FPM.add(createGVNHoistPass());
+  if(EnableGVNHoist)
+    FPM.add(createGVNHoistPass());
   FPM.add(createLowerExpectIntrinsicPass());
 }
 
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index d7eed790e2ab..8f1ff8ac0e66 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -553,8 +553,11 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
     }
   }
 
+  // FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring
+  // decomposeBitTestICmp() might help.
   {
-    unsigned BitWidth = DL.getTypeSizeInBits(TrueVal->getType());
+    unsigned BitWidth =
+        DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
     APInt MinSignedValue = APInt::getSignBit(BitWidth);
     Value *X;
     const APInt *Y, *C;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 51c3262b5d14..377ccb9c37f7 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2830,7 +2830,8 @@ bool InstCombiner::run() {
         // Add operands to the worklist.
         replaceInstUsesWith(*I, C);
         ++NumConstProp;
-        eraseInstFromFunction(*I);
+        if (isInstructionTriviallyDead(I, TLI))
+          eraseInstFromFunction(*I);
         MadeIRChange = true;
         continue;
       }
@@ -2851,7 +2852,8 @@ bool InstCombiner::run() {
         // Add operands to the worklist.
         replaceInstUsesWith(*I, C);
         ++NumConstProp;
-        eraseInstFromFunction(*I);
+        if (isInstructionTriviallyDead(I, TLI))
+          eraseInstFromFunction(*I);
         MadeIRChange = true;
         continue;
       }
@@ -3007,7 +3009,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
                        << *Inst << '\n');
           Inst->replaceAllUsesWith(C);
           ++NumConstProp;
-          Inst->eraseFromParent();
+          if (isInstructionTriviallyDead(Inst, TLI))
+            Inst->eraseFromParent();
           continue;
         }
 
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index dcb62d3ed1b5..41041c78db97 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -272,8 +272,9 @@ static bool shouldInstrumentReadWriteFromAddress(Value *Addr) {
         return false;
     }
 
-    // Check if the global is in a GCOV counter array.
-    if (GV->getName().startswith("__llvm_gcov_ctr"))
+    // Check if the global is private gcov data.
+    if (GV->getName().startswith("__llvm_gcov") ||
+        GV->getName().startswith("__llvm_gcda"))
       return false;
   }
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
index 88172d19fe5a..9e982194bac7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -19,6 +19,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/IR/Constant.h"
@@ -90,11 +91,13 @@ bool ConstantPropagation::runOnFunction(Function &F) {
 
         // Remove the dead instruction.
         WorkList.erase(I);
-        I->eraseFromParent();
+        if (isInstructionTriviallyDead(I, TLI)) {
+          I->eraseFromParent();
+          ++NumInstKilled;
+        }
 
         // We made a change to the function...
         Changed = true;
-        ++NumInstKilled;
       }
   }
   return Changed;
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 9d0ef42e0396..0b16e2703dc4 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -582,6 +582,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
     // its simpler value.
     if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) {
       DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');
+      bool Killed = false;
       if (!Inst->use_empty()) {
         Inst->replaceAllUsesWith(V);
         Changed = true;
@@ -589,11 +590,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       if (isInstructionTriviallyDead(Inst, &TLI)) {
         Inst->eraseFromParent();
         Changed = true;
+        Killed = true;
       }
-      if (Changed) {
+      if (Changed)
         ++NumSimplify;
+      if (Killed)
         continue;
-      }
     }
 
     // If this is a simple instruction that we can value number, process it.
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 542cf38e43bb..e958563e2d10 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -815,6 +815,14 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
     return;
 
+  // Check that `Cast` actually extends the induction variable (we rely on this
+  // later).  This takes care of cases where `Cast` is extending a truncation of
+  // the narrow induction variable, and thus can end up being narrower than the
+  // "narrow" induction variable.
+  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
+  if (NarrowIVWidth >= Width)
+    return;
+
   // Cast is either an sext or zext up to this point.
   // We should not widen an indvar if arithmetics on the wider indvar are more
   // expensive than those on the narrower indvar. We check only the cost of ADD
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index b9e717cf763e..d1769fc3ebb3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -758,7 +758,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
         ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI);
     if (SimpleVal) {
       I->replaceAllUsesWith(SimpleVal);
-      I->eraseFromParent();
+      if (isInstructionTriviallyDead(I, TLI))
+        I->eraseFromParent();
       Condition = SimpleVal;
     }
   }
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index 2c0a70e44f57..cdd17fc516a8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -377,9 +377,11 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
               &I, I.getModule()->getDataLayout(), TLI)) {
         DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C << '\n');
         CurAST->copyValue(&I, C);
-        CurAST->deleteValue(&I);
         I.replaceAllUsesWith(C);
-        I.eraseFromParent();
+        if (isInstructionTriviallyDead(&I, TLI)) {
+          CurAST->deleteValue(&I);
+          I.eraseFromParent();
+        }
         continue;
       }
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 77c77eb7d798..70bd9d3cca95 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -4442,6 +4442,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   // Determine an input position which will be dominated by the operands and
   // which will dominate the result.
   IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
+  Rewriter.setInsertPoint(&*IP);
 
   // Inform the Rewriter if we have a post-increment use, so that it can
   // perform an advantageous expansion.
@@ -4473,7 +4474,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
                                  LF.UserInst, LF.OperandValToReplace,
                                  Loops, SE, DT);
 
-    Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, &*IP)));
+    Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));
   }
 
   // Expand the ScaledReg portion.
@@ -4491,14 +4492,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
       // Expand ScaleReg as if it was part of the base regs.
       if (F.Scale == 1)
         Ops.push_back(
-            SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP)));
+            SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)));
       else {
         // An interesting way of "folding" with an icmp is to use a negated
         // scale, which we'll implement by inserting it into the other operand
         // of the icmp.
         assert(F.Scale == -1 &&
                "The only scale supported by ICmpZero uses is -1!");
-        ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, &*IP);
+        ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);
       }
     } else {
       // Otherwise just expand the scaled register and an explicit scale,
@@ -4508,11 +4509,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
       // Unless the addressing mode will not be folded.
       if (!Ops.empty() && LU.Kind == LSRUse::Address &&
           isAMCompletelyFolded(TTI, LU, F)) {
-        Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
+        Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
         Ops.clear();
         Ops.push_back(SE.getUnknown(FullV));
       }
-      ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP));
+      ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr));
       if (F.Scale != 1)
         ScaledS =
             SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
@@ -4524,7 +4525,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   if (F.BaseGV) {
     // Flush the operand list to suppress SCEVExpander hoisting.
     if (!Ops.empty()) {
-      Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
+      Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
       Ops.clear();
       Ops.push_back(SE.getUnknown(FullV));
     }
@@ -4534,7 +4535,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   // Flush the operand list to suppress SCEVExpander hoisting of both folded and
   // unfolded offsets. LSR assumes they both live next to their uses.
   if (!Ops.empty()) {
-    Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
+    Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
     Ops.clear();
     Ops.push_back(SE.getUnknown(FullV));
   }
@@ -4570,7 +4571,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   const SCEV *FullS = Ops.empty() ?
                       SE.getConstant(IntTy, 0) :
                       SE.getAddExpr(Ops);
-  Value *FullV = Rewriter.expandCodeFor(FullS, Ty, &*IP);
+  Value *FullV = Rewriter.expandCodeFor(FullS, Ty);
 
   // We're done expanding now, so reset the rewriter.
   Rewriter.clearPostInc();
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index c5ca56360fc8..4f1052d81433 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -552,9 +553,39 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
   // two PHINodes, the iteration over the old PHIs remains valid, and the
   // mapping will just map us to the new node (which may not even be a PHI
   // node).
+  const DataLayout &DL = NewFunc->getParent()->getDataLayout();
+  SmallSetVector<const Value *, 8> Worklist;
   for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
-    if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
-      recursivelySimplifyInstruction(PN);
+    if (isa<PHINode>(VMap[PHIToResolve[Idx]]))
+      Worklist.insert(PHIToResolve[Idx]);
+
+  // Note that we must test the size on each iteration, the worklist can grow.
+  for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
+    const Value *OrigV = Worklist[Idx];
+    auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV));
+    if (!I)
+      continue;
+
+    // See if this instruction simplifies.
+    Value *SimpleV = SimplifyInstruction(I, DL);
+    if (!SimpleV)
+      continue;
+
+    // Stash away all the uses of the old instruction so we can check them for
+    // recursive simplifications after a RAUW. This is cheaper than checking all
+    // uses of To on the recursive step in most cases.
+    for (const User *U : OrigV->users())
+      Worklist.insert(cast<Instruction>(U));
+
+    // Replace the instruction with its simplified value.
+    I->replaceAllUsesWith(SimpleV);
+
+    // If the original instruction had no side effects, remove it.
+    if (isInstructionTriviallyDead(I))
+      I->eraseFromParent();
+    else
+      VMap[OrigV] = I;
+  }
 
   // Now that the inlined function body has been fully constructed, go through
   // and zap unconditional fall-through branches. This happens all the time when
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 1fbb19d2b8ad..e82c07fd7b59 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1294,6 +1294,13 @@ updateInlinedAtInfo(const DebugLoc &DL, DILocation *InlinedAtNode,
   return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last);
 }
 
+/// Return the result of AI->isStaticAlloca() if AI were moved to the entry
+/// block. Allocas used in inalloca calls and allocas of dynamic array size
+/// cannot be static.
+static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
+  return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca();
+}
+
 /// Update inlined instructions' line numbers to
 /// to encode location where these instructions are inlined.
 static void fixupLineNumbers(Function *Fn, Function::iterator FI,
@@ -1328,7 +1335,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
 
         // Don't update static allocas, as they may get moved later.
         if (auto *AI = dyn_cast<AllocaInst>(BI))
-          if (isa<Constant>(AI->getArraySize()))
+          if (allocaWouldBeStaticInEntry(AI))
             continue;
 
         BI->setDebugLoc(TheCallDL);
@@ -1626,7 +1633,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
         continue;
       }
 
-      if (!isa<Constant>(AI->getArraySize()))
+      if (!allocaWouldBeStaticInEntry(AI))
         continue;
       
       // Keep track of the static allocas that we inline into the caller.
@@ -1635,7 +1642,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
       // Scan for the block of allocas that we can move over, and move them
       // all at once.
       while (isa<AllocaInst>(I) &&
-             isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
+             allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) {
         IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
         ++I;
       }
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
index 9658966779b9..0d5a25b8ebc5 100644
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -64,6 +64,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
                                     DominatorTree &DT, LoopInfo &LI) {
   SmallVector<Use *, 16> UsesToRewrite;
   SmallVector<BasicBlock *, 8> ExitBlocks;
+  SmallSetVector<PHINode *, 16> PHIsToRemove;
   PredIteratorCache PredCache;
   bool Changed = false;
 
@@ -115,7 +116,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
     SmallVector<PHINode *, 16> AddedPHIs;
     SmallVector<PHINode *, 8> PostProcessPHIs;
 
-    SSAUpdater SSAUpdate;
+    SmallVector<PHINode *, 4> InsertedPHIs;
+    SSAUpdater SSAUpdate(&InsertedPHIs);
     SSAUpdate.Initialize(I->getType(), I->getName());
 
     // Insert the LCSSA phi's into all of the exit blocks dominated by the
@@ -184,6 +186,14 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
 
       // Otherwise, do full PHI insertion.
       SSAUpdate.RewriteUse(*UseToRewrite);
+
+      // SSAUpdater might have inserted phi-nodes inside other loops. We'll need
+      // to post-process them to keep LCSSA form.
+      for (PHINode *InsertedPN : InsertedPHIs) {
+        if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent()))
+          if (!L->contains(OtherLoop))
+            PostProcessPHIs.push_back(InsertedPN);
+      }
     }
 
     // Post process PHI instructions that were inserted into another disjoint
@@ -196,13 +206,19 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
       Worklist.push_back(PostProcessPN);
     }
 
-    // Remove PHI nodes that did not have any uses rewritten.
+    // Keep track of PHI nodes that we want to remove because they did not have
+    // any uses rewritten.
     for (PHINode *PN : AddedPHIs)
       if (PN->use_empty())
-        PN->eraseFromParent();
+        PHIsToRemove.insert(PN);
 
     Changed = true;
   }
+  // Remove PHI nodes that did not have any uses rewritten.
+  for (PHINode *PN : PHIsToRemove) {
+    assert (PN->use_empty() && "Trying to remove a phi with uses.");
+    PN->eraseFromParent();
+  }
   return Changed;
 }
 
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index b3a928bf7753..2846e8f235b7 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -327,6 +327,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
     else
       NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
 
+  SmallVector<BasicBlock *, 8> OuterLoopBlocks;
+  OuterLoopBlocks.push_back(NewBB);
   // Now that we know which blocks are in L and which need to be moved to
   // OuterLoop, move any blocks that need it.
   for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
@@ -334,12 +336,53 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
     if (!BlocksInL.count(BB)) {
       // Move this block to the parent, updating the exit blocks sets
       L->removeBlockFromLoop(BB);
-      if ((*LI)[BB] == L)
+      if ((*LI)[BB] == L) {
         LI->changeLoopFor(BB, NewOuter);
+        OuterLoopBlocks.push_back(BB);
+      }
       --i;
     }
   }
 
+  // Split edges to exit blocks from the inner loop, if they emerged in the
+  // process of separating the outer one.
+  SmallVector<BasicBlock *, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+  SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
+                                               ExitBlocks.end());
+  for (BasicBlock *ExitBlock : ExitBlockSet) {
+    if (any_of(predecessors(ExitBlock),
+               [L](BasicBlock *BB) { return !L->contains(BB); })) {
+      rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA);
+    }
+  }
+
+  if (PreserveLCSSA) {
+    // Fix LCSSA form for L. Some values, which previously were only used inside
+    // L, can now be used in NewOuter loop. We need to insert phi-nodes for them
+    // in corresponding exit blocks.
+
+    // Go through all instructions in OuterLoopBlocks and check if they are
+    // using operands from the inner loop. In this case we'll need to fix LCSSA
+    // for these instructions.
+    SmallSetVector<Instruction *, 8> WorklistSet;
+    for (BasicBlock *OuterBB: OuterLoopBlocks) {
+      for (Instruction &I : *OuterBB) {
+        for (Value *Op : I.operands()) {
+          Instruction *OpI = dyn_cast<Instruction>(Op);
+          if (!OpI || !L->contains(OpI))
+            continue;
+          WorklistSet.insert(OpI);
+        }
+      }
+    }
+    SmallVector<Instruction *, 8> Worklist(WorklistSet.begin(),
+                                           WorklistSet.end());
+    formLCSSAForInstructions(Worklist, *DT, *LI);
+    assert(NewOuter->isRecursivelyLCSSAForm(*DT) &&
+           "LCSSA is broken after separating nested loops!");
+  }
+
   return NewOuter;
 }
 
@@ -541,17 +584,12 @@ ReprocessLoop:
   SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
                                                ExitBlocks.end());
   for (BasicBlock *ExitBlock : ExitBlockSet) {
-    for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
-         PI != PE; ++PI)
-      // Must be exactly this loop: no subloops, parent loops, or non-loop preds
-      // allowed.
-      if (!L->contains(*PI)) {
-        if (rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA)) {
-          ++NumInserted;
-          Changed = true;
-        }
-        break;
-      }
+    if (any_of(predecessors(ExitBlock),
+               [L](BasicBlock *BB) { return !L->contains(BB); })) {
+      rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA);
+      ++NumInserted;
+      Changed = true;
+    }
   }
 
   // If the header has more than two predecessors at this point (from the
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8b85e320d3b2..ee5733d20f4f 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -50,6 +50,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -220,6 +221,81 @@ class LoopVectorizationLegality;
 class LoopVectorizationCostModel;
 class LoopVectorizationRequirements;
 
+// A traits type that is intended to be used in graph algorithms. The graph it
+// models starts at the loop header, and traverses the BasicBlocks that are in
+// the loop body, but not the loop header. Since the loop header is skipped,
+// the back edges are excluded.
+struct LoopBodyTraits {
+  using NodeRef = std::pair<const Loop *, BasicBlock *>;
+
+  // This wraps a const Loop * into the iterator, so we know which edges to
+  // filter out.
+  class WrappedSuccIterator
+      : public iterator_adaptor_base<
+            WrappedSuccIterator, succ_iterator,
+            typename std::iterator_traits<succ_iterator>::iterator_category,
+            NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
+    using BaseT = iterator_adaptor_base<
+        WrappedSuccIterator, succ_iterator,
+        typename std::iterator_traits<succ_iterator>::iterator_category,
+        NodeRef, std::ptrdiff_t, NodeRef *, NodeRef>;
+
+    const Loop *L;
+
+  public:
+    WrappedSuccIterator(succ_iterator Begin, const Loop *L)
+        : BaseT(Begin), L(L) {}
+
+    NodeRef operator*() const { return {L, *I}; }
+  };
+
+  struct LoopBodyFilter {
+    bool operator()(NodeRef N) const {
+      const Loop *L = N.first;
+      return N.second != L->getHeader() && L->contains(N.second);
+    }
+  };
+
+  using ChildIteratorType =
+      filter_iterator<WrappedSuccIterator, LoopBodyFilter>;
+
+  static NodeRef getEntryNode(const Loop &G) { return {&G, G.getHeader()}; }
+
+  static ChildIteratorType child_begin(NodeRef Node) {
+    return make_filter_range(make_range<WrappedSuccIterator>(
+                                 {succ_begin(Node.second), Node.first},
+                                 {succ_end(Node.second), Node.first}),
+                             LoopBodyFilter{})
+        .begin();
+  }
+
+  static ChildIteratorType child_end(NodeRef Node) {
+    return make_filter_range(make_range<WrappedSuccIterator>(
+                                 {succ_begin(Node.second), Node.first},
+                                 {succ_end(Node.second), Node.first}),
+                             LoopBodyFilter{})
+        .end();
+  }
+};
+
+/// Returns true if the given loop body has a cycle, excluding the loop
+/// itself.
+static bool hasCyclesInLoopBody(const Loop &L) {
+  if (!L.empty())
+    return true;
+
+  for (const auto SCC :
+       make_range(scc_iterator<Loop, LoopBodyTraits>::begin(L),
+                  scc_iterator<Loop, LoopBodyTraits>::end(L))) {
+    if (SCC.size() > 1) {
+      DEBUG(dbgs() << "LVL: Detected a cycle in the loop body:\n");
+      DEBUG(L.dump());
+      return true;
+    }
+  }
+  return false;
+}
+
 /// \brief This modifies LoopAccessReport to initialize message with
 /// loop-vectorizer-specific part.
 class VectorizationReport : public LoopAccessReport {
@@ -1782,12 +1858,14 @@ private:
   Instruction *UnsafeAlgebraInst;
 };
 
-static void addInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
-  if (L.empty())
-    return V.push_back(&L);
-
+static void addAcyclicInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
+  if (L.empty()) {
+    if (!hasCyclesInLoopBody(L))
+      V.push_back(&L);
+    return;
+  }
   for (Loop *InnerL : L)
-    addInnerLoop(*InnerL, V);
+    addAcyclicInnerLoop(*InnerL, V);
 }
 
 /// The LoopVectorize Pass.
@@ -4395,6 +4473,9 @@ bool LoopVectorizationLegality::canVectorize() {
     return false;
   }
 
+  // FIXME: The code is currently dead, since the loop gets sent to
+  // LoopVectorizationLegality is already an innermost loop.
+  //
   // We can only vectorize innermost loops.
   if (!TheLoop->empty()) {
     emitAnalysis(VectorizationReport() << "loop is not the innermost loop");
@@ -6639,7 +6720,7 @@ bool LoopVectorizePass::runImpl(
   SmallVector<Loop *, 8> Worklist;
 
   for (Loop *L : *LI)
-    addInnerLoop(*L, Worklist);
+    addAcyclicInnerLoop(*L, Worklist);
 
   LoopsAnalyzed += Worklist.size();
author	Dimitry Andric <dim@FreeBSD.org>	2016-08-17 19:41:29 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2016-08-17 19:41:29 +0000
commit	6c4bc1bd2772d4db151a13576558e1320c7c3b65 (patch)
tree	06f8f3845db41aed4b2b4228d561c3f3b5a09563 /contrib/llvm/lib/Transforms
parent	4bb0738ee7438ed572a4b9d8b609271b029de5b8 (diff)
parent	a7fe922b98bb45be7dce7c1cfe668ec27eeddc74 (diff)