1 files changed, 164 insertions, 75 deletions
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 1f09979b3382..f1f075257020 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -1,9 +1,8 @@
 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -76,6 +75,16 @@ STATISTIC(NumDSE,      "Number of trivial dead stores removed");
 DEBUG_COUNTER(CSECounter, "early-cse",
               "Controls which instructions are removed");
 
+static cl::opt<unsigned> EarlyCSEMssaOptCap(
+    "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
+    cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
+             "for faster compile. Caps the MemorySSA clobbering calls."));
+
+static cl::opt<bool> EarlyCSEDebugHash(
+    "earlycse-debug-hash", cl::init(false), cl::Hidden,
+    cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
+             "function is well-behaved w.r.t. its isEqual predicate"));
+
 //===----------------------------------------------------------------------===//
 // SimpleValue
 //===----------------------------------------------------------------------===//
@@ -126,7 +135,33 @@ template <> struct DenseMapInfo<SimpleValue> {
 
 } // end namespace llvm
 
-unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+/// Match a 'select' including an optional 'not's of the condition.
+static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
+                                           Value *&B,
+                                           SelectPatternFlavor &Flavor) {
+  // Return false if V is not even a select.
+  if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
+    return false;
+
+  // Look through a 'not' of the condition operand by swapping A/B.
+  Value *CondNot;
+  if (match(Cond, m_Not(m_Value(CondNot)))) {
+    Cond = CondNot;
+    std::swap(A, B);
+  }
+
+  // Set flavor if we find a match, or set it to unknown otherwise; in
+  // either case, return true to indicate that this is a select we can
+  // process.
+  if (auto *CmpI = dyn_cast<ICmpInst>(Cond))
+    Flavor = matchDecomposedSelectPattern(CmpI, A, B, A, B).Flavor;
+  else
+    Flavor = SPF_UNKNOWN;
+
+  return true;
+}
+
+static unsigned getHashValueImpl(SimpleValue Val) {
   Instruction *Inst = Val.Inst;
   // Hash in all of the operands as pointers.
   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
@@ -139,32 +174,56 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
   }
 
   if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
+    // Compares can be commuted by swapping the comparands and
+    // updating the predicate.  Choose the form that has the
+    // comparands in sorted order, or in the case of a tie, the
+    // one with the lower predicate.
     Value *LHS = CI->getOperand(0);
     Value *RHS = CI->getOperand(1);
     CmpInst::Predicate Pred = CI->getPredicate();
-    if (Inst->getOperand(0) > Inst->getOperand(1)) {
+    CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
+    if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
       std::swap(LHS, RHS);
-      Pred = CI->getSwappedPredicate();
+      Pred = SwappedPred;
     }
     return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
   }
 
-  // Hash min/max/abs (cmp + select) to allow for commuted operands.
-  // Min/max may also have non-canonical compare predicate (eg, the compare for
-  // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
-  // compare.
-  Value *A, *B;
-  SelectPatternFlavor SPF = matchSelectPattern(Inst, A, B).Flavor;
-  // TODO: We should also detect FP min/max.
-  if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
-      SPF == SPF_UMIN || SPF == SPF_UMAX) {
-    if (A > B)
+  // Hash general selects to allow matching commuted true/false operands.
+  SelectPatternFlavor SPF;
+  Value *Cond, *A, *B;
+  if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
+    // Hash min/max/abs (cmp + select) to allow for commuted operands.
+    // Min/max may also have non-canonical compare predicate (eg, the compare for
+    // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
+    // compare.
+    // TODO: We should also detect FP min/max.
+    if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
+        SPF == SPF_UMIN || SPF == SPF_UMAX) {
+      if (A > B)
+        std::swap(A, B);
+      return hash_combine(Inst->getOpcode(), SPF, A, B);
+    }
+    if (SPF == SPF_ABS || SPF == SPF_NABS) {
+      // ABS/NABS always puts the input in A and its negation in B.
+      return hash_combine(Inst->getOpcode(), SPF, A, B);
+    }
+
+    // Hash general selects to allow matching commuted true/false operands.
+
+    // If we do not have a compare as the condition, just hash in the condition.
+    CmpInst::Predicate Pred;
+    Value *X, *Y;
+    if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
+      return hash_combine(Inst->getOpcode(), Cond, A, B);
+
+    // Similar to cmp normalization (above) - canonicalize the predicate value:
+    // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
+    if (CmpInst::getInversePredicate(Pred) < Pred) {
+      Pred = CmpInst::getInversePredicate(Pred);
       std::swap(A, B);
-    return hash_combine(Inst->getOpcode(), SPF, A, B);
-  }
-  if (SPF == SPF_ABS || SPF == SPF_NABS) {
-    // ABS/NABS always puts the input in A and its negation in B.
-    return hash_combine(Inst->getOpcode(), SPF, A, B);
+    }
+    return hash_combine(Inst->getOpcode(), Pred, X, Y, A, B);
   }
 
   if (CastInst *CI = dyn_cast<CastInst>(Inst))
@@ -179,8 +238,7 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
                         IVI->getOperand(1),
                         hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
 
-  assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
-          isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
+  assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
           isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
           isa<ShuffleVectorInst>(Inst)) &&
          "Invalid/unknown instruction");
@@ -191,7 +249,19 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
 }
 
-bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+#ifndef NDEBUG
+  // If -earlycse-debug-hash was specified, return a constant -- this
+  // will force all hashing to collide, so we'll exhaustively search
+  // the table for a match, and the assertion in isEqual will fire if
+  // there's a bug causing equal keys to hash differently.
+  if (EarlyCSEDebugHash)
+    return 0;
+#endif
+  return getHashValueImpl(Val);
+}
+
+static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
 
   if (LHS.isSentinel() || RHS.isSentinel())
@@ -227,26 +297,68 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
 
   // Min/max/abs can occur with commuted operands, non-canonical predicates,
   // and/or non-canonical operands.
-  Value *LHSA, *LHSB;
-  SelectPatternFlavor LSPF = matchSelectPattern(LHSI, LHSA, LHSB).Flavor;
-  // TODO: We should also detect FP min/max.
-  if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
-      LSPF == SPF_UMIN || LSPF == SPF_UMAX ||
-      LSPF == SPF_ABS || LSPF == SPF_NABS) {
-    Value *RHSA, *RHSB;
-    SelectPatternFlavor RSPF = matchSelectPattern(RHSI, RHSA, RHSB).Flavor;
+  // Selects can be non-trivially equivalent via inverted conditions and swaps.
+  SelectPatternFlavor LSPF, RSPF;
+  Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
+  if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
+      matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
     if (LSPF == RSPF) {
-      // Abs results are placed in a defined order by matchSelectPattern.
-      if (LSPF == SPF_ABS || LSPF == SPF_NABS)
+      // TODO: We should also detect FP min/max.
+      if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
+          LSPF == SPF_UMIN || LSPF == SPF_UMAX)
+        return ((LHSA == RHSA && LHSB == RHSB) ||
+                (LHSA == RHSB && LHSB == RHSA));
+
+      if (LSPF == SPF_ABS || LSPF == SPF_NABS) {
+        // Abs results are placed in a defined order by matchSelectPattern.
         return LHSA == RHSA && LHSB == RHSB;
-      return ((LHSA == RHSA && LHSB == RHSB) ||
-              (LHSA == RHSB && LHSB == RHSA));
+      }
+
+      // select Cond, A, B <--> select not(Cond), B, A
+      if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
+        return true;
+    }
+
+    // If the true/false operands are swapped and the conditions are compares
+    // with inverted predicates, the selects are equal:
+    // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
+    //
+    // This also handles patterns with a double-negation in the sense of not +
+    // inverse, because we looked through a 'not' in the matching function and
+    // swapped A/B:
+    // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
+    //
+    // This intentionally does NOT handle patterns with a double-negation in
+    // the sense of not + not, because doing so could result in values
+    // comparing
+    // as equal that hash differently in the min/max/abs cases like:
+    // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
+    //   ^ hashes as min                  ^ would not hash as min
+    // In the context of the EarlyCSE pass, however, such cases never reach
+    // this code, as we simplify the double-negation before hashing the second
+    // select (and so still succeed at CSEing them).
+    if (LHSA == RHSB && LHSB == RHSA) {
+      CmpInst::Predicate PredL, PredR;
+      Value *X, *Y;
+      if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
+          match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
+          CmpInst::getInversePredicate(PredL) == PredR)
+        return true;
     }
   }
 
   return false;
 }
 
+bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+  // These comparisons are nontrivial, so assert that equality implies
+  // hash equality (DenseMap demands this as an invariant).
+  bool Result = isEqualImpl(LHS, RHS);
+  assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
+         getHashValueImpl(LHS) == getHashValueImpl(RHS));
+  return Result;
+}
+
 //===----------------------------------------------------------------------===//
 // CallValue
 //===----------------------------------------------------------------------===//
@@ -419,6 +531,7 @@ public:
   bool run();
 
 private:
+  unsigned ClobberCounter = 0;
   // Almost a POD, but needs to call the constructors for the scoped hash
   // tables so that a new scope gets pushed on. These are RAII so that the
   // scope gets popped when the NodeScope is destroyed.
@@ -608,36 +721,11 @@ private:
       MSSA->verifyMemorySSA();
     // Removing a store here can leave MemorySSA in an unoptimized state by
     // creating MemoryPhis that have identical arguments and by creating
-    // MemoryUses whose defining access is not an actual clobber.  We handle the
-    // phi case eagerly here.  The non-optimized MemoryUse case is lazily
-    // updated by MemorySSA getClobberingMemoryAccess.
-    if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) {
-      // Optimize MemoryPhi nodes that may become redundant by having all the
-      // same input values once MA is removed.
-      SmallSetVector<MemoryPhi *, 4> PhisToCheck;
-      SmallVector<MemoryAccess *, 8> WorkQueue;
-      WorkQueue.push_back(MA);
-      // Process MemoryPhi nodes in FIFO order using a ever-growing vector since
-      // we shouldn't be processing that many phis and this will avoid an
-      // allocation in almost all cases.
-      for (unsigned I = 0; I < WorkQueue.size(); ++I) {
-        MemoryAccess *WI = WorkQueue[I];
-
-        for (auto *U : WI->users())
-          if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U))
-            PhisToCheck.insert(MP);
-
-        MSSAUpdater->removeMemoryAccess(WI);
-
-        for (MemoryPhi *MP : PhisToCheck) {
-          MemoryAccess *FirstIn = MP->getIncomingValue(0);
-          if (llvm::all_of(MP->incoming_values(),
-                           [=](Use &In) { return In == FirstIn; }))
-            WorkQueue.push_back(MP);
-        }
-        PhisToCheck.clear();
-      }
-    }
+    // MemoryUses whose defining access is not an actual clobber. The phi case
+    // is handled by MemorySSA when passing OptimizePhis = true to
+    // removeMemoryAccess.  The non-optimized MemoryUse case is lazily updated
+    // by MemorySSA's getClobberingMemoryAccess.
+    MSSAUpdater->removeMemoryAccess(Inst, true);
   }
 };
 
@@ -688,8 +776,13 @@ bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
   // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
   // EarlierInst and LaterInst and neither can any other write that potentially
   // clobbers LaterInst.
-  MemoryAccess *LaterDef =
-      MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
+  MemoryAccess *LaterDef;
+  if (ClobberCounter < EarlyCSEMssaOptCap) {
+    LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
+    ClobberCounter++;
+  } else
+    LaterDef = LaterMA->getDefiningAccess();
+
   return MSSA->dominates(LaterDef, EarlierMA);
 }
 
@@ -1117,7 +1210,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
         // At the moment, we don't remove ordered stores, but do remove
         // unordered atomic stores.  There's no special requirement (for
         // unordered atomics) about removing atomic stores only in favor of
-        // other atomic stores since we we're going to execute the non-atomic
+        // other atomic stores since we were going to execute the non-atomic
         // one anyway and the atomic one might never have become visible.
         if (LastStore) {
           ParseMemoryInst LastStoreMemInst(LastStore, TTI);
@@ -1184,8 +1277,7 @@ bool EarlyCSE::run() {
       CurrentGeneration, DT.getRootNode(),
       DT.getRootNode()->begin(), DT.getRootNode()->end()));
 
-  // Save the current generation.
-  unsigned LiveOutGeneration = CurrentGeneration;
+  assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
 
   // Process the stack.
   while (!nodesToProcess.empty()) {
@@ -1217,9 +1309,6 @@ bool EarlyCSE::run() {
     }
   } // while (!nodes...)
 
-  // Reset the current generation.
-  CurrentGeneration = LiveOutGeneration;
-
   return Changed;
 }