1 files changed, 167 insertions, 53 deletions
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index e235e5eb1a06..88dcaf0f8a36 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -21,28 +21,45 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar/Reassociate.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
+#include <cassert>
+#include <utility>
+
 using namespace llvm;
 using namespace reassociate;
 
@@ -54,7 +71,6 @@ STATISTIC(NumFactor , "Number of multiplies factored");
 
 #ifndef NDEBUG
 /// Print out the expression identified in the Ops list.
-///
 static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
   Module *M = I->getModule();
   dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
@@ -128,38 +144,37 @@ XorOpnd::XorOpnd(Value *V) {
 /// Return true if V is an instruction of the specified opcode and if it
 /// only has one use.
 static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
-  if (V->hasOneUse() && isa<Instruction>(V) &&
-      cast<Instruction>(V)->getOpcode() == Opcode &&
-      (!isa<FPMathOperator>(V) ||
-       cast<Instruction>(V)->hasUnsafeAlgebra()))
-    return cast<BinaryOperator>(V);
+  auto *I = dyn_cast<Instruction>(V);
+  if (I && I->hasOneUse() && I->getOpcode() == Opcode)
+    if (!isa<FPMathOperator>(I) || I->isFast())
+      return cast<BinaryOperator>(I);
   return nullptr;
 }
 
 static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
                                         unsigned Opcode2) {
-  if (V->hasOneUse() && isa<Instruction>(V) &&
-      (cast<Instruction>(V)->getOpcode() == Opcode1 ||
-       cast<Instruction>(V)->getOpcode() == Opcode2) &&
-      (!isa<FPMathOperator>(V) ||
-       cast<Instruction>(V)->hasUnsafeAlgebra()))
-    return cast<BinaryOperator>(V);
+  auto *I = dyn_cast<Instruction>(V);
+  if (I && I->hasOneUse() &&
+      (I->getOpcode() == Opcode1 || I->getOpcode() == Opcode2))
+    if (!isa<FPMathOperator>(I) || I->isFast())
+      return cast<BinaryOperator>(I);
   return nullptr;
 }
 
 void ReassociatePass::BuildRankMap(Function &F,
                                    ReversePostOrderTraversal<Function*> &RPOT) {
-  unsigned i = 2;
+  unsigned Rank = 2;
 
   // Assign distinct ranks to function arguments.
-  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
-    ValueRankMap[&*I] = ++i;
-    DEBUG(dbgs() << "Calculated Rank[" << I->getName() << "] = " << i << "\n");
+  for (auto &Arg : F.args()) {
+    ValueRankMap[&Arg] = ++Rank;
+    DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank
+                 << "\n");
   }
 
   // Traverse basic blocks in ReversePostOrder
   for (BasicBlock *BB : RPOT) {
-    unsigned BBRank = RankMap[BB] = ++i << 16;
+    unsigned BBRank = RankMap[BB] = ++Rank << 16;
 
     // Walk the basic block, adding precomputed ranks for any instructions that
     // we cannot move.  This ensures that the ranks for these instructions are
@@ -207,13 +222,9 @@ void ReassociatePass::canonicalizeOperands(Instruction *I) {
 
   Value *LHS = I->getOperand(0);
   Value *RHS = I->getOperand(1);
-  unsigned LHSRank = getRank(LHS);
-  unsigned RHSRank = getRank(RHS);
-
-  if (isa<Constant>(RHS))
+  if (LHS == RHS || isa<Constant>(RHS))
     return;
-
-  if (isa<Constant>(LHS) || RHSRank < LHSRank)
+  if (isa<Constant>(LHS) || getRank(RHS) < getRank(LHS))
     cast<BinaryOperator>(I)->swapOperands();
 }
 
@@ -357,7 +368,7 @@ static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
   }
 }
 
-typedef std::pair<Value*, APInt> RepeatedValue;
+using RepeatedValue = std::pair<Value*, APInt>;
 
 /// Given an associative binary expression, return the leaf
 /// nodes in Ops along with their weights (how many times the leaf occurs).  The
@@ -432,7 +443,6 @@ typedef std::pair<Value*, APInt> RepeatedValue;
 /// that have all uses inside the expression (i.e. only used by non-leaf nodes
 /// of the expression) if it can turn them into binary operators of the right
 /// type and thus make the expression bigger.
-
 static bool LinearizeExprTree(BinaryOperator *I,
                               SmallVectorImpl<RepeatedValue> &Ops) {
   DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
@@ -470,12 +480,12 @@ static bool LinearizeExprTree(BinaryOperator *I,
 
   // Leaves - Keeps track of the set of putative leaves as well as the number of
   // paths to each leaf seen so far.
-  typedef DenseMap<Value*, APInt> LeafMap;
+  using LeafMap = DenseMap<Value *, APInt>;
   LeafMap Leaves; // Leaf -> Total weight so far.
-  SmallVector<Value*, 8> LeafOrder; // Ensure deterministic leaf output order.
+  SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
 
 #ifndef NDEBUG
-  SmallPtrSet<Value*, 8> Visited; // For sanity checking the iteration scheme.
+  SmallPtrSet<Value *, 8> Visited; // For sanity checking the iteration scheme.
 #endif
   while (!Worklist.empty()) {
     std::pair<BinaryOperator*, APInt> P = Worklist.pop_back_val();
@@ -554,7 +564,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
       assert((!isa<Instruction>(Op) ||
               cast<Instruction>(Op)->getOpcode() != Opcode
               || (isa<FPMathOperator>(Op) &&
-                  !cast<Instruction>(Op)->hasUnsafeAlgebra())) &&
+                  !cast<Instruction>(Op)->isFast())) &&
              "Should have been handled above!");
       assert(Op->hasOneUse() && "Has uses outside the expression tree!");
 
@@ -773,7 +783,7 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
         break;
       ExpressionChanged->moveBefore(I);
       ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin());
-    } while (1);
+    } while (true);
 
   // Throw away any left over nodes from the original expression.
   for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
@@ -789,13 +799,9 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
 /// additional opportunities have been exposed.
 static Value *NegateValue(Value *V, Instruction *BI,
                           SetVector<AssertingVH<Instruction>> &ToRedo) {
-  if (Constant *C = dyn_cast<Constant>(V)) {
-    if (C->getType()->isFPOrFPVectorTy()) {
-      return ConstantExpr::getFNeg(C);
-    }
-    return ConstantExpr::getNeg(C);
-  }
-
+  if (auto *C = dyn_cast<Constant>(V))
+    return C->getType()->isFPOrFPVectorTy() ? ConstantExpr::getFNeg(C) :
+                                              ConstantExpr::getNeg(C);
 
   // We are trying to expose opportunity for reassociation.  One of the things
   // that we want to do to achieve this is to push a negation as deep into an
@@ -913,7 +919,6 @@ BreakUpSubtract(Instruction *Sub, SetVector<AssertingVH<Instruction>> &ToRedo) {
   //
   // Calculate the negative value of Operand 1 of the sub instruction,
   // and set it as the RHS of the add instruction we just made.
-  //
   Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
   BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
   Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
@@ -990,7 +995,7 @@ static Value *EmitAddTreeOfValues(Instruction *I,
   Value *V1 = Ops.back();
   Ops.pop_back();
   Value *V2 = EmitAddTreeOfValues(I, Ops);
-  return CreateAdd(V2, V1, "tmp", I, I);
+  return CreateAdd(V2, V1, "reass.add", I, I);
 }
 
 /// If V is an expression tree that is a multiplication sequence,
@@ -1157,7 +1162,6 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
 // If it was successful, true is returned, and the "R" and "C" is returned
 // via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
 // and both "Res" and "ConstOpnd" remain unchanged.
-//
 bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
                                      APInt &ConstOpnd, Value *&Res) {
   // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2 
@@ -1183,7 +1187,6 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
     RedoInsts.insert(T);
   return true;
 }
-
                            
 // Helper function of OptimizeXor(). It tries to simplify
 // "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
@@ -1230,7 +1233,6 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
 
     Res = createAndInstr(I, X, C3);
     ConstOpnd ^= C1;
-
   } else if (Opnd1->isOrExpr()) {
     // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
     //
@@ -1349,7 +1351,6 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
 
     // step 3.2: When previous and current operands share the same symbolic
     //  value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd" 
-    //    
     if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
       // Remove previous operand
       PrevOpnd->Invalidate();
@@ -1601,7 +1602,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
       RedoInsts.insert(VI);
 
     // Create the multiply.
-    Instruction *V2 = CreateMul(V, MaxOccVal, "tmp", I, I);
+    Instruction *V2 = CreateMul(V, MaxOccVal, "reass.mul", I, I);
 
     // Rerun associate on the multiply in case the inner expression turned into
     // a multiply.  We want to make sure that we keep things in canonical form.
@@ -2012,8 +2013,8 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
   if (I->isCommutative())
     canonicalizeOperands(I);
 
-  // Don't optimize floating point instructions that don't have unsafe algebra.
-  if (I->getType()->isFPOrFPVectorTy() && !I->hasUnsafeAlgebra())
+  // Don't optimize floating-point instructions unless they are 'fast'.
+  if (I->getType()->isFPOrFPVectorTy() && !I->isFast())
     return;
 
   // Do not reassociate boolean (i1) expressions.  We want to preserve the
@@ -2140,7 +2141,8 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
     DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
     I->replaceAllUsesWith(V);
     if (Instruction *VI = dyn_cast<Instruction>(V))
-      VI->setDebugLoc(I->getDebugLoc());
+      if (I->getDebugLoc())
+        VI->setDebugLoc(I->getDebugLoc());
     RedoInsts.insert(I);
     ++NumAnnihil;
     return;
@@ -2183,11 +2185,104 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
     return;
   }
 
+  if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) {
+    // Find the pair with the highest count in the pairmap and move it to the
+    // back of the list so that it can later be CSE'd.
+    // example:
+    //   a*b*c*d*e
+    // if c*e is the most "popular" pair, we can express this as
+    //   (((c*e)*d)*b)*a
+    unsigned Max = 1;
+    unsigned BestRank = 0;
+    std::pair<unsigned, unsigned> BestPair;
+    unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin;
+    for (unsigned i = 0; i < Ops.size() - 1; ++i)
+      for (unsigned j = i + 1; j < Ops.size(); ++j) {
+        unsigned Score = 0;
+        Value *Op0 = Ops[i].Op;
+        Value *Op1 = Ops[j].Op;
+        if (std::less<Value *>()(Op1, Op0))
+          std::swap(Op0, Op1);
+        auto it = PairMap[Idx].find({Op0, Op1});
+        if (it != PairMap[Idx].end())
+          Score += it->second;
+
+        unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
+        if (Score > Max || (Score == Max && MaxRank < BestRank)) {
+          BestPair = {i, j};
+          Max = Score;
+          BestRank = MaxRank;
+        }
+      }
+    if (Max > 1) {
+      auto Op0 = Ops[BestPair.first];
+      auto Op1 = Ops[BestPair.second];
+      Ops.erase(&Ops[BestPair.second]);
+      Ops.erase(&Ops[BestPair.first]);
+      Ops.push_back(Op0);
+      Ops.push_back(Op1);
+    }
+  }
   // Now that we ordered and optimized the expressions, splat them back into
   // the expression tree, removing any unneeded nodes.
   RewriteExprTree(I, Ops);
 }
 
+void
+ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
+  // Make a "pairmap" of how often each operand pair occurs.
+  for (BasicBlock *BI : RPOT) {
+    for (Instruction &I : *BI) {
+      if (!I.isAssociative())
+        continue;
+
+      // Ignore nodes that aren't at the root of trees.
+      if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode())
+        continue;
+
+      // Collect all operands in a single reassociable expression.
+      // Since Reassociate has already been run once, we can assume things
+      // are already canonical according to Reassociation's regime.
+      SmallVector<Value *, 8> Worklist = { I.getOperand(0), I.getOperand(1) };
+      SmallVector<Value *, 8> Ops;
+      while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) {
+        Value *Op = Worklist.pop_back_val();
+        Instruction *OpI = dyn_cast<Instruction>(Op);
+        if (!OpI || OpI->getOpcode() != I.getOpcode() || !OpI->hasOneUse()) {
+          Ops.push_back(Op);
+          continue;
+        }
+        // Be paranoid about self-referencing expressions in unreachable code.
+        if (OpI->getOperand(0) != OpI)
+          Worklist.push_back(OpI->getOperand(0));
+        if (OpI->getOperand(1) != OpI)
+          Worklist.push_back(OpI->getOperand(1));
+      }
+      // Skip extremely long expressions.
+      if (Ops.size() > GlobalReassociateLimit)
+        continue;
+
+      // Add all pairwise combinations of operands to the pair map.
+      unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin;
+      SmallSet<std::pair<Value *, Value*>, 32> Visited;
+      for (unsigned i = 0; i < Ops.size() - 1; ++i) {
+        for (unsigned j = i + 1; j < Ops.size(); ++j) {
+          // Canonicalize operand orderings.
+          Value *Op0 = Ops[i];
+          Value *Op1 = Ops[j];
+          if (std::less<Value *>()(Op1, Op0))
+            std::swap(Op0, Op1);
+          if (!Visited.insert({Op0, Op1}).second)
+            continue;
+          auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, 1});
+          if (!res.second)
+            ++res.first->second;
+        }
+      }
+    }
+  }
+}
+
 PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
   // Get the functions basic blocks in Reverse Post Order. This order is used by
   // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
@@ -2198,8 +2293,20 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
   // Calculate the rank map for F.
   BuildRankMap(F, RPOT);
 
+  // Build the pair map before running reassociate.
+  // Technically this would be more accurate if we did it after one round
+  // of reassociation, but in practice it doesn't seem to help much on
+  // real-world code, so don't waste the compile time running reassociate
+  // twice.
+  // If a user wants, they could expicitly run reassociate twice in their
+  // pass pipeline for further potential gains.
+  // It might also be possible to update the pair map during runtime, but the
+  // overhead of that may be large if there's many reassociable chains.
+  BuildPairMap(RPOT);
+
   MadeChange = false;
-  // Traverse the same blocks that was analysed by BuildRankMap.
+
+  // Traverse the same blocks that were analysed by BuildRankMap.
   for (BasicBlock *BI : RPOT) {
     assert(RankMap.count(&*BI) && "BB should be ranked.");
     // Optimize every instruction in the basic block.
@@ -2238,9 +2345,11 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
     }
   }
 
-  // We are done with the rank map.
+  // We are done with the rank map and pair map.
   RankMap.clear();
   ValueRankMap.clear();
+  for (auto &Entry : PairMap)
+    Entry.clear();
 
   if (MadeChange) {
     PreservedAnalyses PA;
@@ -2253,10 +2362,13 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
 }
 
 namespace {
+
   class ReassociateLegacyPass : public FunctionPass {
     ReassociatePass Impl;
+
   public:
     static char ID; // Pass identification, replacement for typeid
+
     ReassociateLegacyPass() : FunctionPass(ID) {
       initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
     }
@@ -2275,9 +2387,11 @@ namespace {
       AU.addPreserved<GlobalsAAWrapperPass>();
     }
   };
-}
+
+} // end anonymous namespace
 
 char ReassociateLegacyPass::ID = 0;
+
 INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
                 "Reassociate expressions", false, false)