aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/Reassociate.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/Reassociate.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/Reassociate.cpp160
1 files changed, 128 insertions, 32 deletions
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 21628b61edd6..40c84e249523 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -52,6 +52,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
@@ -70,6 +71,12 @@ STATISTIC(NumChanged, "Number of insts reassociated");
STATISTIC(NumAnnihil, "Number of expr tree annihilated");
STATISTIC(NumFactor , "Number of multiplies factored");
+static cl::opt<bool>
+ UseCSELocalOpt(DEBUG_TYPE "-use-cse-local",
+ cl::desc("Only reorder expressions within a basic block "
+ "when exposing CSE opportunities"),
+ cl::init(true), cl::Hidden);
+
#ifndef NDEBUG
/// Print out the expression identified in the Ops list.
static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
@@ -620,8 +627,7 @@ static bool LinearizeExprTree(Instruction *I,
// The leaves, repeated according to their weights, represent the linearized
// form of the expression.
- for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) {
- Value *V = LeafOrder[i];
+ for (Value *V : LeafOrder) {
LeafMap::iterator It = Leaves.find(V);
if (It == Leaves.end())
// Node initially thought to be a leaf wasn't.
@@ -683,10 +689,12 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
NotRewritable.insert(Ops[i].Op);
- // ExpressionChanged - Non-null if the rewritten expression differs from the
- // original in some non-trivial way, requiring the clearing of optional flags.
- // Flags are cleared from the operator in ExpressionChanged up to I inclusive.
- BinaryOperator *ExpressionChanged = nullptr;
+ // ExpressionChangedStart - Non-null if the rewritten expression differs from
+ // the original in some non-trivial way, requiring the clearing of optional
+ // flags. Flags are cleared from the operator in ExpressionChangedStart up to
+ // ExpressionChangedEnd inclusive.
+ BinaryOperator *ExpressionChangedStart = nullptr,
+ *ExpressionChangedEnd = nullptr;
for (unsigned i = 0; ; ++i) {
// The last operation (which comes earliest in the IR) is special as both
// operands will come from Ops, rather than just one with the other being
@@ -728,7 +736,9 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
}
LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
- ExpressionChanged = Op;
+ ExpressionChangedStart = Op;
+ if (!ExpressionChangedEnd)
+ ExpressionChangedEnd = Op;
MadeChange = true;
++NumChanged;
@@ -750,7 +760,9 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
if (BO && !NotRewritable.count(BO))
NodesToRewrite.push_back(BO);
Op->setOperand(1, NewRHS);
- ExpressionChanged = Op;
+ ExpressionChangedStart = Op;
+ if (!ExpressionChangedEnd)
+ ExpressionChangedEnd = Op;
}
LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
MadeChange = true;
@@ -787,7 +799,9 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
Op->setOperand(0, NewOp);
LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
- ExpressionChanged = Op;
+ ExpressionChangedStart = Op;
+ if (!ExpressionChangedEnd)
+ ExpressionChangedEnd = Op;
MadeChange = true;
++NumChanged;
Op = NewOp;
@@ -797,27 +811,36 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
// starting from the operator specified in ExpressionChanged, and compactify
// the operators to just before the expression root to guarantee that the
// expression tree is dominated by all of Ops.
- if (ExpressionChanged)
+ if (ExpressionChangedStart) {
+ bool ClearFlags = true;
do {
// Preserve FastMathFlags.
- if (isa<FPMathOperator>(I)) {
- FastMathFlags Flags = I->getFastMathFlags();
- ExpressionChanged->clearSubclassOptionalData();
- ExpressionChanged->setFastMathFlags(Flags);
- } else
- ExpressionChanged->clearSubclassOptionalData();
-
- if (ExpressionChanged == I)
+ if (ClearFlags) {
+ if (isa<FPMathOperator>(I)) {
+ FastMathFlags Flags = I->getFastMathFlags();
+ ExpressionChangedStart->clearSubclassOptionalData();
+ ExpressionChangedStart->setFastMathFlags(Flags);
+ } else
+ ExpressionChangedStart->clearSubclassOptionalData();
+ }
+
+ if (ExpressionChangedStart == ExpressionChangedEnd)
+ ClearFlags = false;
+ if (ExpressionChangedStart == I)
break;
// Discard any debug info related to the expressions that has changed (we
- // can leave debug infor related to the root, since the result of the
- // expression tree should be the same even after reassociation).
- replaceDbgUsesWithUndef(ExpressionChanged);
-
- ExpressionChanged->moveBefore(I);
- ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin());
+ // can leave debug info related to the root and any operation that didn't
+ // change, since the result of the expression tree should be the same
+ // even after reassociation).
+ if (ClearFlags)
+ replaceDbgUsesWithUndef(ExpressionChangedStart);
+
+ ExpressionChangedStart->moveBefore(I);
+ ExpressionChangedStart =
+ cast<BinaryOperator>(*ExpressionChangedStart->user_begin());
} while (true);
+ }
// Throw away any left over nodes from the original expression.
for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
@@ -1507,8 +1530,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
// Step 4: Reassemble the Ops
if (Changed) {
Ops.clear();
- for (unsigned int i = 0, e = Opnds.size(); i < e; i++) {
- XorOpnd &O = Opnds[i];
+ for (const XorOpnd &O : Opnds) {
if (O.isInvalid())
continue;
ValueEntry VE(getRank(O.getValue()), O.getValue());
@@ -1644,8 +1666,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
// Add one to FactorOccurrences for each unique factor in this op.
SmallPtrSet<Value*, 8> Duplicates;
- for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
- Value *Factor = Factors[i];
+ for (Value *Factor : Factors) {
if (!Duplicates.insert(Factor).second)
continue;
@@ -2048,7 +2069,7 @@ void ReassociatePass::EraseInst(Instruction *I) {
// blocks because it's a waste of time and also because it can
// lead to infinite loop due to LLVM's non-standard definition
// of dominance.
- if (ValueRankMap.find(Op) != ValueRankMap.end())
+ if (ValueRankMap.contains(Op))
RedoInsts.insert(Op);
}
@@ -2410,8 +2431,67 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
unsigned BestRank = 0;
std::pair<unsigned, unsigned> BestPair;
unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin;
- for (unsigned i = 0; i < Ops.size() - 1; ++i)
- for (unsigned j = i + 1; j < Ops.size(); ++j) {
+ unsigned LimitIdx = 0;
+ // With the CSE-driven heuristic, we are about to slap two values at the
+ // beginning of the expression whereas they could live very late in the CFG.
+ // When using the CSE-local heuristic we avoid creating dependences from
+ // completely unrelated part of the CFG by limiting the expression
+ // reordering on the values that live in the first seen basic block.
+ // The main idea is that we want to avoid forming expressions that would
+ // become loop dependent.
+ if (UseCSELocalOpt) {
+ const BasicBlock *FirstSeenBB = nullptr;
+ int StartIdx = Ops.size() - 1;
+ // Skip the first value of the expression since we need at least two
+ // values to materialize an expression. I.e., even if this value is
+ // anchored in a different basic block, the actual first sub expression
+ // will be anchored on the second value.
+ for (int i = StartIdx - 1; i != -1; --i) {
+ const Value *Val = Ops[i].Op;
+ const auto *CurrLeafInstr = dyn_cast<Instruction>(Val);
+ const BasicBlock *SeenBB = nullptr;
+ if (!CurrLeafInstr) {
+ // The value is free of any CFG dependencies.
+ // Do as if it lives in the entry block.
+ //
+ // We do this to make sure all the values falling on this path are
+ // seen through the same anchor point. The rationale is these values
+ // can be combined together to from a sub expression free of any CFG
+ // dependencies so we want them to stay together.
+ // We could be cleverer and postpone the anchor down to the first
+ // anchored value, but that's likely complicated to get right.
+ // E.g., we wouldn't want to do that if that means being stuck in a
+ // loop.
+ //
+ // For instance, we wouldn't want to change:
+ // res = arg1 op arg2 op arg3 op ... op loop_val1 op loop_val2 ...
+ // into
+ // res = loop_val1 op arg1 op arg2 op arg3 op ... op loop_val2 ...
+ // Because all the sub expressions with arg2..N would be stuck between
+ // two loop dependent values.
+ SeenBB = &I->getParent()->getParent()->getEntryBlock();
+ } else {
+ SeenBB = CurrLeafInstr->getParent();
+ }
+
+ if (!FirstSeenBB) {
+ FirstSeenBB = SeenBB;
+ continue;
+ }
+ if (FirstSeenBB != SeenBB) {
+ // ith value is in a different basic block.
+ // Rewind the index once to point to the last value on the same basic
+ // block.
+ LimitIdx = i + 1;
+ LLVM_DEBUG(dbgs() << "CSE reordering: Consider values between ["
+ << LimitIdx << ", " << StartIdx << "]\n");
+ break;
+ }
+ }
+ }
+ for (unsigned i = Ops.size() - 1; i > LimitIdx; --i) {
+ // We must use int type to go below zero when LimitIdx is 0.
+ for (int j = i - 1; j >= (int)LimitIdx; --j) {
unsigned Score = 0;
Value *Op0 = Ops[i].Op;
Value *Op1 = Ops[j].Op;
@@ -2429,12 +2509,26 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
}
unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
+
+ // By construction, the operands are sorted in reverse order of their
+ // topological order.
+ // So we tend to form (sub) expressions with values that are close to
+ // each other.
+ //
+ // Now to expose more CSE opportunities we want to expose the pair of
+ // operands that occur the most (as statically computed in
+ // BuildPairMap.) as the first sub-expression.
+ //
+ // If two pairs occur as many times, we pick the one with the
+ // lowest rank, meaning the one with both operands appearing first in
+ // the topological order.
if (Score > Max || (Score == Max && MaxRank < BestRank)) {
- BestPair = {i, j};
+ BestPair = {j, i};
Max = Score;
BestRank = MaxRank;
}
}
+ }
if (Max > 1) {
auto Op0 = Ops[BestPair.first];
auto Op1 = Ops[BestPair.second];
@@ -2444,6 +2538,8 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
Ops.push_back(Op1);
}
}
+ LLVM_DEBUG(dbgs() << "RAOut after CSE reorder:\t"; PrintOps(I, Ops);
+ dbgs() << '\n');
// Now that we ordered and optimized the expressions, splat them back into
// the expression tree, removing any unneeded nodes.
RewriteExprTree(I, Ops);