summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/LoopStrengthReduce.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp68
1 files changed, 50 insertions, 18 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index fa83b48210bc..773ffb9df0a2 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -155,6 +155,11 @@ static cl::opt<bool> FilterSameScaledReg(
cl::desc("Narrow LSR search space by filtering non-optimal formulae"
" with the same ScaledReg and Scale"));
+static cl::opt<unsigned> ComplexityLimit(
+ "lsr-complexity-limit", cl::Hidden,
+ cl::init(std::numeric_limits<uint16_t>::max()),
+ cl::desc("LSR search space complexity limit"));
+
#ifndef NDEBUG
// Stress test IV chain generation.
static cl::opt<bool> StressIVChain(
@@ -1487,7 +1492,7 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
- llvm::sort(Key.begin(), Key.end());
+ llvm::sort(Key);
return Uniquifier.count(Key);
}
@@ -1511,7 +1516,7 @@ bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
- llvm::sort(Key.begin(), Key.end());
+ llvm::sort(Key);
if (!Uniquifier.insert(Key).second)
return false;
@@ -3638,32 +3643,62 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// This method is only interesting on a plurality of registers.
- if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
+ if (Base.BaseRegs.size() + (Base.Scale == 1) +
+ (Base.UnfoldedOffset != 0) <= 1)
return;
// Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
// processing the formula.
Base.unscale();
- Formula F = Base;
- F.BaseRegs.clear();
SmallVector<const SCEV *, 4> Ops;
+ Formula NewBase = Base;
+ NewBase.BaseRegs.clear();
+ Type *CombinedIntegerType = nullptr;
for (const SCEV *BaseReg : Base.BaseRegs) {
if (SE.properlyDominates(BaseReg, L->getHeader()) &&
- !SE.hasComputableLoopEvolution(BaseReg, L))
+ !SE.hasComputableLoopEvolution(BaseReg, L)) {
+ if (!CombinedIntegerType)
+ CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType());
Ops.push_back(BaseReg);
+ }
else
- F.BaseRegs.push_back(BaseReg);
+ NewBase.BaseRegs.push_back(BaseReg);
}
- if (Ops.size() > 1) {
- const SCEV *Sum = SE.getAddExpr(Ops);
+
+ // If no register is relevant, we're done.
+ if (Ops.size() == 0)
+ return;
+
+ // Utility function for generating the required variants of the combined
+ // registers.
+ auto GenerateFormula = [&](const SCEV *Sum) {
+ Formula F = NewBase;
+
// TODO: If Sum is zero, it probably means ScalarEvolution missed an
// opportunity to fold something. For now, just ignore such cases
// rather than proceed with zero in a register.
- if (!Sum->isZero()) {
- F.BaseRegs.push_back(Sum);
- F.canonicalize(*L);
- (void)InsertFormula(LU, LUIdx, F);
- }
+ if (Sum->isZero())
+ return;
+
+ F.BaseRegs.push_back(Sum);
+ F.canonicalize(*L);
+ (void)InsertFormula(LU, LUIdx, F);
+ };
+
+ // If we collected at least two registers, generate a formula combining them.
+ if (Ops.size() > 1) {
+ SmallVector<const SCEV *, 4> OpsCopy(Ops); // Don't let SE modify Ops.
+ GenerateFormula(SE.getAddExpr(OpsCopy));
+ }
+
+ // If we have an unfolded offset, generate a formula combining it with the
+ // registers collected.
+ if (NewBase.UnfoldedOffset) {
+ assert(CombinedIntegerType && "Missing a type for the unfolded offset");
+ Ops.push_back(SE.getConstant(CombinedIntegerType, NewBase.UnfoldedOffset,
+ true));
+ NewBase.UnfoldedOffset = 0;
+ GenerateFormula(SE.getAddExpr(Ops));
}
}
@@ -4238,7 +4273,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for
// uniquifying.
- llvm::sort(Key.begin(), Key.end());
+ llvm::sort(Key);
std::pair<BestFormulaeTy::const_iterator, bool> P =
BestFormulae.insert(std::make_pair(Key, FIdx));
@@ -4281,9 +4316,6 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
});
}
-// This is a rough guess that seems to work fairly well.
-static const size_t ComplexityLimit = std::numeric_limits<uint16_t>::max();
-
/// Estimate the worst-case number of solutions the solver might have to
/// consider. It almost never considers this many solutions because it prune the
/// search space, but the pruning isn't always sufficient.