diff options
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 68 |
1 files changed, 50 insertions, 18 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index fa83b48210bc..773ffb9df0a2 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -155,6 +155,11 @@ static cl::opt<bool> FilterSameScaledReg( cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale")); +static cl::opt<unsigned> ComplexityLimit( + "lsr-complexity-limit", cl::Hidden, + cl::init(std::numeric_limits<uint16_t>::max()), + cl::desc("LSR search space complexity limit")); + #ifndef NDEBUG // Stress test IV chain generation. static cl::opt<bool> StressIVChain( @@ -1487,7 +1492,7 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { SmallVector<const SCEV *, 4> Key = F.BaseRegs; if (F.ScaledReg) Key.push_back(F.ScaledReg); // Unstable sort by host order ok, because this is only used for uniquifying. - llvm::sort(Key.begin(), Key.end()); + llvm::sort(Key); return Uniquifier.count(Key); } @@ -1511,7 +1516,7 @@ bool LSRUse::InsertFormula(const Formula &F, const Loop &L) { SmallVector<const SCEV *, 4> Key = F.BaseRegs; if (F.ScaledReg) Key.push_back(F.ScaledReg); // Unstable sort by host order ok, because this is only used for uniquifying. - llvm::sort(Key.begin(), Key.end()); + llvm::sort(Key); if (!Uniquifier.insert(Key).second) return false; @@ -3638,32 +3643,62 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base) { // This method is only interesting on a plurality of registers. - if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1) + if (Base.BaseRegs.size() + (Base.Scale == 1) + + (Base.UnfoldedOffset != 0) <= 1) return; // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before // processing the formula. Base.unscale(); - Formula F = Base; - F.BaseRegs.clear(); SmallVector<const SCEV *, 4> Ops; + Formula NewBase = Base; + NewBase.BaseRegs.clear(); + Type *CombinedIntegerType = nullptr; for (const SCEV *BaseReg : Base.BaseRegs) { if (SE.properlyDominates(BaseReg, L->getHeader()) && - !SE.hasComputableLoopEvolution(BaseReg, L)) + !SE.hasComputableLoopEvolution(BaseReg, L)) { + if (!CombinedIntegerType) + CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType()); Ops.push_back(BaseReg); + } else - F.BaseRegs.push_back(BaseReg); + NewBase.BaseRegs.push_back(BaseReg); } - if (Ops.size() > 1) { - const SCEV *Sum = SE.getAddExpr(Ops); + + // If no register is relevant, we're done. + if (Ops.size() == 0) + return; + + // Utility function for generating the required variants of the combined + // registers. + auto GenerateFormula = [&](const SCEV *Sum) { + Formula F = NewBase; + // TODO: If Sum is zero, it probably means ScalarEvolution missed an // opportunity to fold something. For now, just ignore such cases // rather than proceed with zero in a register. - if (!Sum->isZero()) { - F.BaseRegs.push_back(Sum); - F.canonicalize(*L); - (void)InsertFormula(LU, LUIdx, F); - } + if (Sum->isZero()) + return; + + F.BaseRegs.push_back(Sum); + F.canonicalize(*L); + (void)InsertFormula(LU, LUIdx, F); + }; + + // If we collected at least two registers, generate a formula combining them. + if (Ops.size() > 1) { + SmallVector<const SCEV *, 4> OpsCopy(Ops); // Don't let SE modify Ops. + GenerateFormula(SE.getAddExpr(OpsCopy)); + } + + // If we have an unfolded offset, generate a formula combining it with the + // registers collected. + if (NewBase.UnfoldedOffset) { + assert(CombinedIntegerType && "Missing a type for the unfolded offset"); + Ops.push_back(SE.getConstant(CombinedIntegerType, NewBase.UnfoldedOffset, + true)); + NewBase.UnfoldedOffset = 0; + GenerateFormula(SE.getAddExpr(Ops)); } } @@ -4238,7 +4273,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { Key.push_back(F.ScaledReg); // Unstable sort by host order ok, because this is only used for // uniquifying. - llvm::sort(Key.begin(), Key.end()); + llvm::sort(Key); std::pair<BestFormulaeTy::const_iterator, bool> P = BestFormulae.insert(std::make_pair(Key, FIdx)); @@ -4281,9 +4316,6 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { }); } -// This is a rough guess that seems to work fairly well. -static const size_t ComplexityLimit = std::numeric_limits<uint16_t>::max(); - /// Estimate the worst-case number of solutions the solver might have to /// consider. It almost never considers this many solutions because it prune the /// search space, but the pruning isn't always sufficient. |