diff options
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
| -rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 68 | 
1 files changed, 50 insertions, 18 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index fa83b48210bc9..773ffb9df0a28 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -155,6 +155,11 @@ static cl::opt<bool> FilterSameScaledReg(      cl::desc("Narrow LSR search space by filtering non-optimal formulae"               " with the same ScaledReg and Scale")); +static cl::opt<unsigned> ComplexityLimit( +  "lsr-complexity-limit", cl::Hidden, +  cl::init(std::numeric_limits<uint16_t>::max()), +  cl::desc("LSR search space complexity limit")); +  #ifndef NDEBUG  // Stress test IV chain generation.  static cl::opt<bool> StressIVChain( @@ -1487,7 +1492,7 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {    SmallVector<const SCEV *, 4> Key = F.BaseRegs;    if (F.ScaledReg) Key.push_back(F.ScaledReg);    // Unstable sort by host order ok, because this is only used for uniquifying. -  llvm::sort(Key.begin(), Key.end()); +  llvm::sort(Key);    return Uniquifier.count(Key);  } @@ -1511,7 +1516,7 @@ bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {    SmallVector<const SCEV *, 4> Key = F.BaseRegs;    if (F.ScaledReg) Key.push_back(F.ScaledReg);    // Unstable sort by host order ok, because this is only used for uniquifying. -  llvm::sort(Key.begin(), Key.end()); +  llvm::sort(Key);    if (!Uniquifier.insert(Key).second)      return false; @@ -3638,32 +3643,62 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,  void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,                                         Formula Base) {    // This method is only interesting on a plurality of registers. -  if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1) +  if (Base.BaseRegs.size() + (Base.Scale == 1) + +      (Base.UnfoldedOffset != 0) <= 1)      return;    // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before    // processing the formula.    Base.unscale(); -  Formula F = Base; -  F.BaseRegs.clear();    SmallVector<const SCEV *, 4> Ops; +  Formula NewBase = Base; +  NewBase.BaseRegs.clear(); +  Type *CombinedIntegerType = nullptr;    for (const SCEV *BaseReg : Base.BaseRegs) {      if (SE.properlyDominates(BaseReg, L->getHeader()) && -        !SE.hasComputableLoopEvolution(BaseReg, L)) +        !SE.hasComputableLoopEvolution(BaseReg, L)) { +      if (!CombinedIntegerType) +        CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType());        Ops.push_back(BaseReg); +    }      else -      F.BaseRegs.push_back(BaseReg); +      NewBase.BaseRegs.push_back(BaseReg);    } -  if (Ops.size() > 1) { -    const SCEV *Sum = SE.getAddExpr(Ops); + +  // If no register is relevant, we're done. +  if (Ops.size() == 0) +    return; + +  // Utility function for generating the required variants of the combined +  // registers. +  auto GenerateFormula = [&](const SCEV *Sum) { +    Formula F = NewBase; +      // TODO: If Sum is zero, it probably means ScalarEvolution missed an      // opportunity to fold something. For now, just ignore such cases      // rather than proceed with zero in a register. -    if (!Sum->isZero()) { -      F.BaseRegs.push_back(Sum); -      F.canonicalize(*L); -      (void)InsertFormula(LU, LUIdx, F); -    } +    if (Sum->isZero()) +      return; + +    F.BaseRegs.push_back(Sum); +    F.canonicalize(*L); +    (void)InsertFormula(LU, LUIdx, F); +  }; + +  // If we collected at least two registers, generate a formula combining them. +  if (Ops.size() > 1) { +    SmallVector<const SCEV *, 4> OpsCopy(Ops); // Don't let SE modify Ops. +    GenerateFormula(SE.getAddExpr(OpsCopy)); +  } + +  // If we have an unfolded offset, generate a formula combining it with the +  // registers collected. +  if (NewBase.UnfoldedOffset) { +    assert(CombinedIntegerType && "Missing a type for the unfolded offset"); +    Ops.push_back(SE.getConstant(CombinedIntegerType, NewBase.UnfoldedOffset, +                                 true)); +    NewBase.UnfoldedOffset = 0; +    GenerateFormula(SE.getAddExpr(Ops));    }  } @@ -4238,7 +4273,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {            Key.push_back(F.ScaledReg);          // Unstable sort by host order ok, because this is only used for          // uniquifying. -        llvm::sort(Key.begin(), Key.end()); +        llvm::sort(Key);          std::pair<BestFormulaeTy::const_iterator, bool> P =            BestFormulae.insert(std::make_pair(Key, FIdx)); @@ -4281,9 +4316,6 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {    });  } -// This is a rough guess that seems to work fairly well. -static const size_t ComplexityLimit = std::numeric_limits<uint16_t>::max(); -  /// Estimate the worst-case number of solutions the solver might have to  /// consider. It almost never considers this many solutions because it prune the  /// search space, but the pruning isn't always sufficient.  | 
