diff options
Diffstat (limited to 'contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp')
| -rw-r--r-- | contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp | 551 | 
1 files changed, 319 insertions, 232 deletions
| diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp index becbae4c5b50..d7896ade3543 100644 --- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -58,12 +58,12 @@ static cl::opt<unsigned> MemoryCheckMergeThreshold(  /// Maximum SIMD width.  const unsigned VectorizerParams::MaxVectorWidth = 64; -/// \brief We collect interesting dependences up to this threshold. -static cl::opt<unsigned> MaxInterestingDependence( -    "max-interesting-dependences", cl::Hidden, -    cl::desc("Maximum number of interesting dependences collected by " -             "loop-access analysis (default = 100)"), -    cl::init(100)); +/// \brief We collect dependences up to this threshold. +static cl::opt<unsigned> +    MaxDependences("max-dependences", cl::Hidden, +                   cl::desc("Maximum number of dependences collected by " +                            "loop-access analysis (default = 100)"), +                   cl::init(100));  bool VectorizerParams::isInterleaveForced() {    return ::VectorizationInterleave.getNumOccurrences() > 0; @@ -87,11 +87,10 @@ Value *llvm::stripIntegerCast(Value *V) {    return V;  } -const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, +const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,                                              const ValueToValueMap &PtrToStride,                                              Value *Ptr, Value *OrigPtr) { - -  const SCEV *OrigSCEV = SE->getSCEV(Ptr); +  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);    // If there is an entry in the map return the SCEV of the pointer with the    // symbolic stride replaced by one. @@ -108,36 +107,82 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,      ValueToValueMap RewriteMap;      RewriteMap[StrideVal] = One; -    const SCEV *ByOne = -        SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true); -    DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne +    ScalarEvolution *SE = PSE.getSE(); +    const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal)); +    const auto *CT = +        static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType())); + +    PSE.addPredicate(*SE->getEqualPredicate(U, CT)); +    auto *Expr = PSE.getSCEV(Ptr); + +    DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr                   << "\n"); -    return ByOne; +    return Expr;    }    // Otherwise, just return the SCEV of the original pointer. -  return SE->getSCEV(Ptr); +  return OrigSCEV;  }  void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,                                      unsigned DepSetId, unsigned ASId, -                                    const ValueToValueMap &Strides) { +                                    const ValueToValueMap &Strides, +                                    PredicatedScalarEvolution &PSE) {    // Get the stride replaced scev. -  const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr); +  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);    assert(AR && "Invalid addrec expression"); +  ScalarEvolution *SE = PSE.getSE();    const SCEV *Ex = SE->getBackedgeTakenCount(Lp); + +  const SCEV *ScStart = AR->getStart();    const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); -  Pointers.emplace_back(Ptr, AR->getStart(), ScEnd, WritePtr, DepSetId, ASId, -                        Sc); +  const SCEV *Step = AR->getStepRecurrence(*SE); + +  // For expressions with negative step, the upper bound is ScStart and the +  // lower bound is ScEnd. +  if (const SCEVConstant *CStep = dyn_cast<const SCEVConstant>(Step)) { +    if (CStep->getValue()->isNegative()) +      std::swap(ScStart, ScEnd); +  } else { +    // Fallback case: the step is not constant, but the we can still +    // get the upper and lower bounds of the interval by using min/max +    // expressions. +    ScStart = SE->getUMinExpr(ScStart, ScEnd); +    ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd); +  } + +  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); +} + +SmallVector<RuntimePointerChecking::PointerCheck, 4> +RuntimePointerChecking::generateChecks() const { +  SmallVector<PointerCheck, 4> Checks; + +  for (unsigned I = 0; I < CheckingGroups.size(); ++I) { +    for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) { +      const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I]; +      const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J]; + +      if (needsChecking(CGI, CGJ)) +        Checks.push_back(std::make_pair(&CGI, &CGJ)); +    } +  } +  return Checks; +} + +void RuntimePointerChecking::generateChecks( +    MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { +  assert(Checks.empty() && "Checks is not empty"); +  groupChecks(DepCands, UseDependencies); +  Checks = generateChecks();  } -bool RuntimePointerChecking::needsChecking( -    const CheckingPtrGroup &M, const CheckingPtrGroup &N, -    const SmallVectorImpl<int> *PtrPartition) const { +bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M, +                                           const CheckingPtrGroup &N) const {    for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)      for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J) -      if (needsChecking(M.Members[I], N.Members[J], PtrPartition)) +      if (needsChecking(M.Members[I], N.Members[J]))          return true;    return false;  } @@ -204,8 +249,31 @@ void RuntimePointerChecking::groupChecks(    CheckingGroups.clear(); +  // If we need to check two pointers to the same underlying object +  // with a non-constant difference, we shouldn't perform any pointer +  // grouping with those pointers. This is because we can easily get +  // into cases where the resulting check would return false, even when +  // the accesses are safe. +  // +  // The following example shows this: +  // for (i = 0; i < 1000; ++i) +  //   a[5000 + i * m] = a[i] + a[i + 9000] +  // +  // Here grouping gives a check of (5000, 5000 + 1000 * m) against +  // (0, 10000) which is always false. However, if m is 1, there is no +  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows +  // us to perform an accurate check in this case. +  // +  // The above case requires that we have an UnknownDependence between +  // accesses to the same underlying object. This cannot happen unless +  // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies +  // is also false. In this case we will use the fallback path and create +  // separate checking groups for all pointers. +    // If we don't have the dependency partitions, construct a new -  // checking pointer group for each pointer. +  // checking pointer group for each pointer. This is also required +  // for correctness, because in this case we can have checking between +  // pointers to the same underlying object.    if (!UseDependencies) {      for (unsigned I = 0; I < Pointers.size(); ++I)        CheckingGroups.push_back(CheckingPtrGroup(I, *this)); @@ -222,7 +290,7 @@ void RuntimePointerChecking::groupChecks(    // don't process them twice.    SmallSet<unsigned, 2> Seen; -  // Go through all equivalence classes, get the the "pointer check groups" +  // Go through all equivalence classes, get the "pointer check groups"    // and add them to the overall solution. We use the order in which accesses    // appear in 'Pointers' to enforce determinism.    for (unsigned I = 0; I < Pointers.size(); ++I) { @@ -280,8 +348,14 @@ void RuntimePointerChecking::groupChecks(    }  } -bool RuntimePointerChecking::needsChecking( -    unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const { +bool RuntimePointerChecking::arePointersInSamePartition( +    const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1, +    unsigned PtrIdx2) { +  return (PtrToPartition[PtrIdx1] != -1 && +          PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]); +} + +bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {    const PointerInfo &PointerI = Pointers[I];    const PointerInfo &PointerJ = Pointers[J]; @@ -297,85 +371,45 @@ bool RuntimePointerChecking::needsChecking(    if (PointerI.AliasSetId != PointerJ.AliasSetId)      return false; -  // If PtrPartition is set omit checks between pointers of the same partition. -  // Partition number -1 means that the pointer is used in multiple partitions. -  // In this case we can't omit the check. -  if (PtrPartition && (*PtrPartition)[I] != -1 && -      (*PtrPartition)[I] == (*PtrPartition)[J]) -    return false; -    return true;  } -void RuntimePointerChecking::print( -    raw_ostream &OS, unsigned Depth, -    const SmallVectorImpl<int> *PtrPartition) const { - -  OS.indent(Depth) << "Run-time memory checks:\n"; - +void RuntimePointerChecking::printChecks( +    raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks, +    unsigned Depth) const {    unsigned N = 0; -  for (unsigned I = 0; I < CheckingGroups.size(); ++I) -    for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) -      if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) { -        OS.indent(Depth) << "Check " << N++ << ":\n"; -        OS.indent(Depth + 2) << "Comparing group " << I << ":\n"; - -        for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) { -          OS.indent(Depth + 2) -              << *Pointers[CheckingGroups[I].Members[K]].PointerValue << "\n"; -          if (PtrPartition) -            OS << " (Partition: " -               << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")" -               << "\n"; -        } +  for (const auto &Check : Checks) { +    const auto &First = Check.first->Members, &Second = Check.second->Members; -        OS.indent(Depth + 2) << "Against group " << J << ":\n"; +    OS.indent(Depth) << "Check " << N++ << ":\n"; -        for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) { -          OS.indent(Depth + 2) -              << *Pointers[CheckingGroups[J].Members[K]].PointerValue << "\n"; -          if (PtrPartition) -            OS << " (Partition: " -               << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")" -               << "\n"; -        } -      } +    OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n"; +    for (unsigned K = 0; K < First.size(); ++K) +      OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n"; -  OS.indent(Depth) << "Grouped accesses:\n"; -  for (unsigned I = 0; I < CheckingGroups.size(); ++I) { -    OS.indent(Depth + 2) << "Group " << I << ":\n"; -    OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low -                         << " High: " << *CheckingGroups[I].High << ")\n"; -    for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) { -      OS.indent(Depth + 6) << "Member: " -                           << *Pointers[CheckingGroups[I].Members[J]].Expr -                           << "\n"; -    } +    OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n"; +    for (unsigned K = 0; K < Second.size(); ++K) +      OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";    }  } -unsigned RuntimePointerChecking::getNumberOfChecks( -    const SmallVectorImpl<int> *PtrPartition) const { - -  unsigned NumPartitions = CheckingGroups.size(); -  unsigned CheckCount = 0; +void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const { -  for (unsigned I = 0; I < NumPartitions; ++I) -    for (unsigned J = I + 1; J < NumPartitions; ++J) -      if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) -        CheckCount++; -  return CheckCount; -} +  OS.indent(Depth) << "Run-time memory checks:\n"; +  printChecks(OS, Checks, Depth); -bool RuntimePointerChecking::needsAnyChecking( -    const SmallVectorImpl<int> *PtrPartition) const { -  unsigned NumPointers = Pointers.size(); +  OS.indent(Depth) << "Grouped accesses:\n"; +  for (unsigned I = 0; I < CheckingGroups.size(); ++I) { +    const auto &CG = CheckingGroups[I]; -  for (unsigned I = 0; I < NumPointers; ++I) -    for (unsigned J = I + 1; J < NumPointers; ++J) -      if (needsChecking(I, J, PtrPartition)) -        return true; -  return false; +    OS.indent(Depth + 2) << "Group " << &CG << ":\n"; +    OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High +                         << ")\n"; +    for (unsigned J = 0; J < CG.Members.size(); ++J) { +      OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr +                           << "\n"; +    } +  }  }  namespace { @@ -390,9 +424,10 @@ public:    typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;    AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, -                 MemoryDepChecker::DepCandidates &DA) -      : DL(Dl), AST(*AA), LI(LI), DepCands(DA), -        IsRTCheckAnalysisNeeded(false) {} +                 MemoryDepChecker::DepCandidates &DA, +                 PredicatedScalarEvolution &PSE) +      : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false), +        PSE(PSE) {}    /// \brief Register a load  and whether it is only read from.    void addLoad(MemoryLocation &Loc, bool IsReadOnly) { @@ -435,7 +470,7 @@ public:    /// We decided that no dependence analysis would be used.  Reset the state.    void resetDepChecks(MemoryDepChecker &DepChecker) {      CheckDeps.clear(); -    DepChecker.clearInterestingDependences(); +    DepChecker.clearDependences();    }    MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; } @@ -477,14 +512,18 @@ private:    /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared    /// while this remains set if we have potentially dependent accesses.    bool IsRTCheckAnalysisNeeded; + +  /// The SCEV predicate containing all the SCEV-related assumptions. +  PredicatedScalarEvolution &PSE;  };  } // end anonymous namespace  /// \brief Check whether a pointer can participate in a runtime bounds check. -static bool hasComputableBounds(ScalarEvolution *SE, -                                const ValueToValueMap &Strides, Value *Ptr) { -  const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr); +static bool hasComputableBounds(PredicatedScalarEvolution &PSE, +                                const ValueToValueMap &Strides, Value *Ptr, +                                Loop *L) { +  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);    if (!AR)      return false; @@ -527,11 +566,11 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,        else          ++NumReadPtrChecks; -      if (hasComputableBounds(SE, StridesMap, Ptr) && +      if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) &&            // When we run after a failing dependency check we have to make sure            // we don't have wrapping pointers.            (!ShouldCheckStride || -           isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) { +           isStridedPtr(PSE, Ptr, TheLoop, StridesMap) == 1)) {          // The id of the dependence set.          unsigned DepId; @@ -545,7 +584,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,            // Each access has its own dependence set.            DepId = RunningDepId++; -        RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); +        RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);          DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');        } else { @@ -599,9 +638,9 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,    }    if (NeedRTCheck && CanDoRT) -    RtCheck.groupChecks(DepCands, IsDepCheckNeeded); +    RtCheck.generateChecks(DepCands, IsDepCheckNeeded); -  DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr) +  DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()                 << " pointer comparisons.\n");    RtCheck.Need = NeedRTCheck; @@ -706,6 +745,11 @@ void AccessAnalysis::processMemAccesses() {            GetUnderlyingObjects(Ptr, TempObjects, DL, LI);            DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");            for (Value *UnderlyingObj : TempObjects) { +            // nullptr never alias, don't join sets for pointer that have "null" +            // in their UnderlyingObjects list. +            if (isa<ConstantPointerNull>(UnderlyingObj)) +              continue; +              UnderlyingObjToAccessMap::iterator Prev =                  ObjToLastAccess.find(UnderlyingObj);              if (Prev != ObjToLastAccess.end()) @@ -775,20 +819,20 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,  }  /// \brief Check whether the access through \p Ptr has a constant stride. -int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, -                       const ValueToValueMap &StridesMap) { -  const Type *Ty = Ptr->getType(); +int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, +                       const Loop *Lp, const ValueToValueMap &StridesMap) { +  Type *Ty = Ptr->getType();    assert(Ty->isPointerTy() && "Unexpected non-ptr");    // Make sure that the pointer does not point to aggregate types. -  const PointerType *PtrTy = cast<PointerType>(Ty); +  auto *PtrTy = cast<PointerType>(Ty);    if (PtrTy->getElementType()->isAggregateType()) {      DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"            << *Ptr << "\n");      return 0;    } -  const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr); +  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);    if (!AR) { @@ -811,16 +855,16 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,    // to access the pointer value "0" which is undefined behavior in address    // space 0, therefore we can also vectorize this case.    bool IsInBoundsGEP = isInBoundsGep(Ptr); -  bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, SE, Lp); +  bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, PSE.getSE(), Lp);    bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;    if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {      DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " -          << *Ptr << " SCEV: " << *PtrScev << "\n"); +                 << *Ptr << " SCEV: " << *PtrScev << "\n");      return 0;    }    // Check the step is constant. -  const SCEV *Step = AR->getStepRecurrence(*SE); +  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());    // Calculate the pointer stride and check if it is constant.    const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); @@ -832,7 +876,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,    auto &DL = Lp->getHeader()->getModule()->getDataLayout();    int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); -  const APInt &APStepVal = C->getValue()->getValue(); +  const APInt &APStepVal = C->getAPInt();    // Huge step value - give up.    if (APStepVal.getBitWidth() > 64) @@ -872,15 +916,15 @@ bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {    llvm_unreachable("unexpected DepType!");  } -bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) { +bool MemoryDepChecker::Dependence::isBackward() const {    switch (Type) {    case NoDep:    case Forward: +  case ForwardButPreventsForwarding: +  case Unknown:      return false;    case BackwardVectorizable: -  case Unknown: -  case ForwardButPreventsForwarding:    case Backward:    case BackwardVectorizableButPreventsForwarding:      return true; @@ -889,17 +933,21 @@ bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) {  }  bool MemoryDepChecker::Dependence::isPossiblyBackward() const { +  return isBackward() || Type == Unknown; +} + +bool MemoryDepChecker::Dependence::isForward() const {    switch (Type) { -  case NoDep:    case Forward:    case ForwardButPreventsForwarding: -    return false; +    return true; +  case NoDep:    case Unknown:    case BackwardVectorizable:    case Backward:    case BackwardVectorizableButPreventsForwarding: -    return true; +    return false;    }    llvm_unreachable("unexpected DepType!");  } @@ -999,11 +1047,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,        BPtr->getType()->getPointerAddressSpace())      return Dependence::Unknown; -  const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr); -  const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr); +  const SCEV *AScev = replaceSymbolicStrideSCEV(PSE, Strides, APtr); +  const SCEV *BScev = replaceSymbolicStrideSCEV(PSE, Strides, BPtr); -  int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides); -  int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides); +  int StrideAPtr = isStridedPtr(PSE, APtr, InnermostLoop, Strides); +  int StrideBPtr = isStridedPtr(PSE, BPtr, InnermostLoop, Strides);    const SCEV *Src = AScev;    const SCEV *Sink = BScev; @@ -1020,12 +1068,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,      std::swap(StrideAPtr, StrideBPtr);    } -  const SCEV *Dist = SE->getMinusSCEV(Sink, Src); +  const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);    DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink -        << "(Induction step: " << StrideAPtr <<  ")\n"); +               << "(Induction step: " << StrideAPtr << ")\n");    DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " -        << *InstMap[BIdx] << ": " << *Dist << "\n"); +               << *InstMap[BIdx] << ": " << *Dist << "\n");    // Need accesses with constant stride. We don't want to vectorize    // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in @@ -1048,7 +1096,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,    unsigned TypeByteSize = DL.getTypeAllocSize(ATy);    // Negative distances are not plausible dependencies. -  const APInt &Val = C->getValue()->getValue(); +  const APInt &Val = C->getAPInt();    if (Val.isNegative()) {      bool IsTrueDataDependence = (AIsWrite && !BIsWrite);      if (IsTrueDataDependence && @@ -1064,7 +1112,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,    // Could be improved to assert type sizes are the same (i32 == float, etc).    if (Val == 0) {      if (ATy == BTy) -      return Dependence::NoDep; +      return Dependence::Forward;      DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");      return Dependence::Unknown;    } @@ -1203,22 +1251,21 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,                  isDependent(*A.first, A.second, *B.first, B.second, Strides);              SafeForVectorization &= Dependence::isSafeForVectorization(Type); -            // Gather dependences unless we accumulated MaxInterestingDependence +            // Gather dependences unless we accumulated MaxDependences              // dependences.  In that case return as soon as we find the first              // unsafe dependence.  This puts a limit on this quadratic              // algorithm. -            if (RecordInterestingDependences) { -              if (Dependence::isInterestingDependence(Type)) -                InterestingDependences.push_back( -                    Dependence(A.second, B.second, Type)); - -              if (InterestingDependences.size() >= MaxInterestingDependence) { -                RecordInterestingDependences = false; -                InterestingDependences.clear(); +            if (RecordDependences) { +              if (Type != Dependence::NoDep) +                Dependences.push_back(Dependence(A.second, B.second, Type)); + +              if (Dependences.size() >= MaxDependences) { +                RecordDependences = false; +                Dependences.clear();                  DEBUG(dbgs() << "Too many dependences, stopped recording\n");                }              } -            if (!RecordInterestingDependences && !SafeForVectorization) +            if (!RecordDependences && !SafeForVectorization)                return false;            }          ++OI; @@ -1227,8 +1274,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,      }    } -  DEBUG(dbgs() << "Total Interesting Dependences: " -               << InterestingDependences.size() << "\n"); +  DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");    return SafeForVectorization;  } @@ -1298,10 +1344,10 @@ bool LoopAccessInfo::canAnalyzeLoop() {    }    // ScalarEvolution needs to be able to find the exit count. -  const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); -  if (ExitCount == SE->getCouldNotCompute()) { -    emitAnalysis(LoopAccessReport() << -                 "could not determine number of loop iterations"); +  const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); +  if (ExitCount == PSE.getSE()->getCouldNotCompute()) { +    emitAnalysis(LoopAccessReport() +                 << "could not determine number of loop iterations");      DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");      return false;    } @@ -1370,7 +1416,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {        if (it->mayWriteToMemory()) {          StoreInst *St = dyn_cast<StoreInst>(it);          if (!St) { -          emitAnalysis(LoopAccessReport(it) << +          emitAnalysis(LoopAccessReport(&*it) <<                         "instruction cannot be vectorized");            CanVecMem = false;            return; @@ -1402,7 +1448,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {    MemoryDepChecker::DepCandidates DependentAccesses;    AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), -                          AA, LI, DependentAccesses); +                          AA, LI, DependentAccesses, PSE);    // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects    // multiple times on the same object. If the ptr is accessed twice, once @@ -1453,7 +1499,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {      // read a few words, modify, and write a few words, and some of the      // words may be written to the same address.      bool IsReadOnlyPtr = false; -    if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) { +    if (Seen.insert(Ptr).second || !isStridedPtr(PSE, Ptr, TheLoop, Strides)) {        ++NumReads;        IsReadOnlyPtr = true;      } @@ -1483,7 +1529,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {    // Find pointers with computable bounds. We are going to use this information    // to place a runtime bound check.    bool CanDoRTIfNeeded = -      Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides); +      Accesses.canCheckPtrAtRT(PtrRtChecking, PSE.getSE(), TheLoop, Strides);    if (!CanDoRTIfNeeded) {      emitAnalysis(LoopAccessReport() << "cannot identify array bounds");      DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " @@ -1510,6 +1556,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {        PtrRtChecking.reset();        PtrRtChecking.Need = true; +      auto *SE = PSE.getSE();        CanDoRTIfNeeded =            Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true); @@ -1552,7 +1599,7 @@ void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) {  }  bool LoopAccessInfo::isUniform(Value *V) const { -  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); +  return (PSE.getSE()->isLoopInvariant(PSE.getSE()->getSCEV(V), TheLoop));  }  // FIXME: this function is currently a duplicate of the one in @@ -1566,86 +1613,115 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,    return nullptr;  } -std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck( -    Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const { -  if (!PtrRtChecking.Need) -    return std::make_pair(nullptr, nullptr); +namespace { +/// \brief IR Values for the lower and upper bounds of a pointer evolution.  We +/// need to use value-handles because SCEV expansion can invalidate previously +/// expanded values.  Thus expansion of a pointer can invalidate the bounds for +/// a previous one. +struct PointerBounds { +  TrackingVH<Value> Start; +  TrackingVH<Value> End; +}; +} // end anonymous namespace -  SmallVector<TrackingVH<Value>, 2> Starts; -  SmallVector<TrackingVH<Value>, 2> Ends; +/// \brief Expand code for the lower and upper bound of the pointer group \p CG +/// in \p TheLoop.  \return the values for the bounds. +static PointerBounds +expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, +             Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, +             const RuntimePointerChecking &PtrRtChecking) { +  Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue; +  const SCEV *Sc = SE->getSCEV(Ptr); + +  if (SE->isLoopInvariant(Sc, TheLoop)) { +    DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr +                 << "\n"); +    return {Ptr, Ptr}; +  } else { +    unsigned AS = Ptr->getType()->getPointerAddressSpace(); +    LLVMContext &Ctx = Loc->getContext(); + +    // Use this type for pointer arithmetic. +    Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); +    Value *Start = nullptr, *End = nullptr; + +    DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); +    Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); +    End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); +    DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n"); +    return {Start, End}; +  } +} -  LLVMContext &Ctx = Loc->getContext(); -  SCEVExpander Exp(*SE, DL, "induction"); -  Instruction *FirstInst = nullptr; +/// \brief Turns a collection of checks into a collection of expanded upper and +/// lower bounds for both pointers in the check. +static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds( +    const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks, +    Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp, +    const RuntimePointerChecking &PtrRtChecking) { +  SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds; + +  // Here we're relying on the SCEV Expander's cache to only emit code for the +  // same bounds once. +  std::transform( +      PointerChecks.begin(), PointerChecks.end(), +      std::back_inserter(ChecksWithBounds), +      [&](const RuntimePointerChecking::PointerCheck &Check) { +        PointerBounds +          First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking), +          Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking); +        return std::make_pair(First, Second); +      }); + +  return ChecksWithBounds; +} -  for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) { -    const RuntimePointerChecking::CheckingPtrGroup &CG = -        PtrRtChecking.CheckingGroups[i]; -    Value *Ptr = PtrRtChecking.Pointers[CG.Members[0]].PointerValue; -    const SCEV *Sc = SE->getSCEV(Ptr); - -    if (SE->isLoopInvariant(Sc, TheLoop)) { -      DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr -                   << "\n"); -      Starts.push_back(Ptr); -      Ends.push_back(Ptr); -    } else { -      unsigned AS = Ptr->getType()->getPointerAddressSpace(); - -      // Use this type for pointer arithmetic. -      Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); -      Value *Start = nullptr, *End = nullptr; - -      DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); -      Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc); -      End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc); -      DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n"); -      Starts.push_back(Start); -      Ends.push_back(End); -    } -  } +std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks( +    Instruction *Loc, +    const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks) +    const { +  auto *SE = PSE.getSE(); +  SCEVExpander Exp(*SE, DL, "induction"); +  auto ExpandedChecks = +      expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, PtrRtChecking); +  LLVMContext &Ctx = Loc->getContext(); +  Instruction *FirstInst = nullptr;    IRBuilder<> ChkBuilder(Loc);    // Our instructions might fold to a constant.    Value *MemoryRuntimeCheck = nullptr; -  for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) { -    for (unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) { -      const RuntimePointerChecking::CheckingPtrGroup &CGI = -          PtrRtChecking.CheckingGroups[i]; -      const RuntimePointerChecking::CheckingPtrGroup &CGJ = -          PtrRtChecking.CheckingGroups[j]; - -      if (!PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition)) -        continue; -      unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); -      unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace(); - -      assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) && -             (AS1 == Ends[i]->getType()->getPointerAddressSpace()) && -             "Trying to bounds check pointers with different address spaces"); - -      Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); -      Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); - -      Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc"); -      Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc"); -      Value *End0 =   ChkBuilder.CreateBitCast(Ends[i],   PtrArithTy1, "bc"); -      Value *End1 =   ChkBuilder.CreateBitCast(Ends[j],   PtrArithTy0, "bc"); - -      Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); -      FirstInst = getFirstInst(FirstInst, Cmp0, Loc); -      Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); -      FirstInst = getFirstInst(FirstInst, Cmp1, Loc); -      Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); +  for (const auto &Check : ExpandedChecks) { +    const PointerBounds &A = Check.first, &B = Check.second; +    // Check if two pointers (A and B) conflict where conflict is computed as: +    // start(A) <= end(B) && start(B) <= end(A) +    unsigned AS0 = A.Start->getType()->getPointerAddressSpace(); +    unsigned AS1 = B.Start->getType()->getPointerAddressSpace(); + +    assert((AS0 == B.End->getType()->getPointerAddressSpace()) && +           (AS1 == A.End->getType()->getPointerAddressSpace()) && +           "Trying to bounds check pointers with different address spaces"); + +    Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); +    Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); + +    Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc"); +    Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc"); +    Value *End0 =   ChkBuilder.CreateBitCast(A.End,   PtrArithTy1, "bc"); +    Value *End1 =   ChkBuilder.CreateBitCast(B.End,   PtrArithTy0, "bc"); + +    Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); +    FirstInst = getFirstInst(FirstInst, Cmp0, Loc); +    Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); +    FirstInst = getFirstInst(FirstInst, Cmp1, Loc); +    Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); +    FirstInst = getFirstInst(FirstInst, IsConflict, Loc); +    if (MemoryRuntimeCheck) { +      IsConflict = +          ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");        FirstInst = getFirstInst(FirstInst, IsConflict, Loc); -      if (MemoryRuntimeCheck) { -        IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, -                                         "conflict.rdx"); -        FirstInst = getFirstInst(FirstInst, IsConflict, Loc); -      } -      MemoryRuntimeCheck = IsConflict;      } +    MemoryRuntimeCheck = IsConflict;    }    if (!MemoryRuntimeCheck) @@ -1661,12 +1737,20 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(    return std::make_pair(FirstInst, Check);  } +std::pair<Instruction *, Instruction *> +LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const { +  if (!PtrRtChecking.Need) +    return std::make_pair(nullptr, nullptr); + +  return addRuntimeChecks(Loc, PtrRtChecking.getChecks()); +} +  LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,                                 const DataLayout &DL,                                 const TargetLibraryInfo *TLI, AliasAnalysis *AA,                                 DominatorTree *DT, LoopInfo *LI,                                 const ValueToValueMap &Strides) -    : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), +    : PSE(*SE), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL),        TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),        MaxSafeDepDistBytes(-1U), CanVecMem(false),        StoreToLoopInvariantAddress(false) { @@ -1685,14 +1769,14 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {    if (Report)      OS.indent(Depth) << "Report: " << Report->str() << "\n"; -  if (auto *InterestingDependences = DepChecker.getInterestingDependences()) { -    OS.indent(Depth) << "Interesting Dependences:\n"; -    for (auto &Dep : *InterestingDependences) { +  if (auto *Dependences = DepChecker.getDependences()) { +    OS.indent(Depth) << "Dependences:\n"; +    for (auto &Dep : *Dependences) {        Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions());        OS << "\n";      }    } else -    OS.indent(Depth) << "Too many interesting dependences, not recorded\n"; +    OS.indent(Depth) << "Too many dependences, not recorded\n";    // List the pair of accesses need run-time checks to prove independence.    PtrRtChecking.print(OS, Depth); @@ -1701,6 +1785,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {    OS.indent(Depth) << "Store to invariant address was "                     << (StoreToLoopInvariantAddress ? "" : "not ")                     << "found in loop.\n"; + +  OS.indent(Depth) << "SCEV assumptions:\n"; +  PSE.getUnionPredicate().print(OS, Depth);  }  const LoopAccessInfo & @@ -1714,8 +1801,8 @@ LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) {    if (!LAI) {      const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); -    LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI, -                                            Strides); +    LAI = +        llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI, Strides);  #ifndef NDEBUG      LAI->NumSymbolicStrides = Strides.size();  #endif @@ -1737,10 +1824,10 @@ void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const {  }  bool LoopAccessAnalysis::runOnFunction(Function &F) { -  SE = &getAnalysis<ScalarEvolution>(); +  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();    auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();    TLI = TLIP ? &TLIP->getTLI() : nullptr; -  AA = &getAnalysis<AliasAnalysis>(); +  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();    DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();    LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); @@ -1748,8 +1835,8 @@ bool LoopAccessAnalysis::runOnFunction(Function &F) {  }  void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { -    AU.addRequired<ScalarEvolution>(); -    AU.addRequired<AliasAnalysis>(); +    AU.addRequired<ScalarEvolutionWrapperPass>(); +    AU.addRequired<AAResultsWrapperPass>();      AU.addRequired<DominatorTreeWrapperPass>();      AU.addRequired<LoopInfoWrapperPass>(); @@ -1761,8 +1848,8 @@ static const char laa_name[] = "Loop Access Analysis";  #define LAA_NAME "loop-accesses"  INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)  INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)  INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)  INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) | 
