diff options
Diffstat (limited to 'lib/Analysis/LoopAccessAnalysis.cpp')
| -rw-r--r-- | lib/Analysis/LoopAccessAnalysis.cpp | 345 | 
1 files changed, 209 insertions, 136 deletions
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index e141d6c58b65..c6175bf9bee9 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -92,7 +92,7 @@ static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(      cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));  unsigned VectorizerParams::RuntimeMemoryCheckThreshold; -/// \brief The maximum iterations used to merge memory checks +/// The maximum iterations used to merge memory checks  static cl::opt<unsigned> MemoryCheckMergeThreshold(      "memory-check-merge-threshold", cl::Hidden,      cl::desc("Maximum number of comparisons done when trying to merge " @@ -102,7 +102,7 @@ static cl::opt<unsigned> MemoryCheckMergeThreshold(  /// Maximum SIMD width.  const unsigned VectorizerParams::MaxVectorWidth = 64; -/// \brief We collect dependences up to this threshold. +/// We collect dependences up to this threshold.  static cl::opt<unsigned>      MaxDependences("max-dependences", cl::Hidden,                     cl::desc("Maximum number of dependences collected by " @@ -124,7 +124,7 @@ static cl::opt<bool> EnableMemAccessVersioning(      "enable-mem-access-versioning", cl::init(true), cl::Hidden,      cl::desc("Enable symbolic stride memory access versioning")); -/// \brief Enable store-to-load forwarding conflict detection. This option can +/// Enable store-to-load forwarding conflict detection. This option can  /// be disabled for correctness testing.  static cl::opt<bool> EnableForwardingConflictDetection(      "store-to-load-forwarding-conflict-detection", cl::Hidden, @@ -165,8 +165,8 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,      PSE.addPredicate(*SE->getEqualPredicate(U, CT));      auto *Expr = PSE.getSCEV(Ptr); -    DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr -                 << "\n"); +    LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV +                      << " by: " << *Expr << "\n");      return Expr;    } @@ -490,23 +490,23 @@ void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {  namespace { -/// \brief Analyses memory accesses in a loop. +/// Analyses memory accesses in a loop.  ///  /// Checks whether run time pointer checks are needed and builds sets for data  /// dependence checking.  class AccessAnalysis {  public: -  /// \brief Read or write access location. +  /// Read or write access location.    typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;    typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList; -  AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, -                 MemoryDepChecker::DepCandidates &DA, +  AccessAnalysis(const DataLayout &Dl, Loop *TheLoop, AliasAnalysis *AA, +                 LoopInfo *LI, MemoryDepChecker::DepCandidates &DA,                   PredicatedScalarEvolution &PSE) -      : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false), -        PSE(PSE) {} +      : DL(Dl), TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), +        IsRTCheckAnalysisNeeded(false), PSE(PSE) {} -  /// \brief Register a load  and whether it is only read from. +  /// Register a load  and whether it is only read from.    void addLoad(MemoryLocation &Loc, bool IsReadOnly) {      Value *Ptr = const_cast<Value*>(Loc.Ptr);      AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags); @@ -515,14 +515,14 @@ public:        ReadOnlyPtr.insert(Ptr);    } -  /// \brief Register a store. +  /// Register a store.    void addStore(MemoryLocation &Loc) {      Value *Ptr = const_cast<Value*>(Loc.Ptr);      AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);      Accesses.insert(MemAccessInfo(Ptr, true));    } -  /// \brief Check if we can emit a run-time no-alias check for \p Access. +  /// Check if we can emit a run-time no-alias check for \p Access.    ///    /// Returns true if we can emit a run-time no alias check for \p Access.    /// If we can check this access, this also adds it to a dependence set and @@ -537,7 +537,7 @@ public:                              unsigned ASId, bool ShouldCheckStride,                              bool Assume); -  /// \brief Check whether we can check the pointers at runtime for +  /// Check whether we can check the pointers at runtime for    /// non-intersection.    ///    /// Returns true if we need no check or if we do and we can generate them @@ -546,13 +546,13 @@ public:                         Loop *TheLoop, const ValueToValueMap &Strides,                         bool ShouldCheckWrap = false); -  /// \brief Goes over all memory accesses, checks whether a RT check is needed +  /// Goes over all memory accesses, checks whether a RT check is needed    /// and builds sets of dependent accesses.    void buildDependenceSets() {      processMemAccesses();    } -  /// \brief Initial processing of memory accesses determined that we need to +  /// Initial processing of memory accesses determined that we need to    /// perform dependency checking.    ///    /// Note that this can later be cleared if we retry memcheck analysis without @@ -570,7 +570,7 @@ public:  private:    typedef SetVector<MemAccessInfo> PtrAccessSet; -  /// \brief Go over all memory access and check whether runtime pointer checks +  /// Go over all memory access and check whether runtime pointer checks    /// are needed and build sets of dependency check candidates.    void processMemAccesses(); @@ -579,6 +579,9 @@ private:    const DataLayout &DL; +  /// The loop being checked. +  const Loop *TheLoop; +    /// List of accesses that need a further dependence check.    MemAccessInfoList CheckDeps; @@ -596,7 +599,7 @@ private:    /// dependence check.    MemoryDepChecker::DepCandidates &DepCands; -  /// \brief Initial processing of memory accesses determined that we may need +  /// Initial processing of memory accesses determined that we may need    /// to add memchecks.  Perform the analysis to determine the necessary checks.    ///    /// Note that, this is different from isDependencyCheckNeeded.  When we retry @@ -611,7 +614,7 @@ private:  } // end anonymous namespace -/// \brief Check whether a pointer can participate in a runtime bounds check. +/// Check whether a pointer can participate in a runtime bounds check.  /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr  /// by adding run-time checks (overflow checks) if necessary.  static bool hasComputableBounds(PredicatedScalarEvolution &PSE, @@ -634,7 +637,7 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE,    return AR->isAffine();  } -/// \brief Check whether a pointer address cannot wrap. +/// Check whether a pointer address cannot wrap.  static bool isNoWrap(PredicatedScalarEvolution &PSE,                       const ValueToValueMap &Strides, Value *Ptr, Loop *L) {    const SCEV *PtrScev = PSE.getSCEV(Ptr); @@ -684,7 +687,7 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,    bool IsWrite = Access.getInt();    RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); -  DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); +  LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');    return true;   } @@ -729,7 +732,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,        if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,                                  RunningDepId, ASId, ShouldCheckWrap, false)) { -        DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n'); +        LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');          Retries.push_back(Access);          CanDoAliasSetRT = false;        } @@ -791,8 +794,9 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,        unsigned ASi = PtrI->getType()->getPointerAddressSpace();        unsigned ASj = PtrJ->getType()->getPointerAddressSpace();        if (ASi != ASj) { -        DEBUG(dbgs() << "LAA: Runtime check would require comparison between" -                       " different address spaces\n"); +        LLVM_DEBUG( +            dbgs() << "LAA: Runtime check would require comparison between" +                      " different address spaces\n");          return false;        }      } @@ -801,8 +805,8 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,    if (NeedRTCheck && CanDoRT)      RtCheck.generateChecks(DepCands, IsDepCheckNeeded); -  DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks() -               << " pointer comparisons.\n"); +  LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks() +                    << " pointer comparisons.\n");    RtCheck.Need = NeedRTCheck; @@ -817,10 +821,10 @@ void AccessAnalysis::processMemAccesses() {    // process read-only pointers. This allows us to skip dependence tests for    // read-only pointers. -  DEBUG(dbgs() << "LAA: Processing memory accesses...\n"); -  DEBUG(dbgs() << "  AST: "; AST.dump()); -  DEBUG(dbgs() << "LAA:   Accesses(" << Accesses.size() << "):\n"); -  DEBUG({ +  LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n"); +  LLVM_DEBUG(dbgs() << "  AST: "; AST.dump()); +  LLVM_DEBUG(dbgs() << "LAA:   Accesses(" << Accesses.size() << "):\n"); +  LLVM_DEBUG({      for (auto A : Accesses)        dbgs() << "\t" << *A.getPointer() << " (" <<                  (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? @@ -904,11 +908,15 @@ void AccessAnalysis::processMemAccesses() {            ValueVector TempObjects;            GetUnderlyingObjects(Ptr, TempObjects, DL, LI); -          DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n"); +          LLVM_DEBUG(dbgs() +                     << "Underlying objects for pointer " << *Ptr << "\n");            for (Value *UnderlyingObj : TempObjects) {              // nullptr never alias, don't join sets for pointer that have "null"              // in their UnderlyingObjects list. -            if (isa<ConstantPointerNull>(UnderlyingObj)) +            if (isa<ConstantPointerNull>(UnderlyingObj) && +                !NullPointerIsDefined( +                    TheLoop->getHeader()->getParent(), +                    UnderlyingObj->getType()->getPointerAddressSpace()))                continue;              UnderlyingObjToAccessMap::iterator Prev = @@ -917,7 +925,7 @@ void AccessAnalysis::processMemAccesses() {                DepCands.unionSets(Access, Prev->second);              ObjToLastAccess[UnderlyingObj] = Access; -            DEBUG(dbgs() << "  " << *UnderlyingObj << "\n"); +            LLVM_DEBUG(dbgs() << "  " << *UnderlyingObj << "\n");            }          }        } @@ -931,7 +939,7 @@ static bool isInBoundsGep(Value *Ptr) {    return false;  } -/// \brief Return true if an AddRec pointer \p Ptr is unsigned non-wrapping, +/// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,  /// i.e. monotonically increasing/decreasing.  static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,                             PredicatedScalarEvolution &PSE, const Loop *L) { @@ -979,7 +987,7 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,    return false;  } -/// \brief Check whether the access through \p Ptr has a constant stride. +/// Check whether the access through \p Ptr has a constant stride.  int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,                             const Loop *Lp, const ValueToValueMap &StridesMap,                             bool Assume, bool ShouldCheckWrap) { @@ -989,8 +997,8 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,    // Make sure that the pointer does not point to aggregate types.    auto *PtrTy = cast<PointerType>(Ty);    if (PtrTy->getElementType()->isAggregateType()) { -    DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" << *Ptr -                 << "\n"); +    LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" +                      << *Ptr << "\n");      return 0;    } @@ -1001,15 +1009,15 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,      AR = PSE.getAsAddRec(Ptr);    if (!AR) { -    DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr -                 << " SCEV: " << *PtrScev << "\n"); +    LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr +                      << " SCEV: " << *PtrScev << "\n");      return 0;    }    // The accesss function must stride over the innermost loop.    if (Lp != AR->getLoop()) { -    DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " << -          *Ptr << " SCEV: " << *AR << "\n"); +    LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " +                      << *Ptr << " SCEV: " << *AR << "\n");      return 0;    } @@ -1024,18 +1032,20 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,    bool IsNoWrapAddRec = !ShouldCheckWrap ||      PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||      isNoWrapAddRec(Ptr, AR, PSE, Lp); -  bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; -  if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { +  if (!IsNoWrapAddRec && !IsInBoundsGEP && +      NullPointerIsDefined(Lp->getHeader()->getParent(), +                           PtrTy->getAddressSpace())) {      if (Assume) {        PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);        IsNoWrapAddRec = true; -      DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n" -                   << "LAA:   Pointer: " << *Ptr << "\n" -                   << "LAA:   SCEV: " << *AR << "\n" -                   << "LAA:   Added an overflow assumption\n"); +      LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n" +                        << "LAA:   Pointer: " << *Ptr << "\n" +                        << "LAA:   SCEV: " << *AR << "\n" +                        << "LAA:   Added an overflow assumption\n");      } else { -      DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " -                   << *Ptr << " SCEV: " << *AR << "\n"); +      LLVM_DEBUG( +          dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " +                 << *Ptr << " SCEV: " << *AR << "\n");        return 0;      }    } @@ -1046,8 +1056,8 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,    // Calculate the pointer stride and check if it is constant.    const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);    if (!C) { -    DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr << -          " SCEV: " << *AR << "\n"); +    LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr +                      << " SCEV: " << *AR << "\n");      return 0;    } @@ -1070,15 +1080,16 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,    // If the SCEV could wrap but we have an inbounds gep with a unit stride we    // know we can't "wrap around the address space". In case of address space    // zero we know that this won't happen without triggering undefined behavior. -  if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) && -      Stride != 1 && Stride != -1) { +  if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 && +      (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(), +                                              PtrTy->getAddressSpace()))) {      if (Assume) {        // We can avoid this case by adding a run-time check. -      DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either " -                   << "inbouds or in address space 0 may wrap:\n" -                   << "LAA:   Pointer: " << *Ptr << "\n" -                   << "LAA:   SCEV: " << *AR << "\n" -                   << "LAA:   Added an overflow assumption\n"); +      LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either " +                        << "inbouds or in address space 0 may wrap:\n" +                        << "LAA:   Pointer: " << *Ptr << "\n" +                        << "LAA:   SCEV: " << *AR << "\n" +                        << "LAA:   Added an overflow assumption\n");        PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);      } else        return 0; @@ -1087,14 +1098,65 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,    return Stride;  } -/// Take the pointer operand from the Load/Store instruction. -/// Returns NULL if this is not a valid Load/Store instruction. -static Value *getPointerOperand(Value *I) { -  if (auto *LI = dyn_cast<LoadInst>(I)) -    return LI->getPointerOperand(); -  if (auto *SI = dyn_cast<StoreInst>(I)) -    return SI->getPointerOperand(); -  return nullptr; +bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL, +                           ScalarEvolution &SE, +                           SmallVectorImpl<unsigned> &SortedIndices) { +  assert(llvm::all_of( +             VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && +         "Expected list of pointer operands."); +  SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs; +  OffValPairs.reserve(VL.size()); + +  // Walk over the pointers, and map each of them to an offset relative to +  // first pointer in the array. +  Value *Ptr0 = VL[0]; +  const SCEV *Scev0 = SE.getSCEV(Ptr0); +  Value *Obj0 = GetUnderlyingObject(Ptr0, DL); + +  llvm::SmallSet<int64_t, 4> Offsets; +  for (auto *Ptr : VL) { +    // TODO: Outline this code as a special, more time consuming, version of +    // computeConstantDifference() function. +    if (Ptr->getType()->getPointerAddressSpace() != +        Ptr0->getType()->getPointerAddressSpace()) +      return false; +    // If a pointer refers to a different underlying object, bail - the +    // pointers are by definition incomparable. +    Value *CurrObj = GetUnderlyingObject(Ptr, DL); +    if (CurrObj != Obj0) +      return false; + +    const SCEV *Scev = SE.getSCEV(Ptr); +    const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0)); +    // The pointers may not have a constant offset from each other, or SCEV +    // may just not be smart enough to figure out they do. Regardless, +    // there's nothing we can do. +    if (!Diff) +      return false; + +    // Check if the pointer with the same offset is found. +    int64_t Offset = Diff->getAPInt().getSExtValue(); +    if (!Offsets.insert(Offset).second) +      return false; +    OffValPairs.emplace_back(Offset, Ptr); +  } +  SortedIndices.clear(); +  SortedIndices.resize(VL.size()); +  std::iota(SortedIndices.begin(), SortedIndices.end(), 0); + +  // Sort the memory accesses and keep the order of their uses in UseOrder. +  std::stable_sort(SortedIndices.begin(), SortedIndices.end(), +                   [&OffValPairs](unsigned Left, unsigned Right) { +                     return OffValPairs[Left].first < OffValPairs[Right].first; +                   }); + +  // Check if the order is consecutive already. +  if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) { +        return I == SortedIndices[I]; +      })) +    SortedIndices.clear(); + +  return true;  }  /// Take the address space operand from the Load/Store instruction. @@ -1110,8 +1172,8 @@ static unsigned getAddressSpaceOperand(Value *I) {  /// Returns true if the memory operations \p A and \p B are consecutive.  bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,                                 ScalarEvolution &SE, bool CheckType) { -  Value *PtrA = getPointerOperand(A); -  Value *PtrB = getPointerOperand(B); +  Value *PtrA = getLoadStorePointerOperand(A); +  Value *PtrB = getLoadStorePointerOperand(B);    unsigned ASA = getAddressSpaceOperand(A);    unsigned ASB = getAddressSpaceOperand(B); @@ -1127,11 +1189,11 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,    if (CheckType && PtrA->getType() != PtrB->getType())      return false; -  unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); +  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);    Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); -  APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty)); +  APInt Size(IdxWidth, DL.getTypeStoreSize(Ty)); -  APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0); +  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);    PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);    PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); @@ -1242,8 +1304,9 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,    }    if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) { -    DEBUG(dbgs() << "LAA: Distance " << Distance -                 << " that could cause a store-load forwarding conflict\n"); +    LLVM_DEBUG( +        dbgs() << "LAA: Distance " << Distance +               << " that could cause a store-load forwarding conflict\n");      return true;    } @@ -1321,7 +1384,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,    return false;  } -/// \brief Check the dependence for two accesses with the same stride \p Stride. +/// Check the dependence for two accesses with the same stride \p Stride.  /// \p Distance is the positive distance and \p TypeByteSize is type size in  /// bytes.  /// @@ -1395,16 +1458,16 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,    const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src); -  DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink -               << "(Induction step: " << StrideAPtr << ")\n"); -  DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " -               << *InstMap[BIdx] << ": " << *Dist << "\n"); +  LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink +                    << "(Induction step: " << StrideAPtr << ")\n"); +  LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " +                    << *InstMap[BIdx] << ": " << *Dist << "\n");    // Need accesses with constant stride. We don't want to vectorize    // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in    // the address space.    if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ -    DEBUG(dbgs() << "Pointer access with non-constant stride\n"); +    LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");      return Dependence::Unknown;    } @@ -1421,7 +1484,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,                                   TypeByteSize))        return Dependence::NoDep; -    DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); +    LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");      ShouldRetryWithRuntimeCheck = true;      return Dependence::Unknown;    } @@ -1432,7 +1495,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,    // Attempt to prove strided accesses independent.    if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&        areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) { -    DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); +    LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");      return Dependence::NoDep;    } @@ -1442,11 +1505,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,      if (IsTrueDataDependence && EnableForwardingConflictDetection &&          (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||           ATy != BTy)) { -      DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); +      LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");        return Dependence::ForwardButPreventsForwarding;      } -    DEBUG(dbgs() << "LAA: Dependence is negative\n"); +    LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");      return Dependence::Forward;    } @@ -1455,15 +1518,17 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,    if (Val == 0) {      if (ATy == BTy)        return Dependence::Forward; -    DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n"); +    LLVM_DEBUG( +        dbgs() << "LAA: Zero dependence difference but different types\n");      return Dependence::Unknown;    }    assert(Val.isStrictlyPositive() && "Expect a positive value");    if (ATy != BTy) { -    DEBUG(dbgs() << -          "LAA: ReadWrite-Write positive dependency with different types\n"); +    LLVM_DEBUG( +        dbgs() +        << "LAA: ReadWrite-Write positive dependency with different types\n");      return Dependence::Unknown;    } @@ -1504,15 +1569,15 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,    uint64_t MinDistanceNeeded =        TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;    if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) { -    DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance -                 << '\n'); +    LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance " +                      << Distance << '\n');      return Dependence::Backward;    }    // Unsafe if the minimum distance needed is greater than max safe distance.    if (MinDistanceNeeded > MaxSafeDepDistBytes) { -    DEBUG(dbgs() << "LAA: Failure because it needs at least " -                 << MinDistanceNeeded << " size in bytes"); +    LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least " +                      << MinDistanceNeeded << " size in bytes");      return Dependence::Backward;    } @@ -1541,8 +1606,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,      return Dependence::BackwardVectorizableButPreventsForwarding;    uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride); -  DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() -               << " with max VF = " << MaxVF << '\n'); +  LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() +                    << " with max VF = " << MaxVF << '\n');    uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;    MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits);    return Dependence::BackwardVectorizable; @@ -1600,7 +1665,8 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,                if (Dependences.size() >= MaxDependences) {                  RecordDependences = false;                  Dependences.clear(); -                DEBUG(dbgs() << "Too many dependences, stopped recording\n"); +                LLVM_DEBUG(dbgs() +                           << "Too many dependences, stopped recording\n");                }              }              if (!RecordDependences && !SafeForVectorization) @@ -1612,7 +1678,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,      }    } -  DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n"); +  LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");    return SafeForVectorization;  } @@ -1642,20 +1708,21 @@ void MemoryDepChecker::Dependence::print(  bool LoopAccessInfo::canAnalyzeLoop() {    // We need to have a loop header. -  DEBUG(dbgs() << "LAA: Found a loop in " -               << TheLoop->getHeader()->getParent()->getName() << ": " -               << TheLoop->getHeader()->getName() << '\n'); +  LLVM_DEBUG(dbgs() << "LAA: Found a loop in " +                    << TheLoop->getHeader()->getParent()->getName() << ": " +                    << TheLoop->getHeader()->getName() << '\n');    // We can only analyze innermost loops.    if (!TheLoop->empty()) { -    DEBUG(dbgs() << "LAA: loop is not the innermost loop\n"); +    LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");      recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";      return false;    }    // We must have a single backedge.    if (TheLoop->getNumBackEdges() != 1) { -    DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n"); +    LLVM_DEBUG( +        dbgs() << "LAA: loop control flow is not understood by analyzer\n");      recordAnalysis("CFGNotUnderstood")          << "loop control flow is not understood by analyzer";      return false; @@ -1663,7 +1730,8 @@ bool LoopAccessInfo::canAnalyzeLoop() {    // We must have a single exiting block.    if (!TheLoop->getExitingBlock()) { -    DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n"); +    LLVM_DEBUG( +        dbgs() << "LAA: loop control flow is not understood by analyzer\n");      recordAnalysis("CFGNotUnderstood")          << "loop control flow is not understood by analyzer";      return false; @@ -1673,7 +1741,8 @@ bool LoopAccessInfo::canAnalyzeLoop() {    // checked at the end of each iteration. With that we can assume that all    // instructions in the loop are executed the same number of times.    if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { -    DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n"); +    LLVM_DEBUG( +        dbgs() << "LAA: loop control flow is not understood by analyzer\n");      recordAnalysis("CFGNotUnderstood")          << "loop control flow is not understood by analyzer";      return false; @@ -1684,7 +1753,7 @@ bool LoopAccessInfo::canAnalyzeLoop() {    if (ExitCount == PSE->getSE()->getCouldNotCompute()) {      recordAnalysis("CantComputeNumberOfIterations")          << "could not determine number of loop iterations"; -    DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); +    LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");      return false;    } @@ -1734,7 +1803,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,          if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {            recordAnalysis("NonSimpleLoad", Ld)                << "read with atomic ordering or volatile read"; -          DEBUG(dbgs() << "LAA: Found a non-simple load.\n"); +          LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");            CanVecMem = false;            return;          } @@ -1758,7 +1827,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,          if (!St->isSimple() && !IsAnnotatedParallel) {            recordAnalysis("NonSimpleStore", St)                << "write with atomic ordering or volatile write"; -          DEBUG(dbgs() << "LAA: Found a non-simple store.\n"); +          LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");            CanVecMem = false;            return;          } @@ -1777,14 +1846,14 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,    // Check if we see any stores. If there are no stores, then we don't    // care if the pointers are *restrict*.    if (!Stores.size()) { -    DEBUG(dbgs() << "LAA: Found a read-only loop!\n"); +    LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");      CanVecMem = true;      return;    }    MemoryDepChecker::DepCandidates DependentAccesses;    AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), -                          AA, LI, DependentAccesses, *PSE); +                          TheLoop, AA, LI, DependentAccesses, *PSE);    // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects    // multiple times on the same object. If the ptr is accessed twice, once @@ -1814,9 +1883,9 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,    }    if (IsAnnotatedParallel) { -    DEBUG(dbgs() -          << "LAA: A loop annotated parallel, ignore memory dependency " -          << "checks.\n"); +    LLVM_DEBUG( +        dbgs() << "LAA: A loop annotated parallel, ignore memory dependency " +               << "checks.\n");      CanVecMem = true;      return;    } @@ -1851,7 +1920,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,    // If we write (or read-write) to a single destination and there are no    // other reads in this loop then is it safe to vectorize.    if (NumReadWrites == 1 && NumReads == 0) { -    DEBUG(dbgs() << "LAA: Found a write-only loop!\n"); +    LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");      CanVecMem = true;      return;    } @@ -1866,23 +1935,24 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,                                                    TheLoop, SymbolicStrides);    if (!CanDoRTIfNeeded) {      recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds"; -    DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " -                 << "the array bounds.\n"); +    LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " +                      << "the array bounds.\n");      CanVecMem = false;      return;    } -  DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n"); +  LLVM_DEBUG( +      dbgs() << "LAA: We can perform a memory runtime check if needed.\n");    CanVecMem = true;    if (Accesses.isDependencyCheckNeeded()) { -    DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); +    LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");      CanVecMem = DepChecker->areDepsSafe(          DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);      MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();      if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) { -      DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); +      LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");        // Clear the dependency checks. We assume they are not needed.        Accesses.resetDepChecks(*DepChecker); @@ -1898,7 +1968,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,        if (!CanDoRTIfNeeded) {          recordAnalysis("CantCheckMemDepsAtRunTime")              << "cannot check memory dependencies at runtime"; -        DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); +        LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");          CanVecMem = false;          return;        } @@ -1908,16 +1978,17 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,    }    if (CanVecMem) -    DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop.  We" -                 << (PtrRtChecking->Need ? "" : " don't") -                 << " need runtime memory checks.\n"); +    LLVM_DEBUG( +        dbgs() << "LAA: No unsafe dependent memory operations in loop.  We" +               << (PtrRtChecking->Need ? "" : " don't") +               << " need runtime memory checks.\n");    else {      recordAnalysis("UnsafeMemDep")          << "unsafe dependent memory operations in loop. Use "             "#pragma loop distribute(enable) to allow loop distribution "             "to attempt to isolate the offending operations into a separate "             "loop"; -    DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); +    LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");    }  } @@ -1974,7 +2045,7 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,  namespace { -/// \brief IR Values for the lower and upper bounds of a pointer evolution.  We +/// IR Values for the lower and upper bounds of a pointer evolution.  We  /// need to use value-handles because SCEV expansion can invalidate previously  /// expanded values.  Thus expansion of a pointer can invalidate the bounds for  /// a previous one. @@ -1985,7 +2056,7 @@ struct PointerBounds {  } // end anonymous namespace -/// \brief Expand code for the lower and upper bound of the pointer group \p CG +/// Expand code for the lower and upper bound of the pointer group \p CG  /// in \p TheLoop.  \return the values for the bounds.  static PointerBounds  expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, @@ -2001,8 +2072,8 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,    Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);    if (SE->isLoopInvariant(Sc, TheLoop)) { -    DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr -                 << "\n"); +    LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" +                      << *Ptr << "\n");      // Ptr could be in the loop body. If so, expand a new one at the correct      // location.      Instruction *Inst = dyn_cast<Instruction>(Ptr); @@ -2015,15 +2086,16 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,      return {NewPtr, NewPtrPlusOne};    } else {      Value *Start = nullptr, *End = nullptr; -    DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); +    LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");      Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);      End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); -    DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n"); +    LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High +                      << "\n");      return {Start, End};    }  } -/// \brief Turns a collection of checks into a collection of expanded upper and +/// Turns a collection of checks into a collection of expanded upper and  /// lower bounds for both pointers in the check.  static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(      const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks, @@ -2136,9 +2208,9 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {    if (!Stride)      return; -  DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for " -                  "versioning:"); -  DEBUG(dbgs() << "  Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); +  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for " +                       "versioning:"); +  LLVM_DEBUG(dbgs() << "  Ptr: " << *Ptr << " Stride: " << *Stride << "\n");    // Avoid adding the "Stride == 1" predicate when we know that     // Stride >= Trip-Count. Such a predicate will effectively optimize a single @@ -2174,12 +2246,13 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {    // "Stride >= TripCount" is equivalent to checking:     // Stride - BETakenCount > 0    if (SE->isKnownPositive(StrideMinusBETaken)) { -    DEBUG(dbgs() << "LAA: Stride>=TripCount; No point in versioning as the " -                    "Stride==1 predicate will imply that the loop executes " -                    "at most once.\n"); +    LLVM_DEBUG( +        dbgs() << "LAA: Stride>=TripCount; No point in versioning as the " +                  "Stride==1 predicate will imply that the loop executes " +                  "at most once.\n");      return; -  }   -  DEBUG(dbgs() << "LAA: Found a strided access that we can version."); +  } +  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.");    SymbolicStrides[Ptr] = Stride;    StrideSet.insert(Stride);  | 
