diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-12-18 20:30:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-04-06 20:11:55 +0000 |
commit | 5f757f3ff9144b609b3c433dfd370cc6bdc191ad (patch) | |
tree | 1b4e980b866cd26a00af34c0a653eb640bd09caf /contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp | |
parent | 3e1c8a35f741a5d114d0ba670b15191355711fe9 (diff) | |
parent | 312c0ed19cc5276a17bacf2120097bec4515b0f1 (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp | 285 |
1 files changed, 216 insertions, 69 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp index fd0e81c51ac8..0894560fd078 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -14,7 +14,6 @@ #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" @@ -22,7 +21,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/LoopAnalysisManager.h" @@ -53,8 +51,6 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -65,6 +61,7 @@ #include <cstdint> #include <iterator> #include <utility> +#include <variant> #include <vector> using namespace llvm; @@ -142,6 +139,13 @@ static cl::opt<bool> SpeculateUnitStride( cl::desc("Speculate that non-constant strides are unit in LAA"), cl::init(true)); +static cl::opt<bool, true> HoistRuntimeChecks( + "hoist-runtime-checks", cl::Hidden, + cl::desc( + "Hoist inner loop runtime memory checks to outer loop if possible"), + cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(false)); +bool VectorizerParams::HoistRuntimeChecks; + bool VectorizerParams::isInterleaveForced() { return ::VectorizationInterleave.getNumOccurrences() > 0; } @@ -331,6 +335,34 @@ void RuntimePointerChecking::tryToCreateDiffCheck( CanUseDiffCheck = false; return; } + + const Loop *InnerLoop = SrcAR->getLoop(); + // If the start values for both Src and Sink also vary according to an outer + // loop, then it's probably better to avoid creating diff checks because + // they may not be hoisted. We should instead let llvm::addRuntimeChecks + // do the expanded full range overlap checks, which can be hoisted. + if (HoistRuntimeChecks && InnerLoop->getParentLoop() && + isa<SCEVAddRecExpr>(SinkStartInt) && isa<SCEVAddRecExpr>(SrcStartInt)) { + auto *SrcStartAR = cast<SCEVAddRecExpr>(SrcStartInt); + auto *SinkStartAR = cast<SCEVAddRecExpr>(SinkStartInt); + const Loop *StartARLoop = SrcStartAR->getLoop(); + if (StartARLoop == SinkStartAR->getLoop() && + StartARLoop == InnerLoop->getParentLoop() && + // If the diff check would already be loop invariant (due to the + // recurrences being the same), then we prefer to keep the diff checks + // because they are cheaper. + SrcStartAR->getStepRecurrence(*SE) != + SinkStartAR->getStepRecurrence(*SE)) { + LLVM_DEBUG(dbgs() << "LAA: Not creating diff runtime check, since these " + "cannot be hoisted out of the outer loop\n"); + CanUseDiffCheck = false; + return; + } + } + + LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n" + << "SrcStart: " << *SrcStartInt << '\n' + << "SinkStartInt: " << *SinkStartInt << '\n'); DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize, Src->NeedsFreeze || Sink->NeedsFreeze); } @@ -634,7 +666,7 @@ public: /// Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) { Value *Ptr = const_cast<Value*>(Loc.Ptr); - AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags); + AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer())); Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy); if (IsReadOnly) ReadOnlyPtr.insert(Ptr); @@ -643,7 +675,7 @@ public: /// Register a store. void addStore(MemoryLocation &Loc, Type *AccessTy) { Value *Ptr = const_cast<Value*>(Loc.Ptr); - AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags); + AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer())); Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy); } @@ -691,6 +723,11 @@ public: MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; } + const DenseMap<Value *, SmallVector<const Value *, 16>> & + getUnderlyingObjects() { + return UnderlyingObjects; + } + private: typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap; @@ -736,6 +773,8 @@ private: /// The SCEV predicate containing all the SCEV-related assumptions. PredicatedScalarEvolution &PSE; + + DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects; }; } // end anonymous namespace @@ -914,6 +953,22 @@ static void findForkedSCEVs( ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)); break; } + case Instruction::PHI: { + SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> ChildScevs; + // A phi means we've found a forked pointer, but we currently only + // support a single phi per pointer so if there's another behind this + // then we just bail out and return the generic SCEV. + if (I->getNumOperands() == 2) { + findForkedSCEVs(SE, L, I->getOperand(0), ChildScevs, Depth); + findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth); + } + if (ChildScevs.size() == 2) { + ScevList.push_back(ChildScevs[0]); + ScevList.push_back(ChildScevs[1]); + } else + ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)); + break; + } case Instruction::Add: case Instruction::Sub: { SmallVector<PointerIntPair<const SCEV *, 1, bool>> LScevs; @@ -1074,7 +1129,6 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, for (const auto &A : AS) { Value *Ptr = A.getValue(); bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); - if (IsWrite) ++NumWritePtrChecks; else @@ -1289,10 +1343,12 @@ void AccessAnalysis::processMemAccesses() { typedef SmallVector<const Value *, 16> ValueVector; ValueVector TempObjects; - getUnderlyingObjects(Ptr, TempObjects, LI); + UnderlyingObjects[Ptr] = {}; + SmallVector<const Value *, 16> &UOs = UnderlyingObjects[Ptr]; + ::getUnderlyingObjects(Ptr, UOs, LI); LLVM_DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n"); - for (const Value *UnderlyingObj : TempObjects) { + for (const Value *UnderlyingObj : UOs) { // nullptr never alias, don't join sets for pointer that have "null" // in their UnderlyingObjects list. if (isa<ConstantPointerNull>(UnderlyingObj) && @@ -1620,6 +1676,7 @@ MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { case ForwardButPreventsForwarding: case Backward: case BackwardVectorizableButPreventsForwarding: + case IndirectUnsafe: return VectorizationSafetyStatus::Unsafe; } llvm_unreachable("unexpected DepType!"); @@ -1631,6 +1688,7 @@ bool MemoryDepChecker::Dependence::isBackward() const { case Forward: case ForwardButPreventsForwarding: case Unknown: + case IndirectUnsafe: return false; case BackwardVectorizable: @@ -1656,6 +1714,7 @@ bool MemoryDepChecker::Dependence::isForward() const { case BackwardVectorizable: case Backward: case BackwardVectorizableButPreventsForwarding: + case IndirectUnsafe: return false; } llvm_unreachable("unexpected DepType!"); @@ -1678,7 +1737,7 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize; // Maximum vector factor. uint64_t MaxVFWithoutSLForwardIssues = std::min( - VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes); + VectorizerParams::MaxVectorWidth * TypeByteSize, MinDepDistBytes); // Compute the smallest VF at which the store and load would be misaligned. for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues; @@ -1698,10 +1757,10 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, return true; } - if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes && + if (MaxVFWithoutSLForwardIssues < MinDepDistBytes && MaxVFWithoutSLForwardIssues != VectorizerParams::MaxVectorWidth * TypeByteSize) - MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues; + MinDepDistBytes = MaxVFWithoutSLForwardIssues; return false; } @@ -1813,67 +1872,116 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, return ScaledDist % Stride; } -MemoryDepChecker::Dependence::DepType -MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, - const MemAccessInfo &B, unsigned BIdx, - const DenseMap<Value *, const SCEV *> &Strides) { - assert (AIdx < BIdx && "Must pass arguments in program order"); +/// Returns true if any of the underlying objects has a loop varying address, +/// i.e. may change in \p L. +static bool +isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects, + ScalarEvolution &SE, const Loop *L) { + return any_of(UnderlyingObjects, [&SE, L](const Value *UO) { + return !SE.isLoopInvariant(SE.getSCEV(const_cast<Value *>(UO)), L); + }); +} +// Get the dependence distance, stride, type size in whether i is a write for +// the dependence between A and B. Returns a DepType, if we can prove there's +// no dependence or the analysis fails. Outlined to lambda to limit he scope +// of various temporary variables, like A/BPtr, StrideA/BPtr and others. +// Returns either the dependence result, if it could already be determined, or a +// tuple with (Distance, Stride, TypeSize, AIsWrite, BIsWrite). +static std::variant<MemoryDepChecker::Dependence::DepType, + std::tuple<const SCEV *, uint64_t, uint64_t, bool, bool>> +getDependenceDistanceStrideAndSize( + const AccessAnalysis::MemAccessInfo &A, Instruction *AInst, + const AccessAnalysis::MemAccessInfo &B, Instruction *BInst, + const DenseMap<Value *, const SCEV *> &Strides, + const DenseMap<Value *, SmallVector<const Value *, 16>> &UnderlyingObjects, + PredicatedScalarEvolution &PSE, const Loop *InnermostLoop) { + auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); + auto &SE = *PSE.getSE(); auto [APtr, AIsWrite] = A; auto [BPtr, BIsWrite] = B; - Type *ATy = getLoadStoreType(InstMap[AIdx]); - Type *BTy = getLoadStoreType(InstMap[BIdx]); // Two reads are independent. if (!AIsWrite && !BIsWrite) - return Dependence::NoDep; + return MemoryDepChecker::Dependence::NoDep; + + Type *ATy = getLoadStoreType(AInst); + Type *BTy = getLoadStoreType(BInst); // We cannot check pointers in different address spaces. if (APtr->getType()->getPointerAddressSpace() != BPtr->getType()->getPointerAddressSpace()) - return Dependence::Unknown; + return MemoryDepChecker::Dependence::Unknown; int64_t StrideAPtr = - getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true).value_or(0); + getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true).value_or(0); int64_t StrideBPtr = - getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true).value_or(0); + getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true).value_or(0); const SCEV *Src = PSE.getSCEV(APtr); const SCEV *Sink = PSE.getSCEV(BPtr); // If the induction step is negative we have to invert source and sink of the - // dependence. + // dependence when measuring the distance between them. We should not swap + // AIsWrite with BIsWrite, as their uses expect them in program order. if (StrideAPtr < 0) { - std::swap(APtr, BPtr); - std::swap(ATy, BTy); std::swap(Src, Sink); - std::swap(AIsWrite, BIsWrite); - std::swap(AIdx, BIdx); - std::swap(StrideAPtr, StrideBPtr); + std::swap(AInst, BInst); } - ScalarEvolution &SE = *PSE.getSE(); const SCEV *Dist = SE.getMinusSCEV(Sink, Src); LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink << "(Induction step: " << StrideAPtr << ")\n"); - LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " - << *InstMap[BIdx] << ": " << *Dist << "\n"); + LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst + << ": " << *Dist << "\n"); + + // Needs accesses where the addresses of the accessed underlying objects do + // not change within the loop. + if (isLoopVariantIndirectAddress(UnderlyingObjects.find(APtr)->second, SE, + InnermostLoop) || + isLoopVariantIndirectAddress(UnderlyingObjects.find(BPtr)->second, SE, + InnermostLoop)) + return MemoryDepChecker::Dependence::IndirectUnsafe; // Need accesses with constant stride. We don't want to vectorize - // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in - // the address space. - if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ + // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap + // in the address space. + if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) { LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n"); - return Dependence::Unknown; + return MemoryDepChecker::Dependence::Unknown; } - auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); uint64_t TypeByteSize = DL.getTypeAllocSize(ATy); bool HasSameSize = DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy); + if (!HasSameSize) + TypeByteSize = 0; uint64_t Stride = std::abs(StrideAPtr); + return std::make_tuple(Dist, Stride, TypeByteSize, AIsWrite, BIsWrite); +} +MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( + const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B, + unsigned BIdx, const DenseMap<Value *, const SCEV *> &Strides, + const DenseMap<Value *, SmallVector<const Value *, 16>> + &UnderlyingObjects) { + assert(AIdx < BIdx && "Must pass arguments in program order"); + + // Get the dependence distance, stride, type size and what access writes for + // the dependence between A and B. + auto Res = getDependenceDistanceStrideAndSize( + A, InstMap[AIdx], B, InstMap[BIdx], Strides, UnderlyingObjects, PSE, + InnermostLoop); + if (std::holds_alternative<Dependence::DepType>(Res)) + return std::get<Dependence::DepType>(Res); + + const auto &[Dist, Stride, TypeByteSize, AIsWrite, BIsWrite] = + std::get<std::tuple<const SCEV *, uint64_t, uint64_t, bool, bool>>(Res); + bool HasSameSize = TypeByteSize > 0; + + ScalarEvolution &SE = *PSE.getSE(); + auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize && isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist, Stride, TypeByteSize)) @@ -1899,9 +2007,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // Negative distances are not plausible dependencies. if (Val.isNegative()) { bool IsTrueDataDependence = (AIsWrite && !BIsWrite); + // There is no need to update MaxSafeVectorWidthInBits after call to + // couldPreventStoreLoadForward, even if it changed MinDepDistBytes, + // since a forward dependency will allow vectorization using any width. if (IsTrueDataDependence && EnableForwardingConflictDetection && - (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) || - !HasSameSize)) { + (!HasSameSize || couldPreventStoreLoadForward(Val.abs().getZExtValue(), + TypeByteSize))) { LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); return Dependence::ForwardButPreventsForwarding; } @@ -1969,8 +2080,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, return Dependence::Backward; } - // Unsafe if the minimum distance needed is greater than max safe distance. - if (MinDistanceNeeded > MaxSafeDepDistBytes) { + // Unsafe if the minimum distance needed is greater than smallest dependence + // distance distance. + if (MinDistanceNeeded > MinDepDistBytes) { LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least " << MinDistanceNeeded << " size in bytes\n"); return Dependence::Backward; @@ -1992,15 +2104,25 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // is 2. Then we analyze the accesses on array A, the minimum distance needed // is 8, which is less than 2 and forbidden vectorization, But actually // both A and B could be vectorized by 2 iterations. - MaxSafeDepDistBytes = - std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes); + MinDepDistBytes = + std::min(static_cast<uint64_t>(Distance), MinDepDistBytes); bool IsTrueDataDependence = (!AIsWrite && BIsWrite); + uint64_t MinDepDistBytesOld = MinDepDistBytes; if (IsTrueDataDependence && EnableForwardingConflictDetection && - couldPreventStoreLoadForward(Distance, TypeByteSize)) + couldPreventStoreLoadForward(Distance, TypeByteSize)) { + // Sanity check that we didn't update MinDepDistBytes when calling + // couldPreventStoreLoadForward + assert(MinDepDistBytes == MinDepDistBytesOld && + "An update to MinDepDistBytes requires an update to " + "MaxSafeVectorWidthInBits"); + (void)MinDepDistBytesOld; return Dependence::BackwardVectorizableButPreventsForwarding; + } - uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride); + // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits + // since there is a backwards dependency. + uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * Stride); LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << " with max VF = " << MaxVF << '\n'); uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; @@ -2008,11 +2130,13 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, return Dependence::BackwardVectorizable; } -bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, - MemAccessInfoList &CheckDeps, - const DenseMap<Value *, const SCEV *> &Strides) { +bool MemoryDepChecker::areDepsSafe( + DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, + const DenseMap<Value *, const SCEV *> &Strides, + const DenseMap<Value *, SmallVector<const Value *, 16>> + &UnderlyingObjects) { - MaxSafeDepDistBytes = -1; + MinDepDistBytes = -1; SmallPtrSet<MemAccessInfo, 8> Visited; for (MemAccessInfo CurAccess : CheckDeps) { if (Visited.count(CurAccess)) @@ -2054,7 +2178,8 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, std::swap(A, B); Dependence::DepType Type = - isDependent(*A.first, A.second, *B.first, B.second, Strides); + isDependent(*A.first, A.second, *B.first, B.second, Strides, + UnderlyingObjects); mergeInStatus(Dependence::isSafeForVectorization(Type)); // Gather dependences unless we accumulated MaxDependences @@ -2098,8 +2223,14 @@ MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const { } const char *MemoryDepChecker::Dependence::DepName[] = { - "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward", - "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"}; + "NoDep", + "Unknown", + "IndidrectUnsafe", + "Forward", + "ForwardButPreventsForwarding", + "Backward", + "BackwardVectorizable", + "BackwardVectorizableButPreventsForwarding"}; void MemoryDepChecker::Dependence::print( raw_ostream &OS, unsigned Depth, @@ -2192,17 +2323,17 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, if (HasComplexMemInst) continue; + // Many math library functions read the rounding mode. We will only + // vectorize a loop if it contains known function calls that don't set + // the flag. Therefore, it is safe to ignore this read from memory. + auto *Call = dyn_cast<CallInst>(&I); + if (Call && getVectorIntrinsicIDForCall(Call, TLI)) + continue; + // If this is a load, save it. If this instruction can read from memory // but is not a load, then we quit. Notice that we don't handle function // calls that read or write. if (I.mayReadFromMemory()) { - // Many math library functions read the rounding mode. We will only - // vectorize a loop if it contains known function calls that don't set - // the flag. Therefore, it is safe to ignore this read from memory. - auto *Call = dyn_cast<CallInst>(&I); - if (Call && getVectorIntrinsicIDForCall(Call, TLI)) - continue; - // If the function has an explicit vectorized counterpart, we can safely // assume that it can be vectorized. if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() && @@ -2400,8 +2531,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, if (Accesses.isDependencyCheckNeeded()) { LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); CanVecMem = DepChecker->areDepsSafe( - DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides); - MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes(); + DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides, + Accesses.getUnderlyingObjects()); if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) { LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); @@ -2464,12 +2595,24 @@ void LoopAccessInfo::emitUnsafeDependenceRemark() { LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); // Emit remark for first unsafe dependence + bool HasForcedDistribution = false; + std::optional<const MDOperand *> Value = + findStringMetadataForLoop(TheLoop, "llvm.loop.distribute.enable"); + if (Value) { + const MDOperand *Op = *Value; + assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata"); + HasForcedDistribution = mdconst::extract<ConstantInt>(*Op)->getZExtValue(); + } + + const std::string Info = + HasForcedDistribution + ? "unsafe dependent memory operations in loop." + : "unsafe dependent memory operations in loop. Use " + "#pragma clang loop distribute(enable) to allow loop distribution " + "to attempt to isolate the offending operations into a separate " + "loop"; OptimizationRemarkAnalysis &R = - recordAnalysis("UnsafeDep", Dep.getDestination(*this)) - << "unsafe dependent memory operations in loop. Use " - "#pragma loop distribute(enable) to allow loop distribution " - "to attempt to isolate the offending operations into a separate " - "loop"; + recordAnalysis("UnsafeDep", Dep.getDestination(*this)) << Info; switch (Dep.Type) { case MemoryDepChecker::Dependence::NoDep: @@ -2487,6 +2630,9 @@ void LoopAccessInfo::emitUnsafeDependenceRemark() { R << "\nBackward loop carried data dependence that prevents " "store-to-load forwarding."; break; + case MemoryDepChecker::Dependence::IndirectUnsafe: + R << "\nUnsafe indirect dependence."; + break; case MemoryDepChecker::Dependence::Unknown: R << "\nUnknown data dependence."; break; @@ -2766,9 +2912,10 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (CanVecMem) { OS.indent(Depth) << "Memory dependences are safe"; - if (MaxSafeDepDistBytes != -1ULL) - OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes - << " bytes"; + const MemoryDepChecker &DC = getDepChecker(); + if (!DC.isSafeForAnyVectorWidth()) + OS << " with a maximum safe vector width of " + << DC.getMaxSafeVectorWidthInBits() << " bits"; if (PtrRtChecking->Need) OS << " with run-time checks"; OS << "\n"; |