aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp297
1 files changed, 173 insertions, 124 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 8311b480ab09..9e110567e98e 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -286,8 +287,6 @@ void RuntimePointerChecking::tryToCreateDiffCheck(
return;
}
- const DataLayout &DL =
- SinkAR->getLoop()->getHeader()->getModule()->getDataLayout();
SmallVector<Instruction *, 4> SrcInsts =
DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr);
SmallVector<Instruction *, 4> SinkInsts =
@@ -298,11 +297,10 @@ void RuntimePointerChecking::tryToCreateDiffCheck(
CanUseDiffCheck = false;
return;
}
+ const DataLayout &DL =
+ SinkAR->getLoop()->getHeader()->getModule()->getDataLayout();
unsigned AllocSize =
std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));
- IntegerType *IntTy =
- IntegerType::get(Src->PointerValue->getContext(),
- DL.getPointerSizeInBits(CGI.AddressSpace));
// Only matching constant steps matching the AllocSize are supported at the
// moment. This simplifies the difference computation. Can be extended in the
@@ -314,6 +312,10 @@ void RuntimePointerChecking::tryToCreateDiffCheck(
return;
}
+ IntegerType *IntTy =
+ IntegerType::get(Src->PointerValue->getContext(),
+ DL.getPointerSizeInBits(CGI.AddressSpace));
+
// When counting down, the dependence distance needs to be swapped.
if (Step->getValue()->isNegative())
std::swap(SinkAR, SrcAR);
@@ -620,7 +622,10 @@ public:
AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
MemoryDepChecker::DepCandidates &DA,
PredicatedScalarEvolution &PSE)
- : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {}
+ : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE) {
+ // We're analyzing dependences across loop iterations.
+ BAA.enableCrossIterationMode();
+ }
/// Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
@@ -702,6 +707,9 @@ private:
/// Set of pointers that are read only.
SmallPtrSet<Value*, 16> ReadOnlyPtr;
+ /// Batched alias analysis results.
+ BatchAAResults BAA;
+
/// An alias set tracker to partition the access set by underlying object and
//intrinsic property (such as TBAA metadata).
AliasSetTracker AST;
@@ -756,7 +764,7 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE,
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
return true;
- int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides);
+ int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides).value_or(0);
if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
return true;
@@ -803,10 +811,10 @@ static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
// in by the caller. If we have a node that may potentially yield a valid
// SCEVAddRecExpr then we decompose it into parts and build the SCEV terms
// ourselves before adding to the list.
-static void
-findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr,
- SmallVectorImpl<std::pair<const SCEV *, bool>> &ScevList,
- unsigned Depth) {
+static void findForkedSCEVs(
+ ScalarEvolution *SE, const Loop *L, Value *Ptr,
+ SmallVectorImpl<PointerIntPair<const SCEV *, 1, bool>> &ScevList,
+ unsigned Depth) {
// If our Value is a SCEVAddRecExpr, loop invariant, not an instruction, or
// we've exceeded our limit on recursion, just return whatever we have
// regardless of whether it can be used for a forked pointer or not, along
@@ -814,15 +822,25 @@ findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr,
const SCEV *Scev = SE->getSCEV(Ptr);
if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) ||
!isa<Instruction>(Ptr) || Depth == 0) {
- ScevList.push_back(
- std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
+ ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
return;
}
Depth--;
- auto UndefPoisonCheck = [](std::pair<const SCEV *, bool> S) -> bool {
- return S.second;
+ auto UndefPoisonCheck = [](PointerIntPair<const SCEV *, 1, bool> S) {
+ return get<1>(S);
+ };
+
+ auto GetBinOpExpr = [&SE](unsigned Opcode, const SCEV *L, const SCEV *R) {
+ switch (Opcode) {
+ case Instruction::Add:
+ return SE->getAddExpr(L, R);
+ case Instruction::Sub:
+ return SE->getMinusSCEV(L, R);
+ default:
+ llvm_unreachable("Unexpected binary operator when walking ForkedPtrs");
+ }
};
Instruction *I = cast<Instruction>(Ptr);
@@ -834,12 +852,11 @@ findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr,
// We only handle base + single offset GEPs here for now.
// Not dealing with preexisting gathers yet, so no vectors.
if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
- ScevList.push_back(
- std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP)));
+ ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP));
break;
}
- SmallVector<std::pair<const SCEV *, bool>, 2> BaseScevs;
- SmallVector<std::pair<const SCEV *, bool>, 2> OffsetScevs;
+ SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> BaseScevs;
+ SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> OffsetScevs;
findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth);
findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth);
@@ -855,7 +872,7 @@ findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr,
else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)
OffsetScevs.push_back(OffsetScevs[0]);
else {
- ScevList.push_back(std::make_pair(Scev, NeedsFreeze));
+ ScevList.emplace_back(Scev, NeedsFreeze);
break;
}
@@ -870,17 +887,17 @@ findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr,
// Scale up the offsets by the size of the type, then add to the bases.
const SCEV *Scaled1 = SE->getMulExpr(
- Size, SE->getTruncateOrSignExtend(OffsetScevs[0].first, IntPtrTy));
+ Size, SE->getTruncateOrSignExtend(get<0>(OffsetScevs[0]), IntPtrTy));
const SCEV *Scaled2 = SE->getMulExpr(
- Size, SE->getTruncateOrSignExtend(OffsetScevs[1].first, IntPtrTy));
- ScevList.push_back(std::make_pair(
- SE->getAddExpr(BaseScevs[0].first, Scaled1), NeedsFreeze));
- ScevList.push_back(std::make_pair(
- SE->getAddExpr(BaseScevs[1].first, Scaled2), NeedsFreeze));
+ Size, SE->getTruncateOrSignExtend(get<0>(OffsetScevs[1]), IntPtrTy));
+ ScevList.emplace_back(SE->getAddExpr(get<0>(BaseScevs[0]), Scaled1),
+ NeedsFreeze);
+ ScevList.emplace_back(SE->getAddExpr(get<0>(BaseScevs[1]), Scaled2),
+ NeedsFreeze);
break;
}
case Instruction::Select: {
- SmallVector<std::pair<const SCEV *, bool>, 2> ChildScevs;
+ SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> ChildScevs;
// A select means we've found a forked pointer, but we currently only
// support a single select per pointer so if there's another behind this
// then we just bail out and return the generic SCEV.
@@ -890,34 +907,71 @@ findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr,
ScevList.push_back(ChildScevs[0]);
ScevList.push_back(ChildScevs[1]);
} else
- ScevList.push_back(
- std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
+ ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
+ break;
+ }
+ case Instruction::Add:
+ case Instruction::Sub: {
+ SmallVector<PointerIntPair<const SCEV *, 1, bool>> LScevs;
+ SmallVector<PointerIntPair<const SCEV *, 1, bool>> RScevs;
+ findForkedSCEVs(SE, L, I->getOperand(0), LScevs, Depth);
+ findForkedSCEVs(SE, L, I->getOperand(1), RScevs, Depth);
+
+ // See if we need to freeze our fork...
+ bool NeedsFreeze =
+ any_of(LScevs, UndefPoisonCheck) || any_of(RScevs, UndefPoisonCheck);
+
+ // Check that we only have a single fork, on either the left or right side.
+ // Copy the SCEV across for the one without a fork in order to generate
+ // the full SCEV for both sides of the BinOp.
+ if (LScevs.size() == 2 && RScevs.size() == 1)
+ RScevs.push_back(RScevs[0]);
+ else if (RScevs.size() == 2 && LScevs.size() == 1)
+ LScevs.push_back(LScevs[0]);
+ else {
+ ScevList.emplace_back(Scev, NeedsFreeze);
+ break;
+ }
+
+ ScevList.emplace_back(
+ GetBinOpExpr(Opcode, get<0>(LScevs[0]), get<0>(RScevs[0])),
+ NeedsFreeze);
+ ScevList.emplace_back(
+ GetBinOpExpr(Opcode, get<0>(LScevs[1]), get<0>(RScevs[1])),
+ NeedsFreeze);
break;
}
default:
// Just return the current SCEV if we haven't handled the instruction yet.
LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
- ScevList.push_back(
- std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
+ ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
break;
}
}
-static SmallVector<std::pair<const SCEV *, bool>>
+static SmallVector<PointerIntPair<const SCEV *, 1, bool>>
findForkedPointer(PredicatedScalarEvolution &PSE,
const ValueToValueMap &StridesMap, Value *Ptr,
const Loop *L) {
ScalarEvolution *SE = PSE.getSE();
assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
- SmallVector<std::pair<const SCEV *, bool>> Scevs;
+ SmallVector<PointerIntPair<const SCEV *, 1, bool>> Scevs;
findForkedSCEVs(SE, L, Ptr, Scevs, MaxForkedSCEVDepth);
- // For now, we will only accept a forked pointer with two possible SCEVs.
- if (Scevs.size() == 2)
+ // For now, we will only accept a forked pointer with two possible SCEVs
+ // that are either SCEVAddRecExprs or loop invariant.
+ if (Scevs.size() == 2 &&
+ (isa<SCEVAddRecExpr>(get<0>(Scevs[0])) ||
+ SE->isLoopInvariant(get<0>(Scevs[0]), L)) &&
+ (isa<SCEVAddRecExpr>(get<0>(Scevs[1])) ||
+ SE->isLoopInvariant(get<0>(Scevs[1]), L))) {
+ LLVM_DEBUG(dbgs() << "LAA: Found forked pointer: " << *Ptr << "\n");
+ LLVM_DEBUG(dbgs() << "\t(1) " << *get<0>(Scevs[0]) << "\n");
+ LLVM_DEBUG(dbgs() << "\t(2) " << *get<0>(Scevs[1]) << "\n");
return Scevs;
+ }
- return {
- std::make_pair(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false)};
+ return {{replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false}};
}
bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
@@ -929,11 +983,11 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
bool Assume) {
Value *Ptr = Access.getPointer();
- SmallVector<std::pair<const SCEV *, bool>> TranslatedPtrs =
+ SmallVector<PointerIntPair<const SCEV *, 1, bool>> TranslatedPtrs =
findForkedPointer(PSE, StridesMap, Ptr, TheLoop);
for (auto &P : TranslatedPtrs) {
- const SCEV *PtrExpr = P.first;
+ const SCEV *PtrExpr = get<0>(P);
if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume))
return false;
@@ -954,13 +1008,11 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
// If there's only one option for Ptr, look it up after bounds and wrap
// checking, because assumptions might have been added to PSE.
if (TranslatedPtrs.size() == 1)
- TranslatedPtrs[0] = std::make_pair(
- replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false);
+ TranslatedPtrs[0] = {replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr),
+ false};
}
- for (auto &P : TranslatedPtrs) {
- const SCEV *PtrExpr = P.first;
-
+ for (auto [PtrExpr, NeedsFreeze] : TranslatedPtrs) {
// The id of the dependence set.
unsigned DepId;
@@ -976,7 +1028,7 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
bool IsWrite = Access.getInt();
RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,
- P.second);
+ NeedsFreeze);
LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
}
@@ -1314,17 +1366,18 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
}
/// Check whether the access through \p Ptr has a constant stride.
-int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
- Value *Ptr, const Loop *Lp,
- const ValueToValueMap &StridesMap, bool Assume,
- bool ShouldCheckWrap) {
+std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE,
+ Type *AccessTy, Value *Ptr,
+ const Loop *Lp,
+ const ValueToValueMap &StridesMap,
+ bool Assume, bool ShouldCheckWrap) {
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
if (isa<ScalableVectorType>(AccessTy)) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
<< "\n");
- return 0;
+ return std::nullopt;
}
const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
@@ -1336,14 +1389,14 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
if (!AR) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
<< " SCEV: " << *PtrScev << "\n");
- return 0;
+ return std::nullopt;
}
// The access function must stride over the innermost loop.
if (Lp != AR->getLoop()) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
<< *Ptr << " SCEV: " << *AR << "\n");
- return 0;
+ return std::nullopt;
}
// The address calculation must not wrap. Otherwise, a dependence could be
@@ -1371,7 +1424,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
LLVM_DEBUG(
dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
<< *Ptr << " SCEV: " << *AR << "\n");
- return 0;
+ return std::nullopt;
}
}
@@ -1383,17 +1436,17 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
if (!C) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
<< " SCEV: " << *AR << "\n");
- return 0;
+ return std::nullopt;
}
auto &DL = Lp->getHeader()->getModule()->getDataLayout();
TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
- int64_t Size = AllocSize.getFixedSize();
+ int64_t Size = AllocSize.getFixedValue();
const APInt &APStepVal = C->getAPInt();
// Huge step value - give up.
if (APStepVal.getBitWidth() > 64)
- return 0;
+ return std::nullopt;
int64_t StepVal = APStepVal.getSExtValue();
@@ -1401,7 +1454,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
int64_t Stride = StepVal / Size;
int64_t Rem = StepVal % Size;
if (Rem)
- return 0;
+ return std::nullopt;
// If the SCEV could wrap but we have an inbounds gep with a unit stride we
// know we can't "wrap around the address space". In case of address space
@@ -1418,16 +1471,17 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
<< "LAA: Added an overflow assumption\n");
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
} else
- return 0;
+ return std::nullopt;
}
return Stride;
}
-Optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
- Value *PtrB, const DataLayout &DL,
- ScalarEvolution &SE, bool StrictCheck,
- bool CheckType) {
+std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
+ Type *ElemTyB, Value *PtrB,
+ const DataLayout &DL,
+ ScalarEvolution &SE, bool StrictCheck,
+ bool CheckType) {
assert(PtrA && PtrB && "Expected non-nullptr pointers.");
assert(cast<PointerType>(PtrA->getType())
->isOpaqueOrPointeeTypeMatches(ElemTyA) && "Wrong PtrA type");
@@ -1440,14 +1494,14 @@ Optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
// Make sure that the element types are the same if required.
if (CheckType && ElemTyA != ElemTyB)
- return None;
+ return std::nullopt;
unsigned ASA = PtrA->getType()->getPointerAddressSpace();
unsigned ASB = PtrB->getType()->getPointerAddressSpace();
// Check that the address spaces match.
if (ASA != ASB)
- return None;
+ return std::nullopt;
unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
@@ -1462,7 +1516,7 @@ Optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
// Check that the address spaces match and that the pointers are valid.
if (ASA != ASB)
- return None;
+ return std::nullopt;
IdxWidth = DL.getIndexSizeInBits(ASA);
OffsetA = OffsetA.sextOrTrunc(IdxWidth);
@@ -1477,7 +1531,7 @@ Optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
const auto *Diff =
dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA));
if (!Diff)
- return None;
+ return std::nullopt;
Val = Diff->getAPInt().getSExtValue();
}
int Size = DL.getTypeStoreSize(ElemTyA);
@@ -1487,7 +1541,7 @@ Optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
// the bitcasts removal in the provided pointers.
if (!StrictCheck || Dist * Size == Val)
return Dist;
- return None;
+ return std::nullopt;
}
bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
@@ -1507,8 +1561,8 @@ bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
int Cnt = 1;
bool IsConsecutive = true;
for (auto *Ptr : VL.drop_front()) {
- Optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
- /*StrictCheck=*/true);
+ std::optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
+ /*StrictCheck=*/true);
if (!Diff)
return false;
@@ -1543,8 +1597,9 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
return false;
Type *ElemTyA = getLoadStoreType(A);
Type *ElemTyB = getLoadStoreType(B);
- Optional<int> Diff = getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
- /*StrictCheck=*/true, CheckType);
+ std::optional<int> Diff =
+ getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
+ /*StrictCheck=*/true, CheckType);
return Diff && *Diff == 1;
}
@@ -1669,7 +1724,7 @@ void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
Status = S;
}
-/// Given a non-constant (unknown) dependence-distance \p Dist between two
+/// Given a dependence-distance \p Dist between two
/// memory accesses, that have the same stride whose absolute value is given
/// in \p Stride, and that have the same type size \p TypeByteSize,
/// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
@@ -1697,7 +1752,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
// This is equivalent to the Strong SIV Test (Practical Dependence Testing,
// Section 4.2.1); Note, that for vectorization it is sufficient to prove
// that the dependence distance is >= VF; This is checked elsewhere.
- // But in some cases we can prune unknown dependence distances early, and
+ // But in some cases we can prune dependence distances early, and
// even before selecting the VF, and without a runtime test, by comparing
// the distance against the loop iteration count. Since the vectorized code
// will be executed only if LoopCount >= VF, proving distance >= LoopCount
@@ -1778,10 +1833,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
const ValueToValueMap &Strides) {
assert (AIdx < BIdx && "Must pass arguments in program order");
- Value *APtr = A.getPointer();
- Value *BPtr = B.getPointer();
- bool AIsWrite = A.getInt();
- bool BIsWrite = B.getInt();
+ auto [APtr, AIsWrite] = A;
+ auto [BPtr, BIsWrite] = B;
Type *ATy = getLoadStoreType(InstMap[AIdx]);
Type *BTy = getLoadStoreType(InstMap[BIdx]);
@@ -1795,9 +1848,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return Dependence::Unknown;
int64_t StrideAPtr =
- getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true);
+ getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true).value_or(0);
int64_t StrideBPtr =
- getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true);
+ getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true).value_or(0);
const SCEV *Src = PSE.getSCEV(APtr);
const SCEV *Sink = PSE.getSCEV(BPtr);
@@ -1813,7 +1866,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
std::swap(StrideAPtr, StrideBPtr);
}
- const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
+ ScalarEvolution &SE = *PSE.getSE();
+ const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
<< "(Induction step: " << StrideAPtr << ")\n");
@@ -1833,14 +1887,14 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
bool HasSameSize =
DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
uint64_t Stride = std::abs(StrideAPtr);
+
+ if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
+ isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist,
+ Stride, TypeByteSize))
+ return Dependence::NoDep;
+
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
if (!C) {
- if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
- isSafeDependenceDistance(DL, *(PSE.getSE()),
- *(PSE.getBackedgeTakenCount()), *Dist, Stride,
- TypeByteSize))
- return Dependence::NoDep;
-
LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
FoundNonConstantDistanceDependence = true;
return Dependence::Unknown;
@@ -1932,7 +1986,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// Unsafe if the minimum distance needed is greater than max safe distance.
if (MinDistanceNeeded > MaxSafeDepDistBytes) {
LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
- << MinDistanceNeeded << " size in bytes");
+ << MinDistanceNeeded << " size in bytes\n");
return Dependence::Backward;
}
@@ -2128,8 +2182,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
EnableMemAccessVersioning &&
!TheLoop->getHeader()->getParent()->hasOptSize();
- // For each block.
- for (BasicBlock *BB : TheLoop->blocks()) {
+ // Traverse blocks in fixed RPOT order, regardless of their storage in the
+ // loop info, as it may be arbitrary.
+ LoopBlocksRPO RPOT(TheLoop);
+ RPOT.perform(LI);
+ for (BasicBlock *BB : RPOT) {
// Scan the BB and collect legal loads and stores. Also detect any
// convergent instructions.
for (Instruction &I : *BB) {
@@ -2295,7 +2352,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
bool IsReadOnlyPtr = false;
Type *AccessTy = getLoadStoreType(LD);
if (Seen.insert({Ptr, AccessTy}).second ||
- !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) {
+ !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides).value_or(0)) {
++NumReads;
IsReadOnlyPtr = true;
}
@@ -2410,11 +2467,10 @@ void LoopAccessInfo::emitUnsafeDependenceRemark() {
auto Deps = getDepChecker().getDependences();
if (!Deps)
return;
- auto Found = std::find_if(
- Deps->begin(), Deps->end(), [](const MemoryDepChecker::Dependence &D) {
- return MemoryDepChecker::Dependence::isSafeForVectorization(D.Type) !=
- MemoryDepChecker::VectorizationSafetyStatus::Safe;
- });
+ auto Found = llvm::find_if(*Deps, [](const MemoryDepChecker::Dependence &D) {
+ return MemoryDepChecker::Dependence::isSafeForVectorization(D.Type) !=
+ MemoryDepChecker::VectorizationSafetyStatus::Safe;
+ });
if (Found == Deps->end())
return;
MemoryDepChecker::Dependence Dep = *Found;
@@ -2614,38 +2670,28 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
PSE->print(OS, Depth);
}
-LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis() : FunctionPass(ID) {
- initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry());
-}
-
-const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) {
- auto &LAI = LoopAccessInfoMap[L];
+const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) {
+ auto I = LoopAccessInfoMap.insert({&L, nullptr});
- if (!LAI)
- LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
+ if (I.second)
+ I.first->second =
+ std::make_unique<LoopAccessInfo>(&L, &SE, TLI, &AA, &DT, &LI);
- return *LAI;
+ return *I.first->second;
}
-void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const {
- LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
-
- for (Loop *TopLevelLoop : *LI)
- for (Loop *L : depth_first(TopLevelLoop)) {
- OS.indent(2) << L->getHeader()->getName() << ":\n";
- auto &LAI = LAA.getInfo(L);
- LAI.print(OS, 4);
- }
+LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis() : FunctionPass(ID) {
+ initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry());
}
bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) {
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-
+ auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
+ auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ LAIs = std::make_unique<LoopAccessInfoManager>(SE, AA, DT, LI, TLI);
return false;
}
@@ -2658,6 +2704,14 @@ void LoopAccessLegacyAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
+LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ return LoopAccessInfoManager(
+ AM.getResult<ScalarEvolutionAnalysis>(F), AM.getResult<AAManager>(F),
+ AM.getResult<DominatorTreeAnalysis>(F), AM.getResult<LoopAnalysis>(F),
+ &AM.getResult<TargetLibraryAnalysis>(F));
+}
+
char LoopAccessLegacyAnalysis::ID = 0;
static const char laa_name[] = "Loop Access Analysis";
#define LAA_NAME "loop-accesses"
@@ -2671,11 +2725,6 @@ INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)
AnalysisKey LoopAccessAnalysis::Key;
-LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM,
- LoopStandardAnalysisResults &AR) {
- return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
-}
-
namespace llvm {
Pass *createLAAPass() {