summaryrefslogtreecommitdiff
path: root/lib/Analysis/LoopAccessAnalysis.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Analysis/LoopAccessAnalysis.cpp')
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp129
1 files changed, 93 insertions, 36 deletions
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index bf80072130974..4ba12583ff839 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -135,21 +135,6 @@ bool VectorizerParams::isInterleaveForced() {
return ::VectorizationInterleave.getNumOccurrences() > 0;
}
-void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message,
- const Loop *TheLoop, const char *PassName,
- OptimizationRemarkEmitter &ORE) {
- DebugLoc DL = TheLoop->getStartLoc();
- const Value *V = TheLoop->getHeader();
- if (const Instruction *I = Message.getInstr()) {
- // If there is no debug location attached to the instruction, revert back to
- // using the loop's.
- if (I->getDebugLoc())
- DL = I->getDebugLoc();
- V = I->getParent();
- }
- ORE.emitOptimizationRemarkAnalysis(PassName, DL, V, Message.str());
-}
-
Value *llvm::stripIntegerCast(Value *V) {
if (auto *CI = dyn_cast<CastInst>(V))
if (CI->getOperand(0)->getType()->isIntegerTy())
@@ -172,11 +157,6 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
// Strip casts.
StrideVal = stripIntegerCast(StrideVal);
- // Replace symbolic stride by one.
- Value *One = ConstantInt::get(StrideVal->getType(), 1);
- ValueToValueMap RewriteMap;
- RewriteMap[StrideVal] = One;
-
ScalarEvolution *SE = PSE.getSE();
const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
const auto *CT =
@@ -518,7 +498,7 @@ class AccessAnalysis {
public:
/// \brief Read or write access location.
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
- typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
+ typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
MemoryDepChecker::DepCandidates &DA,
@@ -570,7 +550,7 @@ public:
DepChecker.clearDependences();
}
- MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
+ MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
private:
typedef SetVector<MemAccessInfo> PtrAccessSet;
@@ -584,8 +564,8 @@ private:
const DataLayout &DL;
- /// Set of accesses that need a further dependence check.
- MemAccessInfoSet CheckDeps;
+ /// List of accesses that need a further dependence check.
+ MemAccessInfoList CheckDeps;
/// Set of pointers that are read only.
SmallPtrSet<Value*, 16> ReadOnlyPtr;
@@ -842,7 +822,7 @@ void AccessAnalysis::processMemAccesses() {
// there is no other write to the ptr - this is an optimization to
// catch "a[i] = a[i] + " without having to do a dependence check).
if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
- CheckDeps.insert(Access);
+ CheckDeps.push_back(Access);
IsRTCheckAnalysisNeeded = true;
}
@@ -1205,6 +1185,73 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
return false;
}
+/// Given a non-constant (unknown) dependence-distance \p Dist between two
+/// memory accesses, that have the same stride whose absolute value is given
+/// in \p Stride, and that have the same type size \p TypeByteSize,
+/// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
+/// possible to prove statically that the dependence distance is larger
+/// than the range that the accesses will travel through the execution of
+/// the loop. If so, return true; false otherwise. This is useful for
+/// example in loops such as the following (PR31098):
+/// for (i = 0; i < D; ++i) {
+/// = out[i];
+/// out[i+D] =
+/// }
+static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
+ const SCEV &BackedgeTakenCount,
+ const SCEV &Dist, uint64_t Stride,
+ uint64_t TypeByteSize) {
+
+ // If we can prove that
+ // (**) |Dist| > BackedgeTakenCount * Step
+ // where Step is the absolute stride of the memory accesses in bytes,
+ // then there is no dependence.
+ //
+ // Ratioanle:
+ // We basically want to check if the absolute distance (|Dist/Step|)
+ // is >= the loop iteration count (or > BackedgeTakenCount).
+ // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
+ // Section 4.2.1); Note, that for vectorization it is sufficient to prove
+ // that the dependence distance is >= VF; This is checked elsewhere.
+ // But in some cases we can prune unknown dependence distances early, and
+ // even before selecting the VF, and without a runtime test, by comparing
+ // the distance against the loop iteration count. Since the vectorized code
+ // will be executed only if LoopCount >= VF, proving distance >= LoopCount
+ // also guarantees that distance >= VF.
+ //
+ const uint64_t ByteStride = Stride * TypeByteSize;
+ const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
+ const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
+
+ const SCEV *CastedDist = &Dist;
+ const SCEV *CastedProduct = Product;
+ uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
+ uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
+
+ // The dependence distance can be positive/negative, so we sign extend Dist;
+ // The multiplication of the absolute stride in bytes and the
+ // backdgeTakenCount is non-negative, so we zero extend Product.
+ if (DistTypeSize > ProductTypeSize)
+ CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
+ else
+ CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
+
+ // Is Dist - (BackedgeTakenCount * Step) > 0 ?
+ // (If so, then we have proven (**) because |Dist| >= Dist)
+ const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
+ if (SE.isKnownPositive(Minus))
+ return true;
+
+ // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
+ // (If so, then we have proven (**) because |Dist| >= -1*Dist)
+ const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
+ Minus = SE.getMinusSCEV(NegDist, CastedProduct);
+ if (SE.isKnownPositive(Minus))
+ return true;
+
+ return false;
+}
+
/// \brief Check the dependence for two accesses with the same stride \p Stride.
/// \p Distance is the positive distance and \p TypeByteSize is type size in
/// bytes.
@@ -1292,21 +1339,26 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return Dependence::Unknown;
}
+ Type *ATy = APtr->getType()->getPointerElementType();
+ Type *BTy = BPtr->getType()->getPointerElementType();
+ auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
+ uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
+ uint64_t Stride = std::abs(StrideAPtr);
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
if (!C) {
+ if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
+ isSafeDependenceDistance(DL, *(PSE.getSE()),
+ *(PSE.getBackedgeTakenCount()), *Dist, Stride,
+ TypeByteSize))
+ return Dependence::NoDep;
+
DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
ShouldRetryWithRuntimeCheck = true;
return Dependence::Unknown;
}
- Type *ATy = APtr->getType()->getPointerElementType();
- Type *BTy = BPtr->getType()->getPointerElementType();
- auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
- uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
-
const APInt &Val = C->getAPInt();
int64_t Distance = Val.getSExtValue();
- uint64_t Stride = std::abs(StrideAPtr);
// Attempt to prove strided accesses independent.
if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
@@ -1427,12 +1479,14 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
}
bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
- MemAccessInfoSet &CheckDeps,
+ MemAccessInfoList &CheckDeps,
const ValueToValueMap &Strides) {
MaxSafeDepDistBytes = -1;
- while (!CheckDeps.empty()) {
- MemAccessInfo CurAccess = *CheckDeps.begin();
+ SmallPtrSet<MemAccessInfo, 8> Visited;
+ for (MemAccessInfo CurAccess : CheckDeps) {
+ if (Visited.count(CurAccess))
+ continue;
// Get the relevant memory access set.
EquivalenceClasses<MemAccessInfo>::iterator I =
@@ -1446,7 +1500,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
// Check every access pair.
while (AI != AE) {
- CheckDeps.erase(*AI);
+ Visited.insert(*AI);
EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
while (OI != AE) {
// Check every accessing instruction pair in program order.
@@ -1885,7 +1939,10 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
Value *NewPtr = (Inst && TheLoop->contains(Inst))
? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
: Ptr;
- return {NewPtr, NewPtr};
+ // We must return a half-open range, which means incrementing Sc.
+ const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
+ Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
+ return {NewPtr, NewPtrPlusOne};
} else {
Value *Start = nullptr, *End = nullptr;
DEBUG(dbgs() << "LAA: Adding RT check for range:\n");