diff options
Diffstat (limited to 'lib/Analysis/LoopAccessAnalysis.cpp')
-rw-r--r-- | lib/Analysis/LoopAccessAnalysis.cpp | 129 |
1 files changed, 93 insertions, 36 deletions
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index bf80072130974..4ba12583ff839 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -135,21 +135,6 @@ bool VectorizerParams::isInterleaveForced() { return ::VectorizationInterleave.getNumOccurrences() > 0; } -void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message, - const Loop *TheLoop, const char *PassName, - OptimizationRemarkEmitter &ORE) { - DebugLoc DL = TheLoop->getStartLoc(); - const Value *V = TheLoop->getHeader(); - if (const Instruction *I = Message.getInstr()) { - // If there is no debug location attached to the instruction, revert back to - // using the loop's. - if (I->getDebugLoc()) - DL = I->getDebugLoc(); - V = I->getParent(); - } - ORE.emitOptimizationRemarkAnalysis(PassName, DL, V, Message.str()); -} - Value *llvm::stripIntegerCast(Value *V) { if (auto *CI = dyn_cast<CastInst>(V)) if (CI->getOperand(0)->getType()->isIntegerTy()) @@ -172,11 +157,6 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, // Strip casts. StrideVal = stripIntegerCast(StrideVal); - // Replace symbolic stride by one. - Value *One = ConstantInt::get(StrideVal->getType(), 1); - ValueToValueMap RewriteMap; - RewriteMap[StrideVal] = One; - ScalarEvolution *SE = PSE.getSE(); const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal)); const auto *CT = @@ -518,7 +498,7 @@ class AccessAnalysis { public: /// \brief Read or write access location. typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; - typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet; + typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList; AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, MemoryDepChecker::DepCandidates &DA, @@ -570,7 +550,7 @@ public: DepChecker.clearDependences(); } - MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; } + MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; } private: typedef SetVector<MemAccessInfo> PtrAccessSet; @@ -584,8 +564,8 @@ private: const DataLayout &DL; - /// Set of accesses that need a further dependence check. - MemAccessInfoSet CheckDeps; + /// List of accesses that need a further dependence check. + MemAccessInfoList CheckDeps; /// Set of pointers that are read only. SmallPtrSet<Value*, 16> ReadOnlyPtr; @@ -842,7 +822,7 @@ void AccessAnalysis::processMemAccesses() { // there is no other write to the ptr - this is an optimization to // catch "a[i] = a[i] + " without having to do a dependence check). if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { - CheckDeps.insert(Access); + CheckDeps.push_back(Access); IsRTCheckAnalysisNeeded = true; } @@ -1205,6 +1185,73 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, return false; } +/// Given a non-constant (unknown) dependence-distance \p Dist between two +/// memory accesses, that have the same stride whose absolute value is given +/// in \p Stride, and that have the same type size \p TypeByteSize, +/// in a loop whose takenCount is \p BackedgeTakenCount, check if it is +/// possible to prove statically that the dependence distance is larger +/// than the range that the accesses will travel through the execution of +/// the loop. If so, return true; false otherwise. This is useful for +/// example in loops such as the following (PR31098): +/// for (i = 0; i < D; ++i) { +/// = out[i]; +/// out[i+D] = +/// } +static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, + const SCEV &BackedgeTakenCount, + const SCEV &Dist, uint64_t Stride, + uint64_t TypeByteSize) { + + // If we can prove that + // (**) |Dist| > BackedgeTakenCount * Step + // where Step is the absolute stride of the memory accesses in bytes, + // then there is no dependence. + // + // Ratioanle: + // We basically want to check if the absolute distance (|Dist/Step|) + // is >= the loop iteration count (or > BackedgeTakenCount). + // This is equivalent to the Strong SIV Test (Practical Dependence Testing, + // Section 4.2.1); Note, that for vectorization it is sufficient to prove + // that the dependence distance is >= VF; This is checked elsewhere. + // But in some cases we can prune unknown dependence distances early, and + // even before selecting the VF, and without a runtime test, by comparing + // the distance against the loop iteration count. Since the vectorized code + // will be executed only if LoopCount >= VF, proving distance >= LoopCount + // also guarantees that distance >= VF. + // + const uint64_t ByteStride = Stride * TypeByteSize; + const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride); + const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step); + + const SCEV *CastedDist = &Dist; + const SCEV *CastedProduct = Product; + uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType()); + uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType()); + + // The dependence distance can be positive/negative, so we sign extend Dist; + // The multiplication of the absolute stride in bytes and the + // backdgeTakenCount is non-negative, so we zero extend Product. + if (DistTypeSize > ProductTypeSize) + CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType()); + else + CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType()); + + // Is Dist - (BackedgeTakenCount * Step) > 0 ? + // (If so, then we have proven (**) because |Dist| >= Dist) + const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct); + if (SE.isKnownPositive(Minus)) + return true; + + // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ? + // (If so, then we have proven (**) because |Dist| >= -1*Dist) + const SCEV *NegDist = SE.getNegativeSCEV(CastedDist); + Minus = SE.getMinusSCEV(NegDist, CastedProduct); + if (SE.isKnownPositive(Minus)) + return true; + + return false; +} + /// \brief Check the dependence for two accesses with the same stride \p Stride. /// \p Distance is the positive distance and \p TypeByteSize is type size in /// bytes. @@ -1292,21 +1339,26 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, return Dependence::Unknown; } + Type *ATy = APtr->getType()->getPointerElementType(); + Type *BTy = BPtr->getType()->getPointerElementType(); + auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); + uint64_t TypeByteSize = DL.getTypeAllocSize(ATy); + uint64_t Stride = std::abs(StrideAPtr); const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist); if (!C) { + if (TypeByteSize == DL.getTypeAllocSize(BTy) && + isSafeDependenceDistance(DL, *(PSE.getSE()), + *(PSE.getBackedgeTakenCount()), *Dist, Stride, + TypeByteSize)) + return Dependence::NoDep; + DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); ShouldRetryWithRuntimeCheck = true; return Dependence::Unknown; } - Type *ATy = APtr->getType()->getPointerElementType(); - Type *BTy = BPtr->getType()->getPointerElementType(); - auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); - uint64_t TypeByteSize = DL.getTypeAllocSize(ATy); - const APInt &Val = C->getAPInt(); int64_t Distance = Val.getSExtValue(); - uint64_t Stride = std::abs(StrideAPtr); // Attempt to prove strided accesses independent. if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy && @@ -1427,12 +1479,14 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, } bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, - MemAccessInfoSet &CheckDeps, + MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides) { MaxSafeDepDistBytes = -1; - while (!CheckDeps.empty()) { - MemAccessInfo CurAccess = *CheckDeps.begin(); + SmallPtrSet<MemAccessInfo, 8> Visited; + for (MemAccessInfo CurAccess : CheckDeps) { + if (Visited.count(CurAccess)) + continue; // Get the relevant memory access set. EquivalenceClasses<MemAccessInfo>::iterator I = @@ -1446,7 +1500,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, // Check every access pair. while (AI != AE) { - CheckDeps.erase(*AI); + Visited.insert(*AI); EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI); while (OI != AE) { // Check every accessing instruction pair in program order. @@ -1885,7 +1939,10 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, Value *NewPtr = (Inst && TheLoop->contains(Inst)) ? Exp.expandCodeFor(Sc, PtrArithTy, Loc) : Ptr; - return {NewPtr, NewPtr}; + // We must return a half-open range, which means incrementing Sc. + const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy)); + Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc); + return {NewPtr, NewPtrPlusOne}; } else { Value *Start = nullptr, *End = nullptr; DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); |