diff options
Diffstat (limited to 'contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp')
| -rw-r--r-- | contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp | 129 | 
1 files changed, 93 insertions, 36 deletions
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp index bf8007213097..4ba12583ff83 100644 --- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -135,21 +135,6 @@ bool VectorizerParams::isInterleaveForced() {    return ::VectorizationInterleave.getNumOccurrences() > 0;  } -void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message, -                                    const Loop *TheLoop, const char *PassName, -                                    OptimizationRemarkEmitter &ORE) { -  DebugLoc DL = TheLoop->getStartLoc(); -  const Value *V = TheLoop->getHeader(); -  if (const Instruction *I = Message.getInstr()) { -    // If there is no debug location attached to the instruction, revert back to -    // using the loop's. -    if (I->getDebugLoc()) -      DL = I->getDebugLoc(); -    V = I->getParent(); -  } -  ORE.emitOptimizationRemarkAnalysis(PassName, DL, V, Message.str()); -} -  Value *llvm::stripIntegerCast(Value *V) {    if (auto *CI = dyn_cast<CastInst>(V))      if (CI->getOperand(0)->getType()->isIntegerTy()) @@ -172,11 +157,6 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,      // Strip casts.      StrideVal = stripIntegerCast(StrideVal); -    // Replace symbolic stride by one. -    Value *One = ConstantInt::get(StrideVal->getType(), 1); -    ValueToValueMap RewriteMap; -    RewriteMap[StrideVal] = One; -      ScalarEvolution *SE = PSE.getSE();      const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));      const auto *CT = @@ -518,7 +498,7 @@ class AccessAnalysis {  public:    /// \brief Read or write access location.    typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; -  typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet; +  typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;    AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,                   MemoryDepChecker::DepCandidates &DA, @@ -570,7 +550,7 @@ public:      DepChecker.clearDependences();    } -  MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; } +  MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }  private:    typedef SetVector<MemAccessInfo> PtrAccessSet; @@ -584,8 +564,8 @@ private:    const DataLayout &DL; -  /// Set of accesses that need a further dependence check. -  MemAccessInfoSet CheckDeps; +  /// List of accesses that need a further dependence check. +  MemAccessInfoList CheckDeps;    /// Set of pointers that are read only.    SmallPtrSet<Value*, 16> ReadOnlyPtr; @@ -842,7 +822,7 @@ void AccessAnalysis::processMemAccesses() {            // there is no other write to the ptr - this is an optimization to            // catch "a[i] = a[i] + " without having to do a dependence check).            if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { -            CheckDeps.insert(Access); +            CheckDeps.push_back(Access);              IsRTCheckAnalysisNeeded = true;            } @@ -1205,6 +1185,73 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,    return false;  } +/// Given a non-constant (unknown) dependence-distance \p Dist between two  +/// memory accesses, that have the same stride whose absolute value is given +/// in \p Stride, and that have the same type size \p TypeByteSize, +/// in a loop whose takenCount is \p BackedgeTakenCount, check if it is +/// possible to prove statically that the dependence distance is larger +/// than the range that the accesses will travel through the execution of +/// the loop. If so, return true; false otherwise. This is useful for +/// example in loops such as the following (PR31098): +///     for (i = 0; i < D; ++i) { +///                = out[i]; +///       out[i+D] = +///     } +static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, +                                     const SCEV &BackedgeTakenCount, +                                     const SCEV &Dist, uint64_t Stride, +                                     uint64_t TypeByteSize) { + +  // If we can prove that +  //      (**) |Dist| > BackedgeTakenCount * Step +  // where Step is the absolute stride of the memory accesses in bytes,  +  // then there is no dependence. +  // +  // Ratioanle:  +  // We basically want to check if the absolute distance (|Dist/Step|)  +  // is >= the loop iteration count (or > BackedgeTakenCount).  +  // This is equivalent to the Strong SIV Test (Practical Dependence Testing,  +  // Section 4.2.1); Note, that for vectorization it is sufficient to prove  +  // that the dependence distance is >= VF; This is checked elsewhere. +  // But in some cases we can prune unknown dependence distances early, and  +  // even before selecting the VF, and without a runtime test, by comparing  +  // the distance against the loop iteration count. Since the vectorized code  +  // will be executed only if LoopCount >= VF, proving distance >= LoopCount  +  // also guarantees that distance >= VF. +  // +  const uint64_t ByteStride = Stride * TypeByteSize; +  const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride); +  const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step); + +  const SCEV *CastedDist = &Dist; +  const SCEV *CastedProduct = Product; +  uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType()); +  uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType()); + +  // The dependence distance can be positive/negative, so we sign extend Dist;  +  // The multiplication of the absolute stride in bytes and the  +  // backdgeTakenCount is non-negative, so we zero extend Product. +  if (DistTypeSize > ProductTypeSize) +    CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType()); +  else +    CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType()); + +  // Is  Dist - (BackedgeTakenCount * Step) > 0 ? +  // (If so, then we have proven (**) because |Dist| >= Dist) +  const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct); +  if (SE.isKnownPositive(Minus)) +    return true; + +  // Second try: Is  -Dist - (BackedgeTakenCount * Step) > 0 ? +  // (If so, then we have proven (**) because |Dist| >= -1*Dist) +  const SCEV *NegDist = SE.getNegativeSCEV(CastedDist); +  Minus = SE.getMinusSCEV(NegDist, CastedProduct); +  if (SE.isKnownPositive(Minus)) +    return true; + +  return false; +} +  /// \brief Check the dependence for two accesses with the same stride \p Stride.  /// \p Distance is the positive distance and \p TypeByteSize is type size in  /// bytes. @@ -1292,21 +1339,26 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,      return Dependence::Unknown;    } +  Type *ATy = APtr->getType()->getPointerElementType(); +  Type *BTy = BPtr->getType()->getPointerElementType(); +  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); +  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy); +  uint64_t Stride = std::abs(StrideAPtr);    const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);    if (!C) { +    if (TypeByteSize == DL.getTypeAllocSize(BTy) && +        isSafeDependenceDistance(DL, *(PSE.getSE()), +                                 *(PSE.getBackedgeTakenCount()), *Dist, Stride, +                                 TypeByteSize)) +      return Dependence::NoDep; +      DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");      ShouldRetryWithRuntimeCheck = true;      return Dependence::Unknown;    } -  Type *ATy = APtr->getType()->getPointerElementType(); -  Type *BTy = BPtr->getType()->getPointerElementType(); -  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); -  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy); -    const APInt &Val = C->getAPInt();    int64_t Distance = Val.getSExtValue(); -  uint64_t Stride = std::abs(StrideAPtr);    // Attempt to prove strided accesses independent.    if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy && @@ -1427,12 +1479,14 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,  }  bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, -                                   MemAccessInfoSet &CheckDeps, +                                   MemAccessInfoList &CheckDeps,                                     const ValueToValueMap &Strides) {    MaxSafeDepDistBytes = -1; -  while (!CheckDeps.empty()) { -    MemAccessInfo CurAccess = *CheckDeps.begin(); +  SmallPtrSet<MemAccessInfo, 8> Visited; +  for (MemAccessInfo CurAccess : CheckDeps) { +    if (Visited.count(CurAccess)) +      continue;      // Get the relevant memory access set.      EquivalenceClasses<MemAccessInfo>::iterator I = @@ -1446,7 +1500,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,      // Check every access pair.      while (AI != AE) { -      CheckDeps.erase(*AI); +      Visited.insert(*AI);        EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);        while (OI != AE) {          // Check every accessing instruction pair in program order. @@ -1885,7 +1939,10 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,      Value *NewPtr = (Inst && TheLoop->contains(Inst))                          ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)                          : Ptr; -    return {NewPtr, NewPtr}; +    // We must return a half-open range, which means incrementing Sc. +    const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy)); +    Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc); +    return {NewPtr, NewPtrPlusOne};    } else {      Value *Start = nullptr, *End = nullptr;      DEBUG(dbgs() << "LAA: Adding RT check for range:\n");  | 
