diff options
Diffstat (limited to 'lib/Analysis/LoopAccessAnalysis.cpp')
-rw-r--r-- | lib/Analysis/LoopAccessAnalysis.cpp | 94 |
1 files changed, 69 insertions, 25 deletions
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 7f3480f512ab..36bd9a8b7ea7 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -1,9 +1,8 @@ //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -843,7 +842,7 @@ void AccessAnalysis::processMemAccesses() { bool SetHasWrite = false; // Map of pointers to last access encountered. - typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap; + typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap; UnderlyingObjToAccessMap ObjToLastAccess; // Set of access to check after all writes have been processed. @@ -904,13 +903,13 @@ void AccessAnalysis::processMemAccesses() { // Create sets of pointers connected by a shared alias set and // underlying object. - typedef SmallVector<Value *, 16> ValueVector; + typedef SmallVector<const Value *, 16> ValueVector; ValueVector TempObjects; GetUnderlyingObjects(Ptr, TempObjects, DL, LI); LLVM_DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n"); - for (Value *UnderlyingObj : TempObjects) { + for (const Value *UnderlyingObj : TempObjects) { // nullptr never alias, don't join sets for pointer that have "null" // in their UnderlyingObjects list. if (isa<ConstantPointerNull>(UnderlyingObj) && @@ -1014,7 +1013,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, return 0; } - // The accesss function must stride over the innermost loop. + // The access function must stride over the innermost loop. if (Lp != AR->getLoop()) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " << *Ptr << " SCEV: " << *AR << "\n"); @@ -1086,7 +1085,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, if (Assume) { // We can avoid this case by adding a run-time check. LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either " - << "inbouds or in address space 0 may wrap:\n" + << "inbounds or in address space 0 may wrap:\n" << "LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n"); @@ -1145,10 +1144,9 @@ bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL, std::iota(SortedIndices.begin(), SortedIndices.end(), 0); // Sort the memory accesses and keep the order of their uses in UseOrder. - std::stable_sort(SortedIndices.begin(), SortedIndices.end(), - [&OffValPairs](unsigned Left, unsigned Right) { - return OffValPairs[Left].first < OffValPairs[Right].first; - }); + llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) { + return OffValPairs[Left].first < OffValPairs[Right].first; + }); // Check if the order is consecutive already. if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) { @@ -1346,7 +1344,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, // where Step is the absolute stride of the memory accesses in bytes, // then there is no dependence. // - // Ratioanle: + // Rationale: // We basically want to check if the absolute distance (|Dist/Step|) // is >= the loop iteration count (or > BackedgeTakenCount). // This is equivalent to the Strong SIV Test (Practical Dependence Testing, @@ -1369,7 +1367,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, // The dependence distance can be positive/negative, so we sign extend Dist; // The multiplication of the absolute stride in bytes and the - // backdgeTakenCount is non-negative, so we zero extend Product. + // backedgeTakenCount is non-negative, so we zero extend Product. if (DistTypeSize > ProductTypeSize) CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType()); else @@ -1780,6 +1778,11 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, unsigned NumReads = 0; unsigned NumReadWrites = 0; + bool HasComplexMemInst = false; + + // A runtime check is only legal to insert if there are no convergent calls. + HasConvergentOp = false; + PtrRtChecking->Pointers.clear(); PtrRtChecking->Need = false; @@ -1787,8 +1790,25 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, // For each block. for (BasicBlock *BB : TheLoop->blocks()) { - // Scan the BB and collect legal loads and stores. + // Scan the BB and collect legal loads and stores. Also detect any + // convergent instructions. for (Instruction &I : *BB) { + if (auto *Call = dyn_cast<CallBase>(&I)) { + if (Call->isConvergent()) + HasConvergentOp = true; + } + + // With both a non-vectorizable memory instruction and a convergent + // operation, found in this loop, no reason to continue the search. + if (HasComplexMemInst && HasConvergentOp) { + CanVecMem = false; + return; + } + + // Avoid hitting recordAnalysis multiple times. + if (HasComplexMemInst) + continue; + // If this is a load, save it. If this instruction can read from memory // but is not a load, then we quit. Notice that we don't handle function // calls that read or write. @@ -1807,12 +1827,18 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, continue; auto *Ld = dyn_cast<LoadInst>(&I); - if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) { + if (!Ld) { + recordAnalysis("CantVectorizeInstruction", Ld) + << "instruction cannot be vectorized"; + HasComplexMemInst = true; + continue; + } + if (!Ld->isSimple() && !IsAnnotatedParallel) { recordAnalysis("NonSimpleLoad", Ld) << "read with atomic ordering or volatile read"; LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n"); - CanVecMem = false; - return; + HasComplexMemInst = true; + continue; } NumLoads++; Loads.push_back(Ld); @@ -1828,15 +1854,15 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, if (!St) { recordAnalysis("CantVectorizeInstruction", St) << "instruction cannot be vectorized"; - CanVecMem = false; - return; + HasComplexMemInst = true; + continue; } if (!St->isSimple() && !IsAnnotatedParallel) { recordAnalysis("NonSimpleStore", St) << "write with atomic ordering or volatile write"; LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n"); - CanVecMem = false; - return; + HasComplexMemInst = true; + continue; } NumStores++; Stores.push_back(St); @@ -1847,6 +1873,11 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, } // Next instr. } // Next block. + if (HasComplexMemInst) { + CanVecMem = false; + return; + } + // Now we have two lists that hold the loads and the stores. // Next, we find the pointers that they use. @@ -1964,7 +1995,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, } LLVM_DEBUG( - dbgs() << "LAA: We can perform a memory runtime check if needed.\n"); + dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n"); CanVecMem = true; if (Accesses.isDependencyCheckNeeded()) { @@ -1999,6 +2030,15 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, } } + if (HasConvergentOp) { + recordAnalysis("CantInsertRuntimeCheckWithConvergent") + << "cannot add control dependency to convergent operation"; + LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check " + "would be needed with a convergent operation\n"); + CanVecMem = false; + return; + } + if (CanVecMem) LLVM_DEBUG( dbgs() << "LAA: No unsafe dependent memory operations in loop. We" @@ -2252,7 +2292,7 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { // Match the types so we can compare the stride and the BETakenCount. // The Stride can be positive/negative, so we sign extend Stride; - // The backdgeTakenCount is non-negative, so we zero extend BETakenCount. + // The backedgeTakenCount is non-negative, so we zero extend BETakenCount. const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType()); uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType()); @@ -2287,6 +2327,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)), DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false), + HasConvergentOp(false), HasDependenceInvolvingLoopInvariantAddress(false) { if (canAnalyzeLoop()) analyzeLoop(AA, LI, TLI, DT); @@ -2303,6 +2344,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { OS << "\n"; } + if (HasConvergentOp) + OS.indent(Depth) << "Has convergent operation in loop\n"; + if (Report) OS.indent(Depth) << "Report: " << Report->getMsg() << "\n"; |