diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp')
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp | 66 |
1 files changed, 38 insertions, 28 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index 1877ac1dfd08..b615a0a0a9c0 100644 --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -98,20 +98,21 @@ struct StoreToLoadForwardingCandidate { Value *LoadPtr = Load->getPointerOperand(); Value *StorePtr = Store->getPointerOperand(); Type *LoadType = getLoadStoreType(Load); + auto &DL = Load->getParent()->getModule()->getDataLayout(); assert(LoadPtr->getType()->getPointerAddressSpace() == StorePtr->getType()->getPointerAddressSpace() && - LoadType == getLoadStoreType(Store) && + DL.getTypeSizeInBits(LoadType) == + DL.getTypeSizeInBits(getLoadStoreType(Store)) && "Should be a known dependence"); // Currently we only support accesses with unit stride. FIXME: we should be // able to handle non unit stirde as well as long as the stride is equal to // the dependence distance. - if (getPtrStride(PSE, LoadType, LoadPtr, L) != 1 || - getPtrStride(PSE, LoadType, StorePtr, L) != 1) + if (getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0) != 1 || + getPtrStride(PSE, LoadType, StorePtr, L).value_or(0) != 1) return false; - auto &DL = Load->getParent()->getModule()->getDataLayout(); unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType)); auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr)); @@ -211,9 +212,10 @@ public: if (!Load) continue; - // Only progagate the value if they are of the same type. - if (Store->getPointerOperandType() != Load->getPointerOperandType() || - getLoadStoreType(Store) != getLoadStoreType(Load)) + // Only propagate if the stored values are bit/pointer castable. + if (!CastInst::isBitOrNoopPointerCastable( + getLoadStoreType(Store), getLoadStoreType(Load), + Store->getParent()->getModule()->getDataLayout())) continue; Candidates.emplace_front(Load, Store); @@ -438,7 +440,21 @@ public: PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded", &L->getHeader()->front()); PHI->addIncoming(Initial, PH); - PHI->addIncoming(Cand.Store->getOperand(0), L->getLoopLatch()); + + Type *LoadType = Initial->getType(); + Type *StoreType = Cand.Store->getValueOperand()->getType(); + auto &DL = Cand.Load->getParent()->getModule()->getDataLayout(); + (void)DL; + + assert(DL.getTypeSizeInBits(LoadType) == DL.getTypeSizeInBits(StoreType) && + "The type sizes should match!"); + + Value *StoreValue = Cand.Store->getValueOperand(); + if (LoadType != StoreType) + StoreValue = CastInst::CreateBitOrPointerCast( + StoreValue, LoadType, "store_forward_cast", Cand.Store); + + PHI->addIncoming(StoreValue, L->getLoopLatch()); Cand.Load->replaceAllUsesWith(PHI); } @@ -605,11 +621,12 @@ private: } // end anonymous namespace -static bool -eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - ScalarEvolution *SE, AssumptionCache *AC, - function_ref<const LoopAccessInfo &(Loop &)> GetLAI) { +static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, + DominatorTree &DT, + BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, + ScalarEvolution *SE, AssumptionCache *AC, + LoopAccessInfoManager &LAIs) { // Build up a worklist of inner-loops to transform to avoid iterator // invalidation. // FIXME: This logic comes from other passes that actually change the loop @@ -633,8 +650,10 @@ eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, if (!L->isRotatedForm() || !L->getExitingBlock()) continue; // The actual work is performed by LoadEliminationForLoop. - LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT, BFI, PSI); + LoadEliminationForLoop LEL(L, &LI, LAIs.getInfo(*L), &DT, BFI, PSI); Changed |= LEL.processLoop(); + if (Changed) + LAIs.clear(); } return Changed; } @@ -656,7 +675,7 @@ public: return false; auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>(); + auto &LAIs = getAnalysis<LoopAccessLegacyAnalysis>().getLAIs(); auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); auto *BFI = (PSI && PSI->hasProfileSummary()) ? @@ -665,9 +684,8 @@ public: auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); // Process each loop nest in the function. - return eliminateLoadsAcrossLoops( - F, LI, DT, BFI, PSI, SE, /*AC*/ nullptr, - [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); }); + return eliminateLoadsAcrossLoops(F, LI, DT, BFI, PSI, SE, /*AC*/ nullptr, + LAIs); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -712,23 +730,15 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F, if (LI.empty()) return PreservedAnalyses::all(); auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); - auto &TTI = AM.getResult<TargetIRAnalysis>(F); auto &DT = AM.getResult<DominatorTreeAnalysis>(F); - auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); - auto &AA = AM.getResult<AAManager>(F); auto &AC = AM.getResult<AssumptionAnalysis>(F); auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F); auto *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); auto *BFI = (PSI && PSI->hasProfileSummary()) ? &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr; + LoopAccessInfoManager &LAIs = AM.getResult<LoopAccessAnalysis>(F); - auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager(); - bool Changed = eliminateLoadsAcrossLoops( - F, LI, DT, BFI, PSI, &SE, &AC, [&](Loop &L) -> const LoopAccessInfo & { - LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, - TLI, TTI, nullptr, nullptr, nullptr}; - return LAM.getResult<LoopAccessAnalysis>(L, AR); - }); + bool Changed = eliminateLoadsAcrossLoops(F, LI, DT, BFI, PSI, &SE, &AC, LAIs); if (!Changed) return PreservedAnalyses::all(); |