diff options
Diffstat (limited to 'lib/Transforms/Scalar/LoopIdiomRecognize.cpp')
-rw-r--r-- | lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 94 |
1 files changed, 70 insertions, 24 deletions
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index fbffa1920a84..e561494f19cf 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1,9 +1,8 @@ //===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -37,6 +36,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -51,12 +51,12 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -87,8 +87,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include <algorithm> #include <cassert> @@ -120,6 +120,7 @@ class LoopIdiomRecognize { TargetLibraryInfo *TLI; const TargetTransformInfo *TTI; const DataLayout *DL; + OptimizationRemarkEmitter &ORE; bool ApplyCodeSizeHeuristics; public: @@ -127,8 +128,9 @@ public: LoopInfo *LI, ScalarEvolution *SE, TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, - const DataLayout *DL) - : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL) {} + const DataLayout *DL, + OptimizationRemarkEmitter &ORE) + : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {} bool runOnLoop(Loop *L); @@ -221,7 +223,12 @@ public: *L->getHeader()->getParent()); const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout(); - LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL); + // For the old PM, we can't use OptimizationRemarkEmitter as an analysis + // pass. Function analyses need to be preserved across loop transformations + // but ORE cannot be preserved (see comment before the pass definition). + OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); + + LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE); return LIR.runOnLoop(L); } @@ -243,7 +250,19 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, LPMUpdater &) { const auto *DL = &L.getHeader()->getModule()->getDataLayout(); - LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL); + const auto &FAM = + AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); + Function *F = L.getHeader()->getParent(); + + auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F); + // FIXME: This should probably be optional rather than required. + if (!ORE) + report_fatal_error( + "LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached " + "at a higher level"); + + LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL, + *ORE); if (!LIR.runOnLoop(&L)) return PreservedAnalyses::all(); @@ -285,7 +304,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) { // Determine if code size heuristics need to be applied. ApplyCodeSizeHeuristics = - L->getHeader()->getParent()->optForSize() && UseLIRCodeSizeHeurs; + L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs; HasMemset = TLI->has(LibFunc_memset); HasMemsetPattern = TLI->has(LibFunc_memset_pattern16); @@ -313,9 +332,10 @@ bool LoopIdiomRecognize::runOnCountableLoop() { SmallVector<BasicBlock *, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); - LLVM_DEBUG(dbgs() << "loop-idiom Scanning: F[" + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F[" << CurLoop->getHeader()->getParent()->getName() - << "] Loop %" << CurLoop->getHeader()->getName() << "\n"); + << "] Countable Loop %" << CurLoop->getHeader()->getName() + << "\n"); bool MadeChange = false; @@ -430,7 +450,7 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) { // turned into a memset of i8 -1, assuming that all the consecutive bytes // are stored. A store of i32 0x01020304 can never be turned into a memset, // but it can be turned into memset_pattern if the target supports it. - Value *SplatValue = isBytewiseValue(StoredVal); + Value *SplatValue = isBytewiseValue(StoredVal, *DL); Constant *PatternValue = nullptr; // Note: memset and memset_pattern on unordered-atomic is yet not supported @@ -607,7 +627,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL, Constant *FirstPatternValue = nullptr; if (For == ForMemset::Yes) - FirstSplatValue = isBytewiseValue(FirstStoredVal); + FirstSplatValue = isBytewiseValue(FirstStoredVal, *DL); else FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL); @@ -640,7 +660,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL, Constant *SecondPatternValue = nullptr; if (For == ForMemset::Yes) - SecondSplatValue = isBytewiseValue(SecondStoredVal); + SecondSplatValue = isBytewiseValue(SecondStoredVal, *DL); else SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL); @@ -860,7 +880,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( Value *StoredVal, Instruction *TheStore, SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev, const SCEV *BECount, bool NegStride, bool IsLoopMemset) { - Value *SplatValue = isBytewiseValue(StoredVal); + Value *SplatValue = isBytewiseValue(StoredVal, *DL); Constant *PatternValue = nullptr; if (!SplatValue) @@ -931,9 +951,8 @@ bool LoopIdiomRecognize::processLoopStridedStore( Module *M = TheStore->getModule(); StringRef FuncName = "memset_pattern16"; - Value *MSP = - M->getOrInsertFunction(FuncName, Builder.getVoidTy(), - Int8PtrTy, Int8PtrTy, IntPtr); + FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(), + Int8PtrTy, Int8PtrTy, IntPtr); inferLibFuncAttributes(M, FuncName, *TLI); // Otherwise we should form a memset_pattern16. PatternValue is known to be @@ -952,6 +971,14 @@ bool LoopIdiomRecognize::processLoopStridedStore( << "\n"); NewCall->setDebugLoc(TheStore->getDebugLoc()); + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore", + NewCall->getDebugLoc(), Preheader) + << "Transformed loop-strided store into a call to " + << ore::NV("NewFunction", NewCall->getCalledFunction()) + << "() function"; + }); + // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. for (auto *I : Stores) @@ -1084,6 +1111,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad", + NewCall->getDebugLoc(), Preheader) + << "Formed a call to " + << ore::NV("NewFunction", NewCall->getCalledFunction()) + << "() function"; + }); + // Okay, the memcpy has been formed. Zap the original store and anything that // feeds into it. deleteDeadInstruction(SI); @@ -1109,6 +1144,11 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset, } bool LoopIdiomRecognize::runOnNoncountableLoop() { + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F[" + << CurLoop->getHeader()->getParent()->getName() + << "] Noncountable Loop %" + << CurLoop->getHeader()->getName() << "\n"); + return recognizePopcount() || recognizeAndInsertFFS(); } @@ -1462,9 +1502,15 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() { const Value *Args[] = {InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext()) : ConstantInt::getFalse(InitX->getContext())}; - if (CurLoop->getHeader()->size() != IdiomCanonicalSize && + + // @llvm.dbg doesn't count as they have no semantic effect. + auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug(); + uint32_t HeaderSize = + std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end()); + + if (HeaderSize != IdiomCanonicalSize && TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) > - TargetTransformInfo::TCC_Basic) + TargetTransformInfo::TCC_Basic) return false; transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX, @@ -1529,7 +1575,7 @@ static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, Type *Tys[] = {Val->getType()}; Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent(); - Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys); + Function *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys); CallInst *CI = IRBuilder.CreateCall(Func, Ops); CI->setDebugLoc(DL); @@ -1543,7 +1589,7 @@ static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val, Type *Tys[] = {Val->getType()}; Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent(); - Value *Func = Intrinsic::getDeclaration(M, IID, Tys); + Function *Func = Intrinsic::getDeclaration(M, IID, Tys); CallInst *CI = IRBuilder.CreateCall(Func, Ops); CI->setDebugLoc(DL); |