diff options
Diffstat (limited to 'lib/Transforms/Scalar/LoopIdiomRecognize.cpp')
-rw-r--r-- | lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 133 |
1 files changed, 77 insertions, 56 deletions
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 21551f0a0825..d8692198f7a3 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -37,7 +37,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -57,6 +56,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -87,8 +87,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include <algorithm> #include <cassert> @@ -188,8 +188,9 @@ private: PHINode *CntPhi, Value *Var); bool recognizeAndInsertCTLZ(); void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst, - PHINode *CntPhi, Value *Var, const DebugLoc DL, - bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); + PHINode *CntPhi, Value *Var, Instruction *DefX, + const DebugLoc &DL, bool ZeroCheck, + bool IsCntPhiUsedOutsideLoop); /// @} }; @@ -310,9 +311,9 @@ bool LoopIdiomRecognize::runOnCountableLoop() { SmallVector<BasicBlock *, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); - DEBUG(dbgs() << "loop-idiom Scanning: F[" - << CurLoop->getHeader()->getParent()->getName() << "] Loop %" - << CurLoop->getHeader()->getName() << "\n"); + LLVM_DEBUG(dbgs() << "loop-idiom Scanning: F[" + << CurLoop->getHeader()->getParent()->getName() + << "] Loop %" << CurLoop->getHeader()->getName() << "\n"); bool MadeChange = false; @@ -756,8 +757,8 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, MSIs.insert(MSI); bool NegStride = SizeInBytes == -Stride; return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, - MSI->getAlignment(), SplatValue, MSI, MSIs, Ev, - BECount, NegStride, /*IsLoopMemset=*/true); + MSI->getDestAlignment(), SplatValue, MSI, MSIs, + Ev, BECount, NegStride, /*IsLoopMemset=*/true); } /// mayLoopAccessLocation - Return true if the specified loop might access the @@ -936,8 +937,9 @@ bool LoopIdiomRecognize::processLoopStridedStore( NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes}); } - DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" - << " from store to: " << *Ev << " at: " << *TheStore << "\n"); + LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" + << " from store to: " << *Ev << " at: " << *TheStore + << "\n"); NewCall->setDebugLoc(TheStore->getDebugLoc()); // Okay, the memset has been formed. Zap the original store and anything that @@ -1037,16 +1039,17 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); - unsigned Align = std::min(SI->getAlignment(), LI->getAlignment()); CallInst *NewCall = nullptr; // Check whether to generate an unordered atomic memcpy: - // If the load or store are atomic, then they must neccessarily be unordered + // If the load or store are atomic, then they must necessarily be unordered // by previous checks. if (!SI->isAtomic() && !LI->isAtomic()) - NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align); + NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(), + LoadBasePtr, LI->getAlignment(), NumBytes); else { // We cannot allow unaligned ops for unordered load/store, so reject // anything where the alignment isn't at least the element size. + unsigned Align = std::min(SI->getAlignment(), LI->getAlignment()); if (Align < StoreSize) return false; @@ -1066,9 +1069,10 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, } NewCall->setDebugLoc(SI->getDebugLoc()); - DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" - << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" - << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); + LLVM_DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" + << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" + << " from store ptr=" << *StoreEv << " at: " << *SI + << "\n"); // Okay, the memcpy has been formed. Zap the original store and anything that // feeds into it. @@ -1084,9 +1088,9 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset, bool IsLoopMemset) { if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) { if (!CurLoop->getParentLoop() && (!IsMemset || !IsLoopMemset)) { - DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName() - << " : LIR " << (IsMemset ? "Memset" : "Memcpy") - << " avoided: multi-block top-level loop\n"); + LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName() + << " : LIR " << (IsMemset ? "Memset" : "Memcpy") + << " avoided: multi-block top-level loop\n"); return true; } } @@ -1195,14 +1199,13 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, VarX1 = DefX2->getOperand(0); SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1)); } - if (!SubOneOp) + if (!SubOneOp || SubOneOp->getOperand(0) != VarX1) return false; - Instruction *SubInst = cast<Instruction>(SubOneOp); - ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1)); + ConstantInt *Dec = dyn_cast<ConstantInt>(SubOneOp->getOperand(1)); if (!Dec || - !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) || - (SubInst->getOpcode() == Instruction::Add && + !((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) || + (SubOneOp->getOpcode() == Instruction::Add && Dec->isMinusOne()))) { return false; } @@ -1314,7 +1317,8 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX, return false; // step 2: detect instructions corresponding to "x.next = x >> 1" - if (!DefX || DefX->getOpcode() != Instruction::AShr) + if (!DefX || (DefX->getOpcode() != Instruction::AShr && + DefX->getOpcode() != Instruction::LShr)) return false; ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1)); if (!Shft || !Shft->isOne()) @@ -1372,13 +1376,13 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() { bool IsCntPhiUsedOutsideLoop = false; for (User *U : CntPhi->users()) - if (!CurLoop->contains(dyn_cast<Instruction>(U))) { + if (!CurLoop->contains(cast<Instruction>(U))) { IsCntPhiUsedOutsideLoop = true; break; } bool IsCntInstUsedOutsideLoop = false; for (User *U : CntInst->users()) - if (!CurLoop->contains(dyn_cast<Instruction>(U))) { + if (!CurLoop->contains(cast<Instruction>(U))) { IsCntInstUsedOutsideLoop = true; break; } @@ -1395,16 +1399,27 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() { // parent function RunOnLoop. BasicBlock *PH = CurLoop->getLoopPreheader(); Value *InitX = PhiX->getIncomingValueForBlock(PH); - // If we check X != 0 before entering the loop we don't need a zero - // check in CTLZ intrinsic, but only if Cnt Phi is not used outside of the - // loop (if it is used we count CTLZ(X >> 1)). - if (!IsCntPhiUsedOutsideLoop) - if (BasicBlock *PreCondBB = PH->getSinglePredecessor()) - if (BranchInst *PreCondBr = - dyn_cast<BranchInst>(PreCondBB->getTerminator())) { - if (matchCondition(PreCondBr, PH) == InitX) - ZeroCheck = true; - } + + // Make sure the initial value can't be negative otherwise the ashr in the + // loop might never reach zero which would make the loop infinite. + if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, *DL)) + return false; + + // If we are using the count instruction outside the loop, make sure we + // have a zero check as a precondition. Without the check the loop would run + // one iteration for before any check of the input value. This means 0 and 1 + // would have identical behavior in the original loop and thus + if (!IsCntPhiUsedOutsideLoop) { + auto *PreCondBB = PH->getSinglePredecessor(); + if (!PreCondBB) + return false; + auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator()); + if (!PreCondBI) + return false; + if (matchCondition(PreCondBI, PH) != InitX) + return false; + ZeroCheck = true; + } // Check if CTLZ intrinsic is profitable. Assume it is always profitable // if we delete the loop (the loop has only 6 instructions): @@ -1415,17 +1430,16 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() { // %inc = add nsw %i.0, 1 // br i1 %tobool - IRBuilder<> Builder(PH->getTerminator()); - SmallVector<const Value *, 2> Ops = - {InitX, ZeroCheck ? Builder.getTrue() : Builder.getFalse()}; - ArrayRef<const Value *> Args(Ops); + const Value *Args[] = + {InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext()) + : ConstantInt::getFalse(InitX->getContext())}; if (CurLoop->getHeader()->size() != 6 && TTI->getIntrinsicCost(Intrinsic::ctlz, InitX->getType(), Args) > TargetTransformInfo::TCC_Basic) return false; - const DebugLoc DL = DefX->getDebugLoc(); - transformLoopToCountable(PH, CntInst, CntPhi, InitX, DL, ZeroCheck, + transformLoopToCountable(PH, CntInst, CntPhi, InitX, DefX, + DefX->getDebugLoc(), ZeroCheck, IsCntPhiUsedOutsideLoop); return true; } @@ -1461,7 +1475,7 @@ bool LoopIdiomRecognize::recognizePopcount() { if (!EntryBI || EntryBI->isConditional()) return false; - // It should have a precondition block where the generated popcount instrinsic + // It should have a precondition block where the generated popcount intrinsic // function can be inserted. auto *PreCondBB = PH->getSinglePredecessor(); if (!PreCondBB) @@ -1539,8 +1553,9 @@ static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val, /// If CntInst and DefX are not used in LOOP_BODY they will be removed. void LoopIdiomRecognize::transformLoopToCountable( BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX, - const DebugLoc DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) { - BranchInst *PreheaderBr = dyn_cast<BranchInst>(Preheader->getTerminator()); + Instruction *DefX, const DebugLoc &DL, bool ZeroCheck, + bool IsCntPhiUsedOutsideLoop) { + BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator()); // Step 1: Insert the CTLZ instruction at the end of the preheader block // Count = BitWidth - CTLZ(InitX); @@ -1550,10 +1565,16 @@ void LoopIdiomRecognize::transformLoopToCountable( Builder.SetCurrentDebugLocation(DL); Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext; - if (IsCntPhiUsedOutsideLoop) - InitXNext = Builder.CreateAShr(InitX, - ConstantInt::get(InitX->getType(), 1)); - else + if (IsCntPhiUsedOutsideLoop) { + if (DefX->getOpcode() == Instruction::AShr) + InitXNext = + Builder.CreateAShr(InitX, ConstantInt::get(InitX->getType(), 1)); + else if (DefX->getOpcode() == Instruction::LShr) + InitXNext = + Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1)); + else + llvm_unreachable("Unexpected opcode!"); + } else InitXNext = InitX; CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck); Count = Builder.CreateSub( @@ -1588,7 +1609,7 @@ void LoopIdiomRecognize::transformLoopToCountable( // ... // Br: loop if (Dec != 0) BasicBlock *Body = *(CurLoop->block_begin()); - auto *LbBr = dyn_cast<BranchInst>(Body->getTerminator()); + auto *LbBr = cast<BranchInst>(Body->getTerminator()); ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition()); Type *Ty = Count->getType(); @@ -1625,8 +1646,8 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst, PHINode *CntPhi, Value *Var) { BasicBlock *PreHead = CurLoop->getLoopPreheader(); - auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator()); - const DebugLoc DL = CntInst->getDebugLoc(); + auto *PreCondBr = cast<BranchInst>(PreCondBB->getTerminator()); + const DebugLoc &DL = CntInst->getDebugLoc(); // Assuming before transformation, the loop is following: // if (x) // the precondition @@ -1675,7 +1696,7 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB, } // Step 3: Note that the population count is exactly the trip count of the - // loop in question, which enable us to to convert the loop from noncountable + // loop in question, which enable us to convert the loop from noncountable // loop into a countable one. The benefit is twofold: // // - If the loop only counts population, the entire loop becomes dead after @@ -1696,7 +1717,7 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB, // do { cnt++; x &= x-1; t--) } while (t > 0); BasicBlock *Body = *(CurLoop->block_begin()); { - auto *LbBr = dyn_cast<BranchInst>(Body->getTerminator()); + auto *LbBr = cast<BranchInst>(Body->getTerminator()); ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition()); Type *Ty = TripCnt->getType(); |