summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/LoopIdiomRecognize.cpp')
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp133
1 files changed, 77 insertions, 56 deletions
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 21551f0a0825..d8692198f7a3 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -37,7 +37,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -57,6 +56,7 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -87,8 +87,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
#include <cassert>
@@ -188,8 +188,9 @@ private:
PHINode *CntPhi, Value *Var);
bool recognizeAndInsertCTLZ();
void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst,
- PHINode *CntPhi, Value *Var, const DebugLoc DL,
- bool ZeroCheck, bool IsCntPhiUsedOutsideLoop);
+ PHINode *CntPhi, Value *Var, Instruction *DefX,
+ const DebugLoc &DL, bool ZeroCheck,
+ bool IsCntPhiUsedOutsideLoop);
/// @}
};
@@ -310,9 +311,9 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
SmallVector<BasicBlock *, 8> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
- DEBUG(dbgs() << "loop-idiom Scanning: F["
- << CurLoop->getHeader()->getParent()->getName() << "] Loop %"
- << CurLoop->getHeader()->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "loop-idiom Scanning: F["
+ << CurLoop->getHeader()->getParent()->getName()
+ << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
bool MadeChange = false;
@@ -756,8 +757,8 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
MSIs.insert(MSI);
bool NegStride = SizeInBytes == -Stride;
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
- MSI->getAlignment(), SplatValue, MSI, MSIs, Ev,
- BECount, NegStride, /*IsLoopMemset=*/true);
+ MSI->getDestAlignment(), SplatValue, MSI, MSIs,
+ Ev, BECount, NegStride, /*IsLoopMemset=*/true);
}
/// mayLoopAccessLocation - Return true if the specified loop might access the
@@ -936,8 +937,9 @@ bool LoopIdiomRecognize::processLoopStridedStore(
NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
}
- DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
- << " from store to: " << *Ev << " at: " << *TheStore << "\n");
+ LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
+ << " from store to: " << *Ev << " at: " << *TheStore
+ << "\n");
NewCall->setDebugLoc(TheStore->getDebugLoc());
// Okay, the memset has been formed. Zap the original store and anything that
@@ -1037,16 +1039,17 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
- unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
CallInst *NewCall = nullptr;
// Check whether to generate an unordered atomic memcpy:
- // If the load or store are atomic, then they must neccessarily be unordered
+ // If the load or store are atomic, then they must necessarily be unordered
// by previous checks.
if (!SI->isAtomic() && !LI->isAtomic())
- NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align);
+ NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(),
+ LoadBasePtr, LI->getAlignment(), NumBytes);
else {
// We cannot allow unaligned ops for unordered load/store, so reject
// anything where the alignment isn't at least the element size.
+ unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
if (Align < StoreSize)
return false;
@@ -1066,9 +1069,10 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
}
NewCall->setDebugLoc(SI->getDebugLoc());
- DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
- << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
- << " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
+ LLVM_DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
+ << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
+ << " from store ptr=" << *StoreEv << " at: " << *SI
+ << "\n");
// Okay, the memcpy has been formed. Zap the original store and anything that
// feeds into it.
@@ -1084,9 +1088,9 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
bool IsLoopMemset) {
if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
if (!CurLoop->getParentLoop() && (!IsMemset || !IsLoopMemset)) {
- DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
- << " : LIR " << (IsMemset ? "Memset" : "Memcpy")
- << " avoided: multi-block top-level loop\n");
+ LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
+ << " : LIR " << (IsMemset ? "Memset" : "Memcpy")
+ << " avoided: multi-block top-level loop\n");
return true;
}
}
@@ -1195,14 +1199,13 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
VarX1 = DefX2->getOperand(0);
SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
}
- if (!SubOneOp)
+ if (!SubOneOp || SubOneOp->getOperand(0) != VarX1)
return false;
- Instruction *SubInst = cast<Instruction>(SubOneOp);
- ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1));
+ ConstantInt *Dec = dyn_cast<ConstantInt>(SubOneOp->getOperand(1));
if (!Dec ||
- !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
- (SubInst->getOpcode() == Instruction::Add &&
+ !((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) ||
+ (SubOneOp->getOpcode() == Instruction::Add &&
Dec->isMinusOne()))) {
return false;
}
@@ -1314,7 +1317,8 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
return false;
// step 2: detect instructions corresponding to "x.next = x >> 1"
- if (!DefX || DefX->getOpcode() != Instruction::AShr)
+ if (!DefX || (DefX->getOpcode() != Instruction::AShr &&
+ DefX->getOpcode() != Instruction::LShr))
return false;
ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
if (!Shft || !Shft->isOne())
@@ -1372,13 +1376,13 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
bool IsCntPhiUsedOutsideLoop = false;
for (User *U : CntPhi->users())
- if (!CurLoop->contains(dyn_cast<Instruction>(U))) {
+ if (!CurLoop->contains(cast<Instruction>(U))) {
IsCntPhiUsedOutsideLoop = true;
break;
}
bool IsCntInstUsedOutsideLoop = false;
for (User *U : CntInst->users())
- if (!CurLoop->contains(dyn_cast<Instruction>(U))) {
+ if (!CurLoop->contains(cast<Instruction>(U))) {
IsCntInstUsedOutsideLoop = true;
break;
}
@@ -1395,16 +1399,27 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
// parent function RunOnLoop.
BasicBlock *PH = CurLoop->getLoopPreheader();
Value *InitX = PhiX->getIncomingValueForBlock(PH);
- // If we check X != 0 before entering the loop we don't need a zero
- // check in CTLZ intrinsic, but only if Cnt Phi is not used outside of the
- // loop (if it is used we count CTLZ(X >> 1)).
- if (!IsCntPhiUsedOutsideLoop)
- if (BasicBlock *PreCondBB = PH->getSinglePredecessor())
- if (BranchInst *PreCondBr =
- dyn_cast<BranchInst>(PreCondBB->getTerminator())) {
- if (matchCondition(PreCondBr, PH) == InitX)
- ZeroCheck = true;
- }
+
+ // Make sure the initial value can't be negative otherwise the ashr in the
+ // loop might never reach zero which would make the loop infinite.
+ if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, *DL))
+ return false;
+
+ // If we are using the count instruction outside the loop, make sure we
+ // have a zero check as a precondition. Without the check the loop would run
+ // one iteration for before any check of the input value. This means 0 and 1
+ // would have identical behavior in the original loop and thus
+ if (!IsCntPhiUsedOutsideLoop) {
+ auto *PreCondBB = PH->getSinglePredecessor();
+ if (!PreCondBB)
+ return false;
+ auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
+ if (!PreCondBI)
+ return false;
+ if (matchCondition(PreCondBI, PH) != InitX)
+ return false;
+ ZeroCheck = true;
+ }
// Check if CTLZ intrinsic is profitable. Assume it is always profitable
// if we delete the loop (the loop has only 6 instructions):
@@ -1415,17 +1430,16 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
// %inc = add nsw %i.0, 1
// br i1 %tobool
- IRBuilder<> Builder(PH->getTerminator());
- SmallVector<const Value *, 2> Ops =
- {InitX, ZeroCheck ? Builder.getTrue() : Builder.getFalse()};
- ArrayRef<const Value *> Args(Ops);
+ const Value *Args[] =
+ {InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
+ : ConstantInt::getFalse(InitX->getContext())};
if (CurLoop->getHeader()->size() != 6 &&
TTI->getIntrinsicCost(Intrinsic::ctlz, InitX->getType(), Args) >
TargetTransformInfo::TCC_Basic)
return false;
- const DebugLoc DL = DefX->getDebugLoc();
- transformLoopToCountable(PH, CntInst, CntPhi, InitX, DL, ZeroCheck,
+ transformLoopToCountable(PH, CntInst, CntPhi, InitX, DefX,
+ DefX->getDebugLoc(), ZeroCheck,
IsCntPhiUsedOutsideLoop);
return true;
}
@@ -1461,7 +1475,7 @@ bool LoopIdiomRecognize::recognizePopcount() {
if (!EntryBI || EntryBI->isConditional())
return false;
- // It should have a precondition block where the generated popcount instrinsic
+ // It should have a precondition block where the generated popcount intrinsic
// function can be inserted.
auto *PreCondBB = PH->getSinglePredecessor();
if (!PreCondBB)
@@ -1539,8 +1553,9 @@ static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
/// If CntInst and DefX are not used in LOOP_BODY they will be removed.
void LoopIdiomRecognize::transformLoopToCountable(
BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX,
- const DebugLoc DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
- BranchInst *PreheaderBr = dyn_cast<BranchInst>(Preheader->getTerminator());
+ Instruction *DefX, const DebugLoc &DL, bool ZeroCheck,
+ bool IsCntPhiUsedOutsideLoop) {
+ BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
// Step 1: Insert the CTLZ instruction at the end of the preheader block
// Count = BitWidth - CTLZ(InitX);
@@ -1550,10 +1565,16 @@ void LoopIdiomRecognize::transformLoopToCountable(
Builder.SetCurrentDebugLocation(DL);
Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext;
- if (IsCntPhiUsedOutsideLoop)
- InitXNext = Builder.CreateAShr(InitX,
- ConstantInt::get(InitX->getType(), 1));
- else
+ if (IsCntPhiUsedOutsideLoop) {
+ if (DefX->getOpcode() == Instruction::AShr)
+ InitXNext =
+ Builder.CreateAShr(InitX, ConstantInt::get(InitX->getType(), 1));
+ else if (DefX->getOpcode() == Instruction::LShr)
+ InitXNext =
+ Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1));
+ else
+ llvm_unreachable("Unexpected opcode!");
+ } else
InitXNext = InitX;
CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck);
Count = Builder.CreateSub(
@@ -1588,7 +1609,7 @@ void LoopIdiomRecognize::transformLoopToCountable(
// ...
// Br: loop if (Dec != 0)
BasicBlock *Body = *(CurLoop->block_begin());
- auto *LbBr = dyn_cast<BranchInst>(Body->getTerminator());
+ auto *LbBr = cast<BranchInst>(Body->getTerminator());
ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
Type *Ty = Count->getType();
@@ -1625,8 +1646,8 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
Instruction *CntInst,
PHINode *CntPhi, Value *Var) {
BasicBlock *PreHead = CurLoop->getLoopPreheader();
- auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
- const DebugLoc DL = CntInst->getDebugLoc();
+ auto *PreCondBr = cast<BranchInst>(PreCondBB->getTerminator());
+ const DebugLoc &DL = CntInst->getDebugLoc();
// Assuming before transformation, the loop is following:
// if (x) // the precondition
@@ -1675,7 +1696,7 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
}
// Step 3: Note that the population count is exactly the trip count of the
- // loop in question, which enable us to to convert the loop from noncountable
+ // loop in question, which enable us to convert the loop from noncountable
// loop into a countable one. The benefit is twofold:
//
// - If the loop only counts population, the entire loop becomes dead after
@@ -1696,7 +1717,7 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
// do { cnt++; x &= x-1; t--) } while (t > 0);
BasicBlock *Body = *(CurLoop->block_begin());
{
- auto *LbBr = dyn_cast<BranchInst>(Body->getTerminator());
+ auto *LbBr = cast<BranchInst>(Body->getTerminator());
ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
Type *Ty = TripCnt->getType();