summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/LoopIdiomRecognize.cpp')
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp190
1 files changed, 110 insertions, 80 deletions
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 653948717fb9..fbffa1920a84 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -26,7 +26,7 @@
// Future floating point idioms to recognize in -ffast-math mode:
// fpowi
// Future integer operation idioms to recognize:
-// ctpop, ctlz, cttz
+// ctpop
//
// Beware that isel's default lowering for ctpop is highly inefficient for
// i64 and larger types when i64 is legal and the value has few bits set. It
@@ -163,8 +163,9 @@ private:
void collectStores(BasicBlock *BB);
LegalStoreKind isLegalStore(StoreInst *SI);
+ enum class ForMemset { No, Yes };
bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
- bool ForMemset);
+ ForMemset For);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
@@ -186,9 +187,10 @@ private:
bool recognizePopcount();
void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
PHINode *CntPhi, Value *Var);
- bool recognizeAndInsertCTLZ();
- void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst,
- PHINode *CntPhi, Value *Var, Instruction *DefX,
+ bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
+ void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
+ Instruction *CntInst, PHINode *CntPhi,
+ Value *Var, Instruction *DefX,
const DebugLoc &DL, bool ZeroCheck,
bool IsCntPhiUsedOutsideLoop);
@@ -319,9 +321,9 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
// The following transforms hoist stores/memsets into the loop pre-header.
// Give up if the loop has instructions may throw.
- LoopSafetyInfo SafetyInfo;
- computeLoopSafetyInfo(&SafetyInfo, CurLoop);
- if (SafetyInfo.MayThrow)
+ SimpleLoopSafetyInfo SafetyInfo;
+ SafetyInfo.computeLoopSafetyInfo(CurLoop);
+ if (SafetyInfo.anyBlockMayThrow())
return MadeChange;
// Scan all the blocks in the loop that are not in subloops.
@@ -347,6 +349,9 @@ static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
/// just replicate their input array and then pass on to memset_pattern16.
static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
+ // FIXME: This could check for UndefValue because it can be merged into any
+ // other valid pattern.
+
// If the value isn't a constant, we can't promote it to being in a constant
// array. We could theoretically do a store to an alloca or something, but
// that doesn't seem worthwhile.
@@ -543,10 +548,10 @@ bool LoopIdiomRecognize::runOnLoopBlock(
// optimized into a memset (memset_pattern). The latter most commonly happens
// with structs and handunrolled loops.
for (auto &SL : StoreRefsForMemset)
- MadeChange |= processLoopStores(SL.second, BECount, true);
+ MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
for (auto &SL : StoreRefsForMemsetPattern)
- MadeChange |= processLoopStores(SL.second, BECount, false);
+ MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
// Optimize the store into a memcpy, if it feeds an similarly strided load.
for (auto &SI : StoreRefsForMemcpy)
@@ -572,10 +577,9 @@ bool LoopIdiomRecognize::runOnLoopBlock(
return MadeChange;
}
-/// processLoopStores - See if this store(s) can be promoted to a memset.
+/// See if this store(s) can be promoted to a memset.
bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
- const SCEV *BECount,
- bool ForMemset) {
+ const SCEV *BECount, ForMemset For) {
// Try to find consecutive stores that can be transformed into memsets.
SetVector<StoreInst *> Heads, Tails;
SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
@@ -602,7 +606,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
Value *FirstSplatValue = nullptr;
Constant *FirstPatternValue = nullptr;
- if (ForMemset)
+ if (For == ForMemset::Yes)
FirstSplatValue = isBytewiseValue(FirstStoredVal);
else
FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
@@ -635,7 +639,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
Value *SecondSplatValue = nullptr;
Constant *SecondPatternValue = nullptr;
- if (ForMemset)
+ if (For == ForMemset::Yes)
SecondSplatValue = isBytewiseValue(SecondStoredVal);
else
SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
@@ -644,10 +648,14 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
"Expected either splat value or pattern value.");
if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
- if (ForMemset) {
+ if (For == ForMemset::Yes) {
+ if (isa<UndefValue>(FirstSplatValue))
+ FirstSplatValue = SecondSplatValue;
if (FirstSplatValue != SecondSplatValue)
continue;
} else {
+ if (isa<UndefValue>(FirstPatternValue))
+ FirstPatternValue = SecondPatternValue;
if (FirstPatternValue != SecondPatternValue)
continue;
}
@@ -772,12 +780,13 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// Get the location that may be stored across the loop. Since the access is
// strided positively through memory, we say that the modified location starts
// at the pointer and has infinite size.
- uint64_t AccessSize = MemoryLocation::UnknownSize;
+ LocationSize AccessSize = LocationSize::unknown();
// If the loop iterates a fixed number of times, we can refine the access size
// to be exactly the size of the memset, which is (BECount+1)*StoreSize
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
- AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
+ AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
+ StoreSize);
// TODO: For this to be really effective, we have to dive into the pointer
// operand in the store. Store to &A[i] of 100 will always return may alias
@@ -921,10 +930,11 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Type *Int8PtrTy = DestInt8PtrTy;
Module *M = TheStore->getModule();
+ StringRef FuncName = "memset_pattern16";
Value *MSP =
- M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(),
+ M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
Int8PtrTy, Int8PtrTy, IntPtr);
- inferLibFuncAttributes(*M->getFunction("memset_pattern16"), *TLI);
+ inferLibFuncAttributes(M, FuncName, *TLI);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
@@ -1099,15 +1109,17 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
}
bool LoopIdiomRecognize::runOnNoncountableLoop() {
- return recognizePopcount() || recognizeAndInsertCTLZ();
+ return recognizePopcount() || recognizeAndInsertFFS();
}
/// Check if the given conditional branch is based on the comparison between
-/// a variable and zero, and if the variable is non-zero, the control yields to
-/// the loop entry. If the branch matches the behavior, the variable involved
-/// in the comparison is returned. This function will be called to see if the
-/// precondition and postcondition of the loop are in desirable form.
-static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
+/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
+/// true), the control yields to the loop entry. If the branch matches the
+/// behavior, the variable involved in the comparison is returned. This function
+/// will be called to see if the precondition and postcondition of the loop are
+/// in desirable form.
+static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
+ bool JmpOnZero = false) {
if (!BI || !BI->isConditional())
return nullptr;
@@ -1119,9 +1131,14 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
if (!CmpZero || !CmpZero->isZero())
return nullptr;
+ BasicBlock *TrueSucc = BI->getSuccessor(0);
+ BasicBlock *FalseSucc = BI->getSuccessor(1);
+ if (JmpOnZero)
+ std::swap(TrueSucc, FalseSucc);
+
ICmpInst::Predicate Pred = Cond->getPredicate();
- if ((Pred == ICmpInst::ICMP_NE && BI->getSuccessor(0) == LoopEntry) ||
- (Pred == ICmpInst::ICMP_EQ && BI->getSuccessor(1) == LoopEntry))
+ if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) ||
+ (Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
return Cond->getOperand(0);
return nullptr;
@@ -1297,14 +1314,14 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
///
/// loop-exit:
/// \endcode
-static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
- Instruction *&CntInst, PHINode *&CntPhi,
- Instruction *&DefX) {
+static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
+ Intrinsic::ID &IntrinID, Value *&InitX,
+ Instruction *&CntInst, PHINode *&CntPhi,
+ Instruction *&DefX) {
BasicBlock *LoopEntry;
Value *VarX = nullptr;
DefX = nullptr;
- PhiX = nullptr;
CntInst = nullptr;
CntPhi = nullptr;
LoopEntry = *(CurLoop->block_begin());
@@ -1316,20 +1333,28 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
else
return false;
- // step 2: detect instructions corresponding to "x.next = x >> 1"
- if (!DefX || (DefX->getOpcode() != Instruction::AShr &&
- DefX->getOpcode() != Instruction::LShr))
+ // step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
+ if (!DefX || !DefX->isShift())
return false;
+ IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
+ Intrinsic::ctlz;
ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
if (!Shft || !Shft->isOne())
return false;
VarX = DefX->getOperand(0);
// step 3: Check the recurrence of variable X
- PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
+ PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
if (!PhiX)
return false;
+ InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
+
+ // Make sure the initial value can't be negative otherwise the ashr in the
+ // loop might never reach zero which would make the loop infinite.
+ if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
+ return false;
+
// step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
// TODO: We can skip the step. If loop trip count is known (CTLZ),
// then all uses of "cnt.next" could be optimized to the trip count
@@ -1361,17 +1386,25 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
return true;
}
-/// Recognize CTLZ idiom in a non-countable loop and convert the loop
-/// to countable (with CTLZ trip count).
-/// If CTLZ inserted as a new trip count returns true; otherwise, returns false.
-bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
+/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
+/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
+/// trip count returns true; otherwise, returns false.
+bool LoopIdiomRecognize::recognizeAndInsertFFS() {
// Give up if the loop has multiple blocks or multiple backedges.
if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
return false;
- Instruction *CntInst, *DefX;
- PHINode *CntPhi, *PhiX;
- if (!detectCTLZIdiom(CurLoop, PhiX, CntInst, CntPhi, DefX))
+ Intrinsic::ID IntrinID;
+ Value *InitX;
+ Instruction *DefX = nullptr;
+ PHINode *CntPhi = nullptr;
+ Instruction *CntInst = nullptr;
+ // Help decide if transformation is profitable. For ShiftUntilZero idiom,
+ // this is always 6.
+ size_t IdiomCanonicalSize = 6;
+
+ if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
+ CntInst, CntPhi, DefX))
return false;
bool IsCntPhiUsedOutsideLoop = false;
@@ -1398,12 +1431,6 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
// It is safe to assume Preheader exist as it was checked in
// parent function RunOnLoop.
BasicBlock *PH = CurLoop->getLoopPreheader();
- Value *InitX = PhiX->getIncomingValueForBlock(PH);
-
- // Make sure the initial value can't be negative otherwise the ashr in the
- // loop might never reach zero which would make the loop infinite.
- if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, *DL))
- return false;
// If we are using the count instruction outside the loop, make sure we
// have a zero check as a precondition. Without the check the loop would run
@@ -1421,8 +1448,10 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
ZeroCheck = true;
}
- // Check if CTLZ intrinsic is profitable. Assume it is always profitable
- // if we delete the loop (the loop has only 6 instructions):
+ // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
+ // profitable if we delete the loop.
+
+ // the loop has only 6 instructions:
// %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
// %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
// %shr = ashr %n.addr.0, 1
@@ -1433,12 +1462,12 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
const Value *Args[] =
{InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
: ConstantInt::getFalse(InitX->getContext())};
- if (CurLoop->getHeader()->size() != 6 &&
- TTI->getIntrinsicCost(Intrinsic::ctlz, InitX->getType(), Args) >
- TargetTransformInfo::TCC_Basic)
+ if (CurLoop->getHeader()->size() != IdiomCanonicalSize &&
+ TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
+ TargetTransformInfo::TCC_Basic)
return false;
- transformLoopToCountable(PH, CntInst, CntPhi, InitX, DefX,
+ transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
DefX->getDebugLoc(), ZeroCheck,
IsCntPhiUsedOutsideLoop);
return true;
@@ -1507,20 +1536,21 @@ static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
return CI;
}
-static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
- const DebugLoc &DL, bool ZeroCheck) {
+static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
+ const DebugLoc &DL, bool ZeroCheck,
+ Intrinsic::ID IID) {
Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()};
Type *Tys[] = {Val->getType()};
Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
- Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctlz, Tys);
+ Value *Func = Intrinsic::getDeclaration(M, IID, Tys);
CallInst *CI = IRBuilder.CreateCall(Func, Ops);
CI->setDebugLoc(DL);
return CI;
}
-/// Transform the following loop:
+/// Transform the following loop (Using CTLZ, CTTZ is similar):
/// loop:
/// CntPhi = PHI [Cnt0, CntInst]
/// PhiX = PHI [InitX, DefX]
@@ -1552,19 +1582,19 @@ static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
/// If LOOP_BODY is empty the loop will be deleted.
/// If CntInst and DefX are not used in LOOP_BODY they will be removed.
void LoopIdiomRecognize::transformLoopToCountable(
- BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX,
- Instruction *DefX, const DebugLoc &DL, bool ZeroCheck,
- bool IsCntPhiUsedOutsideLoop) {
+ Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
+ PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
+ bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
- // Step 1: Insert the CTLZ instruction at the end of the preheader block
- // Count = BitWidth - CTLZ(InitX);
- // If there are uses of CntPhi create:
- // CountPrev = BitWidth - CTLZ(InitX >> 1);
+ // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
IRBuilder<> Builder(PreheaderBr);
Builder.SetCurrentDebugLocation(DL);
- Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext;
+ Value *FFS, *Count, *CountPrev, *NewCount, *InitXNext;
+ // Count = BitWidth - CTLZ(InitX);
+ // If there are uses of CntPhi create:
+ // CountPrev = BitWidth - CTLZ(InitX >> 1);
if (IsCntPhiUsedOutsideLoop) {
if (DefX->getOpcode() == Instruction::AShr)
InitXNext =
@@ -1572,29 +1602,30 @@ void LoopIdiomRecognize::transformLoopToCountable(
else if (DefX->getOpcode() == Instruction::LShr)
InitXNext =
Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1));
+ else if (DefX->getOpcode() == Instruction::Shl) // cttz
+ InitXNext =
+ Builder.CreateShl(InitX, ConstantInt::get(InitX->getType(), 1));
else
llvm_unreachable("Unexpected opcode!");
} else
InitXNext = InitX;
- CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck);
+ FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
Count = Builder.CreateSub(
- ConstantInt::get(CTLZ->getType(),
- CTLZ->getType()->getIntegerBitWidth()),
- CTLZ);
+ ConstantInt::get(FFS->getType(),
+ FFS->getType()->getIntegerBitWidth()),
+ FFS);
if (IsCntPhiUsedOutsideLoop) {
CountPrev = Count;
Count = Builder.CreateAdd(
CountPrev,
ConstantInt::get(CountPrev->getType(), 1));
}
- if (IsCntPhiUsedOutsideLoop)
- NewCount = Builder.CreateZExtOrTrunc(CountPrev,
- cast<IntegerType>(CntInst->getType()));
- else
- NewCount = Builder.CreateZExtOrTrunc(Count,
- cast<IntegerType>(CntInst->getType()));
- // If the CTLZ counter's initial value is not zero, insert Add Inst.
+ NewCount = Builder.CreateZExtOrTrunc(
+ IsCntPhiUsedOutsideLoop ? CountPrev : Count,
+ cast<IntegerType>(CntInst->getType()));
+
+ // If the counter's initial value is not zero, insert Add Inst.
Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
if (!InitConst || !InitConst->isZero())
@@ -1630,8 +1661,7 @@ void LoopIdiomRecognize::transformLoopToCountable(
LbCond->setOperand(1, ConstantInt::get(Ty, 0));
// Step 3: All the references to the original counter outside
- // the loop are replaced with the NewCount -- the value returned from
- // __builtin_ctlz(x).
+ // the loop are replaced with the NewCount
if (IsCntPhiUsedOutsideLoop)
CntPhi->replaceUsesOutsideBlock(NewCount, Body);
else