summaryrefslogtreecommitdiff
path: root/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-10-23 17:51:42 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-10-23 17:51:42 +0000
commit1d5ae1026e831016fc29fd927877c86af904481f (patch)
tree2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
parente6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff)
Notes
Diffstat (limited to 'lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp')
-rw-r--r--lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp78
1 files changed, 72 insertions, 6 deletions
diff --git a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 06222d7e7e44..a24de3ca213f 100644
--- a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -121,14 +121,13 @@ static bool foldGuardedRotateToFunnelShift(Instruction &I) {
BasicBlock *GuardBB = Phi.getIncomingBlock(RotSrc == P1);
BasicBlock *RotBB = Phi.getIncomingBlock(RotSrc != P1);
Instruction *TermI = GuardBB->getTerminator();
- BasicBlock *TrueBB, *FalseBB;
ICmpInst::Predicate Pred;
- if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()), TrueBB,
- FalseBB)))
+ BasicBlock *PhiBB = Phi.getParent();
+ if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()),
+ m_SpecificBB(PhiBB), m_SpecificBB(RotBB))))
return false;
- BasicBlock *PhiBB = Phi.getParent();
- if (Pred != CmpInst::ICMP_EQ || TrueBB != PhiBB || FalseBB != RotBB)
+ if (Pred != CmpInst::ICMP_EQ)
return false;
// We matched a variation of this IR pattern:
@@ -251,6 +250,72 @@ static bool foldAnyOrAllBitsSet(Instruction &I) {
return true;
}
+// Try to recognize below function as popcount intrinsic.
+// This is the "best" algorithm from
+// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+// Also used in TargetLowering::expandCTPOP().
+//
+// int popcount(unsigned int i) {
+// i = i - ((i >> 1) & 0x55555555);
+// i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
+// i = ((i + (i >> 4)) & 0x0F0F0F0F);
+// return (i * 0x01010101) >> 24;
+// }
+static bool tryToRecognizePopCount(Instruction &I) {
+ if (I.getOpcode() != Instruction::LShr)
+ return false;
+
+ Type *Ty = I.getType();
+ if (!Ty->isIntOrIntVectorTy())
+ return false;
+
+ unsigned Len = Ty->getScalarSizeInBits();
+ // FIXME: fix Len == 8 and other irregular type lengths.
+ if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
+ return false;
+
+ APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
+ APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
+ APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
+ APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01));
+ APInt MaskShift = APInt(Len, Len - 8);
+
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ Value *MulOp0;
+ // Matching "(i * 0x01010101...) >> 24".
+ if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) &&
+ match(Op1, m_SpecificInt(MaskShift))) {
+ Value *ShiftOp0;
+ // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)".
+ if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)),
+ m_Deferred(ShiftOp0)),
+ m_SpecificInt(Mask0F)))) {
+ Value *AndOp0;
+ // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)".
+ if (match(ShiftOp0,
+ m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)),
+ m_And(m_LShr(m_Deferred(AndOp0), m_SpecificInt(2)),
+ m_SpecificInt(Mask33))))) {
+ Value *Root, *SubOp1;
+ // Matching "i - ((i >> 1) & 0x55555555...)".
+ if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) &&
+ match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)),
+ m_SpecificInt(Mask55)))) {
+ LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
+ IRBuilder<> Builder(&I);
+ Function *Func = Intrinsic::getDeclaration(
+ I.getModule(), Intrinsic::ctpop, I.getType());
+ I.replaceAllUsesWith(Builder.CreateCall(Func, {Root}));
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
@@ -269,6 +334,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
MadeChange |= foldAnyOrAllBitsSet(I);
MadeChange |= foldGuardedRotateToFunnelShift(I);
+ MadeChange |= tryToRecognizePopCount(I);
}
}
@@ -303,7 +369,7 @@ void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
}
bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) {
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
return runImpl(F, TLI, DT);
}