aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp')
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp92
1 files changed, 81 insertions, 11 deletions
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 7243e39c9029..1fd8b88dd776 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -22,8 +22,8 @@
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -36,6 +36,10 @@
using namespace llvm;
using namespace PatternMatch;
+namespace llvm {
+class DataLayout;
+}
+
#define DEBUG_TYPE "aggressive-instcombine"
STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");
@@ -200,14 +204,13 @@ static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {
/// of 'and' ops, then we also need to capture the fact that we saw an
/// "and X, 1", so that's an extra return value for that case.
struct MaskOps {
- Value *Root;
+ Value *Root = nullptr;
APInt Mask;
bool MatchAndChain;
- bool FoundAnd1;
+ bool FoundAnd1 = false;
MaskOps(unsigned BitWidth, bool MatchAnds)
- : Root(nullptr), Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds),
- FoundAnd1(false) {}
+ : Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds) {}
};
/// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a
@@ -363,10 +366,72 @@ static bool tryToRecognizePopCount(Instruction &I) {
return false;
}
+/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
+/// C2 saturate the value of the fp conversion. The transform is not reversable
+/// as the fptosi.sat is more defined than the input - all values produce a
+/// valid value for the fptosi.sat, where as some produce poison for original
+/// that were out of range of the integer conversion. The reversed pattern may
+/// use fmax and fmin instead. As we cannot directly reverse the transform, and
+/// it is not always profitable, we make it conditional on the cost being
+/// reported as lower by TTI.
+static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
+ // Look for min(max(fptosi, converting to fptosi_sat.
+ Value *In;
+ const APInt *MinC, *MaxC;
+ if (!match(&I, m_SMax(m_OneUse(m_SMin(m_OneUse(m_FPToSI(m_Value(In))),
+ m_APInt(MinC))),
+ m_APInt(MaxC))) &&
+ !match(&I, m_SMin(m_OneUse(m_SMax(m_OneUse(m_FPToSI(m_Value(In))),
+ m_APInt(MaxC))),
+ m_APInt(MinC))))
+ return false;
+
+ // Check that the constants clamp a saturate.
+ if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1)
+ return false;
+
+ Type *IntTy = I.getType();
+ Type *FpTy = In->getType();
+ Type *SatTy =
+ IntegerType::get(IntTy->getContext(), (*MinC + 1).exactLogBase2() + 1);
+ if (auto *VecTy = dyn_cast<VectorType>(IntTy))
+ SatTy = VectorType::get(SatTy, VecTy->getElementCount());
+
+ // Get the cost of the intrinsic, and check that against the cost of
+ // fptosi+smin+smax
+ InstructionCost SatCost = TTI.getIntrinsicInstrCost(
+ IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}),
+ TTI::TCK_RecipThroughput);
+ SatCost += TTI.getCastInstrCost(Instruction::SExt, SatTy, IntTy,
+ TTI::CastContextHint::None,
+ TTI::TCK_RecipThroughput);
+
+ InstructionCost MinMaxCost = TTI.getCastInstrCost(
+ Instruction::FPToSI, IntTy, FpTy, TTI::CastContextHint::None,
+ TTI::TCK_RecipThroughput);
+ MinMaxCost += TTI.getIntrinsicInstrCost(
+ IntrinsicCostAttributes(Intrinsic::smin, IntTy, {IntTy}),
+ TTI::TCK_RecipThroughput);
+ MinMaxCost += TTI.getIntrinsicInstrCost(
+ IntrinsicCostAttributes(Intrinsic::smax, IntTy, {IntTy}),
+ TTI::TCK_RecipThroughput);
+
+ if (SatCost >= MinMaxCost)
+ return false;
+
+ IRBuilder<> Builder(&I);
+ Function *Fn = Intrinsic::getDeclaration(I.getModule(), Intrinsic::fptosi_sat,
+ {SatTy, FpTy});
+ Value *Sat = Builder.CreateCall(Fn, In);
+ I.replaceAllUsesWith(Builder.CreateSExt(Sat, IntTy));
+ return true;
+}
+
/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
-static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
+static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
+ TargetTransformInfo &TTI) {
bool MadeChange = false;
for (BasicBlock &BB : F) {
// Ignore unreachable basic blocks.
@@ -382,6 +447,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
MadeChange |= foldAnyOrAllBitsSet(I);
MadeChange |= foldGuardedFunnelShift(I, DT);
MadeChange |= tryToRecognizePopCount(I);
+ MadeChange |= tryToFPToSat(I, TTI);
}
}
@@ -395,13 +461,13 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
/// This is the entry point for all transforms. Pass manager differences are
/// handled in the callers of this function.
-static bool runImpl(Function &F, AssumptionCache &AC, TargetLibraryInfo &TLI,
- DominatorTree &DT) {
+static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
+ TargetLibraryInfo &TLI, DominatorTree &DT) {
bool MadeChange = false;
const DataLayout &DL = F.getParent()->getDataLayout();
TruncInstCombine TIC(AC, TLI, DL, DT);
MadeChange |= TIC.run(F);
- MadeChange |= foldUnusualPatterns(F, DT);
+ MadeChange |= foldUnusualPatterns(F, DT, TTI);
return MadeChange;
}
@@ -411,6 +477,7 @@ void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<AAResultsWrapperPass>();
AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -421,7 +488,8 @@ bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) {
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return runImpl(F, AC, TLI, DT);
+ auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ return runImpl(F, AC, TTI, TLI, DT);
}
PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
@@ -429,7 +497,8 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- if (!runImpl(F, AC, TLI, DT)) {
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ if (!runImpl(F, AC, TTI, TLI, DT)) {
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
}
@@ -446,6 +515,7 @@ INITIALIZE_PASS_BEGIN(AggressiveInstCombinerLegacyPass,
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(AggressiveInstCombinerLegacyPass, "aggressive-instcombine",
"Combine pattern based expressions", false, false)