aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp')
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp242
1 files changed, 70 insertions, 172 deletions
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 34c8a380448e..d09ac1c099c1 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -19,7 +19,6 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -29,7 +28,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -373,7 +371,7 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
InstructionCost SatCost = TTI.getIntrinsicInstrCost(
IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}),
TTI::TCK_RecipThroughput);
- SatCost += TTI.getCastInstrCost(Instruction::SExt, SatTy, IntTy,
+ SatCost += TTI.getCastInstrCost(Instruction::SExt, IntTy, SatTy,
TTI::CastContextHint::None,
TTI::TCK_RecipThroughput);
@@ -398,6 +396,54 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
return true;
}
+/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
+/// pessimistic codegen that has to account for setting errno and can enable
+/// vectorization.
+static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI,
+ TargetLibraryInfo &TLI, AssumptionCache &AC,
+ DominatorTree &DT) {
+ // Match a call to sqrt mathlib function.
+ auto *Call = dyn_cast<CallInst>(&I);
+ if (!Call)
+ return false;
+
+ Module *M = Call->getModule();
+ LibFunc Func;
+ if (!TLI.getLibFunc(*Call, Func) || !isLibFuncEmittable(M, &TLI, Func))
+ return false;
+
+ if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
+ return false;
+
+ // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
+ // (because NNAN or the operand arg must not be less than -0.0) and (2) we
+ // would not end up lowering to a libcall anyway (which could change the value
+ // of errno), then:
+ // (1) errno won't be set.
+ // (2) it is safe to convert this to an intrinsic call.
+ Type *Ty = Call->getType();
+ Value *Arg = Call->getArgOperand(0);
+ if (TTI.haveFastSqrt(Ty) &&
+ (Call->hasNoNaNs() ||
+ cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, &I,
+ &DT))) {
+ IRBuilder<> Builder(&I);
+ IRBuilderBase::FastMathFlagGuard Guard(Builder);
+ Builder.setFastMathFlags(Call->getFastMathFlags());
+
+ Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
+ Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt");
+ I.replaceAllUsesWith(NewSqrt);
+
+ // Explicitly erase the old call because a call with side effects is not
+ // trivially dead.
+ I.eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
// Check if this array of constants represents a cttz table.
// Iterate over the elements from \p Table by trying to find/match all
// the numbers from 0 to \p InputBits that should represent cttz results.
@@ -447,7 +493,8 @@ static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul,
// %shr = lshr i32 %mul, 27
// %idxprom = zext i32 %shr to i64
// %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @ctz1.table, i64 0,
-// i64 %idxprom %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
+// i64 %idxprom
+// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
//
// CASE 2:
// %sub = sub i32 0, %x
@@ -455,8 +502,9 @@ static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul,
// %mul = mul i32 %and, 72416175
// %shr = lshr i32 %mul, 26
// %idxprom = zext i32 %shr to i64
-// %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table, i64
-// 0, i64 %idxprom %0 = load i16, i16* %arrayidx, align 2, !tbaa !8
+// %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table,
+// i64 0, i64 %idxprom
+// %0 = load i16, i16* %arrayidx, align 2, !tbaa !8
//
// CASE 3:
// %sub = sub i32 0, %x
@@ -464,16 +512,18 @@ static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul,
// %mul = mul i32 %and, 81224991
// %shr = lshr i32 %mul, 27
// %idxprom = zext i32 %shr to i64
-// %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table, i64
-// 0, i64 %idxprom %0 = load i32, i32* %arrayidx, align 4, !tbaa !8
+// %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table,
+// i64 0, i64 %idxprom
+// %0 = load i32, i32* %arrayidx, align 4, !tbaa !8
//
// CASE 4:
// %sub = sub i64 0, %x
// %and = and i64 %sub, %x
// %mul = mul i64 %and, 283881067100198605
// %shr = lshr i64 %mul, 58
-// %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0, i64
-// %shr %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
+// %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0,
+// i64 %shr
+// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
//
// All this can be lowered to @llvm.cttz.i32/64 intrinsic.
static bool tryToRecognizeTableBasedCttz(Instruction &I) {
@@ -656,7 +706,10 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
make_range(Start->getIterator(), End->getIterator())) {
if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc)))
return false;
- if (++NumScanned > MaxInstrsToScan)
+
+ // Ignore debug info so that's not counted against MaxInstrsToScan.
+ // Otherwise debug info could affect codegen.
+ if (!isa<DbgInfoIntrinsic>(Inst) && ++NumScanned > MaxInstrsToScan)
return false;
}
@@ -869,159 +922,13 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
return true;
}
-/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
-/// pessimistic codegen that has to account for setting errno and can enable
-/// vectorization.
-static bool foldSqrt(CallInst *Call, TargetTransformInfo &TTI,
- TargetLibraryInfo &TLI, AssumptionCache &AC,
- DominatorTree &DT) {
- Module *M = Call->getModule();
-
- // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
- // (because NNAN or the operand arg must not be less than -0.0) and (2) we
- // would not end up lowering to a libcall anyway (which could change the value
- // of errno), then:
- // (1) errno won't be set.
- // (2) it is safe to convert this to an intrinsic call.
- Type *Ty = Call->getType();
- Value *Arg = Call->getArgOperand(0);
- if (TTI.haveFastSqrt(Ty) &&
- (Call->hasNoNaNs() ||
- cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, Call,
- &DT))) {
- IRBuilder<> Builder(Call);
- IRBuilderBase::FastMathFlagGuard Guard(Builder);
- Builder.setFastMathFlags(Call->getFastMathFlags());
-
- Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
- Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt");
- Call->replaceAllUsesWith(NewSqrt);
-
- // Explicitly erase the old call because a call with side effects is not
- // trivially dead.
- Call->eraseFromParent();
- return true;
- }
-
- return false;
-}
-
-/// Try to expand strcmp(P, "x") calls.
-static bool expandStrcmp(CallInst *CI, DominatorTree &DT, bool &MadeCFGChange) {
- Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
-
- // Trivial cases are optimized during inst combine
- if (Str1P == Str2P)
- return false;
-
- StringRef Str1, Str2;
- bool HasStr1 = getConstantStringInfo(Str1P, Str1);
- bool HasStr2 = getConstantStringInfo(Str2P, Str2);
-
- Value *NonConstantP = nullptr;
- StringRef ConstantStr;
-
- if (!HasStr1 && HasStr2 && Str2.size() == 1) {
- NonConstantP = Str1P;
- ConstantStr = Str2;
- } else if (!HasStr2 && HasStr1 && Str1.size() == 1) {
- NonConstantP = Str2P;
- ConstantStr = Str1;
- } else {
- return false;
- }
-
- // Check if strcmp result is only used in a comparison with zero
- if (!isOnlyUsedInZeroComparison(CI))
- return false;
-
- // For strcmp(P, "x") do the following transformation:
- //
- // (before)
- // dst = strcmp(P, "x")
- //
- // (after)
- // v0 = P[0] - 'x'
- // [if v0 == 0]
- // v1 = P[1]
- // dst = phi(v0, v1)
- //
-
- IRBuilder<> B(CI->getParent());
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
-
- Type *RetType = CI->getType();
-
- B.SetInsertPoint(CI);
- BasicBlock *InitialBB = B.GetInsertBlock();
- Value *Str1FirstCharacterValue =
- B.CreateZExt(B.CreateLoad(B.getInt8Ty(), NonConstantP), RetType);
- Value *Str2FirstCharacterValue =
- ConstantInt::get(RetType, static_cast<unsigned char>(ConstantStr[0]));
- Value *FirstCharacterSub =
- B.CreateNSWSub(Str1FirstCharacterValue, Str2FirstCharacterValue);
- Value *IsFirstCharacterSubZero =
- B.CreateICmpEQ(FirstCharacterSub, ConstantInt::get(RetType, 0));
- Instruction *IsFirstCharacterSubZeroBBTerminator = SplitBlockAndInsertIfThen(
- IsFirstCharacterSubZero, CI, /*Unreachable*/ false,
- /*BranchWeights*/ nullptr, &DTU);
-
- B.SetInsertPoint(IsFirstCharacterSubZeroBBTerminator);
- B.GetInsertBlock()->setName("strcmp_expand_sub_is_zero");
- BasicBlock *IsFirstCharacterSubZeroBB = B.GetInsertBlock();
- Value *Str1SecondCharacterValue = B.CreateZExt(
- B.CreateLoad(B.getInt8Ty(), B.CreateConstInBoundsGEP1_64(
- B.getInt8Ty(), NonConstantP, 1)),
- RetType);
-
- B.SetInsertPoint(CI);
- B.GetInsertBlock()->setName("strcmp_expand_sub_join");
-
- PHINode *Result = B.CreatePHI(RetType, 2);
- Result->addIncoming(FirstCharacterSub, InitialBB);
- Result->addIncoming(Str1SecondCharacterValue, IsFirstCharacterSubZeroBB);
-
- CI->replaceAllUsesWith(Result);
- CI->eraseFromParent();
-
- MadeCFGChange = true;
-
- return true;
-}
-
-static bool foldLibraryCalls(Instruction &I, TargetTransformInfo &TTI,
- TargetLibraryInfo &TLI, DominatorTree &DT,
- AssumptionCache &AC, bool &MadeCFGChange) {
- CallInst *CI = dyn_cast<CallInst>(&I);
- if (!CI)
- return false;
-
- LibFunc Func;
- Module *M = I.getModule();
- if (!TLI.getLibFunc(*CI, Func) || !isLibFuncEmittable(M, &TLI, Func))
- return false;
-
- switch (Func) {
- case LibFunc_sqrt:
- case LibFunc_sqrtf:
- case LibFunc_sqrtl:
- return foldSqrt(CI, TTI, TLI, AC, DT);
- case LibFunc_strcmp:
- return expandStrcmp(CI, DT, MadeCFGChange);
- default:
- break;
- }
-
- return false;
-}
-
/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
TargetTransformInfo &TTI,
TargetLibraryInfo &TLI, AliasAnalysis &AA,
- AssumptionCache &AC, bool &MadeCFGChange) {
+ AssumptionCache &AC) {
bool MadeChange = false;
for (BasicBlock &BB : F) {
// Ignore unreachable basic blocks.
@@ -1046,7 +953,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
// NOTE: This function introduces erasing of the instruction `I`, so it
// needs to be called at the end of this sequence, otherwise we may make
// bugs.
- MadeChange |= foldLibraryCalls(I, TTI, TLI, DT, AC, MadeCFGChange);
+ MadeChange |= foldSqrt(I, TTI, TLI, AC, DT);
}
}
@@ -1062,12 +969,12 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
/// handled in the callers of this function.
static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
TargetLibraryInfo &TLI, DominatorTree &DT,
- AliasAnalysis &AA, bool &ChangedCFG) {
+ AliasAnalysis &AA) {
bool MadeChange = false;
const DataLayout &DL = F.getParent()->getDataLayout();
TruncInstCombine TIC(AC, TLI, DL, DT);
MadeChange |= TIC.run(F);
- MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, ChangedCFG);
+ MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC);
return MadeChange;
}
@@ -1078,21 +985,12 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
-
- bool MadeCFGChange = false;
-
- if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
+ if (!runImpl(F, AC, TTI, TLI, DT, AA)) {
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
}
-
// Mark all the analyses that instcombine updates as preserved.
PreservedAnalyses PA;
-
- if (MadeCFGChange)
- PA.preserve<DominatorTreeAnalysis>();
- else
- PA.preserveSet<CFGAnalyses>();
-
+ PA.preserveSet<CFGAnalyses>();
return PA;
}