diff options
Diffstat (limited to 'llvm/lib/Transforms/AggressiveInstCombine')
| -rw-r--r-- | llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp | 504 | ||||
| -rw-r--r-- | llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp | 2 |
2 files changed, 409 insertions, 97 deletions
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 35adaa3bde65..473b41241b8a 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -14,8 +14,6 @@ #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "AggressiveInstCombineInternal.h" -#include "llvm-c/Initialization.h" -#include "llvm-c/Transforms/AggressiveInstCombine.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" @@ -24,23 +22,17 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace PatternMatch; -namespace llvm { -class DataLayout; -} - #define DEBUG_TYPE "aggressive-instcombine" STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded"); @@ -50,31 +42,9 @@ STATISTIC(NumGuardedFunnelShifts, "Number of guarded funnel shifts transformed into funnel shifts"); STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized"); -namespace { -/// Contains expression pattern combiner logic. -/// This class provides both the logic to combine expression patterns and -/// combine them. It differs from InstCombiner class in that each pattern -/// combiner runs only once as opposed to InstCombine's multi-iteration, -/// which allows pattern combiner to have higher complexity than the O(1) -/// required by the instruction combiner. -class AggressiveInstCombinerLegacyPass : public FunctionPass { -public: - static char ID; // Pass identification, replacement for typeid - - AggressiveInstCombinerLegacyPass() : FunctionPass(ID) { - initializeAggressiveInstCombinerLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override; - - /// Run all expression pattern optimizations on the given /p F function. - /// - /// \param F function to optimize. - /// \returns true if the IR is changed. - bool runOnFunction(Function &F) override; -}; -} // namespace +static cl::opt<unsigned> MaxInstrsToScan( + "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, + cl::desc("Max number of instructions to scan for aggressive instcombine.")); /// Match a pattern for a bitwise funnel/rotate operation that partially guards /// against undefined behavior by branching around the funnel-shift/rotation @@ -446,21 +416,22 @@ foldSqrt(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI) { if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl) return false; - // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created, - // and (3) we would not end up lowering to a libcall anyway (which could - // change the value of errno), then: - // (1) the operand arg must not be less than -0.0. - // (2) errno won't be set. - // (3) it is safe to convert this to an intrinsic call. - // TODO: Check if the arg is known non-negative. + // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created + // (because NNAN or the operand arg must not be less than -0.0) and (2) we + // would not end up lowering to a libcall anyway (which could change the value + // of errno), then: + // (1) errno won't be set. + // (2) it is safe to convert this to an intrinsic call. Type *Ty = Call->getType(); - if (TTI.haveFastSqrt(Ty) && Call->hasNoNaNs()) { + Value *Arg = Call->getArgOperand(0); + if (TTI.haveFastSqrt(Ty) && + (Call->hasNoNaNs() || CannotBeOrderedLessThanZero(Arg, &TLI))) { IRBuilder<> Builder(&I); IRBuilderBase::FastMathFlagGuard Guard(Builder); Builder.setFastMathFlags(Call->getFastMathFlags()); Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty); - Value *NewSqrt = Builder.CreateCall(Sqrt, Call->getArgOperand(0), "sqrt"); + Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt"); I.replaceAllUsesWith(NewSqrt); // Explicitly erase the old call because a call with side effects is not @@ -472,18 +443,401 @@ foldSqrt(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI) { return false; } +// Check if this array of constants represents a cttz table. +// Iterate over the elements from \p Table by trying to find/match all +// the numbers from 0 to \p InputBits that should represent cttz results. +static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul, + uint64_t Shift, uint64_t InputBits) { + unsigned Length = Table.getNumElements(); + if (Length < InputBits || Length > InputBits * 2) + return false; + + APInt Mask = APInt::getBitsSetFrom(InputBits, Shift); + unsigned Matched = 0; + + for (unsigned i = 0; i < Length; i++) { + uint64_t Element = Table.getElementAsInteger(i); + if (Element >= InputBits) + continue; + + // Check if \p Element matches a concrete answer. It could fail for some + // elements that are never accessed, so we keep iterating over each element + // from the table. The number of matched elements should be equal to the + // number of potential right answers which is \p InputBits actually. + if ((((Mul << Element) & Mask.getZExtValue()) >> Shift) == i) + Matched++; + } + + return Matched == InputBits; +} + +// Try to recognize table-based ctz implementation. +// E.g., an example in C (for more cases please see the llvm/tests): +// int f(unsigned x) { +// static const char table[32] = +// {0, 1, 28, 2, 29, 14, 24, 3, 30, +// 22, 20, 15, 25, 17, 4, 8, 31, 27, +// 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; +// return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; +// } +// this can be lowered to `cttz` instruction. +// There is also a special case when the element is 0. +// +// Here are some examples or LLVM IR for a 64-bit target: +// +// CASE 1: +// %sub = sub i32 0, %x +// %and = and i32 %sub, %x +// %mul = mul i32 %and, 125613361 +// %shr = lshr i32 %mul, 27 +// %idxprom = zext i32 %shr to i64 +// %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @ctz1.table, i64 0, +// i64 %idxprom %0 = load i8, i8* %arrayidx, align 1, !tbaa !8 +// +// CASE 2: +// %sub = sub i32 0, %x +// %and = and i32 %sub, %x +// %mul = mul i32 %and, 72416175 +// %shr = lshr i32 %mul, 26 +// %idxprom = zext i32 %shr to i64 +// %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table, i64 +// 0, i64 %idxprom %0 = load i16, i16* %arrayidx, align 2, !tbaa !8 +// +// CASE 3: +// %sub = sub i32 0, %x +// %and = and i32 %sub, %x +// %mul = mul i32 %and, 81224991 +// %shr = lshr i32 %mul, 27 +// %idxprom = zext i32 %shr to i64 +// %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table, i64 +// 0, i64 %idxprom %0 = load i32, i32* %arrayidx, align 4, !tbaa !8 +// +// CASE 4: +// %sub = sub i64 0, %x +// %and = and i64 %sub, %x +// %mul = mul i64 %and, 283881067100198605 +// %shr = lshr i64 %mul, 58 +// %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0, i64 +// %shr %0 = load i8, i8* %arrayidx, align 1, !tbaa !8 +// +// All this can be lowered to @llvm.cttz.i32/64 intrinsic. +static bool tryToRecognizeTableBasedCttz(Instruction &I) { + LoadInst *LI = dyn_cast<LoadInst>(&I); + if (!LI) + return false; + + Type *AccessType = LI->getType(); + if (!AccessType->isIntegerTy()) + return false; + + GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand()); + if (!GEP || !GEP->isInBounds() || GEP->getNumIndices() != 2) + return false; + + if (!GEP->getSourceElementType()->isArrayTy()) + return false; + + uint64_t ArraySize = GEP->getSourceElementType()->getArrayNumElements(); + if (ArraySize != 32 && ArraySize != 64) + return false; + + GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand()); + if (!GVTable || !GVTable->hasInitializer() || !GVTable->isConstant()) + return false; + + ConstantDataArray *ConstData = + dyn_cast<ConstantDataArray>(GVTable->getInitializer()); + if (!ConstData) + return false; + + if (!match(GEP->idx_begin()->get(), m_ZeroInt())) + return false; + + Value *Idx2 = std::next(GEP->idx_begin())->get(); + Value *X1; + uint64_t MulConst, ShiftConst; + // FIXME: 64-bit targets have `i64` type for the GEP index, so this match will + // probably fail for other (e.g. 32-bit) targets. + if (!match(Idx2, m_ZExtOrSelf( + m_LShr(m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)), + m_ConstantInt(MulConst)), + m_ConstantInt(ShiftConst))))) + return false; + + unsigned InputBits = X1->getType()->getScalarSizeInBits(); + if (InputBits != 32 && InputBits != 64) + return false; + + // Shift should extract top 5..7 bits. + if (InputBits - Log2_32(InputBits) != ShiftConst && + InputBits - Log2_32(InputBits) - 1 != ShiftConst) + return false; + + if (!isCTTZTable(*ConstData, MulConst, ShiftConst, InputBits)) + return false; + + auto ZeroTableElem = ConstData->getElementAsInteger(0); + bool DefinedForZero = ZeroTableElem == InputBits; + + IRBuilder<> B(LI); + ConstantInt *BoolConst = B.getInt1(!DefinedForZero); + Type *XType = X1->getType(); + auto Cttz = B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst}); + Value *ZExtOrTrunc = nullptr; + + if (DefinedForZero) { + ZExtOrTrunc = B.CreateZExtOrTrunc(Cttz, AccessType); + } else { + // If the value in elem 0 isn't the same as InputBits, we still want to + // produce the value from the table. + auto Cmp = B.CreateICmpEQ(X1, ConstantInt::get(XType, 0)); + auto Select = + B.CreateSelect(Cmp, ConstantInt::get(XType, ZeroTableElem), Cttz); + + // NOTE: If the table[0] is 0, but the cttz(0) is defined by the Target + // it should be handled as: `cttz(x) & (typeSize - 1)`. + + ZExtOrTrunc = B.CreateZExtOrTrunc(Select, AccessType); + } + + LI->replaceAllUsesWith(ZExtOrTrunc); + + return true; +} + +/// This is used by foldLoadsRecursive() to capture a Root Load node which is +/// of type or(load, load) and recursively build the wide load. Also capture the +/// shift amount, zero extend type and loadSize. +struct LoadOps { + LoadInst *Root = nullptr; + LoadInst *RootInsert = nullptr; + bool FoundRoot = false; + uint64_t LoadSize = 0; + Value *Shift = nullptr; + Type *ZextType; + AAMDNodes AATags; +}; + +// Identify and Merge consecutive loads recursively which is of the form +// (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1 +// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3) +static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, + AliasAnalysis &AA) { + Value *ShAmt2 = nullptr; + Value *X; + Instruction *L1, *L2; + + // Go to the last node with loads. + if (match(V, m_OneUse(m_c_Or( + m_Value(X), + m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))), + m_Value(ShAmt2)))))) || + match(V, m_OneUse(m_Or(m_Value(X), + m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))))))) { + if (!foldLoadsRecursive(X, LOps, DL, AA) && LOps.FoundRoot) + // Avoid Partial chain merge. + return false; + } else + return false; + + // Check if the pattern has loads + LoadInst *LI1 = LOps.Root; + Value *ShAmt1 = LOps.Shift; + if (LOps.FoundRoot == false && + (match(X, m_OneUse(m_ZExt(m_Instruction(L1)))) || + match(X, m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1)))), + m_Value(ShAmt1)))))) { + LI1 = dyn_cast<LoadInst>(L1); + } + LoadInst *LI2 = dyn_cast<LoadInst>(L2); + + // Check if loads are same, atomic, volatile and having same address space. + if (LI1 == LI2 || !LI1 || !LI2 || !LI1->isSimple() || !LI2->isSimple() || + LI1->getPointerAddressSpace() != LI2->getPointerAddressSpace()) + return false; + + // Check if Loads come from same BB. + if (LI1->getParent() != LI2->getParent()) + return false; + + // Find the data layout + bool IsBigEndian = DL.isBigEndian(); + + // Check if loads are consecutive and same size. + Value *Load1Ptr = LI1->getPointerOperand(); + APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0); + Load1Ptr = + Load1Ptr->stripAndAccumulateConstantOffsets(DL, Offset1, + /* AllowNonInbounds */ true); + + Value *Load2Ptr = LI2->getPointerOperand(); + APInt Offset2(DL.getIndexTypeSizeInBits(Load2Ptr->getType()), 0); + Load2Ptr = + Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2, + /* AllowNonInbounds */ true); + + // Verify if both loads have same base pointers and load sizes are same. + uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits(); + uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits(); + if (Load1Ptr != Load2Ptr || LoadSize1 != LoadSize2) + return false; + + // Support Loadsizes greater or equal to 8bits and only power of 2. + if (LoadSize1 < 8 || !isPowerOf2_64(LoadSize1)) + return false; + + // Alias Analysis to check for stores b/w the loads. + LoadInst *Start = LOps.FoundRoot ? LOps.RootInsert : LI1, *End = LI2; + MemoryLocation Loc; + if (!Start->comesBefore(End)) { + std::swap(Start, End); + Loc = MemoryLocation::get(End); + if (LOps.FoundRoot) + Loc = Loc.getWithNewSize(LOps.LoadSize); + } else + Loc = MemoryLocation::get(End); + unsigned NumScanned = 0; + for (Instruction &Inst : + make_range(Start->getIterator(), End->getIterator())) { + if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc))) + return false; + if (++NumScanned > MaxInstrsToScan) + return false; + } + + // Make sure Load with lower Offset is at LI1 + bool Reverse = false; + if (Offset2.slt(Offset1)) { + std::swap(LI1, LI2); + std::swap(ShAmt1, ShAmt2); + std::swap(Offset1, Offset2); + std::swap(Load1Ptr, Load2Ptr); + std::swap(LoadSize1, LoadSize2); + Reverse = true; + } + + // Big endian swap the shifts + if (IsBigEndian) + std::swap(ShAmt1, ShAmt2); + + // Find Shifts values. + const APInt *Temp; + uint64_t Shift1 = 0, Shift2 = 0; + if (ShAmt1 && match(ShAmt1, m_APInt(Temp))) + Shift1 = Temp->getZExtValue(); + if (ShAmt2 && match(ShAmt2, m_APInt(Temp))) + Shift2 = Temp->getZExtValue(); + + // First load is always LI1. This is where we put the new load. + // Use the merged load size available from LI1 for forward loads. + if (LOps.FoundRoot) { + if (!Reverse) + LoadSize1 = LOps.LoadSize; + else + LoadSize2 = LOps.LoadSize; + } + + // Verify if shift amount and load index aligns and verifies that loads + // are consecutive. + uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1; + uint64_t PrevSize = + DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1)); + if ((Shift2 - Shift1) != ShiftDiff || (Offset2 - Offset1) != PrevSize) + return false; + + // Update LOps + AAMDNodes AATags1 = LOps.AATags; + AAMDNodes AATags2 = LI2->getAAMetadata(); + if (LOps.FoundRoot == false) { + LOps.FoundRoot = true; + AATags1 = LI1->getAAMetadata(); + } + LOps.LoadSize = LoadSize1 + LoadSize2; + LOps.RootInsert = Start; + + // Concatenate the AATags of the Merged Loads. + LOps.AATags = AATags1.concat(AATags2); + + LOps.Root = LI1; + LOps.Shift = ShAmt1; + LOps.ZextType = X->getType(); + return true; +} + +// For a given BB instruction, evaluate all loads in the chain that form a +// pattern which suggests that the loads can be combined. The one and only use +// of the loads is to form a wider load. +static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, + TargetTransformInfo &TTI, AliasAnalysis &AA) { + // Only consider load chains of scalar values. + if (isa<VectorType>(I.getType())) + return false; + + LoadOps LOps; + if (!foldLoadsRecursive(&I, LOps, DL, AA) || !LOps.FoundRoot) + return false; + + IRBuilder<> Builder(&I); + LoadInst *NewLoad = nullptr, *LI1 = LOps.Root; + + IntegerType *WiderType = IntegerType::get(I.getContext(), LOps.LoadSize); + // TTI based checks if we want to proceed with wider load + bool Allowed = TTI.isTypeLegal(WiderType); + if (!Allowed) + return false; + + unsigned AS = LI1->getPointerAddressSpace(); + unsigned Fast = 0; + Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize, + AS, LI1->getAlign(), &Fast); + if (!Allowed || !Fast) + return false; + + // Make sure the Load pointer of type GEP/non-GEP is above insert point + Instruction *Inst = dyn_cast<Instruction>(LI1->getPointerOperand()); + if (Inst && Inst->getParent() == LI1->getParent() && + !Inst->comesBefore(LOps.RootInsert)) + Inst->moveBefore(LOps.RootInsert); + + // New load can be generated + Value *Load1Ptr = LI1->getPointerOperand(); + Builder.SetInsertPoint(LOps.RootInsert); + Value *NewPtr = Builder.CreateBitCast(Load1Ptr, WiderType->getPointerTo(AS)); + NewLoad = Builder.CreateAlignedLoad(WiderType, NewPtr, LI1->getAlign(), + LI1->isVolatile(), ""); + NewLoad->takeName(LI1); + // Set the New Load AATags Metadata. + if (LOps.AATags) + NewLoad->setAAMetadata(LOps.AATags); + + Value *NewOp = NewLoad; + // Check if zero extend needed. + if (LOps.ZextType) + NewOp = Builder.CreateZExt(NewOp, LOps.ZextType); + + // Check if shift needed. We need to shift with the amount of load1 + // shift if not zero. + if (LOps.Shift) + NewOp = Builder.CreateShl(NewOp, LOps.Shift); + I.replaceAllUsesWith(NewOp); + + return true; +} + /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, - TargetLibraryInfo &TLI) { + TargetLibraryInfo &TLI, AliasAnalysis &AA) { bool MadeChange = false; for (BasicBlock &BB : F) { // Ignore unreachable basic blocks. if (!DT.isReachableFromEntry(&BB)) continue; + const DataLayout &DL = F.getParent()->getDataLayout(); + // Walk the block backwards for efficiency. We're matching a chain of // use->defs, so we're more likely to succeed by starting from the bottom. // Also, we want to avoid matching partial patterns. @@ -494,6 +848,11 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, MadeChange |= foldGuardedFunnelShift(I, DT); MadeChange |= tryToRecognizePopCount(I); MadeChange |= tryToFPToSat(I, TTI); + MadeChange |= tryToRecognizeTableBasedCttz(I); + MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA); + // NOTE: This function introduces erasing of the instruction `I`, so it + // needs to be called at the end of this sequence, otherwise we may make + // bugs. MadeChange |= foldSqrt(I, TTI, TLI); } } @@ -509,43 +868,24 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, /// This is the entry point for all transforms. Pass manager differences are /// handled in the callers of this function. static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI, - TargetLibraryInfo &TLI, DominatorTree &DT) { + TargetLibraryInfo &TLI, DominatorTree &DT, + AliasAnalysis &AA) { bool MadeChange = false; const DataLayout &DL = F.getParent()->getDataLayout(); TruncInstCombine TIC(AC, TLI, DL, DT); MadeChange |= TIC.run(F); - MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI); + MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA); return MadeChange; } -void AggressiveInstCombinerLegacyPass::getAnalysisUsage( - AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - AU.addPreserved<AAResultsWrapperPass>(); - AU.addPreserved<BasicAAWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); -} - -bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) { - auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - return runImpl(F, AC, TTI, TLI, DT); -} - PreservedAnalyses AggressiveInstCombinePass::run(Function &F, FunctionAnalysisManager &AM) { auto &AC = AM.getResult<AssumptionAnalysis>(F); auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &TTI = AM.getResult<TargetIRAnalysis>(F); - if (!runImpl(F, AC, TTI, TLI, DT)) { + auto &AA = AM.getResult<AAManager>(F); + if (!runImpl(F, AC, TTI, TLI, DT, AA)) { // No changes, all analyses are preserved. return PreservedAnalyses::all(); } @@ -554,31 +894,3 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F, PA.preserveSet<CFGAnalyses>(); return PA; } - -char AggressiveInstCombinerLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(AggressiveInstCombinerLegacyPass, - "aggressive-instcombine", - "Combine pattern based expressions", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(AggressiveInstCombinerLegacyPass, "aggressive-instcombine", - "Combine pattern based expressions", false, false) - -// Initialization Routines -void llvm::initializeAggressiveInstCombine(PassRegistry &Registry) { - initializeAggressiveInstCombinerLegacyPassPass(Registry); -} - -void LLVMInitializeAggressiveInstCombiner(LLVMPassRegistryRef R) { - initializeAggressiveInstCombinerLegacyPassPass(*unwrap(R)); -} - -FunctionPass *llvm::createAggressiveInstCombinerPass() { - return new AggressiveInstCombinerLegacyPass(); -} - -void LLVMAddAggressiveInstCombinerPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createAggressiveInstCombinerPass()); -} diff --git a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp index 70ea68587b8e..6c62e84077ac 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp @@ -157,7 +157,7 @@ bool TruncInstCombine::buildTruncExpressionGraph() { getRelevantOperands(I, Operands); // Add only operands not in Stack to prevent cycle for (auto *Op : Operands) - if (all_of(Stack, [Op](Value *V) { return Op != V; })) + if (!llvm::is_contained(Stack, Op)) Worklist.push_back(Op); break; } |
