diff options
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstructionCombining.cpp')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 725 |
1 files changed, 395 insertions, 330 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 71c763de43b4..fb6f4f96ea48 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -38,7 +38,6 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -99,16 +98,19 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/InstCombine/InstCombine.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <cassert> #include <cstdint> #include <memory> +#include <optional> #include <string> #include <utility> #define DEBUG_TYPE "instcombine" #include "llvm/Transforms/Utils/InstructionWorklist.h" +#include <optional> using namespace llvm; using namespace llvm::PatternMatch; @@ -167,16 +169,16 @@ MaxArraySize("instcombine-maxarray-size", cl::init(1024), static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true)); -Optional<Instruction *> +std::optional<Instruction *> InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) { // Handle target specific intrinsics if (II.getCalledFunction()->isTargetIntrinsic()) { return TTI.instCombineIntrinsic(*this, II); } - return None; + return std::nullopt; } -Optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic( +std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic( IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) { // Handle target specific intrinsics @@ -184,10 +186,10 @@ Optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic( return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known, KnownBitsComputed); } - return None; + return std::nullopt; } -Optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic( +std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic( IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function<void(Instruction *, unsigned, APInt, APInt &)> @@ -198,11 +200,11 @@ Optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic( *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3, SimplifyAndSetOp); } - return None; + return std::nullopt; } Value *InstCombinerImpl::EmitGEPOffset(User *GEP) { - return llvm::EmitGEPOffset(&Builder, DL, GEP); + return llvm::emitGEPOffset(&Builder, DL, GEP); } /// Legal integers and common types are considered desirable. This is used to @@ -223,11 +225,12 @@ bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const { /// Return true if it is desirable to convert an integer computation from a /// given bit width to a new bit width. -/// We don't want to convert from a legal to an illegal type or from a smaller -/// to a larger illegal type. A width of '1' is always treated as a desirable -/// type because i1 is a fundamental type in IR, and there are many specialized -/// optimizations for i1 types. Common/desirable widths are equally treated as -/// legal to convert to, in order to open up more combining opportunities. +/// We don't want to convert from a legal or desirable type (like i8) to an +/// illegal type or from a smaller to a larger illegal type. A width of '1' +/// is always treated as a desirable type because i1 is a fundamental type in +/// IR, and there are many specialized optimizations for i1 types. +/// Common/desirable widths are equally treated as legal to convert to, in +/// order to open up more combining opportunities. bool InstCombinerImpl::shouldChangeType(unsigned FromWidth, unsigned ToWidth) const { bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth); @@ -238,9 +241,9 @@ bool InstCombinerImpl::shouldChangeType(unsigned FromWidth, if (ToWidth < FromWidth && isDesirableIntType(ToWidth)) return true; - // If this is a legal integer from type, and the result would be an illegal - // type, don't do the transformation. - if (FromLegal && !ToLegal) + // If this is a legal or desiable integer from type, and the result would be + // an illegal type, don't do the transformation. + if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal) return false; // Otherwise, if both are illegal, do not increase the size of the result. We @@ -367,14 +370,14 @@ static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, // inttoptr ( ptrtoint (x) ) --> x Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) { auto *IntToPtr = dyn_cast<IntToPtrInst>(Val); - if (IntToPtr && DL.getPointerTypeSizeInBits(IntToPtr->getDestTy()) == + if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) == DL.getTypeSizeInBits(IntToPtr->getSrcTy())) { auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0)); Type *CastTy = IntToPtr->getDestTy(); if (PtrToInt && CastTy->getPointerAddressSpace() == PtrToInt->getSrcTy()->getPointerAddressSpace() && - DL.getPointerTypeSizeInBits(PtrToInt->getSrcTy()) == + DL.getTypeSizeInBits(PtrToInt->getSrcTy()) == DL.getTypeSizeInBits(PtrToInt->getDestTy())) { return CastInst::CreateBitOrPointerCast(PtrToInt->getOperand(0), CastTy, "", PtrToInt); @@ -632,14 +635,14 @@ getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). -Value *InstCombinerImpl::tryFactorization(BinaryOperator &I, - Instruction::BinaryOps InnerOpcode, - Value *A, Value *B, Value *C, - Value *D) { +static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, + InstCombiner::BuilderTy &Builder, + Instruction::BinaryOps InnerOpcode, Value *A, + Value *B, Value *C, Value *D) { assert(A && B && C && D && "All values must be provided"); Value *V = nullptr; - Value *SimplifiedInst = nullptr; + Value *RetVal = nullptr; Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); @@ -647,7 +650,7 @@ Value *InstCombinerImpl::tryFactorization(BinaryOperator &I, bool InnerCommutative = Instruction::isCommutative(InnerOpcode); // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? - if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) + if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) { // Does the instruction have the form "(A op' B) op (A op' D)" or, in the // commutative case, "(A op' B) op (C op' A)"? if (A == C || (InnerCommutative && A == D)) { @@ -656,17 +659,18 @@ Value *InstCombinerImpl::tryFactorization(BinaryOperator &I, // Consider forming "A op' (B op D)". // If "B op D" simplifies then it can be formed with no cost. V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I)); - // If "B op D" doesn't simplify then only go on if both of the existing + + // If "B op D" doesn't simplify then only go on if one of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. - if (!V && LHS->hasOneUse() && RHS->hasOneUse()) + if (!V && (LHS->hasOneUse() || RHS->hasOneUse())) V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName()); - if (V) { - SimplifiedInst = Builder.CreateBinOp(InnerOpcode, A, V); - } + if (V) + RetVal = Builder.CreateBinOp(InnerOpcode, A, V); } + } // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? - if (!SimplifiedInst && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) + if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) { // Does the instruction have the form "(A op' B) op (C op' B)" or, in the // commutative case, "(A op' B) op (B op' D)"? if (B == D || (InnerCommutative && B == C)) { @@ -676,61 +680,94 @@ Value *InstCombinerImpl::tryFactorization(BinaryOperator &I, // If "A op C" simplifies then it can be formed with no cost. V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)); - // If "A op C" doesn't simplify then only go on if both of the existing + // If "A op C" doesn't simplify then only go on if one of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. - if (!V && LHS->hasOneUse() && RHS->hasOneUse()) + if (!V && (LHS->hasOneUse() || RHS->hasOneUse())) V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName()); - if (V) { - SimplifiedInst = Builder.CreateBinOp(InnerOpcode, V, B); - } + if (V) + RetVal = Builder.CreateBinOp(InnerOpcode, V, B); } + } - if (SimplifiedInst) { - ++NumFactor; - SimplifiedInst->takeName(&I); - - // Check if we can add NSW/NUW flags to SimplifiedInst. If so, set them. - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) { - if (isa<OverflowingBinaryOperator>(SimplifiedInst)) { - bool HasNSW = false; - bool HasNUW = false; - if (isa<OverflowingBinaryOperator>(&I)) { - HasNSW = I.hasNoSignedWrap(); - HasNUW = I.hasNoUnsignedWrap(); - } + if (!RetVal) + return nullptr; - if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) { - HasNSW &= LOBO->hasNoSignedWrap(); - HasNUW &= LOBO->hasNoUnsignedWrap(); - } + ++NumFactor; + RetVal->takeName(&I); - if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) { - HasNSW &= ROBO->hasNoSignedWrap(); - HasNUW &= ROBO->hasNoUnsignedWrap(); - } + // Try to add no-overflow flags to the final value. + if (isa<OverflowingBinaryOperator>(RetVal)) { + bool HasNSW = false; + bool HasNUW = false; + if (isa<OverflowingBinaryOperator>(&I)) { + HasNSW = I.hasNoSignedWrap(); + HasNUW = I.hasNoUnsignedWrap(); + } + if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) { + HasNSW &= LOBO->hasNoSignedWrap(); + HasNUW &= LOBO->hasNoUnsignedWrap(); + } - if (TopLevelOpcode == Instruction::Add && - InnerOpcode == Instruction::Mul) { - // We can propagate 'nsw' if we know that - // %Y = mul nsw i16 %X, C - // %Z = add nsw i16 %Y, %X - // => - // %Z = mul nsw i16 %X, C+1 - // - // iff C+1 isn't INT_MIN - const APInt *CInt; - if (match(V, m_APInt(CInt))) { - if (!CInt->isMinSignedValue()) - BO->setHasNoSignedWrap(HasNSW); - } + if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) { + HasNSW &= ROBO->hasNoSignedWrap(); + HasNUW &= ROBO->hasNoUnsignedWrap(); + } - // nuw can be propagated with any constant or nuw value. - BO->setHasNoUnsignedWrap(HasNUW); - } - } + if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) { + // We can propagate 'nsw' if we know that + // %Y = mul nsw i16 %X, C + // %Z = add nsw i16 %Y, %X + // => + // %Z = mul nsw i16 %X, C+1 + // + // iff C+1 isn't INT_MIN + const APInt *CInt; + if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue()) + cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW); + + // nuw can be propagated with any constant or nuw value. + cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW); } } - return SimplifiedInst; + return RetVal; +} + +Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); + BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); + Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); + Value *A, *B, *C, *D; + Instruction::BinaryOps LHSOpcode, RHSOpcode; + + if (Op0) + LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B); + if (Op1) + RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D); + + // The instruction has the form "(A op' B) op (C op' D)". Try to factorize + // a common term. + if (Op0 && Op1 && LHSOpcode == RHSOpcode) + if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D)) + return V; + + // The instruction has the form "(A op' B) op (C)". Try to factorize common + // term. + if (Op0) + if (Value *Ident = getIdentityValue(LHSOpcode, RHS)) + if (Value *V = + tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident)) + return V; + + // The instruction has the form "(B) op (C op' D)". Try to factorize common + // term. + if (Op1) + if (Value *Ident = getIdentityValue(RHSOpcode, LHS)) + if (Value *V = + tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D)) + return V; + + return nullptr; } /// This tries to simplify binary operations which some other binary operation @@ -738,41 +775,15 @@ Value *InstCombinerImpl::tryFactorization(BinaryOperator &I, /// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in /// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win). /// Returns the simplified value, or null if it didn't simplify. -Value *InstCombinerImpl::SimplifyUsingDistributiveLaws(BinaryOperator &I) { +Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); - { - // Factorization. - Value *A, *B, *C, *D; - Instruction::BinaryOps LHSOpcode, RHSOpcode; - if (Op0) - LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B); - if (Op1) - RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D); - - // The instruction has the form "(A op' B) op (C op' D)". Try to factorize - // a common term. - if (Op0 && Op1 && LHSOpcode == RHSOpcode) - if (Value *V = tryFactorization(I, LHSOpcode, A, B, C, D)) - return V; - - // The instruction has the form "(A op' B) op (C)". Try to factorize common - // term. - if (Op0) - if (Value *Ident = getIdentityValue(LHSOpcode, RHS)) - if (Value *V = tryFactorization(I, LHSOpcode, A, B, RHS, Ident)) - return V; - - // The instruction has the form "(B) op (C op' D)". Try to factorize common - // term. - if (Op1) - if (Value *Ident = getIdentityValue(RHSOpcode, LHS)) - if (Value *V = tryFactorization(I, RHSOpcode, LHS, Ident, C, D)) - return V; - } + // Factorization. + if (Value *R = tryFactorizationFolds(I)) + return R; // Expansion. if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) { @@ -876,6 +887,28 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, SimplifyQuery Q = SQ.getWithInstruction(&I); Value *Cond, *True = nullptr, *False = nullptr; + + // Special-case for add/negate combination. Replace the zero in the negation + // with the trailing add operand: + // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N) + // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False + auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * { + // We need an 'add' and exactly 1 arm of the select to have been simplified. + if (Opcode != Instruction::Add || (!True && !False) || (True && False)) + return nullptr; + + Value *N; + if (True && match(FVal, m_Neg(m_Value(N)))) { + Value *Sub = Builder.CreateSub(Z, N); + return Builder.CreateSelect(Cond, True, Sub, I.getName()); + } + if (False && match(TVal, m_Neg(m_Value(N)))) { + Value *Sub = Builder.CreateSub(Z, N); + return Builder.CreateSelect(Cond, Sub, False, I.getName()); + } + return nullptr; + }; + if (LHSIsSelect && RHSIsSelect && A == D) { // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F) Cond = A; @@ -893,11 +926,15 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Cond = A; True = simplifyBinOp(Opcode, B, RHS, FMF, Q); False = simplifyBinOp(Opcode, C, RHS, FMF, Q); + if (Value *NewSel = foldAddNegate(B, C, RHS)) + return NewSel; } else if (RHSIsSelect && RHS->hasOneUse()) { // X op (D ? E : F) -> D ? (X op E) : (X op F) Cond = D; True = simplifyBinOp(Opcode, LHS, E, FMF, Q); False = simplifyBinOp(Opcode, LHS, F, FMF, Q); + if (Value *NewSel = foldAddNegate(E, F, LHS)) + return NewSel; } if (!True || !False) @@ -910,8 +947,10 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, /// Freely adapt every user of V as-if V was changed to !V. /// WARNING: only if canFreelyInvertAllUsersOf() said this can be done. -void InstCombinerImpl::freelyInvertAllUsersOf(Value *I) { - for (User *U : I->users()) { +void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) { + for (User *U : make_early_inc_range(I->users())) { + if (U == IgnoredUser) + continue; // Don't consider this user. switch (cast<Instruction>(U)->getOpcode()) { case Instruction::Select: { auto *SI = cast<SelectInst>(U); @@ -1033,6 +1072,9 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, return Builder.CreateBinaryIntrinsic(IID, SO, II->getArgOperand(1)); } + if (auto *EI = dyn_cast<ExtractElementInst>(&I)) + return Builder.CreateExtractElement(SO, EI->getIndexOperand()); + assert(I.isBinaryOp() && "Unexpected opcode for select folding"); // Figure out if the constant is the left or the right argument. @@ -1133,22 +1175,6 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI); } -static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV, - InstCombiner::BuilderTy &Builder) { - bool ConstIsRHS = isa<Constant>(I->getOperand(1)); - Constant *C = cast<Constant>(I->getOperand(ConstIsRHS)); - - Value *Op0 = InV, *Op1 = C; - if (!ConstIsRHS) - std::swap(Op0, Op1); - - Value *RI = Builder.CreateBinOp(I->getOpcode(), Op0, Op1, "phi.bo"); - auto *FPInst = dyn_cast<Instruction>(RI); - if (FPInst && isa<FPMathOperator>(FPInst)) - FPInst->copyFastMathFlags(I); - return RI; -} - Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { unsigned NumPHIValues = PN->getNumIncomingValues(); if (NumPHIValues == 0) @@ -1167,48 +1193,69 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { // Otherwise, we can replace *all* users with the new PHI we form. } - // Check to see if all of the operands of the PHI are simple constants - // (constantint/constantfp/undef). If there is one non-constant value, - // remember the BB it is in. If there is more than one or if *it* is a PHI, - // bail out. We don't do arbitrary constant expressions here because moving - // their computation can be expensive without a cost model. - BasicBlock *NonConstBB = nullptr; + // Check to see whether the instruction can be folded into each phi operand. + // If there is one operand that does not fold, remember the BB it is in. + // If there is more than one or if *it* is a PHI, bail out. + SmallVector<Value *> NewPhiValues; + BasicBlock *NonSimplifiedBB = nullptr; + Value *NonSimplifiedInVal = nullptr; for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InVal = PN->getIncomingValue(i); - // For non-freeze, require constant operand - // For freeze, require non-undef, non-poison operand - if (!isa<FreezeInst>(I) && match(InVal, m_ImmConstant())) - continue; - if (isa<FreezeInst>(I) && isGuaranteedNotToBeUndefOrPoison(InVal)) + BasicBlock *InBB = PN->getIncomingBlock(i); + + // NB: It is a precondition of this transform that the operands be + // phi translatable! This is usually trivially satisfied by limiting it + // to constant ops, and for selects we do a more sophisticated check. + SmallVector<Value *> Ops; + for (Value *Op : I.operands()) { + if (Op == PN) + Ops.push_back(InVal); + else + Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB)); + } + + // Don't consider the simplification successful if we get back a constant + // expression. That's just an instruction in hiding. + // Also reject the case where we simplify back to the phi node. We wouldn't + // be able to remove it in that case. + Value *NewVal = simplifyInstructionWithOperands( + &I, Ops, SQ.getWithInstruction(InBB->getTerminator())); + if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr())) { + NewPhiValues.push_back(NewVal); continue; + } if (isa<PHINode>(InVal)) return nullptr; // Itself a phi. - if (NonConstBB) return nullptr; // More than one non-const value. + if (NonSimplifiedBB) return nullptr; // More than one non-simplified value. - NonConstBB = PN->getIncomingBlock(i); + NonSimplifiedBB = InBB; + NonSimplifiedInVal = InVal; + NewPhiValues.push_back(nullptr); // If the InVal is an invoke at the end of the pred block, then we can't // insert a computation after it without breaking the edge. if (isa<InvokeInst>(InVal)) - if (cast<Instruction>(InVal)->getParent() == NonConstBB) + if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB) return nullptr; // If the incoming non-constant value is reachable from the phis block, // we'll push the operation across a loop backedge. This could result in // an infinite combine loop, and is generally non-profitable (especially // if the operation was originally outside the loop). - if (isPotentiallyReachable(PN->getParent(), NonConstBB, nullptr, &DT, LI)) + if (isPotentiallyReachable(PN->getParent(), NonSimplifiedBB, nullptr, &DT, + LI)) return nullptr; } - // If there is exactly one non-constant value, we can insert a copy of the + // If there is exactly one non-simplified value, we can insert a copy of the // operation in that block. However, if this is a critical edge, we would be // inserting the computation on some other paths (e.g. inside a loop). Only // do this if the pred block is unconditionally branching into the phi block. // Also, make sure that the pred block is not dead code. - if (NonConstBB != nullptr) { - BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator()); - if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(NonConstBB)) + if (NonSimplifiedBB != nullptr) { + BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator()); + if (!BI || !BI->isUnconditional() || + !DT.isReachableFromEntry(NonSimplifiedBB)) return nullptr; } @@ -1219,83 +1266,23 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { // If we are going to have to insert a new computation, do so right before the // predecessor's terminator. - if (NonConstBB) - Builder.SetInsertPoint(NonConstBB->getTerminator()); - - // Next, add all of the operands to the PHI. - if (SelectInst *SI = dyn_cast<SelectInst>(&I)) { - // We only currently try to fold the condition of a select when it is a phi, - // not the true/false values. - Value *TrueV = SI->getTrueValue(); - Value *FalseV = SI->getFalseValue(); - BasicBlock *PhiTransBB = PN->getParent(); - for (unsigned i = 0; i != NumPHIValues; ++i) { - BasicBlock *ThisBB = PN->getIncomingBlock(i); - Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); - Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); - Value *InV = nullptr; - // Beware of ConstantExpr: it may eventually evaluate to getNullValue, - // even if currently isNullValue gives false. - Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)); - // For vector constants, we cannot use isNullValue to fold into - // FalseVInPred versus TrueVInPred. When we have individual nonzero - // elements in the vector, we will incorrectly fold InC to - // `TrueVInPred`. - if (InC && isa<ConstantInt>(InC)) - InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; - else { - // Generate the select in the same block as PN's current incoming block. - // Note: ThisBB need not be the NonConstBB because vector constants - // which are constants by definition are handled here. - // FIXME: This can lead to an increase in IR generation because we might - // generate selects for vector constant phi operand, that could not be - // folded to TrueVInPred or FalseVInPred as done for ConstantInt. For - // non-vector phis, this transformation was always profitable because - // the select would be generated exactly once in the NonConstBB. - Builder.SetInsertPoint(ThisBB->getTerminator()); - InV = Builder.CreateSelect(PN->getIncomingValue(i), TrueVInPred, - FalseVInPred, "phi.sel"); - } - NewPN->addIncoming(InV, ThisBB); - } - } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) { - Constant *C = cast<Constant>(I.getOperand(1)); - for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV = nullptr; - if (auto *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) - InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); - else - InV = Builder.CreateCmp(CI->getPredicate(), PN->getIncomingValue(i), - C, "phi.cmp"); - NewPN->addIncoming(InV, PN->getIncomingBlock(i)); - } - } else if (auto *BO = dyn_cast<BinaryOperator>(&I)) { - for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i), - Builder); - NewPN->addIncoming(InV, PN->getIncomingBlock(i)); - } - } else if (isa<FreezeInst>(&I)) { - for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV; - if (NonConstBB == PN->getIncomingBlock(i)) - InV = Builder.CreateFreeze(PN->getIncomingValue(i), "phi.fr"); - else - InV = PN->getIncomingValue(i); - NewPN->addIncoming(InV, PN->getIncomingBlock(i)); - } - } else { - CastInst *CI = cast<CastInst>(&I); - Type *RetTy = CI->getType(); - for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV; - if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) - InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); + Instruction *Clone = nullptr; + if (NonSimplifiedBB) { + Clone = I.clone(); + for (Use &U : Clone->operands()) { + if (U == PN) + U = NonSimplifiedInVal; else - InV = Builder.CreateCast(CI->getOpcode(), PN->getIncomingValue(i), - I.getType(), "phi.cast"); - NewPN->addIncoming(InV, PN->getIncomingBlock(i)); + U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB); } + InsertNewInstBefore(Clone, *NonSimplifiedBB->getTerminator()); + } + + for (unsigned i = 0; i != NumPHIValues; ++i) { + if (NewPhiValues[i]) + NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i)); + else + NewPN->addIncoming(Clone, PN->getIncomingBlock(i)); } for (User *U : make_early_inc_range(PN->users())) { @@ -1696,6 +1683,35 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { return new ShuffleVectorInst(NewBO0, NewBO1, Mask); } + auto createBinOpReverse = [&](Value *X, Value *Y) { + Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName()); + if (auto *BO = dyn_cast<BinaryOperator>(V)) + BO->copyIRFlags(&Inst); + Module *M = Inst.getModule(); + Function *F = Intrinsic::getDeclaration( + M, Intrinsic::experimental_vector_reverse, V->getType()); + return CallInst::Create(F, V); + }; + + // NOTE: Reverse shuffles don't require the speculative execution protection + // below because they don't affect which lanes take part in the computation. + + Value *V1, *V2; + if (match(LHS, m_VecReverse(m_Value(V1)))) { + // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2)) + if (match(RHS, m_VecReverse(m_Value(V2))) && + (LHS->hasOneUse() || RHS->hasOneUse() || + (LHS == RHS && LHS->hasNUses(2)))) + return createBinOpReverse(V1, V2); + + // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat)) + if (LHS->hasOneUse() && isSplatValue(RHS)) + return createBinOpReverse(V1, RHS); + } + // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2)) + else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2))))) + return createBinOpReverse(LHS, V2); + // It may not be safe to reorder shuffles and things like div, urem, etc. // because we may trap when executing those ops on unknown vector elements. // See PR20059. @@ -1711,7 +1727,6 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { // If both arguments of the binary operation are shuffles that use the same // mask and shuffle within a single vector, move the shuffle after the binop. - Value *V1, *V2; if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) && match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(Mask))) && V1->getType() == V2->getType() && @@ -2228,7 +2243,7 @@ Instruction *InstCombinerImpl::visitGEPOfBitcast(BitCastInst *BCI, if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { I->takeName(BCI); - BCI->getParent()->getInstList().insert(BCI->getIterator(), I); + I->insertInto(BCI->getParent(), BCI->getIterator()); replaceInstUsesWith(*BCI, I); } return &GEP; @@ -2434,10 +2449,8 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { NewGEP->setOperand(DI, NewPN); } - GEP.getParent()->getInstList().insert( - GEP.getParent()->getFirstInsertionPt(), NewGEP); - replaceOperand(GEP, 0, NewGEP); - PtrOp = NewGEP; + NewGEP->insertInto(GEP.getParent(), GEP.getParent()->getFirstInsertionPt()); + return replaceOperand(GEP, 0, NewGEP); } if (auto *Src = dyn_cast<GEPOperator>(PtrOp)) @@ -2450,7 +2463,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { unsigned AS = GEP.getPointerAddressSpace(); if (GEP.getOperand(1)->getType()->getScalarSizeInBits() == DL.getIndexSizeInBits(AS)) { - uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedSize(); + uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue(); bool Matched = false; uint64_t C; @@ -2580,8 +2593,9 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) { // Check that changing the type amounts to dividing the index by a scale // factor. - uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize(); - uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy).getFixedSize(); + uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedValue(); + uint64_t SrcSize = + DL.getTypeAllocSize(StrippedPtrEltTy).getFixedValue(); if (ResSize && SrcSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -2617,10 +2631,10 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { StrippedPtrEltTy->isArrayTy()) { // Check that changing to the array element type amounts to dividing the // index by a scale factor. - uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize(); + uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedValue(); uint64_t ArrayEltSize = DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) - .getFixedSize(); + .getFixedValue(); if (ResSize && ArrayEltSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -2681,7 +2695,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { BasePtrOffset.isNonNegative()) { APInt AllocSize( IdxWidth, - DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinSize()); + DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinValue()); if (BasePtrOffset.ule(AllocSize)) { return GetElementPtrInst::CreateInBounds( GEP.getSourceElementType(), PtrOp, Indices, GEP.getName()); @@ -2724,7 +2738,7 @@ static bool isRemovableWrite(CallBase &CB, Value *UsedV, // If the only possible side effect of the call is writing to the alloca, // and the result isn't used, we can safely remove any reads implied by the // call including those which might read the alloca itself. - Optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI); + std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI); return Dest && Dest->Ptr == UsedV; } @@ -2732,7 +2746,7 @@ static bool isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakTrackingVH> &Users, const TargetLibraryInfo &TLI) { SmallVector<Instruction*, 4> Worklist; - const Optional<StringRef> Family = getAllocationFamily(AI, &TLI); + const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI); Worklist.push_back(AI); do { @@ -2778,7 +2792,7 @@ static bool isAllocSiteRemovable(Instruction *AI, MemIntrinsic *MI = cast<MemIntrinsic>(II); if (MI->isVolatile() || MI->getRawDest() != PI) return false; - LLVM_FALLTHROUGH; + [[fallthrough]]; } case Intrinsic::assume: case Intrinsic::invariant_start: @@ -2808,7 +2822,7 @@ static bool isAllocSiteRemovable(Instruction *AI, continue; } - if (getReallocatedOperand(cast<CallBase>(I), &TLI) == PI && + if (getReallocatedOperand(cast<CallBase>(I)) == PI && getAllocationFamily(I, &TLI) == Family) { assert(Family); Users.emplace_back(I); @@ -2902,7 +2916,7 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { Module *M = II->getModule(); Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing); InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(), - None, "", II->getParent()); + std::nullopt, "", II->getParent()); } // Remove debug intrinsics which describe the value contained within the @@ -3052,7 +3066,7 @@ Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) { // realloc() entirely. CallInst *CI = dyn_cast<CallInst>(Op); if (CI && CI->hasOneUse()) - if (Value *ReallocatedOp = getReallocatedOperand(CI, &TLI)) + if (Value *ReallocatedOp = getReallocatedOperand(CI)) return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp)); // If we optimize for code size, try to move the call to free before the null @@ -3166,31 +3180,41 @@ Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) { return visitUnconditionalBranchInst(BI); // Change br (not X), label True, label False to: br X, label False, True - Value *X = nullptr; - if (match(&BI, m_Br(m_Not(m_Value(X)), m_BasicBlock(), m_BasicBlock())) && - !isa<Constant>(X)) { + Value *Cond = BI.getCondition(); + Value *X; + if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) { // Swap Destinations and condition... BI.swapSuccessors(); return replaceOperand(BI, 0, X); } + // Canonicalize logical-and-with-invert as logical-or-with-invert. + // This is done by inverting the condition and swapping successors: + // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T + Value *Y; + if (isa<SelectInst>(Cond) && + match(Cond, + m_OneUse(m_LogicalAnd(m_Value(X), m_OneUse(m_Not(m_Value(Y))))))) { + Value *NotX = Builder.CreateNot(X, "not." + X->getName()); + Value *Or = Builder.CreateLogicalOr(NotX, Y); + BI.swapSuccessors(); + return replaceOperand(BI, 0, Or); + } + // If the condition is irrelevant, remove the use so that other // transforms on the condition become more effective. - if (!isa<ConstantInt>(BI.getCondition()) && - BI.getSuccessor(0) == BI.getSuccessor(1)) - return replaceOperand( - BI, 0, ConstantInt::getFalse(BI.getCondition()->getType())); + if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1)) + return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType())); // Canonicalize, for example, fcmp_one -> fcmp_oeq. CmpInst::Predicate Pred; - if (match(&BI, m_Br(m_OneUse(m_FCmp(Pred, m_Value(), m_Value())), - m_BasicBlock(), m_BasicBlock())) && + if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) && !isCanonicalPredicate(Pred)) { // Swap destinations and condition. - CmpInst *Cond = cast<CmpInst>(BI.getCondition()); - Cond->setPredicate(CmpInst::getInversePredicate(Pred)); + auto *Cmp = cast<CmpInst>(Cond); + Cmp->setPredicate(CmpInst::getInversePredicate(Pred)); BI.swapSuccessors(); - Worklist.push(Cond); + Worklist.push(Cmp); return &BI; } @@ -3218,7 +3242,7 @@ Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) { // Compute the number of leading bits we can ignore. // TODO: A better way to determine this would use ComputeNumSignBits(). - for (auto &C : SI.cases()) { + for (const auto &C : SI.cases()) { LeadingKnownZeros = std::min( LeadingKnownZeros, C.getCaseValue()->getValue().countLeadingZeros()); LeadingKnownOnes = std::min( @@ -3247,6 +3271,81 @@ Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) { return nullptr; } +Instruction * +InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) { + auto *WO = dyn_cast<WithOverflowInst>(EV.getAggregateOperand()); + if (!WO) + return nullptr; + + Intrinsic::ID OvID = WO->getIntrinsicID(); + const APInt *C = nullptr; + if (match(WO->getRHS(), m_APIntAllowUndef(C))) { + if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow || + OvID == Intrinsic::umul_with_overflow)) { + // extractvalue (any_mul_with_overflow X, -1), 0 --> -X + if (C->isAllOnes()) + return BinaryOperator::CreateNeg(WO->getLHS()); + // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n + if (C->isPowerOf2()) { + return BinaryOperator::CreateShl( + WO->getLHS(), + ConstantInt::get(WO->getLHS()->getType(), C->logBase2())); + } + } + } + + // We're extracting from an overflow intrinsic. See if we're the only user. + // That allows us to simplify multiple result intrinsics to simpler things + // that just get one value. + if (!WO->hasOneUse()) + return nullptr; + + // Check if we're grabbing only the result of a 'with overflow' intrinsic + // and replace it with a traditional binary instruction. + if (*EV.idx_begin() == 0) { + Instruction::BinaryOps BinOp = WO->getBinaryOp(); + Value *LHS = WO->getLHS(), *RHS = WO->getRHS(); + // Replace the old instruction's uses with poison. + replaceInstUsesWith(*WO, PoisonValue::get(WO->getType())); + eraseInstFromFunction(*WO); + return BinaryOperator::Create(BinOp, LHS, RHS); + } + + assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst"); + + // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS. + if (OvID == Intrinsic::usub_with_overflow) + return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS()); + + // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but + // +1 is not possible because we assume signed values. + if (OvID == Intrinsic::smul_with_overflow && + WO->getLHS()->getType()->isIntOrIntVectorTy(1)) + return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS()); + + // If only the overflow result is used, and the right hand side is a + // constant (or constant splat), we can remove the intrinsic by directly + // checking for overflow. + if (C) { + // Compute the no-wrap range for LHS given RHS=C, then construct an + // equivalent icmp, potentially using an offset. + ConstantRange NWR = ConstantRange::makeExactNoWrapRegion( + WO->getBinaryOp(), *C, WO->getNoWrapKind()); + + CmpInst::Predicate Pred; + APInt NewRHSC, Offset; + NWR.getEquivalentICmp(Pred, NewRHSC, Offset); + auto *OpTy = WO->getRHS()->getType(); + auto *NewLHS = WO->getLHS(); + if (Offset != 0) + NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset)); + return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS, + ConstantInt::get(OpTy, NewRHSC)); + } + + return nullptr; +} + Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) { Value *Agg = EV.getAggregateOperand(); @@ -3294,7 +3393,7 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) { Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(), EV.getIndices()); return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), - makeArrayRef(insi, inse)); + ArrayRef(insi, inse)); } if (insi == inse) // The insert list is a prefix of the extract list @@ -3306,60 +3405,13 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) { // with // %E extractvalue { i32 } { i32 42 }, 0 return ExtractValueInst::Create(IV->getInsertedValueOperand(), - makeArrayRef(exti, exte)); + ArrayRef(exti, exte)); } - if (WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Agg)) { - // extractvalue (any_mul_with_overflow X, -1), 0 --> -X - Intrinsic::ID OvID = WO->getIntrinsicID(); - if (*EV.idx_begin() == 0 && - (OvID == Intrinsic::smul_with_overflow || - OvID == Intrinsic::umul_with_overflow) && - match(WO->getArgOperand(1), m_AllOnes())) { - return BinaryOperator::CreateNeg(WO->getArgOperand(0)); - } - // We're extracting from an overflow intrinsic, see if we're the only user, - // which allows us to simplify multiple result intrinsics to simpler - // things that just get one value. - if (WO->hasOneUse()) { - // Check if we're grabbing only the result of a 'with overflow' intrinsic - // and replace it with a traditional binary instruction. - if (*EV.idx_begin() == 0) { - Instruction::BinaryOps BinOp = WO->getBinaryOp(); - Value *LHS = WO->getLHS(), *RHS = WO->getRHS(); - // Replace the old instruction's uses with poison. - replaceInstUsesWith(*WO, PoisonValue::get(WO->getType())); - eraseInstFromFunction(*WO); - return BinaryOperator::Create(BinOp, LHS, RHS); - } + if (Instruction *R = foldExtractOfOverflowIntrinsic(EV)) + return R; - assert(*EV.idx_begin() == 1 && - "unexpected extract index for overflow inst"); - - // If only the overflow result is used, and the right hand side is a - // constant (or constant splat), we can remove the intrinsic by directly - // checking for overflow. - const APInt *C; - if (match(WO->getRHS(), m_APInt(C))) { - // Compute the no-wrap range for LHS given RHS=C, then construct an - // equivalent icmp, potentially using an offset. - ConstantRange NWR = - ConstantRange::makeExactNoWrapRegion(WO->getBinaryOp(), *C, - WO->getNoWrapKind()); - - CmpInst::Predicate Pred; - APInt NewRHSC, Offset; - NWR.getEquivalentICmp(Pred, NewRHSC, Offset); - auto *OpTy = WO->getRHS()->getType(); - auto *NewLHS = WO->getLHS(); - if (Offset != 0) - NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset)); - return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS, - ConstantInt::get(OpTy, NewRHSC)); - } - } - } - if (LoadInst *L = dyn_cast<LoadInst>(Agg)) + if (LoadInst *L = dyn_cast<LoadInst>(Agg)) { // If the (non-volatile) load only has one use, we can rewrite this to a // load from a GEP. This reduces the size of the load. If a load is used // only by extractvalue instructions then this either must have been @@ -3386,6 +3438,12 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) { // the wrong spot, so use replaceInstUsesWith(). return replaceInstUsesWith(EV, NL); } + } + + if (auto *PN = dyn_cast<PHINode>(Agg)) + if (Instruction *Res = foldOpIntoPhi(EV, PN)) + return Res; + // We could simplify extracts from other values. Note that nested extracts may // already be simplified implicitly by the above: extract (extract (insert) ) // will be translated into extract ( insert ( extract ) ) first and then just @@ -3771,7 +3829,8 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) { // poison. If the only source of new poison is flags, we can simply // strip them (since we know the only use is the freeze and nothing can // benefit from them.) - if (canCreateUndefOrPoison(cast<Operator>(OrigOp), /*ConsiderFlags*/ false)) + if (canCreateUndefOrPoison(cast<Operator>(OrigOp), + /*ConsiderFlagsAndMetadata*/ false)) return nullptr; // If operand is guaranteed not to be poison, there is no need to add freeze @@ -3779,7 +3838,8 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) { // poison. Use *MaybePoisonOperand = nullptr; for (Use &U : OrigOpInst->operands()) { - if (isGuaranteedNotToBeUndefOrPoison(U.get())) + if (isa<MetadataAsValue>(U.get()) || + isGuaranteedNotToBeUndefOrPoison(U.get())) continue; if (!MaybePoisonOperand) MaybePoisonOperand = &U; @@ -3787,7 +3847,7 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) { return nullptr; } - OrigOpInst->dropPoisonGeneratingFlags(); + OrigOpInst->dropPoisonGeneratingFlagsAndMetadata(); // If all operands are guaranteed to be non-poison, we can drop freeze. if (!MaybePoisonOperand) @@ -3850,7 +3910,7 @@ Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI, Instruction *I = dyn_cast<Instruction>(V); if (!I || canCreateUndefOrPoison(cast<Operator>(I), - /*ConsiderFlags*/ false)) + /*ConsiderFlagsAndMetadata*/ false)) return nullptr; DropFlags.push_back(I); @@ -3858,7 +3918,7 @@ Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI, } for (Instruction *I : DropFlags) - I->dropPoisonGeneratingFlags(); + I->dropPoisonGeneratingFlagsAndMetadata(); if (StartNeedsFreeze) { Builder.SetInsertPoint(StartBB->getTerminator()); @@ -3880,21 +3940,14 @@ bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) { // *all* uses if the operand is an invoke/callbr and the use is in a phi on // the normal/default destination. This is why the domination check in the // replacement below is still necessary. - Instruction *MoveBefore = nullptr; + Instruction *MoveBefore; if (isa<Argument>(Op)) { - MoveBefore = &FI.getFunction()->getEntryBlock().front(); - while (isa<AllocaInst>(MoveBefore)) - MoveBefore = MoveBefore->getNextNode(); - } else if (auto *PN = dyn_cast<PHINode>(Op)) { - MoveBefore = PN->getParent()->getFirstNonPHI(); - } else if (auto *II = dyn_cast<InvokeInst>(Op)) { - MoveBefore = II->getNormalDest()->getFirstNonPHI(); - } else if (auto *CB = dyn_cast<CallBrInst>(Op)) { - MoveBefore = CB->getDefaultDest()->getFirstNonPHI(); + MoveBefore = + &*FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca(); } else { - auto *I = cast<Instruction>(Op); - assert(!I->isTerminator() && "Cannot be a terminator"); - MoveBefore = I->getNextNode(); + MoveBefore = cast<Instruction>(Op)->getInsertionPointAfterDef(); + if (!MoveBefore) + return false; } bool Changed = false; @@ -3987,7 +4040,7 @@ static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) { // to allow reload along used path as described below. Otherwise, this // is simply a store to a dead allocation which will be removed. return false; - Optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI); + std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI); if (!Dest) return false; auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr)); @@ -4103,7 +4156,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock, SmallVector<DbgVariableIntrinsic *, 2> DIIClones; SmallSet<DebugVariable, 4> SunkVariables; - for (auto User : DbgUsersToSink) { + for (auto *User : DbgUsersToSink) { // A dbg.declare instruction should not be cloned, since there can only be // one per variable fragment. It should be left in the original place // because the sunk instruction is not an alloca (otherwise we could not be @@ -4118,6 +4171,11 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock, if (!SunkVariables.insert(DbgUserVariable).second) continue; + // Leave dbg.assign intrinsics in their original positions and there should + // be no need to insert a clone. + if (isa<DbgAssignIntrinsic>(User)) + continue; + DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone())); if (isa<DbgDeclareInst>(User) && isa<CastInst>(I)) DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0)); @@ -4190,9 +4248,9 @@ bool InstCombinerImpl::run() { // prove that the successor is not executed more frequently than our block. // Return the UserBlock if successful. auto getOptionalSinkBlockForInst = - [this](Instruction *I) -> Optional<BasicBlock *> { + [this](Instruction *I) -> std::optional<BasicBlock *> { if (!EnableCodeSinking) - return None; + return std::nullopt; BasicBlock *BB = I->getParent(); BasicBlock *UserParent = nullptr; @@ -4202,7 +4260,7 @@ bool InstCombinerImpl::run() { if (U->isDroppable()) continue; if (NumUsers > MaxSinkNumUsers) - return None; + return std::nullopt; Instruction *UserInst = cast<Instruction>(U); // Special handling for Phi nodes - get the block the use occurs in. @@ -4213,14 +4271,14 @@ bool InstCombinerImpl::run() { // sophisticated analysis (i.e finding NearestCommonDominator of // these use blocks). if (UserParent && UserParent != PN->getIncomingBlock(i)) - return None; + return std::nullopt; UserParent = PN->getIncomingBlock(i); } } assert(UserParent && "expected to find user block!"); } else { if (UserParent && UserParent != UserInst->getParent()) - return None; + return std::nullopt; UserParent = UserInst->getParent(); } @@ -4230,7 +4288,7 @@ bool InstCombinerImpl::run() { // Try sinking to another block. If that block is unreachable, then do // not bother. SimplifyCFG should handle it. if (UserParent == BB || !DT.isReachableFromEntry(UserParent)) - return None; + return std::nullopt; auto *Term = UserParent->getTerminator(); // See if the user is one of our successors that has only one @@ -4242,7 +4300,7 @@ bool InstCombinerImpl::run() { // - the User will be executed at most once. // So sinking I down to User is always profitable or neutral. if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term)) - return None; + return std::nullopt; assert(DT.dominates(BB, UserParent) && "Dominance relation broken?"); } @@ -4252,7 +4310,7 @@ bool InstCombinerImpl::run() { // No user or only has droppable users. if (!UserParent) - return None; + return std::nullopt; return UserParent; }; @@ -4312,7 +4370,7 @@ bool InstCombinerImpl::run() { InsertPos = InstParent->getFirstNonPHI()->getIterator(); } - InstParent->getInstList().insert(InsertPos, Result); + Result->insertInto(InstParent, InsertPos); // Push the new instruction and any users onto the worklist. Worklist.pushUsersToWorkList(*Result); @@ -4360,7 +4418,7 @@ public: const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList); if (!MDScopeList || !Container.insert(MDScopeList).second) return; - for (auto &MDOperand : MDScopeList->operands()) + for (const auto &MDOperand : MDScopeList->operands()) if (auto *MDScope = dyn_cast<MDNode>(MDOperand)) Container.insert(MDScope); }; @@ -4543,6 +4601,13 @@ static bool combineInstructionsOverFunction( bool MadeIRChange = false; if (ShouldLowerDbgDeclare) MadeIRChange = LowerDbgDeclare(F); + // LowerDbgDeclare calls RemoveRedundantDbgInstrs, but LowerDbgDeclare will + // almost never return true when running an assignment tracking build. Take + // this opportunity to do some clean up for assignment tracking builds too. + if (!MadeIRChange && isAssignmentTrackingEnabled(*F.getParent())) { + for (auto &BB : F) + RemoveRedundantDbgInstrs(&BB); + } // Iterate while there is work to do. unsigned Iteration = 0; |
