summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstructionCombining.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp708
1 files changed, 424 insertions, 284 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 801c09a317a7f..b3254c10a0b2b 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -60,6 +60,7 @@
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
@@ -129,10 +130,6 @@ static cl::opt<bool>
EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
cl::init(true));
-static cl::opt<bool>
-EnableExpensiveCombines("expensive-combines",
- cl::desc("Enable expensive instruction combines"));
-
static cl::opt<unsigned> LimitMaxIterations(
"instcombine-max-iterations",
cl::desc("Limit the maximum number of instruction combining iterations"),
@@ -267,7 +264,7 @@ static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
/// cast to eliminate one of the associative operations:
/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
-static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1) {
+static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombiner &IC) {
auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
if (!Cast || !Cast->hasOneUse())
return false;
@@ -300,8 +297,8 @@ static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1) {
Type *DestTy = C1->getType();
Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy);
Constant *FoldedC = ConstantExpr::get(AssocOpcode, C1, CastC2);
- Cast->setOperand(0, BinOp2->getOperand(0));
- BinOp1->setOperand(1, FoldedC);
+ IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
+ IC.replaceOperand(*BinOp1, 1, FoldedC);
return true;
}
@@ -350,8 +347,8 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
// Does "B op C" simplify?
if (Value *V = SimplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
// It simplifies to V. Form "A op V".
- I.setOperand(0, A);
- I.setOperand(1, V);
+ replaceOperand(I, 0, A);
+ replaceOperand(I, 1, V);
bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
@@ -383,8 +380,8 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
// Does "A op B" simplify?
if (Value *V = SimplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
// It simplifies to V. Form "V op C".
- I.setOperand(0, V);
- I.setOperand(1, C);
+ replaceOperand(I, 0, V);
+ replaceOperand(I, 1, C);
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
ClearSubclassDataAfterReassociation(I);
@@ -396,7 +393,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
}
if (I.isAssociative() && I.isCommutative()) {
- if (simplifyAssocCastAssoc(&I)) {
+ if (simplifyAssocCastAssoc(&I, *this)) {
Changed = true;
++NumReassoc;
continue;
@@ -411,8 +408,8 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
// Does "C op A" simplify?
if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
// It simplifies to V. Form "V op B".
- I.setOperand(0, V);
- I.setOperand(1, B);
+ replaceOperand(I, 0, V);
+ replaceOperand(I, 1, B);
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
ClearSubclassDataAfterReassociation(I);
@@ -431,8 +428,8 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
// Does "C op A" simplify?
if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
// It simplifies to V. Form "B op V".
- I.setOperand(0, B);
- I.setOperand(1, V);
+ replaceOperand(I, 0, B);
+ replaceOperand(I, 1, V);
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
ClearSubclassDataAfterReassociation(I);
@@ -465,8 +462,8 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
}
InsertNewInstWith(NewBO, I);
NewBO->takeName(Op1);
- I.setOperand(0, NewBO);
- I.setOperand(1, ConstantExpr::get(Opcode, C1, C2));
+ replaceOperand(I, 0, NewBO);
+ replaceOperand(I, 1, ConstantExpr::get(Opcode, C1, C2));
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
ClearSubclassDataAfterReassociation(I);
@@ -925,8 +922,31 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
if (CI->hasOneUse()) {
Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
- if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
- (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+
+ // FIXME: This is a hack to avoid infinite looping with min/max patterns.
+ // We have to ensure that vector constants that only differ with
+ // undef elements are treated as equivalent.
+ auto areLooselyEqual = [](Value *A, Value *B) {
+ if (A == B)
+ return true;
+
+ // Test for vector constants.
+ Constant *ConstA, *ConstB;
+ if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
+ return false;
+
+ // TODO: Deal with FP constants?
+ if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType())
+ return false;
+
+ // Compare for equality including undefs as equal.
+ auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
+ const APInt *C;
+ return match(Cmp, m_APIntAllowUndef(C)) && C->isOneValue();
+ };
+
+ if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
+ (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
return nullptr;
}
}
@@ -951,7 +971,7 @@ static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV,
if (!ConstIsRHS)
std::swap(Op0, Op1);
- Value *RI = Builder.CreateBinOp(I->getOpcode(), Op0, Op1, "phitmp");
+ Value *RI = Builder.CreateBinOp(I->getOpcode(), Op0, Op1, "phi.bo");
auto *FPInst = dyn_cast<Instruction>(RI);
if (FPInst && isa<FPMathOperator>(FPInst))
FPInst->copyFastMathFlags(I);
@@ -1056,7 +1076,7 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// the select would be generated exactly once in the NonConstBB.
Builder.SetInsertPoint(ThisBB->getTerminator());
InV = Builder.CreateSelect(PN->getIncomingValue(i), TrueVInPred,
- FalseVInPred, "phitmp");
+ FalseVInPred, "phi.sel");
}
NewPN->addIncoming(InV, ThisBB);
}
@@ -1064,14 +1084,11 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
Constant *C = cast<Constant>(I.getOperand(1));
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InV = nullptr;
- if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ if (auto *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
- else if (isa<ICmpInst>(CI))
- InV = Builder.CreateICmp(CI->getPredicate(), PN->getIncomingValue(i),
- C, "phitmp");
else
- InV = Builder.CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i),
- C, "phitmp");
+ InV = Builder.CreateCmp(CI->getPredicate(), PN->getIncomingValue(i),
+ C, "phi.cmp");
NewPN->addIncoming(InV, PN->getIncomingBlock(i));
}
} else if (auto *BO = dyn_cast<BinaryOperator>(&I)) {
@@ -1089,7 +1106,7 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
else
InV = Builder.CreateCast(CI->getOpcode(), PN->getIncomingValue(i),
- I.getType(), "phitmp");
+ I.getType(), "phi.cast");
NewPN->addIncoming(InV, PN->getIncomingBlock(i));
}
}
@@ -1391,8 +1408,8 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
assert(Parent.first->hasOneUse() && "Drilled down when more than one use!");
assert(Op != Parent.first->getOperand(Parent.second) &&
"Descaling was a no-op?");
- Parent.first->setOperand(Parent.second, Op);
- Worklist.Add(Parent.first);
+ replaceOperand(*Parent.first, Parent.second, Op);
+ Worklist.push(Parent.first);
// Now work back up the expression correcting nsw flags. The logic is based
// on the following observation: if X * Y is known not to overflow as a signed
@@ -1410,7 +1427,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
NoSignedWrap &= OpNoSignedWrap;
if (NoSignedWrap != OpNoSignedWrap) {
BO->setHasNoSignedWrap(NoSignedWrap);
- Worklist.Add(Ancestor);
+ Worklist.push(Ancestor);
}
} else if (Ancestor->getOpcode() == Instruction::Trunc) {
// The fact that the descaled input to the trunc has smaller absolute
@@ -1432,21 +1449,24 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
}
Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
- if (!Inst.getType()->isVectorTy()) return nullptr;
+ // FIXME: some of this is likely fine for scalable vectors
+ if (!isa<FixedVectorType>(Inst.getType()))
+ return nullptr;
BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
- unsigned NumElts = cast<VectorType>(Inst.getType())->getNumElements();
Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
- assert(cast<VectorType>(LHS->getType())->getNumElements() == NumElts);
- assert(cast<VectorType>(RHS->getType())->getNumElements() == NumElts);
+ assert(cast<VectorType>(LHS->getType())->getElementCount() ==
+ cast<VectorType>(Inst.getType())->getElementCount());
+ assert(cast<VectorType>(RHS->getType())->getElementCount() ==
+ cast<VectorType>(Inst.getType())->getElementCount());
// If both operands of the binop are vector concatenations, then perform the
// narrow binop on each pair of the source operands followed by concatenation
// of the results.
Value *L0, *L1, *R0, *R1;
- Constant *Mask;
- if (match(LHS, m_ShuffleVector(m_Value(L0), m_Value(L1), m_Constant(Mask))) &&
- match(RHS, m_ShuffleVector(m_Value(R0), m_Value(R1), m_Specific(Mask))) &&
+ ArrayRef<int> Mask;
+ if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
+ match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) &&
LHS->hasOneUse() && RHS->hasOneUse() &&
cast<ShuffleVectorInst>(LHS)->isConcat() &&
cast<ShuffleVectorInst>(RHS)->isConcat()) {
@@ -1470,7 +1490,7 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
if (!isSafeToSpeculativelyExecute(&Inst))
return nullptr;
- auto createBinOpShuffle = [&](Value *X, Value *Y, Constant *M) {
+ auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
Value *XY = Builder.CreateBinOp(Opcode, X, Y);
if (auto *BO = dyn_cast<BinaryOperator>(XY))
BO->copyIRFlags(&Inst);
@@ -1480,8 +1500,8 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
// If both arguments of the binary operation are shuffles that use the same
// mask and shuffle within a single vector, move the shuffle after the binop.
Value *V1, *V2;
- if (match(LHS, m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(Mask))) &&
- match(RHS, m_ShuffleVector(m_Value(V2), m_Undef(), m_Specific(Mask))) &&
+ if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) &&
+ match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(Mask))) &&
V1->getType() == V2->getType() &&
(LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
// Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
@@ -1491,17 +1511,19 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
// If both arguments of a commutative binop are select-shuffles that use the
// same mask with commuted operands, the shuffles are unnecessary.
if (Inst.isCommutative() &&
- match(LHS, m_ShuffleVector(m_Value(V1), m_Value(V2), m_Constant(Mask))) &&
- match(RHS, m_ShuffleVector(m_Specific(V2), m_Specific(V1),
- m_Specific(Mask)))) {
+ match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
+ match(RHS,
+ m_Shuffle(m_Specific(V2), m_Specific(V1), m_SpecificMask(Mask)))) {
auto *LShuf = cast<ShuffleVectorInst>(LHS);
auto *RShuf = cast<ShuffleVectorInst>(RHS);
// TODO: Allow shuffles that contain undefs in the mask?
// That is legal, but it reduces undef knowledge.
// TODO: Allow arbitrary shuffles by shuffling after binop?
// That might be legal, but we have to deal with poison.
- if (LShuf->isSelect() && !LShuf->getMask()->containsUndefElement() &&
- RShuf->isSelect() && !RShuf->getMask()->containsUndefElement()) {
+ if (LShuf->isSelect() &&
+ !is_contained(LShuf->getShuffleMask(), UndefMaskElem) &&
+ RShuf->isSelect() &&
+ !is_contained(RShuf->getShuffleMask(), UndefMaskElem)) {
// Example:
// LHS = shuffle V1, V2, <0, 5, 6, 3>
// RHS = shuffle V2, V1, <0, 5, 6, 3>
@@ -1517,11 +1539,12 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
// intends to move shuffles closer to other shuffles and binops closer to
// other binops, so they can be folded. It may also enable demanded elements
// transforms.
+ unsigned NumElts = cast<FixedVectorType>(Inst.getType())->getNumElements();
Constant *C;
- if (match(&Inst, m_c_BinOp(
- m_OneUse(m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(Mask))),
- m_Constant(C))) &&
- V1->getType()->getVectorNumElements() <= NumElts) {
+ if (match(&Inst,
+ m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))),
+ m_Constant(C))) &&
+ cast<FixedVectorType>(V1->getType())->getNumElements() <= NumElts) {
assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
"Shuffle should not change scalar type");
@@ -1531,9 +1554,9 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
// reorder is not possible. A 1-to-1 mapping is not required. Example:
// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
bool ConstOp1 = isa<Constant>(RHS);
- SmallVector<int, 16> ShMask;
- ShuffleVectorInst::getShuffleMask(Mask, ShMask);
- unsigned SrcVecNumElts = V1->getType()->getVectorNumElements();
+ ArrayRef<int> ShMask = Mask;
+ unsigned SrcVecNumElts =
+ cast<FixedVectorType>(V1->getType())->getNumElements();
UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
bool MayChange = true;
@@ -1590,6 +1613,57 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
}
}
+ // Try to reassociate to sink a splat shuffle after a binary operation.
+ if (Inst.isAssociative() && Inst.isCommutative()) {
+ // Canonicalize shuffle operand as LHS.
+ if (isa<ShuffleVectorInst>(RHS))
+ std::swap(LHS, RHS);
+
+ Value *X;
+ ArrayRef<int> MaskC;
+ int SplatIndex;
+ BinaryOperator *BO;
+ if (!match(LHS,
+ m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
+ !match(MaskC, m_SplatOrUndefMask(SplatIndex)) ||
+ X->getType() != Inst.getType() || !match(RHS, m_OneUse(m_BinOp(BO))) ||
+ BO->getOpcode() != Opcode)
+ return nullptr;
+
+ // FIXME: This may not be safe if the analysis allows undef elements. By
+ // moving 'Y' before the splat shuffle, we are implicitly assuming
+ // that it is not undef/poison at the splat index.
+ Value *Y, *OtherOp;
+ if (isSplatValue(BO->getOperand(0), SplatIndex)) {
+ Y = BO->getOperand(0);
+ OtherOp = BO->getOperand(1);
+ } else if (isSplatValue(BO->getOperand(1), SplatIndex)) {
+ Y = BO->getOperand(1);
+ OtherOp = BO->getOperand(0);
+ } else {
+ return nullptr;
+ }
+
+ // X and Y are splatted values, so perform the binary operation on those
+ // values followed by a splat followed by the 2nd binary operation:
+ // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
+ Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
+ UndefValue *Undef = UndefValue::get(Inst.getType());
+ SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
+ Value *NewSplat = Builder.CreateShuffleVector(NewBO, Undef, NewMask);
+ Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
+
+ // Intersect FMF on both new binops. Other (poison-generating) flags are
+ // dropped to be safe.
+ if (isa<FPMathOperator>(R)) {
+ R->copyFastMathFlags(&Inst);
+ R->andIRFlags(BO);
+ }
+ if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
+ NewInstBO->copyIRFlags(R);
+ return R;
+ }
+
return nullptr;
}
@@ -1658,16 +1732,46 @@ static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2) {
(GEP2.isInBounds() || GEP2.hasAllZeroIndices());
}
+/// Thread a GEP operation with constant indices through the constant true/false
+/// arms of a select.
+static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
+ InstCombiner::BuilderTy &Builder) {
+ if (!GEP.hasAllConstantIndices())
+ return nullptr;
+
+ Instruction *Sel;
+ Value *Cond;
+ Constant *TrueC, *FalseC;
+ if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
+ !match(Sel,
+ m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
+ return nullptr;
+
+ // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
+ // Propagate 'inbounds' and metadata from existing instructions.
+ // Note: using IRBuilder to create the constants for efficiency.
+ SmallVector<Value *, 4> IndexC(GEP.idx_begin(), GEP.idx_end());
+ bool IsInBounds = GEP.isInBounds();
+ Value *NewTrueC = IsInBounds ? Builder.CreateInBoundsGEP(TrueC, IndexC)
+ : Builder.CreateGEP(TrueC, IndexC);
+ Value *NewFalseC = IsInBounds ? Builder.CreateInBoundsGEP(FalseC, IndexC)
+ : Builder.CreateGEP(FalseC, IndexC);
+ return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
+}
+
Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
Type *GEPType = GEP.getType();
Type *GEPEltType = GEP.getSourceElementType();
+ bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP)))
return replaceInstUsesWith(GEP, V);
// For vector geps, use the generic demanded vector support.
- if (GEP.getType()->isVectorTy()) {
- auto VWidth = GEP.getType()->getVectorNumElements();
+ // Skip if GEP return type is scalable. The number of elements is unknown at
+ // compile-time.
+ if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
+ auto VWidth = GEPFVTy->getNumElements();
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
@@ -1679,7 +1783,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
// possible (decide on canonical form for pointer broadcast), 3) exploit
- // undef elements to decrease demanded bits
+ // undef elements to decrease demanded bits
}
Value *PtrOp = GEP.getOperand(0);
@@ -1703,13 +1807,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Type *IndexTy = (*I)->getType();
Type *NewIndexType =
IndexTy->isVectorTy()
- ? VectorType::get(NewScalarIndexTy, IndexTy->getVectorNumElements())
+ ? VectorType::get(NewScalarIndexTy,
+ cast<VectorType>(IndexTy)->getElementCount())
: NewScalarIndexTy;
// If the element type has zero size then any index over it is equivalent
// to an index of zero, so replace it with zero if it is not zero already.
Type *EltTy = GTI.getIndexedType();
- if (EltTy->isSized() && DL.getTypeAllocSize(EltTy) == 0)
+ if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
*I = Constant::getNullValue(NewIndexType);
MadeChange = true;
@@ -1789,10 +1894,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (J > 0) {
if (J == 1) {
CurTy = Op1->getSourceElementType();
- } else if (auto *CT = dyn_cast<CompositeType>(CurTy)) {
- CurTy = CT->getTypeAtIndex(Op1->getOperand(J));
} else {
- CurTy = nullptr;
+ CurTy =
+ GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
}
}
}
@@ -1808,8 +1912,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (DI == -1) {
// All the GEPs feeding the PHI are identical. Clone one down into our
// BB so that it can be merged with the current GEP.
- GEP.getParent()->getInstList().insert(
- GEP.getParent()->getFirstInsertionPt(), NewGEP);
} else {
// All the GEPs feeding the PHI differ at a single offset. Clone a GEP
// into the current block so it can be merged, and create a new PHI to
@@ -1827,12 +1929,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
PN->getIncomingBlock(I));
NewGEP->setOperand(DI, NewPN);
- GEP.getParent()->getInstList().insert(
- GEP.getParent()->getFirstInsertionPt(), NewGEP);
- NewGEP->setOperand(DI, NewPN);
}
- GEP.setOperand(0, NewGEP);
+ GEP.getParent()->getInstList().insert(
+ GEP.getParent()->getFirstInsertionPt(), NewGEP);
+ replaceOperand(GEP, 0, NewGEP);
PtrOp = NewGEP;
}
@@ -1932,8 +2033,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Update the GEP in place if possible.
if (Src->getNumOperands() == 2) {
GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
- GEP.setOperand(0, Src->getOperand(0));
- GEP.setOperand(1, Sum);
+ replaceOperand(GEP, 0, Src->getOperand(0));
+ replaceOperand(GEP, 1, Sum);
return &GEP;
}
Indices.append(Src->op_begin()+1, Src->op_end()-1);
@@ -1957,11 +2058,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
GEP.getName());
}
- if (GEP.getNumIndices() == 1) {
+ // Skip if GEP source element type is scalable. The type alloc size is unknown
+ // at compile-time.
+ if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
unsigned AS = GEP.getPointerAddressSpace();
if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
DL.getIndexSizeInBits(AS)) {
- uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType);
+ uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
bool Matched = false;
uint64_t C;
@@ -2051,9 +2154,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// array. Because the array type is never stepped over (there
// is a leading zero) we can fold the cast into this GEP.
if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
- GEP.setOperand(0, StrippedPtr);
GEP.setSourceElementType(XATy);
- return &GEP;
+ return replaceOperand(GEP, 0, StrippedPtr);
}
// Cannot replace the base pointer directly because StrippedPtr's
// address space is different. Instead, create a new GEP followed by
@@ -2075,10 +2177,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
}
- } else if (GEP.getNumOperands() == 2) {
- // Transform things like:
- // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
- // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
+ } else if (GEP.getNumOperands() == 2 && !IsGEPSrcEleScalable) {
+ // Skip if GEP source element type is scalable. The type alloc size is
+ // unknown at compile-time.
+ // Transform things like: %t = getelementptr i32*
+ // bitcast ([2 x i32]* %str to i32*), i32 %V into: %t1 = getelementptr [2
+ // x i32]* %str, i32 0, i32 %V; bitcast
if (StrippedPtrEltTy->isArrayTy() &&
DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
DL.getTypeAllocSize(GEPEltType)) {
@@ -2102,8 +2206,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {
// Check that changing the type amounts to dividing the index by a scale
// factor.
- uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);
- uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy);
+ uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
+ uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy).getFixedSize();
if (ResSize && SrcSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -2142,9 +2246,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
StrippedPtrEltTy->isArrayTy()) {
// Check that changing to the array element type amounts to dividing the
// index by a scale factor.
- uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);
+ uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
uint64_t ArrayEltSize =
- DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType());
+ DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType())
+ .getFixedSize();
if (ResSize && ArrayEltSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -2203,8 +2308,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z
auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy,
const DataLayout &DL) {
- return ArrTy->getArrayElementType() == VecTy->getVectorElementType() &&
- ArrTy->getArrayNumElements() == VecTy->getVectorNumElements() &&
+ auto *VecVTy = cast<VectorType>(VecTy);
+ return ArrTy->getArrayElementType() == VecVTy->getElementType() &&
+ ArrTy->getArrayNumElements() == VecVTy->getNumElements() &&
DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy);
};
if (GEP.getNumOperands() == 3 &&
@@ -2291,7 +2397,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
BasePtrOffset.isNonNegative()) {
- APInt AllocSize(IdxWidth, DL.getTypeAllocSize(AI->getAllocatedType()));
+ APInt AllocSize(
+ IdxWidth,
+ DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinSize());
if (BasePtrOffset.ule(AllocSize)) {
return GetElementPtrInst::CreateInBounds(
GEP.getSourceElementType(), PtrOp, makeArrayRef(Ops).slice(1),
@@ -2301,6 +2409,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
+ if (Instruction *R = foldSelectGEP(GEP, Builder))
+ return R;
+
return nullptr;
}
@@ -2369,6 +2480,7 @@ static bool isAllocSiteRemovable(Instruction *AI,
return false;
LLVM_FALLTHROUGH;
}
+ case Intrinsic::assume:
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
case Intrinsic::lifetime_start:
@@ -2517,7 +2629,7 @@ static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
// If there are more than 2 instructions, check that they are noops
// i.e., they won't hurt the performance of the generated code.
if (FreeInstrBB->size() != 2) {
- for (const Instruction &Inst : *FreeInstrBB) {
+ for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
continue;
auto *Cast = dyn_cast<CastInst>(&Inst);
@@ -2579,60 +2691,108 @@ Instruction *InstCombiner::visitFree(CallInst &FI) {
// if (foo) free(foo);
// into
// free(foo);
- if (MinimizeSize)
- if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
- return I;
+ //
+ // Note that we can only do this for 'free' and not for any flavor of
+ // 'operator delete'; there is no 'operator delete' symbol for which we are
+ // permitted to invent a call, even if we're passing in a null pointer.
+ if (MinimizeSize) {
+ LibFunc Func;
+ if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
+ if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
+ return I;
+ }
return nullptr;
}
+static bool isMustTailCall(Value *V) {
+ if (auto *CI = dyn_cast<CallInst>(V))
+ return CI->isMustTailCall();
+ return false;
+}
+
Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) {
if (RI.getNumOperands() == 0) // ret void
return nullptr;
Value *ResultOp = RI.getOperand(0);
Type *VTy = ResultOp->getType();
- if (!VTy->isIntegerTy())
+ if (!VTy->isIntegerTy() || isa<Constant>(ResultOp))
+ return nullptr;
+
+ // Don't replace result of musttail calls.
+ if (isMustTailCall(ResultOp))
return nullptr;
// There might be assume intrinsics dominating this return that completely
// determine the value. If so, constant fold it.
KnownBits Known = computeKnownBits(ResultOp, 0, &RI);
if (Known.isConstant())
- RI.setOperand(0, Constant::getIntegerValue(VTy, Known.getConstant()));
+ return replaceOperand(RI, 0,
+ Constant::getIntegerValue(VTy, Known.getConstant()));
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitUnconditionalBranchInst(BranchInst &BI) {
+ assert(BI.isUnconditional() && "Only for unconditional branches.");
+
+ // If this store is the second-to-last instruction in the basic block
+ // (excluding debug info and bitcasts of pointers) and if the block ends with
+ // an unconditional branch, try to move the store to the successor block.
+
+ auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
+ auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
+ return isa<DbgInfoIntrinsic>(BBI) ||
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
+ };
+
+ BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
+ do {
+ if (BBI != FirstInstr)
+ --BBI;
+ } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
+
+ return dyn_cast<StoreInst>(BBI);
+ };
+
+ if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
+ if (mergeStoreIntoSuccessor(*SI))
+ return &BI;
return nullptr;
}
Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
+ if (BI.isUnconditional())
+ return visitUnconditionalBranchInst(BI);
+
// Change br (not X), label True, label False to: br X, label False, True
Value *X = nullptr;
if (match(&BI, m_Br(m_Not(m_Value(X)), m_BasicBlock(), m_BasicBlock())) &&
!isa<Constant>(X)) {
// Swap Destinations and condition...
- BI.setCondition(X);
BI.swapSuccessors();
- return &BI;
+ return replaceOperand(BI, 0, X);
}
// If the condition is irrelevant, remove the use so that other
// transforms on the condition become more effective.
- if (BI.isConditional() && !isa<ConstantInt>(BI.getCondition()) &&
- BI.getSuccessor(0) == BI.getSuccessor(1)) {
- BI.setCondition(ConstantInt::getFalse(BI.getCondition()->getType()));
- return &BI;
- }
+ if (!isa<ConstantInt>(BI.getCondition()) &&
+ BI.getSuccessor(0) == BI.getSuccessor(1))
+ return replaceOperand(
+ BI, 0, ConstantInt::getFalse(BI.getCondition()->getType()));
- // Canonicalize, for example, icmp_ne -> icmp_eq or fcmp_one -> fcmp_oeq.
+ // Canonicalize, for example, fcmp_one -> fcmp_oeq.
CmpInst::Predicate Pred;
- if (match(&BI, m_Br(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())),
+ if (match(&BI, m_Br(m_OneUse(m_FCmp(Pred, m_Value(), m_Value())),
m_BasicBlock(), m_BasicBlock())) &&
!isCanonicalPredicate(Pred)) {
// Swap destinations and condition.
CmpInst *Cond = cast<CmpInst>(BI.getCondition());
Cond->setPredicate(CmpInst::getInversePredicate(Pred));
BI.swapSuccessors();
- Worklist.Add(Cond);
+ Worklist.push(Cond);
return &BI;
}
@@ -2651,8 +2811,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
"Result of expression should be constant");
Case.setValue(cast<ConstantInt>(NewCase));
}
- SI.setCondition(Op0);
- return &SI;
+ return replaceOperand(SI, 0, Op0);
}
KnownBits Known = computeKnownBits(Cond, 0, &SI);
@@ -2679,13 +2838,12 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
Builder.SetInsertPoint(&SI);
Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
- SI.setCondition(NewCond);
for (auto Case : SI.cases()) {
APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
}
- return &SI;
+ return replaceOperand(SI, 0, NewCond);
}
return nullptr;
@@ -3175,7 +3333,7 @@ Instruction *InstCombiner::visitFreeze(FreezeInst &I) {
/// instruction past all of the instructions between it and the end of its
/// block.
static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
- assert(I->hasOneUse() && "Invariants didn't hold!");
+ assert(I->getSingleUndroppableUse() && "Invariants didn't hold!");
BasicBlock *SrcBlock = I->getParent();
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
@@ -3202,12 +3360,26 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
// We can only sink load instructions if there is nothing between the load and
// the end of block that could change the value.
if (I->mayReadFromMemory()) {
+ // We don't want to do any sophisticated alias analysis, so we only check
+ // the instructions after I in I's parent block if we try to sink to its
+ // successor block.
+ if (DestBlock->getUniquePredecessor() != I->getParent())
+ return false;
for (BasicBlock::iterator Scan = I->getIterator(),
E = I->getParent()->end();
Scan != E; ++Scan)
if (Scan->mayWriteToMemory())
return false;
}
+
+ I->dropDroppableUses([DestBlock](const Use *U) {
+ if (auto *I = dyn_cast<Instruction>(U->getUser()))
+ return I->getParent() != DestBlock;
+ return true;
+ });
+ /// FIXME: We could remove droppable uses that are not dominated by
+ /// the new position.
+
BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
I->moveBefore(&*InsertPos);
++NumSunkInst;
@@ -3219,60 +3391,70 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
// here, but that computation has been sunk.
SmallVector<DbgVariableIntrinsic *, 2> DbgUsers;
findDbgUsers(DbgUsers, I);
- for (auto *DII : reverse(DbgUsers)) {
- if (DII->getParent() == SrcBlock) {
- if (isa<DbgDeclareInst>(DII)) {
- // A dbg.declare instruction should not be cloned, since there can only be
- // one per variable fragment. It should be left in the original place since
- // sunk instruction is not an alloca(otherwise we could not be here).
- // But we need to update arguments of dbg.declare instruction, so that it
- // would not point into sunk instruction.
- if (!isa<CastInst>(I))
- continue; // dbg.declare points at something it shouldn't
-
- DII->setOperand(
- 0, MetadataAsValue::get(I->getContext(),
- ValueAsMetadata::get(I->getOperand(0))));
- continue;
- }
- // dbg.value is in the same basic block as the sunk inst, see if we can
- // salvage it. Clone a new copy of the instruction: on success we need
- // both salvaged and unsalvaged copies.
- SmallVector<DbgVariableIntrinsic *, 1> TmpUser{
- cast<DbgVariableIntrinsic>(DII->clone())};
-
- if (!salvageDebugInfoForDbgValues(*I, TmpUser)) {
- // We are unable to salvage: sink the cloned dbg.value, and mark the
- // original as undef, terminating any earlier variable location.
- LLVM_DEBUG(dbgs() << "SINK: " << *DII << '\n');
- TmpUser[0]->insertBefore(&*InsertPos);
- Value *Undef = UndefValue::get(I->getType());
- DII->setOperand(0, MetadataAsValue::get(DII->getContext(),
- ValueAsMetadata::get(Undef)));
- } else {
- // We successfully salvaged: place the salvaged dbg.value in the
- // original location, and move the unmodified dbg.value to sink with
- // the sunk inst.
- TmpUser[0]->insertBefore(DII);
- DII->moveBefore(&*InsertPos);
- }
+ // Update the arguments of a dbg.declare instruction, so that it
+ // does not point into a sunk instruction.
+ auto updateDbgDeclare = [&I](DbgVariableIntrinsic *DII) {
+ if (!isa<DbgDeclareInst>(DII))
+ return false;
+
+ if (isa<CastInst>(I))
+ DII->setOperand(
+ 0, MetadataAsValue::get(I->getContext(),
+ ValueAsMetadata::get(I->getOperand(0))));
+ return true;
+ };
+
+ SmallVector<DbgVariableIntrinsic *, 2> DIIClones;
+ for (auto User : DbgUsers) {
+ // A dbg.declare instruction should not be cloned, since there can only be
+ // one per variable fragment. It should be left in the original place
+ // because the sunk instruction is not an alloca (otherwise we could not be
+ // here).
+ if (User->getParent() != SrcBlock || updateDbgDeclare(User))
+ continue;
+
+ DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
+ LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
+ }
+
+ // Perform salvaging without the clones, then sink the clones.
+ if (!DIIClones.empty()) {
+ salvageDebugInfoForDbgValues(*I, DbgUsers);
+ for (auto &DIIClone : DIIClones) {
+ DIIClone->insertBefore(&*InsertPos);
+ LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
}
}
+
return true;
}
bool InstCombiner::run() {
while (!Worklist.isEmpty()) {
- Instruction *I = Worklist.RemoveOne();
+ // Walk deferred instructions in reverse order, and push them to the
+ // worklist, which means they'll end up popped from the worklist in-order.
+ while (Instruction *I = Worklist.popDeferred()) {
+ // Check to see if we can DCE the instruction. We do this already here to
+ // reduce the number of uses and thus allow other folds to trigger.
+ // Note that eraseInstFromFunction() may push additional instructions on
+ // the deferred worklist, so this will DCE whole instruction chains.
+ if (isInstructionTriviallyDead(I, &TLI)) {
+ eraseInstFromFunction(*I);
+ ++NumDeadInst;
+ continue;
+ }
+
+ Worklist.push(I);
+ }
+
+ Instruction *I = Worklist.removeOne();
if (I == nullptr) continue; // skip null values.
// Check to see if we can DCE the instruction.
if (isInstructionTriviallyDead(I, &TLI)) {
- LLVM_DEBUG(dbgs() << "IC: DCE: " << *I << '\n');
eraseInstFromFunction(*I);
++NumDeadInst;
- MadeIRChange = true;
continue;
}
@@ -3296,65 +3478,51 @@ bool InstCombiner::run() {
}
}
- // In general, it is possible for computeKnownBits to determine all bits in
- // a value even when the operands are not all constants.
- Type *Ty = I->getType();
- if (ExpensiveCombines && !I->use_empty() && Ty->isIntOrIntVectorTy()) {
- KnownBits Known = computeKnownBits(I, /*Depth*/0, I);
- if (Known.isConstant()) {
- Constant *C = ConstantInt::get(Ty, Known.getConstant());
- LLVM_DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C
- << " from: " << *I << '\n');
-
- // Add operands to the worklist.
- replaceInstUsesWith(*I, C);
- ++NumConstProp;
- if (isInstructionTriviallyDead(I, &TLI))
- eraseInstFromFunction(*I);
- MadeIRChange = true;
- continue;
- }
- }
-
- // See if we can trivially sink this instruction to a successor basic block.
- if (EnableCodeSinking && I->hasOneUse()) {
- BasicBlock *BB = I->getParent();
- Instruction *UserInst = cast<Instruction>(*I->user_begin());
- BasicBlock *UserParent;
-
- // Get the block the use occurs in.
- if (PHINode *PN = dyn_cast<PHINode>(UserInst))
- UserParent = PN->getIncomingBlock(*I->use_begin());
- else
- UserParent = UserInst->getParent();
-
- if (UserParent != BB) {
- bool UserIsSuccessor = false;
- // See if the user is one of our successors.
- for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
- if (*SI == UserParent) {
- UserIsSuccessor = true;
- break;
+ // See if we can trivially sink this instruction to its user if we can
+ // prove that the successor is not executed more frequently than our block.
+ if (EnableCodeSinking)
+ if (Use *SingleUse = I->getSingleUndroppableUse()) {
+ BasicBlock *BB = I->getParent();
+ Instruction *UserInst = cast<Instruction>(SingleUse->getUser());
+ BasicBlock *UserParent;
+
+ // Get the block the use occurs in.
+ if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+ UserParent = PN->getIncomingBlock(*SingleUse);
+ else
+ UserParent = UserInst->getParent();
+
+ if (UserParent != BB) {
+ // See if the user is one of our successors that has only one
+ // predecessor, so that we don't have to split the critical edge.
+ bool ShouldSink = UserParent->getUniquePredecessor() == BB;
+ // Another option where we can sink is a block that ends with a
+ // terminator that does not pass control to other block (such as
+ // return or unreachable). In this case:
+ // - I dominates the User (by SSA form);
+ // - the User will be executed at most once.
+ // So sinking I down to User is always profitable or neutral.
+ if (!ShouldSink) {
+ auto *Term = UserParent->getTerminator();
+ ShouldSink = isa<ReturnInst>(Term) || isa<UnreachableInst>(Term);
}
-
- // If the user is one of our immediate successors, and if that successor
- // only has us as a predecessors (we'd have to split the critical edge
- // otherwise), we can keep going.
- if (UserIsSuccessor && UserParent->getUniquePredecessor()) {
- // Okay, the CFG is simple enough, try to sink this instruction.
- if (TryToSinkInstruction(I, UserParent)) {
- LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
- MadeIRChange = true;
- // We'll add uses of the sunk instruction below, but since sinking
- // can expose opportunities for it's *operands* add them to the
- // worklist
- for (Use &U : I->operands())
- if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
- Worklist.Add(OpI);
+ if (ShouldSink) {
+ assert(DT.dominates(BB, UserParent) &&
+ "Dominance relation broken?");
+ // Okay, the CFG is simple enough, try to sink this instruction.
+ if (TryToSinkInstruction(I, UserParent)) {
+ LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
+ MadeIRChange = true;
+ // We'll add uses of the sunk instruction below, but since sinking
+ // can expose opportunities for it's *operands* add them to the
+ // worklist
+ for (Use &U : I->operands())
+ if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
+ Worklist.push(OpI);
+ }
}
}
}
- }
// Now that we have an instruction, try combining it to simplify it.
Builder.SetInsertPoint(I);
@@ -3393,8 +3561,8 @@ bool InstCombiner::run() {
InstParent->getInstList().insert(InsertPos, Result);
// Push the new instruction and any users onto the worklist.
- Worklist.AddUsersToWorkList(*Result);
- Worklist.Add(Result);
+ Worklist.pushUsersToWorkList(*Result);
+ Worklist.push(Result);
eraseInstFromFunction(*I);
} else {
@@ -3406,39 +3574,39 @@ bool InstCombiner::run() {
if (isInstructionTriviallyDead(I, &TLI)) {
eraseInstFromFunction(*I);
} else {
- Worklist.AddUsersToWorkList(*I);
- Worklist.Add(I);
+ Worklist.pushUsersToWorkList(*I);
+ Worklist.push(I);
}
}
MadeIRChange = true;
}
}
- Worklist.Zap();
+ Worklist.zap();
return MadeIRChange;
}
-/// Walk the function in depth-first order, adding all reachable code to the
-/// worklist.
+/// Populate the IC worklist from a function, by walking it in depth-first
+/// order and adding all reachable code to the worklist.
///
/// This has a couple of tricks to make the code faster and more powerful. In
/// particular, we constant fold and DCE instructions as we go, to avoid adding
/// them to the worklist (this significantly speeds up instcombine on code where
/// many instructions are dead or constant). Additionally, if we find a branch
/// whose condition is a known constant, we only visit the reachable successors.
-static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
- SmallPtrSetImpl<BasicBlock *> &Visited,
- InstCombineWorklist &ICWorklist,
- const TargetLibraryInfo *TLI) {
+static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
+ const TargetLibraryInfo *TLI,
+ InstCombineWorklist &ICWorklist) {
bool MadeIRChange = false;
+ SmallPtrSet<BasicBlock *, 32> Visited;
SmallVector<BasicBlock*, 256> Worklist;
- Worklist.push_back(BB);
+ Worklist.push_back(&F.front());
SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
DenseMap<Constant *, Constant *> FoldedConstants;
do {
- BB = Worklist.pop_back_val();
+ BasicBlock *BB = Worklist.pop_back_val();
// We have now visited this block! If we've already been here, ignore it.
if (!Visited.insert(BB).second)
@@ -3447,16 +3615,6 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
Instruction *Inst = &*BBI++;
- // DCE instruction if trivially dead.
- if (isInstructionTriviallyDead(Inst, TLI)) {
- ++NumDeadInst;
- LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
- salvageDebugInfoOrMarkUndef(*Inst);
- Inst->eraseFromParent();
- MadeIRChange = true;
- continue;
- }
-
// ConstantProp instruction if trivially constant.
if (!Inst->use_empty() &&
(Inst->getNumOperands() == 0 || isa<Constant>(Inst->getOperand(0))))
@@ -3480,8 +3638,6 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
Constant *&FoldRes = FoldedConstants[C];
if (!FoldRes)
FoldRes = ConstantFoldConstant(C, DL, TLI);
- if (!FoldRes)
- FoldRes = C;
if (FoldRes != C) {
LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << *Inst
@@ -3519,36 +3675,9 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
Worklist.push_back(SuccBB);
} while (!Worklist.empty());
- // Once we've found all of the instructions to add to instcombine's worklist,
- // add them in reverse order. This way instcombine will visit from the top
- // of the function down. This jives well with the way that it adds all uses
- // of instructions to the worklist after doing a transformation, thus avoiding
- // some N^2 behavior in pathological cases.
- ICWorklist.AddInitialGroup(InstrsForInstCombineWorklist);
-
- return MadeIRChange;
-}
-
-/// Populate the IC worklist from a function, and prune any dead basic
-/// blocks discovered in the process.
-///
-/// This also does basic constant propagation and other forward fixing to make
-/// the combiner itself run much faster.
-static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
- TargetLibraryInfo *TLI,
- InstCombineWorklist &ICWorklist) {
- bool MadeIRChange = false;
-
- // Do a depth-first traversal of the function, populate the worklist with
- // the reachable instructions. Ignore blocks that are not reachable. Keep
- // track of which blocks we visit.
- SmallPtrSet<BasicBlock *, 32> Visited;
- MadeIRChange |=
- AddReachableCodeToWorklist(&F.front(), DL, Visited, ICWorklist, TLI);
-
- // Do a quick scan over the function. If we find any blocks that are
- // unreachable, remove any instructions inside of them. This prevents
- // the instcombine code from having to deal with some bad special cases.
+ // Remove instructions inside unreachable blocks. This prevents the
+ // instcombine code from having to deal with some bad special cases, and
+ // reduces use counts of instructions.
for (BasicBlock &BB : F) {
if (Visited.count(&BB))
continue;
@@ -3558,6 +3687,27 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
NumDeadInst += NumDeadInstInBB;
}
+ // Once we've found all of the instructions to add to instcombine's worklist,
+ // add them in reverse order. This way instcombine will visit from the top
+ // of the function down. This jives well with the way that it adds all uses
+ // of instructions to the worklist after doing a transformation, thus avoiding
+ // some N^2 behavior in pathological cases.
+ ICWorklist.reserve(InstrsForInstCombineWorklist.size());
+ for (Instruction *Inst : reverse(InstrsForInstCombineWorklist)) {
+ // DCE instruction if trivially dead. As we iterate in reverse program
+ // order here, we will clean up whole chains of dead instructions.
+ if (isInstructionTriviallyDead(Inst, TLI)) {
+ ++NumDeadInst;
+ LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
+ salvageDebugInfo(*Inst);
+ Inst->eraseFromParent();
+ MadeIRChange = true;
+ continue;
+ }
+
+ ICWorklist.push(Inst);
+ }
+
return MadeIRChange;
}
@@ -3565,10 +3715,8 @@ static bool combineInstructionsOverFunction(
Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA,
AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
- ProfileSummaryInfo *PSI, bool ExpensiveCombines, unsigned MaxIterations,
- LoopInfo *LI) {
+ ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {
auto &DL = F.getParent()->getDataLayout();
- ExpensiveCombines |= EnableExpensiveCombines;
MaxIterations = std::min(MaxIterations, LimitMaxIterations.getValue());
/// Builder - This is an IRBuilder that automatically inserts new
@@ -3576,7 +3724,7 @@ static bool combineInstructionsOverFunction(
IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder(
F.getContext(), TargetFolder(DL),
IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
- Worklist.Add(I);
+ Worklist.add(I);
if (match(I, m_Intrinsic<Intrinsic::assume>()))
AC.registerAssumption(cast<CallInst>(I));
}));
@@ -3610,7 +3758,7 @@ static bool combineInstructionsOverFunction(
MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
- InstCombiner IC(Worklist, Builder, F.hasMinSize(), ExpensiveCombines, AA,
+ InstCombiner IC(Worklist, Builder, F.hasMinSize(), AA,
AC, TLI, DT, ORE, BFI, PSI, DL, LI);
IC.MaxArraySizeForCombine = MaxArraySize;
@@ -3623,11 +3771,10 @@ static bool combineInstructionsOverFunction(
return MadeIRChange;
}
-InstCombinePass::InstCombinePass(bool ExpensiveCombines)
- : ExpensiveCombines(ExpensiveCombines), MaxIterations(LimitMaxIterations) {}
+InstCombinePass::InstCombinePass() : MaxIterations(LimitMaxIterations) {}
-InstCombinePass::InstCombinePass(bool ExpensiveCombines, unsigned MaxIterations)
- : ExpensiveCombines(ExpensiveCombines), MaxIterations(MaxIterations) {}
+InstCombinePass::InstCombinePass(unsigned MaxIterations)
+ : MaxIterations(MaxIterations) {}
PreservedAnalyses InstCombinePass::run(Function &F,
FunctionAnalysisManager &AM) {
@@ -3639,16 +3786,14 @@ PreservedAnalyses InstCombinePass::run(Function &F,
auto *LI = AM.getCachedResult<LoopAnalysis>(F);
auto *AA = &AM.getResult<AAManager>(F);
- const ModuleAnalysisManager &MAM =
- AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
ProfileSummaryInfo *PSI =
- MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
&AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, BFI,
- PSI, ExpensiveCombines, MaxIterations,
- LI))
+ PSI, MaxIterations, LI))
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
@@ -3698,22 +3843,18 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
nullptr;
return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, BFI,
- PSI, ExpensiveCombines, MaxIterations,
- LI);
+ PSI, MaxIterations, LI);
}
char InstructionCombiningPass::ID = 0;
-InstructionCombiningPass::InstructionCombiningPass(bool ExpensiveCombines)
- : FunctionPass(ID), ExpensiveCombines(ExpensiveCombines),
- MaxIterations(InstCombineDefaultMaxIterations) {
+InstructionCombiningPass::InstructionCombiningPass()
+ : FunctionPass(ID), MaxIterations(InstCombineDefaultMaxIterations) {
initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
}
-InstructionCombiningPass::InstructionCombiningPass(bool ExpensiveCombines,
- unsigned MaxIterations)
- : FunctionPass(ID), ExpensiveCombines(ExpensiveCombines),
- MaxIterations(MaxIterations) {
+InstructionCombiningPass::InstructionCombiningPass(unsigned MaxIterations)
+ : FunctionPass(ID), MaxIterations(MaxIterations) {
initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
}
@@ -3739,13 +3880,12 @@ void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
initializeInstructionCombiningPassPass(*unwrap(R));
}
-FunctionPass *llvm::createInstructionCombiningPass(bool ExpensiveCombines) {
- return new InstructionCombiningPass(ExpensiveCombines);
+FunctionPass *llvm::createInstructionCombiningPass() {
+ return new InstructionCombiningPass();
}
-FunctionPass *llvm::createInstructionCombiningPass(bool ExpensiveCombines,
- unsigned MaxIterations) {
- return new InstructionCombiningPass(ExpensiveCombines, MaxIterations);
+FunctionPass *llvm::createInstructionCombiningPass(unsigned MaxIterations) {
+ return new InstructionCombiningPass(MaxIterations);
}
void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) {