summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp175
1 files changed, 119 insertions, 56 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 32b15376f898..32e537897140 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -35,37 +35,46 @@
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <cassert>
#include <cstdint>
#include <iterator>
#include <utility>
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
+
using namespace llvm;
using namespace PatternMatch;
-#define DEBUG_TYPE "instcombine"
-
STATISTIC(NumAggregateReconstructionsSimplified,
"Number of aggregate reconstructions turned into reuse of the "
"original aggregate");
/// Return true if the value is cheaper to scalarize than it is to leave as a
-/// vector operation. IsConstantExtractIndex indicates whether we are extracting
-/// one known element from a vector constant.
+/// vector operation. If the extract index \p EI is a constant integer then
+/// some operations may be cheap to scalarize.
///
/// FIXME: It's possible to create more instructions than previously existed.
-static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
+static bool cheapToScalarize(Value *V, Value *EI) {
+ ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
+
// If we can pick a scalar constant value out of a vector, that is free.
if (auto *C = dyn_cast<Constant>(V))
- return IsConstantExtractIndex || C->getSplatValue();
+ return CEI || C->getSplatValue();
+
+ if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
+ ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
+ // Index needs to be lower than the minimum size of the vector, because
+ // for scalable vector, the vector size is known at run time.
+ return CEI->getValue().ult(EC.getKnownMinValue());
+ }
// An insertelement to the same constant index as our extract will simplify
// to the scalar inserted element. An insertelement to a different constant
// index is irrelevant to our extract.
if (match(V, m_InsertElt(m_Value(), m_Value(), m_ConstantInt())))
- return IsConstantExtractIndex;
+ return CEI;
if (match(V, m_OneUse(m_Load(m_Value()))))
return true;
@@ -75,14 +84,12 @@ static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
Value *V0, *V1;
if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
- if (cheapToScalarize(V0, IsConstantExtractIndex) ||
- cheapToScalarize(V1, IsConstantExtractIndex))
+ if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
return true;
CmpInst::Predicate UnusedPred;
if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
- if (cheapToScalarize(V0, IsConstantExtractIndex) ||
- cheapToScalarize(V1, IsConstantExtractIndex))
+ if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
return true;
return false;
@@ -119,7 +126,8 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
// and that it is a binary operation which is cheap to scalarize.
// otherwise return nullptr.
if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
- !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
+ !(isa<BinaryOperator>(PHIUser)) ||
+ !cheapToScalarize(PHIUser, EI.getIndexOperand()))
return nullptr;
// Create a scalar PHI node that will replace the vector PHI node
@@ -170,24 +178,46 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
return &EI;
}
-static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
- InstCombiner::BuilderTy &Builder,
- bool IsBigEndian) {
+Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
Value *X;
uint64_t ExtIndexC;
if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
- !X->getType()->isVectorTy() ||
!match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
return nullptr;
+ ElementCount NumElts =
+ cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
+ Type *DestTy = Ext.getType();
+ bool IsBigEndian = DL.isBigEndian();
+
+ // If we are casting an integer to vector and extracting a portion, that is
+ // a shift-right and truncate.
+ // TODO: Allow FP dest type by casting the trunc to FP?
+ if (X->getType()->isIntegerTy() && DestTy->isIntegerTy() &&
+ isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) {
+ assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
+ "Expected fixed vector type for bitcast from scalar integer");
+
+ // Big endian requires adjusting the extract index since MSB is at index 0.
+ // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
+ // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
+ if (IsBigEndian)
+ ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
+ unsigned ShiftAmountC = ExtIndexC * DestTy->getPrimitiveSizeInBits();
+ if (!ShiftAmountC || Ext.getVectorOperand()->hasOneUse()) {
+ Value *Lshr = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
+ return new TruncInst(Lshr, DestTy);
+ }
+ }
+
+ if (!X->getType()->isVectorTy())
+ return nullptr;
+
// If this extractelement is using a bitcast from a vector of the same number
// of elements, see if we can find the source element from the source vector:
// extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
auto *SrcTy = cast<VectorType>(X->getType());
- Type *DestTy = Ext.getType();
ElementCount NumSrcElts = SrcTy->getElementCount();
- ElementCount NumElts =
- cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
if (NumSrcElts == NumElts)
if (Value *Elt = findScalarElement(X, ExtIndexC))
return new BitCastInst(Elt, DestTy);
@@ -274,7 +304,7 @@ static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
// Conservatively assume that all elements are needed.
- APInt UsedElts(APInt::getAllOnesValue(VWidth));
+ APInt UsedElts(APInt::getAllOnes(VWidth));
switch (UserInstr->getOpcode()) {
case Instruction::ExtractElement: {
@@ -322,11 +352,11 @@ static APInt findDemandedEltsByAllUsers(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
} else {
- UnionUsedElts = APInt::getAllOnesValue(VWidth);
+ UnionUsedElts = APInt::getAllOnes(VWidth);
break;
}
- if (UnionUsedElts.isAllOnesValue())
+ if (UnionUsedElts.isAllOnes())
break;
}
@@ -388,7 +418,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
// If the input vector has multiple uses, simplify it based on a union
// of all elements used.
APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
APInt UndefElts(NumElts, 0);
if (Value *V = SimplifyDemandedVectorElts(
SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
@@ -402,7 +432,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
}
}
- if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
+ if (Instruction *I = foldBitcastExtElt(EI))
return I;
// If there's a vector PHI feeding a scalar use through this extractelement
@@ -415,7 +445,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
// TODO come up with a n-ary matcher that subsumes both unary and
// binary matchers.
UnaryOperator *UO;
- if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, IndexC)) {
+ if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
// extelt (unop X), Index --> unop (extelt X, Index)
Value *X = UO->getOperand(0);
Value *E = Builder.CreateExtractElement(X, Index);
@@ -423,7 +453,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
}
BinaryOperator *BO;
- if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) {
+ if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index)) {
// extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
Value *E0 = Builder.CreateExtractElement(X, Index);
@@ -434,7 +464,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
Value *X, *Y;
CmpInst::Predicate Pred;
if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
- cheapToScalarize(SrcVec, IndexC)) {
+ cheapToScalarize(SrcVec, Index)) {
// extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
Value *E0 = Builder.CreateExtractElement(X, Index);
Value *E1 = Builder.CreateExtractElement(Y, Index);
@@ -651,8 +681,7 @@ static void replaceExtractElements(InsertElementInst *InsElt,
if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
return;
- auto *WideVec =
- new ShuffleVectorInst(ExtVecOp, PoisonValue::get(ExtVecType), ExtendMask);
+ auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
// Insert the new shuffle after the vector operand of the extract is defined
// (as long as it's not a PHI) or at the start of the basic block of the
@@ -913,7 +942,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
"We don't store nullptr in SourceAggregate!");
assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
(I.index() != 0) &&
- "SourceAggregate should be valid after the the first element,");
+ "SourceAggregate should be valid after the first element,");
// For this element, is there a plausible source aggregate?
// FIXME: we could special-case undef element, IFF we know that in the
@@ -1179,7 +1208,7 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
if (!ElementPresent[i])
Mask[i] = -1;
- return new ShuffleVectorInst(FirstIE, PoisonVec, Mask);
+ return new ShuffleVectorInst(FirstIE, Mask);
}
/// Try to fold an insert element into an existing splat shuffle by changing
@@ -1208,15 +1237,15 @@ static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
// Replace the shuffle mask element at the index of this insert with a zero.
// For example:
- // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
- // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
+ // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
+ // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
unsigned NumMaskElts =
cast<FixedVectorType>(Shuf->getType())->getNumElements();
SmallVector<int, 16> NewMask(NumMaskElts);
for (unsigned i = 0; i != NumMaskElts; ++i)
NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
- return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
+ return new ShuffleVectorInst(Op0, NewMask);
}
/// Try to fold an extract+insert element into an existing identity shuffle by
@@ -1348,6 +1377,10 @@ static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
NewShufElts[I] = ShufConstVec->getAggregateElement(I);
NewMaskElts[I] = Mask[I];
}
+
+ // Bail if we failed to find an element.
+ if (!NewShufElts[I])
+ return nullptr;
}
// Create new operands for a shuffle that includes the constant of the
@@ -1399,6 +1432,41 @@ static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
return nullptr;
}
+/// If both the base vector and the inserted element are extended from the same
+/// type, do the insert element in the narrow source type followed by extend.
+/// TODO: This can be extended to include other cast opcodes, but particularly
+/// if we create a wider insertelement, make sure codegen is not harmed.
+static Instruction *narrowInsElt(InsertElementInst &InsElt,
+ InstCombiner::BuilderTy &Builder) {
+ // We are creating a vector extend. If the original vector extend has another
+ // use, that would mean we end up with 2 vector extends, so avoid that.
+ // TODO: We could ease the use-clause to "if at least one op has one use"
+ // (assuming that the source types match - see next TODO comment).
+ Value *Vec = InsElt.getOperand(0);
+ if (!Vec->hasOneUse())
+ return nullptr;
+
+ Value *Scalar = InsElt.getOperand(1);
+ Value *X, *Y;
+ CastInst::CastOps CastOpcode;
+ if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
+ CastOpcode = Instruction::FPExt;
+ else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
+ CastOpcode = Instruction::SExt;
+ else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
+ CastOpcode = Instruction::ZExt;
+ else
+ return nullptr;
+
+ // TODO: We can allow mismatched types by creating an intermediate cast.
+ if (X->getType()->getScalarType() != Y->getType())
+ return nullptr;
+
+ // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
+ Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
+ return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
+}
+
Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
Value *VecOp = IE.getOperand(0);
Value *ScalarOp = IE.getOperand(1);
@@ -1495,7 +1563,7 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
unsigned VWidth = VecTy->getNumElements();
APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
if (V != &IE)
return replaceInstUsesWith(IE, V);
@@ -1518,6 +1586,9 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
return IdentityShuf;
+ if (Instruction *Ext = narrowInsElt(IE, Builder))
+ return Ext;
+
return nullptr;
}
@@ -1924,8 +1995,8 @@ static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
// Splat from element 0. Any mask element that is undefined remains undefined.
// For example:
- // shuf (inselt undef, X, 2), undef, <2,2,undef>
- // --> shuf (inselt undef, X, 0), undef, <0,0,undef>
+ // shuf (inselt undef, X, 2), _, <2,2,undef>
+ // --> shuf (inselt undef, X, 0), poison, <0,0,undef>
unsigned NumMaskElts =
cast<FixedVectorType>(Shuf.getType())->getNumElements();
SmallVector<int, 16> NewMask(NumMaskElts, 0);
@@ -1933,7 +2004,7 @@ static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
if (Mask[i] == UndefMaskElem)
NewMask[i] = Mask[i];
- return new ShuffleVectorInst(NewIns, UndefVec, NewMask);
+ return new ShuffleVectorInst(NewIns, NewMask);
}
/// Try to fold shuffles that are the equivalent of a vector select.
@@ -2197,12 +2268,8 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
SmallVector<int, 16> Mask;
Shuf.getShuffleMask(Mask);
- // The shuffle must not change vector sizes.
- // TODO: This restriction could be removed if the insert has only one use
- // (because the transform would require a new length-changing shuffle).
int NumElts = Mask.size();
- if (NumElts != (int)(cast<FixedVectorType>(V0->getType())->getNumElements()))
- return nullptr;
+ int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
// This is a specialization of a fold in SimplifyDemandedVectorElts. We may
// not be able to handle it there if the insertelement has >1 use.
@@ -2219,11 +2286,16 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
// Offset the index constant by the vector width because we are checking for
// accesses to the 2nd vector input of the shuffle.
- IdxC += NumElts;
+ IdxC += InpNumElts;
// shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
if (!is_contained(Mask, (int)IdxC))
return IC.replaceOperand(Shuf, 1, X);
}
+ // For the rest of the transform, the shuffle must not change vector sizes.
+ // TODO: This restriction could be removed if the insert has only one use
+ // (because the transform would require a new length-changing shuffle).
+ if (NumElts != InpNumElts)
+ return nullptr;
// shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
@@ -2413,16 +2485,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (LHS == RHS) {
assert(!match(RHS, m_Undef()) &&
"Shuffle with 2 undef ops not simplified?");
- // Remap any references to RHS to use LHS.
- SmallVector<int, 16> Elts;
- for (unsigned i = 0; i != VWidth; ++i) {
- // Propagate undef elements or force mask to LHS.
- if (Mask[i] < 0)
- Elts.push_back(UndefMaskElem);
- else
- Elts.push_back(Mask[i] % LHSWidth);
- }
- return new ShuffleVectorInst(LHS, UndefValue::get(RHS->getType()), Elts);
+ return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
}
// shuffle undef, x, mask --> shuffle x, undef, mask'
@@ -2444,7 +2507,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
return I;
APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
if (V != &SVI)
return replaceInstUsesWith(SVI, V);