diff options
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 175 |
1 files changed, 119 insertions, 56 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 32b15376f898..32e537897140 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -35,37 +35,46 @@ #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Transforms/InstCombine/InstCombineWorklist.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" #include <cassert> #include <cstdint> #include <iterator> #include <utility> +#define DEBUG_TYPE "instcombine" +#include "llvm/Transforms/Utils/InstructionWorklist.h" + using namespace llvm; using namespace PatternMatch; -#define DEBUG_TYPE "instcombine" - STATISTIC(NumAggregateReconstructionsSimplified, "Number of aggregate reconstructions turned into reuse of the " "original aggregate"); /// Return true if the value is cheaper to scalarize than it is to leave as a -/// vector operation. IsConstantExtractIndex indicates whether we are extracting -/// one known element from a vector constant. +/// vector operation. If the extract index \p EI is a constant integer then +/// some operations may be cheap to scalarize. /// /// FIXME: It's possible to create more instructions than previously existed. -static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) { +static bool cheapToScalarize(Value *V, Value *EI) { + ConstantInt *CEI = dyn_cast<ConstantInt>(EI); + // If we can pick a scalar constant value out of a vector, that is free. if (auto *C = dyn_cast<Constant>(V)) - return IsConstantExtractIndex || C->getSplatValue(); + return CEI || C->getSplatValue(); + + if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) { + ElementCount EC = cast<VectorType>(V->getType())->getElementCount(); + // Index needs to be lower than the minimum size of the vector, because + // for scalable vector, the vector size is known at run time. + return CEI->getValue().ult(EC.getKnownMinValue()); + } // An insertelement to the same constant index as our extract will simplify // to the scalar inserted element. An insertelement to a different constant // index is irrelevant to our extract. if (match(V, m_InsertElt(m_Value(), m_Value(), m_ConstantInt()))) - return IsConstantExtractIndex; + return CEI; if (match(V, m_OneUse(m_Load(m_Value())))) return true; @@ -75,14 +84,12 @@ static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) { Value *V0, *V1; if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1))))) - if (cheapToScalarize(V0, IsConstantExtractIndex) || - cheapToScalarize(V1, IsConstantExtractIndex)) + if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI)) return true; CmpInst::Predicate UnusedPred; if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1))))) - if (cheapToScalarize(V0, IsConstantExtractIndex) || - cheapToScalarize(V1, IsConstantExtractIndex)) + if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI)) return true; return false; @@ -119,7 +126,8 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI, // and that it is a binary operation which is cheap to scalarize. // otherwise return nullptr. if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) || - !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true)) + !(isa<BinaryOperator>(PHIUser)) || + !cheapToScalarize(PHIUser, EI.getIndexOperand())) return nullptr; // Create a scalar PHI node that will replace the vector PHI node @@ -170,24 +178,46 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI, return &EI; } -static Instruction *foldBitcastExtElt(ExtractElementInst &Ext, - InstCombiner::BuilderTy &Builder, - bool IsBigEndian) { +Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) { Value *X; uint64_t ExtIndexC; if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) || - !X->getType()->isVectorTy() || !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC))) return nullptr; + ElementCount NumElts = + cast<VectorType>(Ext.getVectorOperandType())->getElementCount(); + Type *DestTy = Ext.getType(); + bool IsBigEndian = DL.isBigEndian(); + + // If we are casting an integer to vector and extracting a portion, that is + // a shift-right and truncate. + // TODO: Allow FP dest type by casting the trunc to FP? + if (X->getType()->isIntegerTy() && DestTy->isIntegerTy() && + isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) { + assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) && + "Expected fixed vector type for bitcast from scalar integer"); + + // Big endian requires adjusting the extract index since MSB is at index 0. + // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8 + // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8 + if (IsBigEndian) + ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC; + unsigned ShiftAmountC = ExtIndexC * DestTy->getPrimitiveSizeInBits(); + if (!ShiftAmountC || Ext.getVectorOperand()->hasOneUse()) { + Value *Lshr = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset"); + return new TruncInst(Lshr, DestTy); + } + } + + if (!X->getType()->isVectorTy()) + return nullptr; + // If this extractelement is using a bitcast from a vector of the same number // of elements, see if we can find the source element from the source vector: // extelt (bitcast VecX), IndexC --> bitcast X[IndexC] auto *SrcTy = cast<VectorType>(X->getType()); - Type *DestTy = Ext.getType(); ElementCount NumSrcElts = SrcTy->getElementCount(); - ElementCount NumElts = - cast<VectorType>(Ext.getVectorOperandType())->getElementCount(); if (NumSrcElts == NumElts) if (Value *Elt = findScalarElement(X, ExtIndexC)) return new BitCastInst(Elt, DestTy); @@ -274,7 +304,7 @@ static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) { unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements(); // Conservatively assume that all elements are needed. - APInt UsedElts(APInt::getAllOnesValue(VWidth)); + APInt UsedElts(APInt::getAllOnes(VWidth)); switch (UserInstr->getOpcode()) { case Instruction::ExtractElement: { @@ -322,11 +352,11 @@ static APInt findDemandedEltsByAllUsers(Value *V) { if (Instruction *I = dyn_cast<Instruction>(U.getUser())) { UnionUsedElts |= findDemandedEltsBySingleUser(V, I); } else { - UnionUsedElts = APInt::getAllOnesValue(VWidth); + UnionUsedElts = APInt::getAllOnes(VWidth); break; } - if (UnionUsedElts.isAllOnesValue()) + if (UnionUsedElts.isAllOnes()) break; } @@ -388,7 +418,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { // If the input vector has multiple uses, simplify it based on a union // of all elements used. APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec); - if (!DemandedElts.isAllOnesValue()) { + if (!DemandedElts.isAllOnes()) { APInt UndefElts(NumElts, 0); if (Value *V = SimplifyDemandedVectorElts( SrcVec, DemandedElts, UndefElts, 0 /* Depth */, @@ -402,7 +432,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { } } - if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian())) + if (Instruction *I = foldBitcastExtElt(EI)) return I; // If there's a vector PHI feeding a scalar use through this extractelement @@ -415,7 +445,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { // TODO come up with a n-ary matcher that subsumes both unary and // binary matchers. UnaryOperator *UO; - if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, IndexC)) { + if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) { // extelt (unop X), Index --> unop (extelt X, Index) Value *X = UO->getOperand(0); Value *E = Builder.CreateExtractElement(X, Index); @@ -423,7 +453,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { } BinaryOperator *BO; - if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) { + if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index)) { // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index) Value *X = BO->getOperand(0), *Y = BO->getOperand(1); Value *E0 = Builder.CreateExtractElement(X, Index); @@ -434,7 +464,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { Value *X, *Y; CmpInst::Predicate Pred; if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) && - cheapToScalarize(SrcVec, IndexC)) { + cheapToScalarize(SrcVec, Index)) { // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index) Value *E0 = Builder.CreateExtractElement(X, Index); Value *E1 = Builder.CreateExtractElement(Y, Index); @@ -651,8 +681,7 @@ static void replaceExtractElements(InsertElementInst *InsElt, if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back())) return; - auto *WideVec = - new ShuffleVectorInst(ExtVecOp, PoisonValue::get(ExtVecType), ExtendMask); + auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask); // Insert the new shuffle after the vector operand of the extract is defined // (as long as it's not a PHI) or at the start of the basic block of the @@ -913,7 +942,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( "We don't store nullptr in SourceAggregate!"); assert((Describe(SourceAggregate) == AggregateDescription::Found) == (I.index() != 0) && - "SourceAggregate should be valid after the the first element,"); + "SourceAggregate should be valid after the first element,"); // For this element, is there a plausible source aggregate? // FIXME: we could special-case undef element, IFF we know that in the @@ -1179,7 +1208,7 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) { if (!ElementPresent[i]) Mask[i] = -1; - return new ShuffleVectorInst(FirstIE, PoisonVec, Mask); + return new ShuffleVectorInst(FirstIE, Mask); } /// Try to fold an insert element into an existing splat shuffle by changing @@ -1208,15 +1237,15 @@ static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) { // Replace the shuffle mask element at the index of this insert with a zero. // For example: - // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1 - // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef> + // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1 + // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef> unsigned NumMaskElts = cast<FixedVectorType>(Shuf->getType())->getNumElements(); SmallVector<int, 16> NewMask(NumMaskElts); for (unsigned i = 0; i != NumMaskElts; ++i) NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i); - return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask); + return new ShuffleVectorInst(Op0, NewMask); } /// Try to fold an extract+insert element into an existing identity shuffle by @@ -1348,6 +1377,10 @@ static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) { NewShufElts[I] = ShufConstVec->getAggregateElement(I); NewMaskElts[I] = Mask[I]; } + + // Bail if we failed to find an element. + if (!NewShufElts[I]) + return nullptr; } // Create new operands for a shuffle that includes the constant of the @@ -1399,6 +1432,41 @@ static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) { return nullptr; } +/// If both the base vector and the inserted element are extended from the same +/// type, do the insert element in the narrow source type followed by extend. +/// TODO: This can be extended to include other cast opcodes, but particularly +/// if we create a wider insertelement, make sure codegen is not harmed. +static Instruction *narrowInsElt(InsertElementInst &InsElt, + InstCombiner::BuilderTy &Builder) { + // We are creating a vector extend. If the original vector extend has another + // use, that would mean we end up with 2 vector extends, so avoid that. + // TODO: We could ease the use-clause to "if at least one op has one use" + // (assuming that the source types match - see next TODO comment). + Value *Vec = InsElt.getOperand(0); + if (!Vec->hasOneUse()) + return nullptr; + + Value *Scalar = InsElt.getOperand(1); + Value *X, *Y; + CastInst::CastOps CastOpcode; + if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y)))) + CastOpcode = Instruction::FPExt; + else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y)))) + CastOpcode = Instruction::SExt; + else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y)))) + CastOpcode = Instruction::ZExt; + else + return nullptr; + + // TODO: We can allow mismatched types by creating an intermediate cast. + if (X->getType()->getScalarType() != Y->getType()) + return nullptr; + + // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index) + Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2)); + return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType()); +} + Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) { Value *VecOp = IE.getOperand(0); Value *ScalarOp = IE.getOperand(1); @@ -1495,7 +1563,7 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) { if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) { unsigned VWidth = VecTy->getNumElements(); APInt UndefElts(VWidth, 0); - APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { if (V != &IE) return replaceInstUsesWith(IE, V); @@ -1518,6 +1586,9 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) { if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE)) return IdentityShuf; + if (Instruction *Ext = narrowInsElt(IE, Builder)) + return Ext; + return nullptr; } @@ -1924,8 +1995,8 @@ static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf, // Splat from element 0. Any mask element that is undefined remains undefined. // For example: - // shuf (inselt undef, X, 2), undef, <2,2,undef> - // --> shuf (inselt undef, X, 0), undef, <0,0,undef> + // shuf (inselt undef, X, 2), _, <2,2,undef> + // --> shuf (inselt undef, X, 0), poison, <0,0,undef> unsigned NumMaskElts = cast<FixedVectorType>(Shuf.getType())->getNumElements(); SmallVector<int, 16> NewMask(NumMaskElts, 0); @@ -1933,7 +2004,7 @@ static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf, if (Mask[i] == UndefMaskElem) NewMask[i] = Mask[i]; - return new ShuffleVectorInst(NewIns, UndefVec, NewMask); + return new ShuffleVectorInst(NewIns, NewMask); } /// Try to fold shuffles that are the equivalent of a vector select. @@ -2197,12 +2268,8 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf, SmallVector<int, 16> Mask; Shuf.getShuffleMask(Mask); - // The shuffle must not change vector sizes. - // TODO: This restriction could be removed if the insert has only one use - // (because the transform would require a new length-changing shuffle). int NumElts = Mask.size(); - if (NumElts != (int)(cast<FixedVectorType>(V0->getType())->getNumElements())) - return nullptr; + int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements(); // This is a specialization of a fold in SimplifyDemandedVectorElts. We may // not be able to handle it there if the insertelement has >1 use. @@ -2219,11 +2286,16 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf, if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) { // Offset the index constant by the vector width because we are checking for // accesses to the 2nd vector input of the shuffle. - IdxC += NumElts; + IdxC += InpNumElts; // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask if (!is_contained(Mask, (int)IdxC)) return IC.replaceOperand(Shuf, 1, X); } + // For the rest of the transform, the shuffle must not change vector sizes. + // TODO: This restriction could be removed if the insert has only one use + // (because the transform would require a new length-changing shuffle). + if (NumElts != InpNumElts) + return nullptr; // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC' auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) { @@ -2413,16 +2485,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (LHS == RHS) { assert(!match(RHS, m_Undef()) && "Shuffle with 2 undef ops not simplified?"); - // Remap any references to RHS to use LHS. - SmallVector<int, 16> Elts; - for (unsigned i = 0; i != VWidth; ++i) { - // Propagate undef elements or force mask to LHS. - if (Mask[i] < 0) - Elts.push_back(UndefMaskElem); - else - Elts.push_back(Mask[i] % LHSWidth); - } - return new ShuffleVectorInst(LHS, UndefValue::get(RHS->getType()), Elts); + return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth)); } // shuffle undef, x, mask --> shuffle x, undef, mask' @@ -2444,7 +2507,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { return I; APInt UndefElts(VWidth, 0); - APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { if (V != &SVI) return replaceInstUsesWith(SVI, V); |
