summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
commit344a3780b2e33f6ca763666c380202b18aab72a3 (patch)
treef0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
parentb60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp178
1 files changed, 133 insertions, 45 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 06f22cdfb63d..32b15376f898 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -347,6 +347,25 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
ElementCount EC = EI.getVectorOperandType()->getElementCount();
unsigned NumElts = EC.getKnownMinValue();
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
+ Intrinsic::ID IID = II->getIntrinsicID();
+ // Index needs to be lower than the minimum size of the vector, because
+ // for scalable vector, the vector size is known at run time.
+ if (IID == Intrinsic::experimental_stepvector &&
+ IndexC->getValue().ult(NumElts)) {
+ Type *Ty = EI.getType();
+ unsigned BitWidth = Ty->getIntegerBitWidth();
+ Value *Idx;
+ // Return index when its value does not exceed the allowed limit
+ // for the element type of the vector, otherwise return undefined.
+ if (IndexC->getValue().getActiveBits() <= BitWidth)
+ Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
+ else
+ Idx = UndefValue::get(Ty);
+ return replaceInstUsesWith(EI, Idx);
+ }
+ }
+
// InstSimplify should handle cases where the index is invalid.
// For fixed-length vector, it's invalid to extract out-of-range element.
if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
@@ -382,6 +401,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
}
}
}
+
if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
return I;
@@ -430,6 +450,47 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
// be the same value, extract from the pre-inserted value instead.
if (isa<Constant>(IE->getOperand(2)) && IndexC)
return replaceOperand(EI, 0, IE->getOperand(0));
+ } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ auto *VecType = cast<VectorType>(GEP->getType());
+ ElementCount EC = VecType->getElementCount();
+ uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
+ if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
+ // Find out why we have a vector result - these are a few examples:
+ // 1. We have a scalar pointer and a vector of indices, or
+ // 2. We have a vector of pointers and a scalar index, or
+ // 3. We have a vector of pointers and a vector of indices, etc.
+ // Here we only consider combining when there is exactly one vector
+ // operand, since the optimization is less obviously a win due to
+ // needing more than one extractelements.
+
+ unsigned VectorOps =
+ llvm::count_if(GEP->operands(), [](const Value *V) {
+ return isa<VectorType>(V->getType());
+ });
+ if (VectorOps > 1)
+ return nullptr;
+ assert(VectorOps == 1 && "Expected exactly one vector GEP operand!");
+
+ Value *NewPtr = GEP->getPointerOperand();
+ if (isa<VectorType>(NewPtr->getType()))
+ NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
+
+ SmallVector<Value *> NewOps;
+ for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
+ Value *Op = GEP->getOperand(I);
+ if (isa<VectorType>(Op->getType()))
+ NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
+ else
+ NewOps.push_back(Op);
+ }
+
+ GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
+ cast<PointerType>(NewPtr->getType())->getElementType(), NewPtr,
+ NewOps);
+ NewGEP->setIsInBounds(GEP->isInBounds());
+ return NewGEP;
+ }
+ return nullptr;
} else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
// If this is extracting an element from a shufflevector, figure out where
// it came from and extract from the appropriate input element instead.
@@ -474,7 +535,7 @@ static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
"Invalid CollectSingleShuffleElements");
unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
- if (isa<UndefValue>(V)) {
+ if (match(V, m_Undef())) {
Mask.assign(NumElts, -1);
return true;
}
@@ -554,9 +615,9 @@ static void replaceExtractElements(InsertElementInst *InsElt,
NumExtElts >= NumInsElts)
return;
- // Create a shuffle mask to widen the extended-from vector using undefined
+ // Create a shuffle mask to widen the extended-from vector using poison
// values. The mask selects all of the values of the original vector followed
- // by as many undefined values as needed to create a vector of the same length
+ // by as many poison values as needed to create a vector of the same length
// as the inserted-to vector.
SmallVector<int, 16> ExtendMask;
for (unsigned i = 0; i < NumExtElts; ++i)
@@ -591,7 +652,7 @@ static void replaceExtractElements(InsertElementInst *InsElt,
return;
auto *WideVec =
- new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType), ExtendMask);
+ new ShuffleVectorInst(ExtVecOp, PoisonValue::get(ExtVecType), ExtendMask);
// Insert the new shuffle after the vector operand of the extract is defined
// (as long as it's not a PHI) or at the start of the basic block of the
@@ -630,7 +691,7 @@ static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
assert(V->getType()->isVectorTy() && "Invalid shuffle!");
unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
- if (isa<UndefValue>(V)) {
+ if (match(V, m_Undef())) {
Mask.assign(NumElts, -1);
return std::make_pair(
PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
@@ -737,12 +798,12 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
// Try to find a value of each element of an aggregate.
// FIXME: deal with more complex, not one-dimensional, aggregate types
- SmallVector<Optional<Value *>, 2> AggElts(NumAggElts, NotFound);
+ SmallVector<Optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
// Do we know values for each element of the aggregate?
auto KnowAllElts = [&AggElts]() {
return all_of(AggElts,
- [](Optional<Value *> Elt) { return Elt != NotFound; });
+ [](Optional<Instruction *> Elt) { return Elt != NotFound; });
};
int Depth = 0;
@@ -757,7 +818,11 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
Depth < DepthLimit && CurrIVI && !KnowAllElts();
CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
++Depth) {
- Value *InsertedValue = CurrIVI->getInsertedValueOperand();
+ auto *InsertedValue =
+ dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
+ if (!InsertedValue)
+ return nullptr; // Inserted value must be produced by an instruction.
+
ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
// Don't bother with more than single-level aggregates.
@@ -767,7 +832,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
// Now, we may have already previously recorded the value for this element
// of an aggregate. If we did, that means the CurrIVI will later be
// overwritten with the already-recorded value. But if not, let's record it!
- Optional<Value *> &Elt = AggElts[Indices.front()];
+ Optional<Instruction *> &Elt = AggElts[Indices.front()];
Elt = Elt.getValueOr(InsertedValue);
// FIXME: should we handle chain-terminating undef base operand?
@@ -811,15 +876,15 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
// If found, return the source aggregate from which the extraction was.
// If \p PredBB is provided, does PHI translation of an \p Elt first.
auto FindSourceAggregate =
- [&](Value *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB,
+ [&](Instruction *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB,
Optional<BasicBlock *> PredBB) -> Optional<Value *> {
// For now(?), only deal with, at most, a single level of PHI indirection.
if (UseBB && PredBB)
- Elt = Elt->DoPHITranslation(*UseBB, *PredBB);
+ Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
// FIXME: deal with multiple levels of PHI indirection?
// Did we find an extraction?
- auto *EVI = dyn_cast<ExtractValueInst>(Elt);
+ auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
if (!EVI)
return NotFound;
@@ -907,13 +972,8 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
// they all should be defined in the same basic block.
BasicBlock *UseBB = nullptr;
- for (const Optional<Value *> &Elt : AggElts) {
- // If this element's value was not defined by an instruction, ignore it.
- auto *I = dyn_cast<Instruction>(*Elt);
- if (!I)
- continue;
- // Otherwise, in which basic block is this instruction located?
- BasicBlock *BB = I->getParent();
+ for (const Optional<Instruction *> &I : AggElts) {
+ BasicBlock *BB = (*I)->getParent();
// If it's the first instruction we've encountered, record the basic block.
if (!UseBB) {
UseBB = BB;
@@ -1050,7 +1110,7 @@ static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
/// Turn a chain of inserts that splats a value into an insert + shuffle:
/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
-/// shufflevector(insertelt(X, %k, 0), undef, zero)
+/// shufflevector(insertelt(X, %k, 0), poison, zero)
static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
// We are interested in the last insert in a chain. So if this insert has a
// single user and that user is an insert, bail.
@@ -1102,16 +1162,16 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
// insert into every element.
// TODO: If the base vector is not undef, it might be better to create a splat
// and then a select-shuffle (blend) with the base vector.
- if (!isa<UndefValue>(FirstIE->getOperand(0)))
+ if (!match(FirstIE->getOperand(0), m_Undef()))
if (!ElementPresent.all())
return nullptr;
// Create the insert + shuffle.
Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
- UndefValue *UndefVec = UndefValue::get(VecTy);
+ PoisonValue *PoisonVec = PoisonValue::get(VecTy);
Constant *Zero = ConstantInt::get(Int32Ty, 0);
if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
- FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);
+ FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "", &InsElt);
// Splat from element 0, but replace absent elements with undef in the mask.
SmallVector<int, 16> Mask(NumElements, 0);
@@ -1119,7 +1179,7 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
if (!ElementPresent[i])
Mask[i] = -1;
- return new ShuffleVectorInst(FirstIE, UndefVec, Mask);
+ return new ShuffleVectorInst(FirstIE, PoisonVec, Mask);
}
/// Try to fold an insert element into an existing splat shuffle by changing
@@ -1164,7 +1224,7 @@ static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
// Check if the vector operand of this insert is an identity shuffle.
auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
- if (!Shuf || !isa<UndefValue>(Shuf->getOperand(1)) ||
+ if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) ||
!(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
return nullptr;
@@ -1633,14 +1693,14 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
Type *EltTy = V->getType()->getScalarType();
Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
- if (isa<UndefValue>(V))
+ if (match(V, m_Undef()))
return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
if (isa<ConstantAggregateZero>(V))
return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
if (Constant *C = dyn_cast<Constant>(V))
- return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
+ return ConstantExpr::getShuffleVector(C, PoisonValue::get(C->getType()),
Mask);
Instruction *I = cast<Instruction>(V);
@@ -1886,7 +1946,7 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
// Canonicalize to choose from operand 0 first unless operand 1 is undefined.
// Commuting undef to operand 0 conflicts with another canonicalization.
unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
- if (!isa<UndefValue>(Shuf.getOperand(1)) &&
+ if (!match(Shuf.getOperand(1), m_Undef()) &&
Shuf.getMaskValue(0) >= (int)NumElts) {
// TODO: Can we assert that both operands of a shuffle-select are not undef
// (otherwise, it would have been folded by instsimplify?
@@ -2080,13 +2140,22 @@ static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
}
-/// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
+/// Try to fold an extract subvector operation.
static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
- if (!Shuf.isIdentityWithExtract() || !isa<UndefValue>(Op1))
+ if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef()))
return nullptr;
- Value *X, *Y;
+ // Check if we are extracting all bits of an inserted scalar:
+ // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
+ Value *X;
+ if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
+ X->getType()->getPrimitiveSizeInBits() ==
+ Shuf.getType()->getPrimitiveSizeInBits())
+ return new BitCastInst(X, Shuf.getType());
+
+ // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
+ Value *Y;
ArrayRef<int> Mask;
if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
return nullptr;
@@ -2231,10 +2300,10 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
!isPowerOf2_32(
cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
!isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
- isa<UndefValue>(X) || isa<UndefValue>(Y))
+ match(X, m_Undef()) || match(Y, m_Undef()))
return nullptr;
- assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&
- isa<UndefValue>(Shuffle1->getOperand(1)) &&
+ assert(match(Shuffle0->getOperand(1), m_Undef()) &&
+ match(Shuffle1->getOperand(1), m_Undef()) &&
"Unexpected operand for identity shuffle");
// This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
@@ -2287,9 +2356,27 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (isa<ScalableVectorType>(LHS->getType()))
return nullptr;
- // shuffle x, x, mask --> shuffle x, undef, mask'
unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
+
+ // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
+ //
+ // if X and Y are of the same (vector) type, and the element size is not
+ // changed by the bitcasts, we can distribute the bitcasts through the
+ // shuffle, hopefully reducing the number of instructions. We make sure that
+ // at least one bitcast only has one use, so we don't *increase* the number of
+ // instructions here.
+ Value *X, *Y;
+ if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
+ X->getType()->isVectorTy() && X->getType() == Y->getType() &&
+ X->getType()->getScalarSizeInBits() ==
+ SVI.getType()->getScalarSizeInBits() &&
+ (LHS->hasOneUse() || RHS->hasOneUse())) {
+ Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),
+ SVI.getName() + ".uncasted");
+ return new BitCastInst(V, SVI.getType());
+ }
+
ArrayRef<int> Mask = SVI.getShuffleMask();
Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
@@ -2299,7 +2386,6 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// TODO: This could be extended to allow length-changing shuffles.
// The transform might also be obsoleted if we allowed canonicalization
// of bitcasted shuffles.
- Value *X;
if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
X->getType()->isVectorTy() && VWidth == LHSWidth) {
// Try to create a scaled mask constant.
@@ -2323,8 +2409,10 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
}
}
+ // shuffle x, x, mask --> shuffle x, undef, mask'
if (LHS == RHS) {
- assert(!isa<UndefValue>(RHS) && "Shuffle with 2 undef ops not simplified?");
+ assert(!match(RHS, m_Undef()) &&
+ "Shuffle with 2 undef ops not simplified?");
// Remap any references to RHS to use LHS.
SmallVector<int, 16> Elts;
for (unsigned i = 0; i != VWidth; ++i) {
@@ -2338,7 +2426,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
}
// shuffle undef, x, mask --> shuffle x, undef, mask'
- if (isa<UndefValue>(LHS)) {
+ if (match(LHS, m_Undef())) {
SVI.commute();
return &SVI;
}
@@ -2373,7 +2461,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (Instruction *I = foldIdentityPaddedShuffles(SVI))
return I;
- if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) {
+ if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) {
Value *V = evaluateInDifferentElementOrder(LHS, Mask);
return replaceInstUsesWith(SVI, V);
}
@@ -2512,10 +2600,10 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
if (LHSShuffle)
- if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
+ if (!match(LHSShuffle->getOperand(1), m_Undef()) && !match(RHS, m_Undef()))
LHSShuffle = nullptr;
if (RHSShuffle)
- if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
+ if (!match(RHSShuffle->getOperand(1), m_Undef()))
RHSShuffle = nullptr;
if (!LHSShuffle && !RHSShuffle)
return MadeChange ? &SVI : nullptr;
@@ -2538,7 +2626,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value* newRHS = RHS;
if (LHSShuffle) {
// case 1
- if (isa<UndefValue>(RHS)) {
+ if (match(RHS, m_Undef())) {
newLHS = LHSOp0;
newRHS = LHSOp1;
}
@@ -2596,7 +2684,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
//
// If the value selected is an undef value, explicitly specify it
// with a -1 mask value. (case 1)
- if (isa<UndefValue>(RHS))
+ if (match(RHS, m_Undef()))
eltMask = -1;
// If RHS is going to be replaced (case 3 or 4), calculate the
// new mask value for the element.
@@ -2605,8 +2693,8 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// If the value selected is an undef value, explicitly specify it
// with a -1 mask value.
if (eltMask >= (int)RHSOp0Width) {
- assert(isa<UndefValue>(RHSShuffle->getOperand(1))
- && "should have been check above");
+ assert(match(RHSShuffle->getOperand(1), m_Undef()) &&
+ "should have been check above");
eltMask = -1;
}
} else