Vendor import of llvm-project main 88e66fa60ae5, the last commit before - src

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2021-07-29 20:15:26 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2021-07-29 20:15:26 +0000
commit	344a3780b2e33f6ca763666c380202b18aab72a3 (patch)
tree	f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
parent	b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)

vendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5 vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0e vendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f

Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp')

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

178

1 files changed, 133 insertions, 45 deletions

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 06f22cdfb63d..32b15376f898 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

@@ -347,6 +347,25 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {

ElementCount EC = EI.getVectorOperandType()->getElementCount();

unsigned NumElts = EC.getKnownMinValue();

+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {

+ Intrinsic::ID IID = II->getIntrinsicID();

+ // Index needs to be lower than the minimum size of the vector, because

+ // for scalable vector, the vector size is known at run time.

+ if (IID == Intrinsic::experimental_stepvector &&

+ IndexC->getValue().ult(NumElts)) {

+ Type *Ty = EI.getType();

+ unsigned BitWidth = Ty->getIntegerBitWidth();

+ Value *Idx;

+ // Return index when its value does not exceed the allowed limit

+ // for the element type of the vector, otherwise return undefined.

+ if (IndexC->getValue().getActiveBits() <= BitWidth)

+ Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));

+ else

+ Idx = UndefValue::get(Ty);

+ return replaceInstUsesWith(EI, Idx);

+ }

// InstSimplify should handle cases where the index is invalid.

// For fixed-length vector, it's invalid to extract out-of-range element.

if (!EC.isScalable() && IndexC->getValue().uge(NumElts))

@@ -382,6 +401,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {

}

if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))

return I;

@@ -430,6 +450,47 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {

// be the same value, extract from the pre-inserted value instead.

if (isa<Constant>(IE->getOperand(2)) && IndexC)

return replaceOperand(EI, 0, IE->getOperand(0));

+ } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {

+ auto *VecType = cast<VectorType>(GEP->getType());

+ ElementCount EC = VecType->getElementCount();

+ uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;

+ if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {

+ // Find out why we have a vector result - these are a few examples:

+ // 1. We have a scalar pointer and a vector of indices, or

+ // 2. We have a vector of pointers and a scalar index, or

+ // 3. We have a vector of pointers and a vector of indices, etc.

+ // Here we only consider combining when there is exactly one vector

+ // operand, since the optimization is less obviously a win due to

+ // needing more than one extractelements.

+ unsigned VectorOps =

+ llvm::count_if(GEP->operands(), [](const Value *V) {

+ return isa<VectorType>(V->getType());

+ });

+ if (VectorOps > 1)

+ return nullptr;

+ assert(VectorOps == 1 && "Expected exactly one vector GEP operand!");

+ Value *NewPtr = GEP->getPointerOperand();

+ if (isa<VectorType>(NewPtr->getType()))

+ NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);

+ SmallVector<Value *> NewOps;

+ for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {

+ Value *Op = GEP->getOperand(I);

+ if (isa<VectorType>(Op->getType()))

+ NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));

+ else

+ NewOps.push_back(Op);

+ }

+ GetElementPtrInst *NewGEP = GetElementPtrInst::Create(

+ cast<PointerType>(NewPtr->getType())->getElementType(), NewPtr,

+ NewOps);

+ NewGEP->setIsInBounds(GEP->isInBounds());

+ return NewGEP;

+ }

+ return nullptr;

} else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {

// If this is extracting an element from a shufflevector, figure out where

// it came from and extract from the appropriate input element instead.

@@ -474,7 +535,7 @@ static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,

"Invalid CollectSingleShuffleElements");

unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();

- if (isa<UndefValue>(V)) {

+ if (match(V, m_Undef())) {

Mask.assign(NumElts, -1);

return true;

}

@@ -554,9 +615,9 @@ static void replaceExtractElements(InsertElementInst *InsElt,

NumExtElts >= NumInsElts)

return;

- // Create a shuffle mask to widen the extended-from vector using undefined

+ // Create a shuffle mask to widen the extended-from vector using poison

// values. The mask selects all of the values of the original vector followed

- // by as many undefined values as needed to create a vector of the same length

+ // by as many poison values as needed to create a vector of the same length

// as the inserted-to vector.

SmallVector<int, 16> ExtendMask;

for (unsigned i = 0; i < NumExtElts; ++i)

@@ -591,7 +652,7 @@ static void replaceExtractElements(InsertElementInst *InsElt,

return;

auto *WideVec =

- new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType), ExtendMask);

+ new ShuffleVectorInst(ExtVecOp, PoisonValue::get(ExtVecType), ExtendMask);

// Insert the new shuffle after the vector operand of the extract is defined

// (as long as it's not a PHI) or at the start of the basic block of the

@@ -630,7 +691,7 @@ static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,

assert(V->getType()->isVectorTy() && "Invalid shuffle!");

unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();

- if (isa<UndefValue>(V)) {

+ if (match(V, m_Undef())) {

Mask.assign(NumElts, -1);

return std::make_pair(

PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);

@@ -737,12 +798,12 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(

// Try to find a value of each element of an aggregate.

// FIXME: deal with more complex, not one-dimensional, aggregate types

- SmallVector<Optional<Value *>, 2> AggElts(NumAggElts, NotFound);

+ SmallVector<Optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);

// Do we know values for each element of the aggregate?

auto KnowAllElts = [&AggElts]() {

return all_of(AggElts,

- [](Optional<Value *> Elt) { return Elt != NotFound; });

+ [](Optional<Instruction *> Elt) { return Elt != NotFound; });

};

int Depth = 0;

@@ -757,7 +818,11 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(

Depth < DepthLimit && CurrIVI && !KnowAllElts();

CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),

++Depth) {

- Value *InsertedValue = CurrIVI->getInsertedValueOperand();

+ auto *InsertedValue =

+ dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());

+ if (!InsertedValue)

+ return nullptr; // Inserted value must be produced by an instruction.

ArrayRef<unsigned int> Indices = CurrIVI->getIndices();

// Don't bother with more than single-level aggregates.

@@ -767,7 +832,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(

// Now, we may have already previously recorded the value for this element

// of an aggregate. If we did, that means the CurrIVI will later be

// overwritten with the already-recorded value. But if not, let's record it!

- Optional<Value *> &Elt = AggElts[Indices.front()];

+ Optional<Instruction *> &Elt = AggElts[Indices.front()];

Elt = Elt.getValueOr(InsertedValue);

// FIXME: should we handle chain-terminating undef base operand?

@@ -811,15 +876,15 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(

// If found, return the source aggregate from which the extraction was.

// If \p PredBB is provided, does PHI translation of an \p Elt first.

auto FindSourceAggregate =

- [&](Value *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB,

+ [&](Instruction *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB,

Optional<BasicBlock *> PredBB) -> Optional<Value *> {

// For now(?), only deal with, at most, a single level of PHI indirection.

if (UseBB && PredBB)

- Elt = Elt->DoPHITranslation(*UseBB, *PredBB);

+ Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));

// FIXME: deal with multiple levels of PHI indirection?

// Did we find an extraction?

- auto *EVI = dyn_cast<ExtractValueInst>(Elt);

+ auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);

if (!EVI)

return NotFound;

@@ -907,13 +972,8 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(

// they all should be defined in the same basic block.

BasicBlock *UseBB = nullptr;

- for (const Optional<Value *> &Elt : AggElts) {

- // If this element's value was not defined by an instruction, ignore it.

- auto *I = dyn_cast<Instruction>(*Elt);

- if (!I)

- continue;

- // Otherwise, in which basic block is this instruction located?

- BasicBlock *BB = I->getParent();

+ for (const Optional<Instruction *> &I : AggElts) {

+ BasicBlock *BB = (*I)->getParent();

// If it's the first instruction we've encountered, record the basic block.

if (!UseBB) {

UseBB = BB;

@@ -1050,7 +1110,7 @@ static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {

/// Turn a chain of inserts that splats a value into an insert + shuffle:

/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->

-/// shufflevector(insertelt(X, %k, 0), undef, zero)

+/// shufflevector(insertelt(X, %k, 0), poison, zero)

static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {

// We are interested in the last insert in a chain. So if this insert has a

// single user and that user is an insert, bail.

@@ -1102,16 +1162,16 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {

// insert into every element.

// TODO: If the base vector is not undef, it might be better to create a splat

// and then a select-shuffle (blend) with the base vector.

- if (!isa<UndefValue>(FirstIE->getOperand(0)))

+ if (!match(FirstIE->getOperand(0), m_Undef()))

if (!ElementPresent.all())

return nullptr;

// Create the insert + shuffle.

Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());

- UndefValue *UndefVec = UndefValue::get(VecTy);

+ PoisonValue *PoisonVec = PoisonValue::get(VecTy);

Constant *Zero = ConstantInt::get(Int32Ty, 0);

if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())

- FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);

+ FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "", &InsElt);

// Splat from element 0, but replace absent elements with undef in the mask.

SmallVector<int, 16> Mask(NumElements, 0);

@@ -1119,7 +1179,7 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {

if (!ElementPresent[i])

Mask[i] = -1;

- return new ShuffleVectorInst(FirstIE, UndefVec, Mask);

+ return new ShuffleVectorInst(FirstIE, PoisonVec, Mask);

}

/// Try to fold an insert element into an existing splat shuffle by changing

@@ -1164,7 +1224,7 @@ static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {

static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {

// Check if the vector operand of this insert is an identity shuffle.

auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));

- if (!Shuf || !isa<UndefValue>(Shuf->getOperand(1)) ||

+ if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) ||

!(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))

return nullptr;

@@ -1633,14 +1693,14 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {

assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");

Type *EltTy = V->getType()->getScalarType();

Type *I32Ty = IntegerType::getInt32Ty(V->getContext());

- if (isa<UndefValue>(V))

+ if (match(V, m_Undef()))

return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));

if (isa<ConstantAggregateZero>(V))

return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));

if (Constant *C = dyn_cast<Constant>(V))

- return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),

+ return ConstantExpr::getShuffleVector(C, PoisonValue::get(C->getType()),

Mask);

Instruction *I = cast<Instruction>(V);

@@ -1886,7 +1946,7 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,

// Canonicalize to choose from operand 0 first unless operand 1 is undefined.

// Commuting undef to operand 0 conflicts with another canonicalization.

unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();

- if (!isa<UndefValue>(Shuf.getOperand(1)) &&

+ if (!match(Shuf.getOperand(1), m_Undef()) &&

Shuf.getMaskValue(0) >= (int)NumElts) {

// TODO: Can we assert that both operands of a shuffle-select are not undef

// (otherwise, it would have been folded by instsimplify?

@@ -2080,13 +2140,22 @@ static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,

return SelectInst::Create(NarrowCond, NarrowX, NarrowY);

}

-/// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.

+/// Try to fold an extract subvector operation.

static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {

Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);

- if (!Shuf.isIdentityWithExtract() || !isa<UndefValue>(Op1))

+ if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef()))

return nullptr;

- Value *X, *Y;

+ // Check if we are extracting all bits of an inserted scalar:

+ // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type

+ Value *X;

+ if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&

+ X->getType()->getPrimitiveSizeInBits() ==

+ Shuf.getType()->getPrimitiveSizeInBits())

+ return new BitCastInst(X, Shuf.getType());

+ // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.

+ Value *Y;

ArrayRef<int> Mask;

if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))

return nullptr;

@@ -2231,10 +2300,10 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {

!isPowerOf2_32(

cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||

!isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||

- isa<UndefValue>(X) || isa<UndefValue>(Y))

+ match(X, m_Undef()) || match(Y, m_Undef()))

return nullptr;

- assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&

- isa<UndefValue>(Shuffle1->getOperand(1)) &&

+ assert(match(Shuffle0->getOperand(1), m_Undef()) &&

+ match(Shuffle1->getOperand(1), m_Undef()) &&

"Unexpected operand for identity shuffle");

// This is a shuffle of 2 widening shuffles. We can shuffle the narrow source

@@ -2287,9 +2356,27 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {

if (isa<ScalableVectorType>(LHS->getType()))

return nullptr;

- // shuffle x, x, mask --> shuffle x, undef, mask'

unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();

unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();

+ // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)

+ //

+ // if X and Y are of the same (vector) type, and the element size is not

+ // changed by the bitcasts, we can distribute the bitcasts through the

+ // shuffle, hopefully reducing the number of instructions. We make sure that

+ // at least one bitcast only has one use, so we don't *increase* the number of

+ // instructions here.

+ Value *X, *Y;

+ if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&

+ X->getType()->isVectorTy() && X->getType() == Y->getType() &&

+ X->getType()->getScalarSizeInBits() ==

+ SVI.getType()->getScalarSizeInBits() &&

+ (LHS->hasOneUse() || RHS->hasOneUse())) {

+ Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),

+ SVI.getName() + ".uncasted");

+ return new BitCastInst(V, SVI.getType());

+ }

ArrayRef<int> Mask = SVI.getShuffleMask();

Type *Int32Ty = Type::getInt32Ty(SVI.getContext());

@@ -2299,7 +2386,6 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {

// TODO: This could be extended to allow length-changing shuffles.

// The transform might also be obsoleted if we allowed canonicalization

// of bitcasted shuffles.

- Value *X;

if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&

X->getType()->isVectorTy() && VWidth == LHSWidth) {

// Try to create a scaled mask constant.

@@ -2323,8 +2409,10 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {

}

+ // shuffle x, x, mask --> shuffle x, undef, mask'

if (LHS == RHS) {

- assert(!isa<UndefValue>(RHS) && "Shuffle with 2 undef ops not simplified?");

+ assert(!match(RHS, m_Undef()) &&

+ "Shuffle with 2 undef ops not simplified?");

// Remap any references to RHS to use LHS.

SmallVector<int, 16> Elts;

for (unsigned i = 0; i != VWidth; ++i) {

@@ -2338,7 +2426,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {

}

// shuffle undef, x, mask --> shuffle x, undef, mask'

- if (isa<UndefValue>(LHS)) {

+ if (match(LHS, m_Undef())) {

SVI.commute();

return &SVI;

}

@@ -2373,7 +2461,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {

if (Instruction *I = foldIdentityPaddedShuffles(SVI))

return I;

- if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) {

+ if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) {

Value *V = evaluateInDifferentElementOrder(LHS, Mask);

return replaceInstUsesWith(SVI, V);

}

@@ -2512,10 +2600,10 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {

ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);

ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);

if (LHSShuffle)

- if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))

+ if (!match(LHSShuffle->getOperand(1), m_Undef()) && !match(RHS, m_Undef()))

LHSShuffle = nullptr;

if (RHSShuffle)

- if (!isa<UndefValue>(RHSShuffle->getOperand(1)))

+ if (!match(RHSShuffle->getOperand(1), m_Undef()))

RHSShuffle = nullptr;

if (!LHSShuffle && !RHSShuffle)

return MadeChange ? &SVI : nullptr;

@@ -2538,7 +2626,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {

Value* newRHS = RHS;

if (LHSShuffle) {

// case 1

- if (isa<UndefValue>(RHS)) {

+ if (match(RHS, m_Undef())) {

newLHS = LHSOp0;

newRHS = LHSOp1;

}

@@ -2596,7 +2684,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {

// If the value selected is an undef value, explicitly specify it

// with a -1 mask value. (case 1)

- if (isa<UndefValue>(RHS))

+ if (match(RHS, m_Undef()))

eltMask = -1;

// If RHS is going to be replaced (case 3 or 4), calculate the

// new mask value for the element.

@@ -2605,8 +2693,8 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {

// If the value selected is an undef value, explicitly specify it

// with a -1 mask value.

if (eltMask >= (int)RHSOp0Width) {

- assert(isa<UndefValue>(RHSShuffle->getOperand(1))

- && "should have been check above");

+ assert(match(RHSShuffle->getOperand(1), m_Undef()) &&

+ "should have been check above");

eltMask = -1;

}

} else