diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
| commit | 344a3780b2e33f6ca763666c380202b18aab72a3 (patch) | |
| tree | f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | |
| parent | b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff) | |
vendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0evendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 178 |
1 files changed, 133 insertions, 45 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 06f22cdfb63d..32b15376f898 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -347,6 +347,25 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { ElementCount EC = EI.getVectorOperandType()->getElementCount(); unsigned NumElts = EC.getKnownMinValue(); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) { + Intrinsic::ID IID = II->getIntrinsicID(); + // Index needs to be lower than the minimum size of the vector, because + // for scalable vector, the vector size is known at run time. + if (IID == Intrinsic::experimental_stepvector && + IndexC->getValue().ult(NumElts)) { + Type *Ty = EI.getType(); + unsigned BitWidth = Ty->getIntegerBitWidth(); + Value *Idx; + // Return index when its value does not exceed the allowed limit + // for the element type of the vector, otherwise return undefined. + if (IndexC->getValue().getActiveBits() <= BitWidth) + Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth)); + else + Idx = UndefValue::get(Ty); + return replaceInstUsesWith(EI, Idx); + } + } + // InstSimplify should handle cases where the index is invalid. // For fixed-length vector, it's invalid to extract out-of-range element. if (!EC.isScalable() && IndexC->getValue().uge(NumElts)) @@ -382,6 +401,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { } } } + if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian())) return I; @@ -430,6 +450,47 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { // be the same value, extract from the pre-inserted value instead. if (isa<Constant>(IE->getOperand(2)) && IndexC) return replaceOperand(EI, 0, IE->getOperand(0)); + } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) { + auto *VecType = cast<VectorType>(GEP->getType()); + ElementCount EC = VecType->getElementCount(); + uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0; + if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) { + // Find out why we have a vector result - these are a few examples: + // 1. We have a scalar pointer and a vector of indices, or + // 2. We have a vector of pointers and a scalar index, or + // 3. We have a vector of pointers and a vector of indices, etc. + // Here we only consider combining when there is exactly one vector + // operand, since the optimization is less obviously a win due to + // needing more than one extractelements. + + unsigned VectorOps = + llvm::count_if(GEP->operands(), [](const Value *V) { + return isa<VectorType>(V->getType()); + }); + if (VectorOps > 1) + return nullptr; + assert(VectorOps == 1 && "Expected exactly one vector GEP operand!"); + + Value *NewPtr = GEP->getPointerOperand(); + if (isa<VectorType>(NewPtr->getType())) + NewPtr = Builder.CreateExtractElement(NewPtr, IndexC); + + SmallVector<Value *> NewOps; + for (unsigned I = 1; I != GEP->getNumOperands(); ++I) { + Value *Op = GEP->getOperand(I); + if (isa<VectorType>(Op->getType())) + NewOps.push_back(Builder.CreateExtractElement(Op, IndexC)); + else + NewOps.push_back(Op); + } + + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( + cast<PointerType>(NewPtr->getType())->getElementType(), NewPtr, + NewOps); + NewGEP->setIsInBounds(GEP->isInBounds()); + return NewGEP; + } + return nullptr; } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) { // If this is extracting an element from a shufflevector, figure out where // it came from and extract from the appropriate input element instead. @@ -474,7 +535,7 @@ static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, "Invalid CollectSingleShuffleElements"); unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements(); - if (isa<UndefValue>(V)) { + if (match(V, m_Undef())) { Mask.assign(NumElts, -1); return true; } @@ -554,9 +615,9 @@ static void replaceExtractElements(InsertElementInst *InsElt, NumExtElts >= NumInsElts) return; - // Create a shuffle mask to widen the extended-from vector using undefined + // Create a shuffle mask to widen the extended-from vector using poison // values. The mask selects all of the values of the original vector followed - // by as many undefined values as needed to create a vector of the same length + // by as many poison values as needed to create a vector of the same length // as the inserted-to vector. SmallVector<int, 16> ExtendMask; for (unsigned i = 0; i < NumExtElts; ++i) @@ -591,7 +652,7 @@ static void replaceExtractElements(InsertElementInst *InsElt, return; auto *WideVec = - new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType), ExtendMask); + new ShuffleVectorInst(ExtVecOp, PoisonValue::get(ExtVecType), ExtendMask); // Insert the new shuffle after the vector operand of the extract is defined // (as long as it's not a PHI) or at the start of the basic block of the @@ -630,7 +691,7 @@ static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask, assert(V->getType()->isVectorTy() && "Invalid shuffle!"); unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements(); - if (isa<UndefValue>(V)) { + if (match(V, m_Undef())) { Mask.assign(NumElts, -1); return std::make_pair( PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr); @@ -737,12 +798,12 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( // Try to find a value of each element of an aggregate. // FIXME: deal with more complex, not one-dimensional, aggregate types - SmallVector<Optional<Value *>, 2> AggElts(NumAggElts, NotFound); + SmallVector<Optional<Instruction *>, 2> AggElts(NumAggElts, NotFound); // Do we know values for each element of the aggregate? auto KnowAllElts = [&AggElts]() { return all_of(AggElts, - [](Optional<Value *> Elt) { return Elt != NotFound; }); + [](Optional<Instruction *> Elt) { return Elt != NotFound; }); }; int Depth = 0; @@ -757,7 +818,11 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( Depth < DepthLimit && CurrIVI && !KnowAllElts(); CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()), ++Depth) { - Value *InsertedValue = CurrIVI->getInsertedValueOperand(); + auto *InsertedValue = + dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand()); + if (!InsertedValue) + return nullptr; // Inserted value must be produced by an instruction. + ArrayRef<unsigned int> Indices = CurrIVI->getIndices(); // Don't bother with more than single-level aggregates. @@ -767,7 +832,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( // Now, we may have already previously recorded the value for this element // of an aggregate. If we did, that means the CurrIVI will later be // overwritten with the already-recorded value. But if not, let's record it! - Optional<Value *> &Elt = AggElts[Indices.front()]; + Optional<Instruction *> &Elt = AggElts[Indices.front()]; Elt = Elt.getValueOr(InsertedValue); // FIXME: should we handle chain-terminating undef base operand? @@ -811,15 +876,15 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( // If found, return the source aggregate from which the extraction was. // If \p PredBB is provided, does PHI translation of an \p Elt first. auto FindSourceAggregate = - [&](Value *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB, + [&](Instruction *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB, Optional<BasicBlock *> PredBB) -> Optional<Value *> { // For now(?), only deal with, at most, a single level of PHI indirection. if (UseBB && PredBB) - Elt = Elt->DoPHITranslation(*UseBB, *PredBB); + Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB)); // FIXME: deal with multiple levels of PHI indirection? // Did we find an extraction? - auto *EVI = dyn_cast<ExtractValueInst>(Elt); + auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt); if (!EVI) return NotFound; @@ -907,13 +972,8 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( // they all should be defined in the same basic block. BasicBlock *UseBB = nullptr; - for (const Optional<Value *> &Elt : AggElts) { - // If this element's value was not defined by an instruction, ignore it. - auto *I = dyn_cast<Instruction>(*Elt); - if (!I) - continue; - // Otherwise, in which basic block is this instruction located? - BasicBlock *BB = I->getParent(); + for (const Optional<Instruction *> &I : AggElts) { + BasicBlock *BB = (*I)->getParent(); // If it's the first instruction we've encountered, record the basic block. if (!UseBB) { UseBB = BB; @@ -1050,7 +1110,7 @@ static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) { /// Turn a chain of inserts that splats a value into an insert + shuffle: /// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... -> -/// shufflevector(insertelt(X, %k, 0), undef, zero) +/// shufflevector(insertelt(X, %k, 0), poison, zero) static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) { // We are interested in the last insert in a chain. So if this insert has a // single user and that user is an insert, bail. @@ -1102,16 +1162,16 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) { // insert into every element. // TODO: If the base vector is not undef, it might be better to create a splat // and then a select-shuffle (blend) with the base vector. - if (!isa<UndefValue>(FirstIE->getOperand(0))) + if (!match(FirstIE->getOperand(0), m_Undef())) if (!ElementPresent.all()) return nullptr; // Create the insert + shuffle. Type *Int32Ty = Type::getInt32Ty(InsElt.getContext()); - UndefValue *UndefVec = UndefValue::get(VecTy); + PoisonValue *PoisonVec = PoisonValue::get(VecTy); Constant *Zero = ConstantInt::get(Int32Ty, 0); if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero()) - FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt); + FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "", &InsElt); // Splat from element 0, but replace absent elements with undef in the mask. SmallVector<int, 16> Mask(NumElements, 0); @@ -1119,7 +1179,7 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) { if (!ElementPresent[i]) Mask[i] = -1; - return new ShuffleVectorInst(FirstIE, UndefVec, Mask); + return new ShuffleVectorInst(FirstIE, PoisonVec, Mask); } /// Try to fold an insert element into an existing splat shuffle by changing @@ -1164,7 +1224,7 @@ static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) { static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) { // Check if the vector operand of this insert is an identity shuffle. auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0)); - if (!Shuf || !isa<UndefValue>(Shuf->getOperand(1)) || + if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) || !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding())) return nullptr; @@ -1633,14 +1693,14 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { assert(V->getType()->isVectorTy() && "can't reorder non-vector elements"); Type *EltTy = V->getType()->getScalarType(); Type *I32Ty = IntegerType::getInt32Ty(V->getContext()); - if (isa<UndefValue>(V)) + if (match(V, m_Undef())) return UndefValue::get(FixedVectorType::get(EltTy, Mask.size())); if (isa<ConstantAggregateZero>(V)) return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size())); if (Constant *C = dyn_cast<Constant>(V)) - return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()), + return ConstantExpr::getShuffleVector(C, PoisonValue::get(C->getType()), Mask); Instruction *I = cast<Instruction>(V); @@ -1886,7 +1946,7 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf, // Canonicalize to choose from operand 0 first unless operand 1 is undefined. // Commuting undef to operand 0 conflicts with another canonicalization. unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements(); - if (!isa<UndefValue>(Shuf.getOperand(1)) && + if (!match(Shuf.getOperand(1), m_Undef()) && Shuf.getMaskValue(0) >= (int)NumElts) { // TODO: Can we assert that both operands of a shuffle-select are not undef // (otherwise, it would have been folded by instsimplify? @@ -2080,13 +2140,22 @@ static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf, return SelectInst::Create(NarrowCond, NarrowX, NarrowY); } -/// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask. +/// Try to fold an extract subvector operation. static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) { Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1); - if (!Shuf.isIdentityWithExtract() || !isa<UndefValue>(Op1)) + if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef())) return nullptr; - Value *X, *Y; + // Check if we are extracting all bits of an inserted scalar: + // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type + Value *X; + if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) && + X->getType()->getPrimitiveSizeInBits() == + Shuf.getType()->getPrimitiveSizeInBits()) + return new BitCastInst(X, Shuf.getType()); + + // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask. + Value *Y; ArrayRef<int> Mask; if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask)))) return nullptr; @@ -2231,10 +2300,10 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) { !isPowerOf2_32( cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) || !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) || - isa<UndefValue>(X) || isa<UndefValue>(Y)) + match(X, m_Undef()) || match(Y, m_Undef())) return nullptr; - assert(isa<UndefValue>(Shuffle0->getOperand(1)) && - isa<UndefValue>(Shuffle1->getOperand(1)) && + assert(match(Shuffle0->getOperand(1), m_Undef()) && + match(Shuffle1->getOperand(1), m_Undef()) && "Unexpected operand for identity shuffle"); // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source @@ -2287,9 +2356,27 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (isa<ScalableVectorType>(LHS->getType())) return nullptr; - // shuffle x, x, mask --> shuffle x, undef, mask' unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements(); unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements(); + + // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask) + // + // if X and Y are of the same (vector) type, and the element size is not + // changed by the bitcasts, we can distribute the bitcasts through the + // shuffle, hopefully reducing the number of instructions. We make sure that + // at least one bitcast only has one use, so we don't *increase* the number of + // instructions here. + Value *X, *Y; + if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) && + X->getType()->isVectorTy() && X->getType() == Y->getType() && + X->getType()->getScalarSizeInBits() == + SVI.getType()->getScalarSizeInBits() && + (LHS->hasOneUse() || RHS->hasOneUse())) { + Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(), + SVI.getName() + ".uncasted"); + return new BitCastInst(V, SVI.getType()); + } + ArrayRef<int> Mask = SVI.getShuffleMask(); Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); @@ -2299,7 +2386,6 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // TODO: This could be extended to allow length-changing shuffles. // The transform might also be obsoleted if we allowed canonicalization // of bitcasted shuffles. - Value *X; if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) && X->getType()->isVectorTy() && VWidth == LHSWidth) { // Try to create a scaled mask constant. @@ -2323,8 +2409,10 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { } } + // shuffle x, x, mask --> shuffle x, undef, mask' if (LHS == RHS) { - assert(!isa<UndefValue>(RHS) && "Shuffle with 2 undef ops not simplified?"); + assert(!match(RHS, m_Undef()) && + "Shuffle with 2 undef ops not simplified?"); // Remap any references to RHS to use LHS. SmallVector<int, 16> Elts; for (unsigned i = 0; i != VWidth; ++i) { @@ -2338,7 +2426,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { } // shuffle undef, x, mask --> shuffle x, undef, mask' - if (isa<UndefValue>(LHS)) { + if (match(LHS, m_Undef())) { SVI.commute(); return &SVI; } @@ -2373,7 +2461,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (Instruction *I = foldIdentityPaddedShuffles(SVI)) return I; - if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) { + if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) { Value *V = evaluateInDifferentElementOrder(LHS, Mask); return replaceInstUsesWith(SVI, V); } @@ -2512,10 +2600,10 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS); ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); if (LHSShuffle) - if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS)) + if (!match(LHSShuffle->getOperand(1), m_Undef()) && !match(RHS, m_Undef())) LHSShuffle = nullptr; if (RHSShuffle) - if (!isa<UndefValue>(RHSShuffle->getOperand(1))) + if (!match(RHSShuffle->getOperand(1), m_Undef())) RHSShuffle = nullptr; if (!LHSShuffle && !RHSShuffle) return MadeChange ? &SVI : nullptr; @@ -2538,7 +2626,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value* newRHS = RHS; if (LHSShuffle) { // case 1 - if (isa<UndefValue>(RHS)) { + if (match(RHS, m_Undef())) { newLHS = LHSOp0; newRHS = LHSOp1; } @@ -2596,7 +2684,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // // If the value selected is an undef value, explicitly specify it // with a -1 mask value. (case 1) - if (isa<UndefValue>(RHS)) + if (match(RHS, m_Undef())) eltMask = -1; // If RHS is going to be replaced (case 3 or 4), calculate the // new mask value for the element. @@ -2605,8 +2693,8 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // If the value selected is an undef value, explicitly specify it // with a -1 mask value. if (eltMask >= (int)RHSOp0Width) { - assert(isa<UndefValue>(RHSShuffle->getOperand(1)) - && "should have been check above"); + assert(match(RHSShuffle->getOperand(1), m_Undef()) && + "should have been check above"); eltMask = -1; } } else |
