aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp270
1 files changed, 211 insertions, 59 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 13464c9d3496..f18711ba30b7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -13,6 +13,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Vectorize/VectorCombine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -28,6 +30,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/Local.h"
#include <numeric>
+#include <queue>
#define DEBUG_TYPE "vector-combine"
#include "llvm/Transforms/Utils/InstructionWorklist.h"
@@ -100,8 +103,9 @@ private:
Instruction &I);
bool foldExtractExtract(Instruction &I);
bool foldInsExtFNeg(Instruction &I);
- bool foldBitcastShuf(Instruction &I);
+ bool foldBitcastShuffle(Instruction &I);
bool scalarizeBinopOrCmp(Instruction &I);
+ bool scalarizeVPIntrinsic(Instruction &I);
bool foldExtractedCmps(Instruction &I);
bool foldSingleElementStore(Instruction &I);
bool scalarizeLoadExtract(Instruction &I);
@@ -258,8 +262,8 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
// It is safe and potentially profitable to load a vector directly:
// inselt undef, load Scalar, 0 --> load VecPtr
IRBuilder<> Builder(Load);
- Value *CastedPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
- SrcPtr, MinVecTy->getPointerTo(AS));
+ Value *CastedPtr =
+ Builder.CreatePointerBitCastOrAddrSpaceCast(SrcPtr, Builder.getPtrTy(AS));
Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
VecLd = Builder.CreateShuffleVector(VecLd, Mask);
@@ -321,7 +325,7 @@ bool VectorCombine::widenSubvectorLoad(Instruction &I) {
IRBuilder<> Builder(Load);
Value *CastedPtr =
- Builder.CreatePointerBitCastOrAddrSpaceCast(SrcPtr, Ty->getPointerTo(AS));
+ Builder.CreatePointerBitCastOrAddrSpaceCast(SrcPtr, Builder.getPtrTy(AS));
Value *VecLd = Builder.CreateAlignedLoad(Ty, CastedPtr, Alignment);
replaceValue(I, *VecLd);
++NumVecLoad;
@@ -677,7 +681,7 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
/// If this is a bitcast of a shuffle, try to bitcast the source vector to the
/// destination type followed by shuffle. This can enable further transforms by
/// moving bitcasts or shuffles together.
-bool VectorCombine::foldBitcastShuf(Instruction &I) {
+bool VectorCombine::foldBitcastShuffle(Instruction &I) {
Value *V;
ArrayRef<int> Mask;
if (!match(&I, m_BitCast(
@@ -687,35 +691,43 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
// 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for
// scalable type is unknown; Second, we cannot reason if the narrowed shuffle
// mask for scalable type is a splat or not.
- // 2) Disallow non-vector casts and length-changing shuffles.
+ // 2) Disallow non-vector casts.
// TODO: We could allow any shuffle.
+ auto *DestTy = dyn_cast<FixedVectorType>(I.getType());
auto *SrcTy = dyn_cast<FixedVectorType>(V->getType());
- if (!SrcTy || I.getOperand(0)->getType() != SrcTy)
+ if (!DestTy || !SrcTy)
+ return false;
+
+ unsigned DestEltSize = DestTy->getScalarSizeInBits();
+ unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
+ if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
return false;
- auto *DestTy = cast<FixedVectorType>(I.getType());
- unsigned DestNumElts = DestTy->getNumElements();
- unsigned SrcNumElts = SrcTy->getNumElements();
SmallVector<int, 16> NewMask;
- if (SrcNumElts <= DestNumElts) {
+ if (DestEltSize <= SrcEltSize) {
// The bitcast is from wide to narrow/equal elements. The shuffle mask can
// always be expanded to the equivalent form choosing narrower elements.
- assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask");
- unsigned ScaleFactor = DestNumElts / SrcNumElts;
+ assert(SrcEltSize % DestEltSize == 0 && "Unexpected shuffle mask");
+ unsigned ScaleFactor = SrcEltSize / DestEltSize;
narrowShuffleMaskElts(ScaleFactor, Mask, NewMask);
} else {
// The bitcast is from narrow elements to wide elements. The shuffle mask
// must choose consecutive elements to allow casting first.
- assert(SrcNumElts % DestNumElts == 0 && "Unexpected shuffle mask");
- unsigned ScaleFactor = SrcNumElts / DestNumElts;
+ assert(DestEltSize % SrcEltSize == 0 && "Unexpected shuffle mask");
+ unsigned ScaleFactor = DestEltSize / SrcEltSize;
if (!widenShuffleMaskElts(ScaleFactor, Mask, NewMask))
return false;
}
+ // Bitcast the shuffle src - keep its original width but using the destination
+ // scalar type.
+ unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
+ auto *ShuffleTy = FixedVectorType::get(DestTy->getScalarType(), NumSrcElts);
+
// The new shuffle must not cost more than the old shuffle. The bitcast is
// moved ahead of the shuffle, so assume that it has the same cost as before.
InstructionCost DestCost = TTI.getShuffleCost(
- TargetTransformInfo::SK_PermuteSingleSrc, DestTy, NewMask);
+ TargetTransformInfo::SK_PermuteSingleSrc, ShuffleTy, NewMask);
InstructionCost SrcCost =
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy, Mask);
if (DestCost > SrcCost || !DestCost.isValid())
@@ -723,12 +735,131 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
// bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
++NumShufOfBitcast;
- Value *CastV = Builder.CreateBitCast(V, DestTy);
+ Value *CastV = Builder.CreateBitCast(V, ShuffleTy);
Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask);
replaceValue(I, *Shuf);
return true;
}
+/// VP Intrinsics whose vector operands are both splat values may be simplified
+/// into the scalar version of the operation and the result splatted. This
+/// can lead to scalarization down the line.
+bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {
+ if (!isa<VPIntrinsic>(I))
+ return false;
+ VPIntrinsic &VPI = cast<VPIntrinsic>(I);
+ Value *Op0 = VPI.getArgOperand(0);
+ Value *Op1 = VPI.getArgOperand(1);
+
+ if (!isSplatValue(Op0) || !isSplatValue(Op1))
+ return false;
+
+ // Check getSplatValue early in this function, to avoid doing unnecessary
+ // work.
+ Value *ScalarOp0 = getSplatValue(Op0);
+ Value *ScalarOp1 = getSplatValue(Op1);
+ if (!ScalarOp0 || !ScalarOp1)
+ return false;
+
+ // For the binary VP intrinsics supported here, the result on disabled lanes
+ // is a poison value. For now, only do this simplification if all lanes
+ // are active.
+ // TODO: Relax the condition that all lanes are active by using insertelement
+ // on inactive lanes.
+ auto IsAllTrueMask = [](Value *MaskVal) {
+ if (Value *SplattedVal = getSplatValue(MaskVal))
+ if (auto *ConstValue = dyn_cast<Constant>(SplattedVal))
+ return ConstValue->isAllOnesValue();
+ return false;
+ };
+ if (!IsAllTrueMask(VPI.getArgOperand(2)))
+ return false;
+
+ // Check to make sure we support scalarization of the intrinsic
+ Intrinsic::ID IntrID = VPI.getIntrinsicID();
+ if (!VPBinOpIntrinsic::isVPBinOp(IntrID))
+ return false;
+
+ // Calculate cost of splatting both operands into vectors and the vector
+ // intrinsic
+ VectorType *VecTy = cast<VectorType>(VPI.getType());
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ InstructionCost SplatCost =
+ TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind, 0) +
+ TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
+
+ // Calculate the cost of the VP Intrinsic
+ SmallVector<Type *, 4> Args;
+ for (Value *V : VPI.args())
+ Args.push_back(V->getType());
+ IntrinsicCostAttributes Attrs(IntrID, VecTy, Args);
+ InstructionCost VectorOpCost = TTI.getIntrinsicInstrCost(Attrs, CostKind);
+ InstructionCost OldCost = 2 * SplatCost + VectorOpCost;
+
+ // Determine scalar opcode
+ std::optional<unsigned> FunctionalOpcode =
+ VPI.getFunctionalOpcode();
+ std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
+ if (!FunctionalOpcode) {
+ ScalarIntrID = VPI.getFunctionalIntrinsicID();
+ if (!ScalarIntrID)
+ return false;
+ }
+
+ // Calculate cost of scalarizing
+ InstructionCost ScalarOpCost = 0;
+ if (ScalarIntrID) {
+ IntrinsicCostAttributes Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
+ ScalarOpCost = TTI.getIntrinsicInstrCost(Attrs, CostKind);
+ } else {
+ ScalarOpCost =
+ TTI.getArithmeticInstrCost(*FunctionalOpcode, VecTy->getScalarType());
+ }
+
+ // The existing splats may be kept around if other instructions use them.
+ InstructionCost CostToKeepSplats =
+ (SplatCost * !Op0->hasOneUse()) + (SplatCost * !Op1->hasOneUse());
+ InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
+
+ LLVM_DEBUG(dbgs() << "Found a VP Intrinsic to scalarize: " << VPI
+ << "\n");
+ LLVM_DEBUG(dbgs() << "Cost of Intrinsic: " << OldCost
+ << ", Cost of scalarizing:" << NewCost << "\n");
+
+ // We want to scalarize unless the vector variant actually has lower cost.
+ if (OldCost < NewCost || !NewCost.isValid())
+ return false;
+
+ // Scalarize the intrinsic
+ ElementCount EC = cast<VectorType>(Op0->getType())->getElementCount();
+ Value *EVL = VPI.getArgOperand(3);
+ const DataLayout &DL = VPI.getModule()->getDataLayout();
+
+ // If the VP op might introduce UB or poison, we can scalarize it provided
+ // that we know the EVL > 0: If the EVL is zero, then the original VP op
+ // becomes a no-op and thus won't be UB, so make sure we don't introduce UB by
+ // scalarizing it.
+ bool SafeToSpeculate;
+ if (ScalarIntrID)
+ SafeToSpeculate = Intrinsic::getAttributes(I.getContext(), *ScalarIntrID)
+ .hasFnAttr(Attribute::AttrKind::Speculatable);
+ else
+ SafeToSpeculate = isSafeToSpeculativelyExecuteWithOpcode(
+ *FunctionalOpcode, &VPI, nullptr, &AC, &DT);
+ if (!SafeToSpeculate && !isKnownNonZero(EVL, DL, 0, &AC, &VPI, &DT))
+ return false;
+
+ Value *ScalarVal =
+ ScalarIntrID
+ ? Builder.CreateIntrinsic(VecTy->getScalarType(), *ScalarIntrID,
+ {ScalarOp0, ScalarOp1})
+ : Builder.CreateBinOp((Instruction::BinaryOps)(*FunctionalOpcode),
+ ScalarOp0, ScalarOp1);
+
+ replaceValue(VPI, *Builder.CreateVectorSplat(EC, ScalarVal));
+ return true;
+}
+
/// Match a vector binop or compare instruction with at least one inserted
/// scalar operand and convert to scalar binop/cmp followed by insertelement.
bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
@@ -1013,19 +1144,24 @@ public:
/// Check if it is legal to scalarize a memory access to \p VecTy at index \p
/// Idx. \p Idx must access a valid vector element.
-static ScalarizationResult canScalarizeAccess(FixedVectorType *VecTy,
- Value *Idx, Instruction *CtxI,
+static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx,
+ Instruction *CtxI,
AssumptionCache &AC,
const DominatorTree &DT) {
+ // We do checks for both fixed vector types and scalable vector types.
+ // This is the number of elements of fixed vector types,
+ // or the minimum number of elements of scalable vector types.
+ uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
+
if (auto *C = dyn_cast<ConstantInt>(Idx)) {
- if (C->getValue().ult(VecTy->getNumElements()))
+ if (C->getValue().ult(NumElements))
return ScalarizationResult::safe();
return ScalarizationResult::unsafe();
}
unsigned IntWidth = Idx->getType()->getScalarSizeInBits();
APInt Zero(IntWidth, 0);
- APInt MaxElts(IntWidth, VecTy->getNumElements());
+ APInt MaxElts(IntWidth, NumElements);
ConstantRange ValidIndices(Zero, MaxElts);
ConstantRange IdxRange(IntWidth, true);
@@ -1074,8 +1210,7 @@ static Align computeAlignmentAfterScalarization(Align VectorAlignment,
// store i32 %b, i32* %1
bool VectorCombine::foldSingleElementStore(Instruction &I) {
auto *SI = cast<StoreInst>(&I);
- if (!SI->isSimple() ||
- !isa<FixedVectorType>(SI->getValueOperand()->getType()))
+ if (!SI->isSimple() || !isa<VectorType>(SI->getValueOperand()->getType()))
return false;
// TODO: Combine more complicated patterns (multiple insert) by referencing
@@ -1089,13 +1224,13 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
return false;
if (auto *Load = dyn_cast<LoadInst>(Source)) {
- auto VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
+ auto VecTy = cast<VectorType>(SI->getValueOperand()->getType());
const DataLayout &DL = I.getModule()->getDataLayout();
Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts();
// Don't optimize for atomic/volatile load or store. Ensure memory is not
// modified between, vector type matches store size, and index is inbounds.
if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
- !DL.typeSizeEqualsStoreSize(Load->getType()) ||
+ !DL.typeSizeEqualsStoreSize(Load->getType()->getScalarType()) ||
SrcAddr != SI->getPointerOperand()->stripPointerCasts())
return false;
@@ -1130,19 +1265,26 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
if (!match(&I, m_Load(m_Value(Ptr))))
return false;
- auto *FixedVT = cast<FixedVectorType>(I.getType());
+ auto *VecTy = cast<VectorType>(I.getType());
auto *LI = cast<LoadInst>(&I);
const DataLayout &DL = I.getModule()->getDataLayout();
- if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(FixedVT))
+ if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(VecTy->getScalarType()))
return false;
InstructionCost OriginalCost =
- TTI.getMemoryOpCost(Instruction::Load, FixedVT, LI->getAlign(),
+ TTI.getMemoryOpCost(Instruction::Load, VecTy, LI->getAlign(),
LI->getPointerAddressSpace());
InstructionCost ScalarizedCost = 0;
Instruction *LastCheckedInst = LI;
unsigned NumInstChecked = 0;
+ DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
+ auto FailureGuard = make_scope_exit([&]() {
+ // If the transform is aborted, discard the ScalarizationResults.
+ for (auto &Pair : NeedFreeze)
+ Pair.second.discard();
+ });
+
// Check if all users of the load are extracts with no memory modifications
// between the load and the extract. Compute the cost of both the original
// code and the scalarized version.
@@ -1151,9 +1293,6 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
if (!UI || UI->getParent() != LI->getParent())
return false;
- if (!isGuaranteedNotToBePoison(UI->getOperand(1), &AC, LI, &DT))
- return false;
-
// Check if any instruction between the load and the extract may modify
// memory.
if (LastCheckedInst->comesBefore(UI)) {
@@ -1168,22 +1307,23 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
LastCheckedInst = UI;
}
- auto ScalarIdx = canScalarizeAccess(FixedVT, UI->getOperand(1), &I, AC, DT);
- if (!ScalarIdx.isSafe()) {
- // TODO: Freeze index if it is safe to do so.
- ScalarIdx.discard();
+ auto ScalarIdx = canScalarizeAccess(VecTy, UI->getOperand(1), &I, AC, DT);
+ if (ScalarIdx.isUnsafe())
return false;
+ if (ScalarIdx.isSafeWithFreeze()) {
+ NeedFreeze.try_emplace(UI, ScalarIdx);
+ ScalarIdx.discard();
}
auto *Index = dyn_cast<ConstantInt>(UI->getOperand(1));
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
OriginalCost +=
- TTI.getVectorInstrCost(Instruction::ExtractElement, FixedVT, CostKind,
+ TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, CostKind,
Index ? Index->getZExtValue() : -1);
ScalarizedCost +=
- TTI.getMemoryOpCost(Instruction::Load, FixedVT->getElementType(),
+ TTI.getMemoryOpCost(Instruction::Load, VecTy->getElementType(),
Align(1), LI->getPointerAddressSpace());
- ScalarizedCost += TTI.getAddressComputationCost(FixedVT->getElementType());
+ ScalarizedCost += TTI.getAddressComputationCost(VecTy->getElementType());
}
if (ScalarizedCost >= OriginalCost)
@@ -1192,21 +1332,27 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
// Replace extracts with narrow scalar loads.
for (User *U : LI->users()) {
auto *EI = cast<ExtractElementInst>(U);
- Builder.SetInsertPoint(EI);
-
Value *Idx = EI->getOperand(1);
+
+ // Insert 'freeze' for poison indexes.
+ auto It = NeedFreeze.find(EI);
+ if (It != NeedFreeze.end())
+ It->second.freeze(Builder, *cast<Instruction>(Idx));
+
+ Builder.SetInsertPoint(EI);
Value *GEP =
- Builder.CreateInBoundsGEP(FixedVT, Ptr, {Builder.getInt32(0), Idx});
+ Builder.CreateInBoundsGEP(VecTy, Ptr, {Builder.getInt32(0), Idx});
auto *NewLoad = cast<LoadInst>(Builder.CreateLoad(
- FixedVT->getElementType(), GEP, EI->getName() + ".scalar"));
+ VecTy->getElementType(), GEP, EI->getName() + ".scalar"));
Align ScalarOpAlignment = computeAlignmentAfterScalarization(
- LI->getAlign(), FixedVT->getElementType(), Idx, DL);
+ LI->getAlign(), VecTy->getElementType(), Idx, DL);
NewLoad->setAlignment(ScalarOpAlignment);
replaceValue(*EI, *NewLoad);
}
+ FailureGuard.release();
return true;
}
@@ -1340,21 +1486,28 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
dyn_cast<FixedVectorType>(Shuffle->getOperand(0)->getType());
if (!ShuffleInputType)
return false;
- int NumInputElts = ShuffleInputType->getNumElements();
+ unsigned NumInputElts = ShuffleInputType->getNumElements();
// Find the mask from sorting the lanes into order. This is most likely to
// become a identity or concat mask. Undef elements are pushed to the end.
SmallVector<int> ConcatMask;
Shuffle->getShuffleMask(ConcatMask);
sort(ConcatMask, [](int X, int Y) { return (unsigned)X < (unsigned)Y; });
+ // In the case of a truncating shuffle it's possible for the mask
+ // to have an index greater than the size of the resulting vector.
+ // This requires special handling.
+ bool IsTruncatingShuffle = VecType->getNumElements() < NumInputElts;
bool UsesSecondVec =
- any_of(ConcatMask, [&](int M) { return M >= NumInputElts; });
+ any_of(ConcatMask, [&](int M) { return M >= (int)NumInputElts; });
+
+ FixedVectorType *VecTyForCost =
+ (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType;
InstructionCost OldCost = TTI.getShuffleCost(
- UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc, VecType,
- Shuffle->getShuffleMask());
+ UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc,
+ VecTyForCost, Shuffle->getShuffleMask());
InstructionCost NewCost = TTI.getShuffleCost(
- UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc, VecType,
- ConcatMask);
+ UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc,
+ VecTyForCost, ConcatMask);
LLVM_DEBUG(dbgs() << "Found a reduction feeding from a shuffle: " << *Shuffle
<< "\n");
@@ -1657,16 +1810,16 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
return SSV->getOperand(Op);
return SV->getOperand(Op);
};
- Builder.SetInsertPoint(SVI0A->getInsertionPointAfterDef());
+ Builder.SetInsertPoint(*SVI0A->getInsertionPointAfterDef());
Value *NSV0A = Builder.CreateShuffleVector(GetShuffleOperand(SVI0A, 0),
GetShuffleOperand(SVI0A, 1), V1A);
- Builder.SetInsertPoint(SVI0B->getInsertionPointAfterDef());
+ Builder.SetInsertPoint(*SVI0B->getInsertionPointAfterDef());
Value *NSV0B = Builder.CreateShuffleVector(GetShuffleOperand(SVI0B, 0),
GetShuffleOperand(SVI0B, 1), V1B);
- Builder.SetInsertPoint(SVI1A->getInsertionPointAfterDef());
+ Builder.SetInsertPoint(*SVI1A->getInsertionPointAfterDef());
Value *NSV1A = Builder.CreateShuffleVector(GetShuffleOperand(SVI1A, 0),
GetShuffleOperand(SVI1A, 1), V2A);
- Builder.SetInsertPoint(SVI1B->getInsertionPointAfterDef());
+ Builder.SetInsertPoint(*SVI1B->getInsertionPointAfterDef());
Value *NSV1B = Builder.CreateShuffleVector(GetShuffleOperand(SVI1B, 0),
GetShuffleOperand(SVI1B, 1), V2B);
Builder.SetInsertPoint(Op0);
@@ -1723,9 +1876,6 @@ bool VectorCombine::run() {
case Instruction::ShuffleVector:
MadeChange |= widenSubvectorLoad(I);
break;
- case Instruction::Load:
- MadeChange |= scalarizeLoadExtract(I);
- break;
default:
break;
}
@@ -1733,13 +1883,15 @@ bool VectorCombine::run() {
// This transform works with scalable and fixed vectors
// TODO: Identify and allow other scalable transforms
- if (isa<VectorType>(I.getType()))
+ if (isa<VectorType>(I.getType())) {
MadeChange |= scalarizeBinopOrCmp(I);
+ MadeChange |= scalarizeLoadExtract(I);
+ MadeChange |= scalarizeVPIntrinsic(I);
+ }
if (Opcode == Instruction::Store)
MadeChange |= foldSingleElementStore(I);
-
// If this is an early pipeline invocation of this pass, we are done.
if (TryEarlyFoldsOnly)
return;
@@ -1758,7 +1910,7 @@ bool VectorCombine::run() {
MadeChange |= foldSelectShuffle(I);
break;
case Instruction::BitCast:
- MadeChange |= foldBitcastShuf(I);
+ MadeChange |= foldBitcastShuffle(I);
break;
}
} else {