aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/Scalarizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/Scalarizer.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/Scalarizer.cpp774
1 files changed, 506 insertions, 268 deletions
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 4aab88b74f10..86b55dfd304a 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -6,8 +6,9 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass converts vector operations into scalar operations, in order
-// to expose optimization opportunities on the individual scalar operations.
+// This pass converts vector operations into scalar operations (or, optionally,
+// operations on smaller vector widths), in order to expose optimization
+// opportunities on the individual scalar operations.
// It is mainly intended for targets that do not have vector units, but it
// may also be useful for revectorizing code to different vector widths.
//
@@ -62,6 +63,16 @@ static cl::opt<bool> ClScalarizeLoadStore(
"scalarize-load-store", cl::init(false), cl::Hidden,
cl::desc("Allow the scalarizer pass to scalarize loads and store"));
+// Split vectors larger than this size into fragments, where each fragment is
+// either a vector no larger than this size or a scalar.
+//
+// Instructions with operands or results of different sizes that would be split
+// into a different number of fragments are currently left as-is.
+static cl::opt<unsigned> ClScalarizeMinBits(
+ "scalarize-min-bits", cl::init(0), cl::Hidden,
+ cl::desc("Instruct the scalarizer pass to attempt to keep values of a "
+ "minimum number of bits"));
+
namespace {
BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
@@ -88,6 +99,29 @@ using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
// along with a pointer to their scattered forms.
using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>;
+struct VectorSplit {
+ // The type of the vector.
+ FixedVectorType *VecTy = nullptr;
+
+ // The number of elements packed in a fragment (other than the remainder).
+ unsigned NumPacked = 0;
+
+ // The number of fragments (scalars or smaller vectors) into which the vector
+ // shall be split.
+ unsigned NumFragments = 0;
+
+ // The type of each complete fragment.
+ Type *SplitTy = nullptr;
+
+ // The type of the remainder (last) fragment; null if all fragments are
+ // complete.
+ Type *RemainderTy = nullptr;
+
+ Type *getFragmentType(unsigned I) const {
+ return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy;
+ }
+};
+
// Provides a very limited vector-like interface for lazily accessing one
// component of a scattered vector or vector pointer.
class Scatterer {
@@ -97,23 +131,23 @@ public:
// Scatter V into Size components. If new instructions are needed,
// insert them before BBI in BB. If Cache is nonnull, use it to cache
// the results.
- Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, Type *PtrElemTy,
- ValueVector *cachePtr = nullptr);
+ Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
+ const VectorSplit &VS, ValueVector *cachePtr = nullptr);
// Return component I, creating a new Value for it if necessary.
Value *operator[](unsigned I);
// Return the number of components.
- unsigned size() const { return Size; }
+ unsigned size() const { return VS.NumFragments; }
private:
BasicBlock *BB;
BasicBlock::iterator BBI;
Value *V;
- Type *PtrElemTy;
+ VectorSplit VS;
+ bool IsPointer;
ValueVector *CachePtr;
ValueVector Tmp;
- unsigned Size;
};
// FCmpSplitter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
@@ -171,24 +205,74 @@ struct BinarySplitter {
struct VectorLayout {
VectorLayout() = default;
- // Return the alignment of element I.
- Align getElemAlign(unsigned I) {
- return commonAlignment(VecAlign, I * ElemSize);
+ // Return the alignment of fragment Frag.
+ Align getFragmentAlign(unsigned Frag) {
+ return commonAlignment(VecAlign, Frag * SplitSize);
}
- // The type of the vector.
- FixedVectorType *VecTy = nullptr;
-
- // The type of each element.
- Type *ElemTy = nullptr;
+ // The split of the underlying vector type.
+ VectorSplit VS;
// The alignment of the vector.
Align VecAlign;
- // The size of each element.
- uint64_t ElemSize = 0;
+ // The size of each (non-remainder) fragment in bytes.
+ uint64_t SplitSize = 0;
};
+/// Concatenate the given fragments to a single vector value of the type
+/// described in @p VS.
+static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
+ const VectorSplit &VS, Twine Name) {
+ unsigned NumElements = VS.VecTy->getNumElements();
+ SmallVector<int> ExtendMask;
+ SmallVector<int> InsertMask;
+
+ if (VS.NumPacked > 1) {
+ // Prepare the shufflevector masks once and re-use them for all
+ // fragments.
+ ExtendMask.resize(NumElements, -1);
+ for (unsigned I = 0; I < VS.NumPacked; ++I)
+ ExtendMask[I] = I;
+
+ InsertMask.resize(NumElements);
+ for (unsigned I = 0; I < NumElements; ++I)
+ InsertMask[I] = I;
+ }
+
+ Value *Res = PoisonValue::get(VS.VecTy);
+ for (unsigned I = 0; I < VS.NumFragments; ++I) {
+ Value *Fragment = Fragments[I];
+
+ unsigned NumPacked = VS.NumPacked;
+ if (I == VS.NumFragments - 1 && VS.RemainderTy) {
+ if (auto *RemVecTy = dyn_cast<FixedVectorType>(VS.RemainderTy))
+ NumPacked = RemVecTy->getNumElements();
+ else
+ NumPacked = 1;
+ }
+
+ if (NumPacked == 1) {
+ Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked,
+ Name + ".upto" + Twine(I));
+ } else {
+ Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
+ if (I == 0) {
+ Res = Fragment;
+ } else {
+ for (unsigned J = 0; J < NumPacked; ++J)
+ InsertMask[I * VS.NumPacked + J] = NumElements + J;
+ Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
+ Name + ".upto" + Twine(I));
+ for (unsigned J = 0; J < NumPacked; ++J)
+ InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J;
+ }
+ }
+ }
+
+ return Res;
+}
+
template <typename T>
T getWithDefaultOverride(const cl::opt<T> &ClOption,
const std::optional<T> &DefaultOverride) {
@@ -205,8 +289,9 @@ public:
getWithDefaultOverride(ClScalarizeVariableInsertExtract,
Options.ScalarizeVariableInsertExtract)),
ScalarizeLoadStore(getWithDefaultOverride(ClScalarizeLoadStore,
- Options.ScalarizeLoadStore)) {
- }
+ Options.ScalarizeLoadStore)),
+ ScalarizeMinBits(getWithDefaultOverride(ClScalarizeMinBits,
+ Options.ScalarizeMinBits)) {}
bool visit(Function &F);
@@ -228,13 +313,15 @@ public:
bool visitLoadInst(LoadInst &LI);
bool visitStoreInst(StoreInst &SI);
bool visitCallInst(CallInst &ICI);
+ bool visitFreezeInst(FreezeInst &FI);
private:
- Scatterer scatter(Instruction *Point, Value *V, Type *PtrElemTy = nullptr);
- void gather(Instruction *Op, const ValueVector &CV);
+ Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS);
+ void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS);
void replaceUses(Instruction *Op, Value *CV);
bool canTransferMetadata(unsigned Kind);
void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
+ std::optional<VectorSplit> getVectorSplit(Type *Ty);
std::optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment,
const DataLayout &DL);
bool finish();
@@ -256,6 +343,7 @@ private:
const bool ScalarizeVariableInsertExtract;
const bool ScalarizeLoadStore;
+ const unsigned ScalarizeMinBits;
};
class ScalarizerLegacyPass : public FunctionPass {
@@ -284,42 +372,47 @@ INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
"Scalarize vector operations", false, false)
Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
- Type *PtrElemTy, ValueVector *cachePtr)
- : BB(bb), BBI(bbi), V(v), PtrElemTy(PtrElemTy), CachePtr(cachePtr) {
- Type *Ty = V->getType();
- if (Ty->isPointerTy()) {
- assert(cast<PointerType>(Ty)->isOpaqueOrPointeeTypeMatches(PtrElemTy) &&
- "Pointer element type mismatch");
- Ty = PtrElemTy;
+ const VectorSplit &VS, ValueVector *cachePtr)
+ : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
+ IsPointer = V->getType()->isPointerTy();
+ if (!CachePtr) {
+ Tmp.resize(VS.NumFragments, nullptr);
+ } else {
+ assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() ||
+ IsPointer) &&
+ "Inconsistent vector sizes");
+ if (VS.NumFragments > CachePtr->size())
+ CachePtr->resize(VS.NumFragments, nullptr);
}
- Size = cast<FixedVectorType>(Ty)->getNumElements();
- if (!CachePtr)
- Tmp.resize(Size, nullptr);
- else if (CachePtr->empty())
- CachePtr->resize(Size, nullptr);
- else
- assert(Size == CachePtr->size() && "Inconsistent vector sizes");
}
-// Return component I, creating a new Value for it if necessary.
-Value *Scatterer::operator[](unsigned I) {
- ValueVector &CV = (CachePtr ? *CachePtr : Tmp);
+// Return fragment Frag, creating a new Value for it if necessary.
+Value *Scatterer::operator[](unsigned Frag) {
+ ValueVector &CV = CachePtr ? *CachePtr : Tmp;
// Try to reuse a previous value.
- if (CV[I])
- return CV[I];
+ if (CV[Frag])
+ return CV[Frag];
IRBuilder<> Builder(BB, BBI);
- if (PtrElemTy) {
- Type *VectorElemTy = cast<VectorType>(PtrElemTy)->getElementType();
- if (!CV[0]) {
- Type *NewPtrTy = PointerType::get(
- VectorElemTy, V->getType()->getPointerAddressSpace());
- CV[0] = Builder.CreateBitCast(V, NewPtrTy, V->getName() + ".i0");
- }
- if (I != 0)
- CV[I] = Builder.CreateConstGEP1_32(VectorElemTy, CV[0], I,
- V->getName() + ".i" + Twine(I));
+ if (IsPointer) {
+ if (Frag == 0)
+ CV[Frag] = V;
+ else
+ CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, V, Frag,
+ V->getName() + ".i" + Twine(Frag));
+ return CV[Frag];
+ }
+
+ Type *FragmentTy = VS.getFragmentType(Frag);
+
+ if (auto *VecTy = dyn_cast<FixedVectorType>(FragmentTy)) {
+ SmallVector<int> Mask;
+ for (unsigned J = 0; J < VecTy->getNumElements(); ++J)
+ Mask.push_back(Frag * VS.NumPacked + J);
+ CV[Frag] =
+ Builder.CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask,
+ V->getName() + ".i" + Twine(Frag));
} else {
- // Search through a chain of InsertElementInsts looking for element I.
+ // Search through a chain of InsertElementInsts looking for element Frag.
// Record other elements in the cache. The new V is still suitable
// for all uncached indices.
while (true) {
@@ -331,20 +424,23 @@ Value *Scatterer::operator[](unsigned I) {
break;
unsigned J = Idx->getZExtValue();
V = Insert->getOperand(0);
- if (I == J) {
- CV[J] = Insert->getOperand(1);
- return CV[J];
- } else if (!CV[J]) {
+ if (Frag * VS.NumPacked == J) {
+ CV[Frag] = Insert->getOperand(1);
+ return CV[Frag];
+ }
+
+ if (VS.NumPacked == 1 && !CV[J]) {
// Only cache the first entry we find for each index we're not actively
// searching for. This prevents us from going too far up the chain and
// caching incorrect entries.
CV[J] = Insert->getOperand(1);
}
}
- CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
- V->getName() + ".i" + Twine(I));
+ CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked,
+ V->getName() + ".i" + Twine(Frag));
}
- return CV[I];
+
+ return CV[Frag];
}
bool ScalarizerLegacyPass::runOnFunction(Function &F) {
@@ -386,13 +482,13 @@ bool ScalarizerVisitor::visit(Function &F) {
// Return a scattered form of V that can be accessed by Point. V must be a
// vector or a pointer to a vector.
Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V,
- Type *PtrElemTy) {
+ const VectorSplit &VS) {
if (Argument *VArg = dyn_cast<Argument>(V)) {
// Put the scattered form of arguments in the entry block,
// so that it can be used everywhere.
Function *F = VArg->getParent();
BasicBlock *BB = &F->getEntryBlock();
- return Scatterer(BB, BB->begin(), V, PtrElemTy, &Scattered[{V, PtrElemTy}]);
+ return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]);
}
if (Instruction *VOp = dyn_cast<Instruction>(V)) {
// When scalarizing PHI nodes we might try to examine/rewrite InsertElement
@@ -403,29 +499,30 @@ Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V,
// need to analyse them further.
if (!DT->isReachableFromEntry(VOp->getParent()))
return Scatterer(Point->getParent(), Point->getIterator(),
- PoisonValue::get(V->getType()), PtrElemTy);
+ PoisonValue::get(V->getType()), VS);
// Put the scattered form of an instruction directly after the
// instruction, skipping over PHI nodes and debug intrinsics.
BasicBlock *BB = VOp->getParent();
return Scatterer(
- BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V,
- PtrElemTy, &Scattered[{V, PtrElemTy}]);
+ BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, VS,
+ &Scattered[{V, VS.SplitTy}]);
}
// In the fallback case, just put the scattered before Point and
// keep the result local to Point.
- return Scatterer(Point->getParent(), Point->getIterator(), V, PtrElemTy);
+ return Scatterer(Point->getParent(), Point->getIterator(), V, VS);
}
// Replace Op with the gathered form of the components in CV. Defer the
// deletion of Op and creation of the gathered form to the end of the pass,
// so that we can avoid creating the gathered form if all uses of Op are
// replaced with uses of CV.
-void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
+void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV,
+ const VectorSplit &VS) {
transferMetadataAndIRFlags(Op, CV);
// If we already have a scattered form of Op (created from ExtractElements
// of Op itself), replace them with the new form.
- ValueVector &SV = Scattered[{Op, nullptr}];
+ ValueVector &SV = Scattered[{Op, VS.SplitTy}];
if (!SV.empty()) {
for (unsigned I = 0, E = SV.size(); I != E; ++I) {
Value *V = SV[I];
@@ -483,23 +580,57 @@ void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
}
}
+// Determine how Ty is split, if at all.
+std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) {
+ VectorSplit Split;
+ Split.VecTy = dyn_cast<FixedVectorType>(Ty);
+ if (!Split.VecTy)
+ return {};
+
+ unsigned NumElems = Split.VecTy->getNumElements();
+ Type *ElemTy = Split.VecTy->getElementType();
+
+ if (NumElems == 1 || ElemTy->isPointerTy() ||
+ 2 * ElemTy->getScalarSizeInBits() > ScalarizeMinBits) {
+ Split.NumPacked = 1;
+ Split.NumFragments = NumElems;
+ Split.SplitTy = ElemTy;
+ } else {
+ Split.NumPacked = ScalarizeMinBits / ElemTy->getScalarSizeInBits();
+ if (Split.NumPacked >= NumElems)
+ return {};
+
+ Split.NumFragments = divideCeil(NumElems, Split.NumPacked);
+ Split.SplitTy = FixedVectorType::get(ElemTy, Split.NumPacked);
+
+ unsigned RemainderElems = NumElems % Split.NumPacked;
+ if (RemainderElems > 1)
+ Split.RemainderTy = FixedVectorType::get(ElemTy, RemainderElems);
+ else if (RemainderElems == 1)
+ Split.RemainderTy = ElemTy;
+ }
+
+ return Split;
+}
+
// Try to fill in Layout from Ty, returning true on success. Alignment is
// the alignment of the vector, or std::nullopt if the ABI default should be
// used.
std::optional<VectorLayout>
ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment,
const DataLayout &DL) {
+ std::optional<VectorSplit> VS = getVectorSplit(Ty);
+ if (!VS)
+ return {};
+
VectorLayout Layout;
- // Make sure we're dealing with a vector.
- Layout.VecTy = dyn_cast<FixedVectorType>(Ty);
- if (!Layout.VecTy)
- return std::nullopt;
- // Check that we're dealing with full-byte elements.
- Layout.ElemTy = Layout.VecTy->getElementType();
- if (!DL.typeSizeEqualsStoreSize(Layout.ElemTy))
- return std::nullopt;
+ Layout.VS = *VS;
+ // Check that we're dealing with full-byte fragments.
+ if (!DL.typeSizeEqualsStoreSize(VS->SplitTy) ||
+ (VS->RemainderTy && !DL.typeSizeEqualsStoreSize(VS->RemainderTy)))
+ return {};
Layout.VecAlign = Alignment;
- Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy);
+ Layout.SplitSize = DL.getTypeStoreSize(VS->SplitTy);
return Layout;
}
@@ -507,19 +638,27 @@ ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment,
// to create an instruction like I with operand X and name Name.
template<typename Splitter>
bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
- auto *VT = dyn_cast<FixedVectorType>(I.getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(I.getType());
+ if (!VS)
return false;
- unsigned NumElems = VT->getNumElements();
+ std::optional<VectorSplit> OpVS;
+ if (I.getOperand(0)->getType() == I.getType()) {
+ OpVS = VS;
+ } else {
+ OpVS = getVectorSplit(I.getOperand(0)->getType());
+ if (!OpVS || VS->NumPacked != OpVS->NumPacked)
+ return false;
+ }
+
IRBuilder<> Builder(&I);
- Scatterer Op = scatter(&I, I.getOperand(0));
- assert(Op.size() == NumElems && "Mismatched unary operation");
+ Scatterer Op = scatter(&I, I.getOperand(0), *OpVS);
+ assert(Op.size() == VS->NumFragments && "Mismatched unary operation");
ValueVector Res;
- Res.resize(NumElems);
- for (unsigned Elem = 0; Elem < NumElems; ++Elem)
- Res[Elem] = Split(Builder, Op[Elem], I.getName() + ".i" + Twine(Elem));
- gather(&I, Res);
+ Res.resize(VS->NumFragments);
+ for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag)
+ Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag));
+ gather(&I, Res, *VS);
return true;
}
@@ -527,24 +666,32 @@ bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
// to create an instruction like I with operands X and Y and name Name.
template<typename Splitter>
bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
- auto *VT = dyn_cast<FixedVectorType>(I.getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(I.getType());
+ if (!VS)
return false;
- unsigned NumElems = VT->getNumElements();
+ std::optional<VectorSplit> OpVS;
+ if (I.getOperand(0)->getType() == I.getType()) {
+ OpVS = VS;
+ } else {
+ OpVS = getVectorSplit(I.getOperand(0)->getType());
+ if (!OpVS || VS->NumPacked != OpVS->NumPacked)
+ return false;
+ }
+
IRBuilder<> Builder(&I);
- Scatterer VOp0 = scatter(&I, I.getOperand(0));
- Scatterer VOp1 = scatter(&I, I.getOperand(1));
- assert(VOp0.size() == NumElems && "Mismatched binary operation");
- assert(VOp1.size() == NumElems && "Mismatched binary operation");
+ Scatterer VOp0 = scatter(&I, I.getOperand(0), *OpVS);
+ Scatterer VOp1 = scatter(&I, I.getOperand(1), *OpVS);
+ assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation");
+ assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation");
ValueVector Res;
- Res.resize(NumElems);
- for (unsigned Elem = 0; Elem < NumElems; ++Elem) {
- Value *Op0 = VOp0[Elem];
- Value *Op1 = VOp1[Elem];
- Res[Elem] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Elem));
+ Res.resize(VS->NumFragments);
+ for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) {
+ Value *Op0 = VOp0[Frag];
+ Value *Op1 = VOp1[Frag];
+ Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag));
}
- gather(&I, Res);
+ gather(&I, Res, *VS);
return true;
}
@@ -552,18 +699,11 @@ static bool isTriviallyScalariable(Intrinsic::ID ID) {
return isTriviallyVectorizable(ID);
}
-// All of the current scalarizable intrinsics only have one mangled type.
-static Function *getScalarIntrinsicDeclaration(Module *M,
- Intrinsic::ID ID,
- ArrayRef<Type*> Tys) {
- return Intrinsic::getDeclaration(M, ID, Tys);
-}
-
/// If a call to a vector typed intrinsic function, split into a scalar call per
/// element if possible for the intrinsic.
bool ScalarizerVisitor::splitCall(CallInst &CI) {
- auto *VT = dyn_cast<FixedVectorType>(CI.getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(CI.getType());
+ if (!VS)
return false;
Function *F = CI.getCalledFunction();
@@ -574,26 +714,41 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
if (ID == Intrinsic::not_intrinsic || !isTriviallyScalariable(ID))
return false;
- unsigned NumElems = VT->getNumElements();
+ // unsigned NumElems = VT->getNumElements();
unsigned NumArgs = CI.arg_size();
ValueVector ScalarOperands(NumArgs);
SmallVector<Scatterer, 8> Scattered(NumArgs);
-
- Scattered.resize(NumArgs);
+ SmallVector<int> OverloadIdx(NumArgs, -1);
SmallVector<llvm::Type *, 3> Tys;
- Tys.push_back(VT->getScalarType());
+ // Add return type if intrinsic is overloaded on it.
+ if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
+ Tys.push_back(VS->SplitTy);
// Assumes that any vector type has the same number of elements as the return
// vector type, which is true for all current intrinsics.
for (unsigned I = 0; I != NumArgs; ++I) {
Value *OpI = CI.getOperand(I);
- if (OpI->getType()->isVectorTy()) {
- Scattered[I] = scatter(&CI, OpI);
- assert(Scattered[I].size() == NumElems && "mismatched call operands");
- if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
- Tys.push_back(OpI->getType()->getScalarType());
+ if (auto *OpVecTy = dyn_cast<FixedVectorType>(OpI->getType())) {
+ assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements());
+ std::optional<VectorSplit> OpVS = getVectorSplit(OpI->getType());
+ if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
+ // The natural split of the operand doesn't match the result. This could
+ // happen if the vector elements are different and the ScalarizeMinBits
+ // option is used.
+ //
+ // We could in principle handle this case as well, at the cost of
+ // complicating the scattering machinery to support multiple scattering
+ // granularities for a single value.
+ return false;
+ }
+
+ Scattered[I] = scatter(&CI, OpI, *OpVS);
+ if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) {
+ OverloadIdx[I] = Tys.size();
+ Tys.push_back(OpVS->SplitTy);
+ }
} else {
ScalarOperands[I] = OpI;
if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
@@ -601,49 +756,67 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
}
}
- ValueVector Res(NumElems);
+ ValueVector Res(VS->NumFragments);
ValueVector ScalarCallOps(NumArgs);
- Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, Tys);
+ Function *NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
IRBuilder<> Builder(&CI);
// Perform actual scalarization, taking care to preserve any scalar operands.
- for (unsigned Elem = 0; Elem < NumElems; ++Elem) {
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
+ bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy;
ScalarCallOps.clear();
+ if (IsRemainder)
+ Tys[0] = VS->RemainderTy;
+
for (unsigned J = 0; J != NumArgs; ++J) {
- if (isVectorIntrinsicWithScalarOpAtArg(ID, J))
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, J)) {
ScalarCallOps.push_back(ScalarOperands[J]);
- else
- ScalarCallOps.push_back(Scattered[J][Elem]);
+ } else {
+ ScalarCallOps.push_back(Scattered[J][I]);
+ if (IsRemainder && OverloadIdx[J] >= 0)
+ Tys[OverloadIdx[J]] = Scattered[J][I]->getType();
+ }
}
- Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps,
- CI.getName() + ".i" + Twine(Elem));
+ if (IsRemainder)
+ NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
+
+ Res[I] = Builder.CreateCall(NewIntrin, ScalarCallOps,
+ CI.getName() + ".i" + Twine(I));
}
- gather(&CI, Res);
+ gather(&CI, Res, *VS);
return true;
}
bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
- auto *VT = dyn_cast<FixedVectorType>(SI.getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(SI.getType());
+ if (!VS)
return false;
- unsigned NumElems = VT->getNumElements();
+ std::optional<VectorSplit> CondVS;
+ if (isa<FixedVectorType>(SI.getCondition()->getType())) {
+ CondVS = getVectorSplit(SI.getCondition()->getType());
+ if (!CondVS || CondVS->NumPacked != VS->NumPacked) {
+ // This happens when ScalarizeMinBits is used.
+ return false;
+ }
+ }
+
IRBuilder<> Builder(&SI);
- Scatterer VOp1 = scatter(&SI, SI.getOperand(1));
- Scatterer VOp2 = scatter(&SI, SI.getOperand(2));
- assert(VOp1.size() == NumElems && "Mismatched select");
- assert(VOp2.size() == NumElems && "Mismatched select");
+ Scatterer VOp1 = scatter(&SI, SI.getOperand(1), *VS);
+ Scatterer VOp2 = scatter(&SI, SI.getOperand(2), *VS);
+ assert(VOp1.size() == VS->NumFragments && "Mismatched select");
+ assert(VOp2.size() == VS->NumFragments && "Mismatched select");
ValueVector Res;
- Res.resize(NumElems);
+ Res.resize(VS->NumFragments);
- if (SI.getOperand(0)->getType()->isVectorTy()) {
- Scatterer VOp0 = scatter(&SI, SI.getOperand(0));
- assert(VOp0.size() == NumElems && "Mismatched select");
- for (unsigned I = 0; I < NumElems; ++I) {
+ if (CondVS) {
+ Scatterer VOp0 = scatter(&SI, SI.getOperand(0), *CondVS);
+ assert(VOp0.size() == CondVS->NumFragments && "Mismatched select");
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
Value *Op0 = VOp0[I];
Value *Op1 = VOp1[I];
Value *Op2 = VOp2[I];
@@ -652,14 +825,14 @@ bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
}
} else {
Value *Op0 = SI.getOperand(0);
- for (unsigned I = 0; I < NumElems; ++I) {
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
Value *Op1 = VOp1[I];
Value *Op2 = VOp2[I];
Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
SI.getName() + ".i" + Twine(I));
}
}
- gather(&SI, Res);
+ gather(&SI, Res, *VS);
return true;
}
@@ -680,146 +853,194 @@ bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
}
bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
- auto *VT = dyn_cast<FixedVectorType>(GEPI.getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(GEPI.getType());
+ if (!VS)
return false;
IRBuilder<> Builder(&GEPI);
- unsigned NumElems = VT->getNumElements();
unsigned NumIndices = GEPI.getNumIndices();
- // The base pointer might be scalar even if it's a vector GEP. In those cases,
- // splat the pointer into a vector value, and scatter that vector.
- Value *Op0 = GEPI.getOperand(0);
- if (!Op0->getType()->isVectorTy())
- Op0 = Builder.CreateVectorSplat(NumElems, Op0);
- Scatterer Base = scatter(&GEPI, Op0);
-
- SmallVector<Scatterer, 8> Ops;
- Ops.resize(NumIndices);
- for (unsigned I = 0; I < NumIndices; ++I) {
- Value *Op = GEPI.getOperand(I + 1);
-
- // The indices might be scalars even if it's a vector GEP. In those cases,
- // splat the scalar into a vector value, and scatter that vector.
- if (!Op->getType()->isVectorTy())
- Op = Builder.CreateVectorSplat(NumElems, Op);
-
- Ops[I] = scatter(&GEPI, Op);
+ // The base pointer and indices might be scalar even if it's a vector GEP.
+ SmallVector<Value *, 8> ScalarOps{1 + NumIndices};
+ SmallVector<Scatterer, 8> ScatterOps{1 + NumIndices};
+
+ for (unsigned I = 0; I < 1 + NumIndices; ++I) {
+ if (auto *VecTy =
+ dyn_cast<FixedVectorType>(GEPI.getOperand(I)->getType())) {
+ std::optional<VectorSplit> OpVS = getVectorSplit(VecTy);
+ if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
+ // This can happen when ScalarizeMinBits is used.
+ return false;
+ }
+ ScatterOps[I] = scatter(&GEPI, GEPI.getOperand(I), *OpVS);
+ } else {
+ ScalarOps[I] = GEPI.getOperand(I);
+ }
}
ValueVector Res;
- Res.resize(NumElems);
- for (unsigned I = 0; I < NumElems; ++I) {
- SmallVector<Value *, 8> Indices;
- Indices.resize(NumIndices);
- for (unsigned J = 0; J < NumIndices; ++J)
- Indices[J] = Ops[J][I];
- Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices,
+ Res.resize(VS->NumFragments);
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
+ SmallVector<Value *, 8> SplitOps;
+ SplitOps.resize(1 + NumIndices);
+ for (unsigned J = 0; J < 1 + NumIndices; ++J) {
+ if (ScalarOps[J])
+ SplitOps[J] = ScalarOps[J];
+ else
+ SplitOps[J] = ScatterOps[J][I];
+ }
+ Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), SplitOps[0],
+ ArrayRef(SplitOps).drop_front(),
GEPI.getName() + ".i" + Twine(I));
if (GEPI.isInBounds())
if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
NewGEPI->setIsInBounds();
}
- gather(&GEPI, Res);
+ gather(&GEPI, Res, *VS);
return true;
}
bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
- auto *VT = dyn_cast<FixedVectorType>(CI.getDestTy());
- if (!VT)
+ std::optional<VectorSplit> DestVS = getVectorSplit(CI.getDestTy());
+ if (!DestVS)
+ return false;
+
+ std::optional<VectorSplit> SrcVS = getVectorSplit(CI.getSrcTy());
+ if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked)
return false;
- unsigned NumElems = VT->getNumElements();
IRBuilder<> Builder(&CI);
- Scatterer Op0 = scatter(&CI, CI.getOperand(0));
- assert(Op0.size() == NumElems && "Mismatched cast");
+ Scatterer Op0 = scatter(&CI, CI.getOperand(0), *SrcVS);
+ assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast");
ValueVector Res;
- Res.resize(NumElems);
- for (unsigned I = 0; I < NumElems; ++I)
- Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
- CI.getName() + ".i" + Twine(I));
- gather(&CI, Res);
+ Res.resize(DestVS->NumFragments);
+ for (unsigned I = 0; I < DestVS->NumFragments; ++I)
+ Res[I] =
+ Builder.CreateCast(CI.getOpcode(), Op0[I], DestVS->getFragmentType(I),
+ CI.getName() + ".i" + Twine(I));
+ gather(&CI, Res, *DestVS);
return true;
}
bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
- auto *DstVT = dyn_cast<FixedVectorType>(BCI.getDestTy());
- auto *SrcVT = dyn_cast<FixedVectorType>(BCI.getSrcTy());
- if (!DstVT || !SrcVT)
+ std::optional<VectorSplit> DstVS = getVectorSplit(BCI.getDestTy());
+ std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.getSrcTy());
+ if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy)
return false;
- unsigned DstNumElems = DstVT->getNumElements();
- unsigned SrcNumElems = SrcVT->getNumElements();
+ const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy();
+
+ // Vectors of pointers are always fully scalarized.
+ assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1));
+
IRBuilder<> Builder(&BCI);
- Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
+ Scatterer Op0 = scatter(&BCI, BCI.getOperand(0), *SrcVS);
ValueVector Res;
- Res.resize(DstNumElems);
+ Res.resize(DstVS->NumFragments);
+
+ unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits();
+ unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits();
- if (DstNumElems == SrcNumElems) {
- for (unsigned I = 0; I < DstNumElems; ++I)
- Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
+ if (isPointerTy || DstSplitBits == SrcSplitBits) {
+ assert(DstVS->NumFragments == SrcVS->NumFragments);
+ for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
+ Res[I] = Builder.CreateBitCast(Op0[I], DstVS->getFragmentType(I),
BCI.getName() + ".i" + Twine(I));
- } else if (DstNumElems > SrcNumElems) {
- // <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the
- // individual elements to the destination.
- unsigned FanOut = DstNumElems / SrcNumElems;
- auto *MidTy = FixedVectorType::get(DstVT->getElementType(), FanOut);
+ }
+ } else if (SrcSplitBits % DstSplitBits == 0) {
+ // Convert each source fragment to the same-sized destination vector and
+ // then scatter the result to the destination.
+ VectorSplit MidVS;
+ MidVS.NumPacked = DstVS->NumPacked;
+ MidVS.NumFragments = SrcSplitBits / DstSplitBits;
+ MidVS.VecTy = FixedVectorType::get(DstVS->VecTy->getElementType(),
+ MidVS.NumPacked * MidVS.NumFragments);
+ MidVS.SplitTy = DstVS->SplitTy;
+
unsigned ResI = 0;
- for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
- Value *V = Op0[Op0I];
- Instruction *VI;
+ for (unsigned I = 0; I < SrcVS->NumFragments; ++I) {
+ Value *V = Op0[I];
+
// Look through any existing bitcasts before converting to <N x t2>.
// In the best case, the resulting conversion might be a no-op.
+ Instruction *VI;
while ((VI = dyn_cast<Instruction>(V)) &&
VI->getOpcode() == Instruction::BitCast)
V = VI->getOperand(0);
- V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
- Scatterer Mid = scatter(&BCI, V);
- for (unsigned MidI = 0; MidI < FanOut; ++MidI)
- Res[ResI++] = Mid[MidI];
+
+ V = Builder.CreateBitCast(V, MidVS.VecTy, V->getName() + ".cast");
+
+ Scatterer Mid = scatter(&BCI, V, MidVS);
+ for (unsigned J = 0; J < MidVS.NumFragments; ++J)
+ Res[ResI++] = Mid[J];
}
- } else {
- // <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2.
- unsigned FanIn = SrcNumElems / DstNumElems;
- auto *MidTy = FixedVectorType::get(SrcVT->getElementType(), FanIn);
- unsigned Op0I = 0;
- for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
- Value *V = PoisonValue::get(MidTy);
- for (unsigned MidI = 0; MidI < FanIn; ++MidI)
- V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
- BCI.getName() + ".i" + Twine(ResI)
- + ".upto" + Twine(MidI));
- Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
- BCI.getName() + ".i" + Twine(ResI));
+ } else if (DstSplitBits % SrcSplitBits == 0) {
+ // Gather enough source fragments to make up a destination fragment and
+ // then convert to the destination type.
+ VectorSplit MidVS;
+ MidVS.NumFragments = DstSplitBits / SrcSplitBits;
+ MidVS.NumPacked = SrcVS->NumPacked;
+ MidVS.VecTy = FixedVectorType::get(SrcVS->VecTy->getElementType(),
+ MidVS.NumPacked * MidVS.NumFragments);
+ MidVS.SplitTy = SrcVS->SplitTy;
+
+ unsigned SrcI = 0;
+ SmallVector<Value *, 8> ConcatOps;
+ ConcatOps.resize(MidVS.NumFragments);
+ for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
+ for (unsigned J = 0; J < MidVS.NumFragments; ++J)
+ ConcatOps[J] = Op0[SrcI++];
+ Value *V = concatenate(Builder, ConcatOps, MidVS,
+ BCI.getName() + ".i" + Twine(I));
+ Res[I] = Builder.CreateBitCast(V, DstVS->getFragmentType(I),
+ BCI.getName() + ".i" + Twine(I));
}
+ } else {
+ return false;
}
- gather(&BCI, Res);
+
+ gather(&BCI, Res, *DstVS);
return true;
}
bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
- auto *VT = dyn_cast<FixedVectorType>(IEI.getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(IEI.getType());
+ if (!VS)
return false;
- unsigned NumElems = VT->getNumElements();
IRBuilder<> Builder(&IEI);
- Scatterer Op0 = scatter(&IEI, IEI.getOperand(0));
+ Scatterer Op0 = scatter(&IEI, IEI.getOperand(0), *VS);
Value *NewElt = IEI.getOperand(1);
Value *InsIdx = IEI.getOperand(2);
ValueVector Res;
- Res.resize(NumElems);
+ Res.resize(VS->NumFragments);
if (auto *CI = dyn_cast<ConstantInt>(InsIdx)) {
- for (unsigned I = 0; I < NumElems; ++I)
- Res[I] = CI->getValue().getZExtValue() == I ? NewElt : Op0[I];
+ unsigned Idx = CI->getZExtValue();
+ unsigned Fragment = Idx / VS->NumPacked;
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
+ if (I == Fragment) {
+ bool IsPacked = VS->NumPacked > 1;
+ if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
+ !VS->RemainderTy->isVectorTy())
+ IsPacked = false;
+ if (IsPacked) {
+ Res[I] =
+ Builder.CreateInsertElement(Op0[I], NewElt, Idx % VS->NumPacked);
+ } else {
+ Res[I] = NewElt;
+ }
+ } else {
+ Res[I] = Op0[I];
+ }
+ }
} else {
- if (!ScalarizeVariableInsertExtract)
+ // Never split a variable insertelement that isn't fully scalarized.
+ if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
return false;
- for (unsigned I = 0; I < NumElems; ++I) {
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
Value *ShouldReplace =
Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I),
InsIdx->getName() + ".is." + Twine(I));
@@ -829,31 +1050,39 @@ bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
}
}
- gather(&IEI, Res);
+ gather(&IEI, Res, *VS);
return true;
}
bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
- auto *VT = dyn_cast<FixedVectorType>(EEI.getOperand(0)->getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(EEI.getOperand(0)->getType());
+ if (!VS)
return false;
- unsigned NumSrcElems = VT->getNumElements();
IRBuilder<> Builder(&EEI);
- Scatterer Op0 = scatter(&EEI, EEI.getOperand(0));
+ Scatterer Op0 = scatter(&EEI, EEI.getOperand(0), *VS);
Value *ExtIdx = EEI.getOperand(1);
if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) {
- Value *Res = Op0[CI->getValue().getZExtValue()];
+ unsigned Idx = CI->getZExtValue();
+ unsigned Fragment = Idx / VS->NumPacked;
+ Value *Res = Op0[Fragment];
+ bool IsPacked = VS->NumPacked > 1;
+ if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
+ !VS->RemainderTy->isVectorTy())
+ IsPacked = false;
+ if (IsPacked)
+ Res = Builder.CreateExtractElement(Res, Idx % VS->NumPacked);
replaceUses(&EEI, Res);
return true;
}
- if (!ScalarizeVariableInsertExtract)
+ // Never split a variable extractelement that isn't fully scalarized.
+ if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
return false;
- Value *Res = PoisonValue::get(VT->getElementType());
- for (unsigned I = 0; I < NumSrcElems; ++I) {
+ Value *Res = PoisonValue::get(VS->VecTy->getElementType());
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
Value *ShouldExtract =
Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->getType(), I),
ExtIdx->getName() + ".is." + Twine(I));
@@ -866,51 +1095,52 @@ bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
}
bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
- auto *VT = dyn_cast<FixedVectorType>(SVI.getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(SVI.getType());
+ std::optional<VectorSplit> VSOp =
+ getVectorSplit(SVI.getOperand(0)->getType());
+ if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1)
return false;
- unsigned NumElems = VT->getNumElements();
- Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
- Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
+ Scatterer Op0 = scatter(&SVI, SVI.getOperand(0), *VSOp);
+ Scatterer Op1 = scatter(&SVI, SVI.getOperand(1), *VSOp);
ValueVector Res;
- Res.resize(NumElems);
+ Res.resize(VS->NumFragments);
- for (unsigned I = 0; I < NumElems; ++I) {
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
int Selector = SVI.getMaskValue(I);
if (Selector < 0)
- Res[I] = UndefValue::get(VT->getElementType());
+ Res[I] = PoisonValue::get(VS->VecTy->getElementType());
else if (unsigned(Selector) < Op0.size())
Res[I] = Op0[Selector];
else
Res[I] = Op1[Selector - Op0.size()];
}
- gather(&SVI, Res);
+ gather(&SVI, Res, *VS);
return true;
}
bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
- auto *VT = dyn_cast<FixedVectorType>(PHI.getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(PHI.getType());
+ if (!VS)
return false;
- unsigned NumElems = cast<FixedVectorType>(VT)->getNumElements();
IRBuilder<> Builder(&PHI);
ValueVector Res;
- Res.resize(NumElems);
+ Res.resize(VS->NumFragments);
unsigned NumOps = PHI.getNumOperands();
- for (unsigned I = 0; I < NumElems; ++I)
- Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
+ Res[I] = Builder.CreatePHI(VS->getFragmentType(I), NumOps,
PHI.getName() + ".i" + Twine(I));
+ }
for (unsigned I = 0; I < NumOps; ++I) {
- Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
+ Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I), *VS);
BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
- for (unsigned J = 0; J < NumElems; ++J)
+ for (unsigned J = 0; J < VS->NumFragments; ++J)
cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
}
- gather(&PHI, Res);
+ gather(&PHI, Res, *VS);
return true;
}
@@ -925,17 +1155,17 @@ bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
if (!Layout)
return false;
- unsigned NumElems = cast<FixedVectorType>(Layout->VecTy)->getNumElements();
IRBuilder<> Builder(&LI);
- Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), LI.getType());
+ Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), Layout->VS);
ValueVector Res;
- Res.resize(NumElems);
+ Res.resize(Layout->VS.NumFragments);
- for (unsigned I = 0; I < NumElems; ++I)
- Res[I] = Builder.CreateAlignedLoad(Layout->VecTy->getElementType(), Ptr[I],
- Align(Layout->getElemAlign(I)),
+ for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
+ Res[I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(I), Ptr[I],
+ Align(Layout->getFragmentAlign(I)),
LI.getName() + ".i" + Twine(I));
- gather(&LI, Res);
+ }
+ gather(&LI, Res, Layout->VS);
return true;
}
@@ -951,17 +1181,17 @@ bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
if (!Layout)
return false;
- unsigned NumElems = cast<FixedVectorType>(Layout->VecTy)->getNumElements();
IRBuilder<> Builder(&SI);
- Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), FullValue->getType());
- Scatterer VVal = scatter(&SI, FullValue);
+ Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), Layout->VS);
+ Scatterer VVal = scatter(&SI, FullValue, Layout->VS);
ValueVector Stores;
- Stores.resize(NumElems);
- for (unsigned I = 0; I < NumElems; ++I) {
+ Stores.resize(Layout->VS.NumFragments);
+ for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
Value *Val = VVal[I];
Value *Ptr = VPtr[I];
- Stores[I] = Builder.CreateAlignedStore(Val, Ptr, Layout->getElemAlign(I));
+ Stores[I] =
+ Builder.CreateAlignedStore(Val, Ptr, Layout->getFragmentAlign(I));
}
transferMetadataAndIRFlags(&SI, Stores);
return true;
@@ -971,6 +1201,12 @@ bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
return splitCall(CI);
}
+bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) {
+ return splitUnary(FI, [](IRBuilder<> &Builder, Value *Op, const Twine &Name) {
+ return Builder.CreateFreeze(Op, Name);
+ });
+}
+
// Delete the instructions that we scalarized. If a full vector result
// is still needed, recreate it using InsertElements.
bool ScalarizerVisitor::finish() {
@@ -983,17 +1219,19 @@ bool ScalarizerVisitor::finish() {
ValueVector &CV = *GMI.second;
if (!Op->use_empty()) {
// The value is still needed, so recreate it using a series of
- // InsertElements.
- Value *Res = PoisonValue::get(Op->getType());
+ // insertelements and/or shufflevectors.
+ Value *Res;
if (auto *Ty = dyn_cast<FixedVectorType>(Op->getType())) {
BasicBlock *BB = Op->getParent();
- unsigned Count = Ty->getNumElements();
IRBuilder<> Builder(Op);
if (isa<PHINode>(Op))
Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
- for (unsigned I = 0; I < Count; ++I)
- Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
- Op->getName() + ".upto" + Twine(I));
+
+ VectorSplit VS = *getVectorSplit(Ty);
+ assert(VS.NumFragments == CV.size());
+
+ Res = concatenate(Builder, CV, VS, Op->getName());
+
Res->takeName(Op);
} else {
assert(CV.size() == 1 && Op->getType() == CV[0]->getType());