summaryrefslogtreecommitdiff
path: root/llvm/lib/Analysis/VectorUtils.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Analysis/VectorUtils.cpp')
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp269
1 files changed, 195 insertions, 74 deletions
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index c45ab941a1428..23531b65ea32d 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -78,6 +78,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
+ case Intrinsic::roundeven:
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
@@ -112,7 +113,7 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
/// its ID, in case it does not found it return not_intrinsic.
Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,
const TargetLibraryInfo *TLI) {
- Intrinsic::ID ID = getIntrinsicForCallSite(CI, TLI);
+ Intrinsic::ID ID = getIntrinsicForCallSite(*CI, TLI);
if (ID == Intrinsic::not_intrinsic)
return Intrinsic::not_intrinsic;
@@ -262,9 +263,12 @@ Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
assert(V->getType()->isVectorTy() && "Not looking at a vector?");
VectorType *VTy = cast<VectorType>(V->getType());
- unsigned Width = VTy->getNumElements();
- if (EltNo >= Width) // Out of range access.
- return UndefValue::get(VTy->getElementType());
+ // For fixed-length vector, return undef for out of range access.
+ if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
+ unsigned Width = FVTy->getNumElements();
+ if (EltNo >= Width)
+ return UndefValue::get(FVTy->getElementType());
+ }
if (Constant *C = dyn_cast<Constant>(V))
return C->getAggregateElement(EltNo);
@@ -285,8 +289,11 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
return findScalarElement(III->getOperand(0), EltNo);
}
- if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
- unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
+ ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);
+ // Restrict the following transformation to fixed-length vector.
+ if (SVI && isa<FixedVectorType>(SVI->getType())) {
+ unsigned LHSWidth =
+ cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
int InEl = SVI->getMaskValue(EltNo);
if (InEl < 0)
return UndefValue::get(VTy->getElementType());
@@ -307,6 +314,24 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
return nullptr;
}
+int llvm::getSplatIndex(ArrayRef<int> Mask) {
+ int SplatIndex = -1;
+ for (int M : Mask) {
+ // Ignore invalid (undefined) mask elements.
+ if (M < 0)
+ continue;
+
+ // There can be only 1 non-negative mask element value if this is a splat.
+ if (SplatIndex != -1 && SplatIndex != M)
+ return -1;
+
+ // Initialize the splat index to the 1st non-negative mask element.
+ SplatIndex = M;
+ }
+ assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");
+ return SplatIndex;
+}
+
/// Get splat value if the input is a splat vector or return nullptr.
/// This function is not fully general. It checks only 2 cases:
/// the input value is (1) a splat constant vector or (2) a sequence
@@ -318,9 +343,9 @@ const llvm::Value *llvm::getSplatValue(const Value *V) {
// shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
Value *Splat;
- if (match(V, m_ShuffleVector(m_InsertElement(m_Value(), m_Value(Splat),
- m_ZeroInt()),
- m_Value(), m_ZeroInt())))
+ if (match(V,
+ m_Shuffle(m_InsertElt(m_Value(), m_Value(Splat), m_ZeroInt()),
+ m_Value(), m_ZeroMask())))
return Splat;
return nullptr;
@@ -330,21 +355,32 @@ const llvm::Value *llvm::getSplatValue(const Value *V) {
// adjusted if needed.
const unsigned MaxDepth = 6;
-bool llvm::isSplatValue(const Value *V, unsigned Depth) {
+bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
assert(Depth <= MaxDepth && "Limit Search Depth");
if (isa<VectorType>(V->getType())) {
if (isa<UndefValue>(V))
return true;
- // FIXME: Constant splat analysis does not allow undef elements.
+ // FIXME: We can allow undefs, but if Index was specified, we may want to
+ // check that the constant is defined at that index.
if (auto *C = dyn_cast<Constant>(V))
return C->getSplatValue() != nullptr;
}
- // FIXME: Constant splat analysis does not allow undef elements.
- Constant *Mask;
- if (match(V, m_ShuffleVector(m_Value(), m_Value(), m_Constant(Mask))))
- return Mask->getSplatValue() != nullptr;
+ if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
+ // FIXME: We can safely allow undefs here. If Index was specified, we will
+ // check that the mask elt is defined at the required index.
+ if (!is_splat(Shuf->getShuffleMask()))
+ return false;
+
+ // Match any index.
+ if (Index == -1)
+ return true;
+
+ // Match a specific element. The mask should be defined at and match the
+ // specified index.
+ return Shuf->getMaskValue(Index) == Index;
+ }
// The remaining tests are all recursive, so bail out if we hit the limit.
if (Depth++ == MaxDepth)
@@ -353,18 +389,91 @@ bool llvm::isSplatValue(const Value *V, unsigned Depth) {
// If both operands of a binop are splats, the result is a splat.
Value *X, *Y, *Z;
if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
- return isSplatValue(X, Depth) && isSplatValue(Y, Depth);
+ return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth);
// If all operands of a select are splats, the result is a splat.
if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
- return isSplatValue(X, Depth) && isSplatValue(Y, Depth) &&
- isSplatValue(Z, Depth);
+ return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) &&
+ isSplatValue(Z, Index, Depth);
// TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
return false;
}
+void llvm::narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
+ SmallVectorImpl<int> &ScaledMask) {
+ assert(Scale > 0 && "Unexpected scaling factor");
+
+ // Fast-path: if no scaling, then it is just a copy.
+ if (Scale == 1) {
+ ScaledMask.assign(Mask.begin(), Mask.end());
+ return;
+ }
+
+ ScaledMask.clear();
+ for (int MaskElt : Mask) {
+ if (MaskElt >= 0) {
+ assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <=
+ std::numeric_limits<int32_t>::max() &&
+ "Overflowed 32-bits");
+ }
+ for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)
+ ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
+ }
+}
+
+bool llvm::widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
+ SmallVectorImpl<int> &ScaledMask) {
+ assert(Scale > 0 && "Unexpected scaling factor");
+
+ // Fast-path: if no scaling, then it is just a copy.
+ if (Scale == 1) {
+ ScaledMask.assign(Mask.begin(), Mask.end());
+ return true;
+ }
+
+ // We must map the original elements down evenly to a type with less elements.
+ int NumElts = Mask.size();
+ if (NumElts % Scale != 0)
+ return false;
+
+ ScaledMask.clear();
+ ScaledMask.reserve(NumElts / Scale);
+
+ // Step through the input mask by splitting into Scale-sized slices.
+ do {
+ ArrayRef<int> MaskSlice = Mask.take_front(Scale);
+ assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");
+
+ // The first element of the slice determines how we evaluate this slice.
+ int SliceFront = MaskSlice.front();
+ if (SliceFront < 0) {
+ // Negative values (undef or other "sentinel" values) must be equal across
+ // the entire slice.
+ if (!is_splat(MaskSlice))
+ return false;
+ ScaledMask.push_back(SliceFront);
+ } else {
+ // A positive mask element must be cleanly divisible.
+ if (SliceFront % Scale != 0)
+ return false;
+ // Elements of the slice must be consecutive.
+ for (int i = 1; i < Scale; ++i)
+ if (MaskSlice[i] != SliceFront + i)
+ return false;
+ ScaledMask.push_back(SliceFront / Scale);
+ }
+ Mask = Mask.drop_front(Scale);
+ } while (!Mask.empty());
+
+ assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");
+
+ // All elements of the original mask can be scaled down to map to the elements
+ // of a mask with wider elements.
+ return true;
+}
+
MapVector<Instruction *, uint64_t>
llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
const TargetTransformInfo *TTI) {
@@ -636,7 +745,7 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
}
Constant *
-llvm::createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF,
+llvm::createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
const InterleaveGroup<Instruction> &Group) {
// All 1's means mask is not needed.
if (Group.getNumMembers() == Group.getFactor())
@@ -655,52 +764,52 @@ llvm::createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF,
return ConstantVector::get(Mask);
}
-Constant *llvm::createReplicatedMask(IRBuilder<> &Builder,
- unsigned ReplicationFactor, unsigned VF) {
- SmallVector<Constant *, 16> MaskVec;
+llvm::SmallVector<int, 16>
+llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {
+ SmallVector<int, 16> MaskVec;
for (unsigned i = 0; i < VF; i++)
for (unsigned j = 0; j < ReplicationFactor; j++)
- MaskVec.push_back(Builder.getInt32(i));
+ MaskVec.push_back(i);
- return ConstantVector::get(MaskVec);
+ return MaskVec;
}
-Constant *llvm::createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
- unsigned NumVecs) {
- SmallVector<Constant *, 16> Mask;
+llvm::SmallVector<int, 16> llvm::createInterleaveMask(unsigned VF,
+ unsigned NumVecs) {
+ SmallVector<int, 16> Mask;
for (unsigned i = 0; i < VF; i++)
for (unsigned j = 0; j < NumVecs; j++)
- Mask.push_back(Builder.getInt32(j * VF + i));
+ Mask.push_back(j * VF + i);
- return ConstantVector::get(Mask);
+ return Mask;
}
-Constant *llvm::createStrideMask(IRBuilder<> &Builder, unsigned Start,
- unsigned Stride, unsigned VF) {
- SmallVector<Constant *, 16> Mask;
+llvm::SmallVector<int, 16>
+llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {
+ SmallVector<int, 16> Mask;
for (unsigned i = 0; i < VF; i++)
- Mask.push_back(Builder.getInt32(Start + i * Stride));
+ Mask.push_back(Start + i * Stride);
- return ConstantVector::get(Mask);
+ return Mask;
}
-Constant *llvm::createSequentialMask(IRBuilder<> &Builder, unsigned Start,
- unsigned NumInts, unsigned NumUndefs) {
- SmallVector<Constant *, 16> Mask;
+llvm::SmallVector<int, 16> llvm::createSequentialMask(unsigned Start,
+ unsigned NumInts,
+ unsigned NumUndefs) {
+ SmallVector<int, 16> Mask;
for (unsigned i = 0; i < NumInts; i++)
- Mask.push_back(Builder.getInt32(Start + i));
+ Mask.push_back(Start + i);
- Constant *Undef = UndefValue::get(Builder.getInt32Ty());
for (unsigned i = 0; i < NumUndefs; i++)
- Mask.push_back(Undef);
+ Mask.push_back(-1);
- return ConstantVector::get(Mask);
+ return Mask;
}
/// A helper function for concatenating vectors. This function concatenates two
/// vectors having the same element type. If the second vector has fewer
/// elements than the first, it is padded with undefs.
-static Value *concatenateTwoVectors(IRBuilder<> &Builder, Value *V1,
+static Value *concatenateTwoVectors(IRBuilderBase &Builder, Value *V1,
Value *V2) {
VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
@@ -714,16 +823,17 @@ static Value *concatenateTwoVectors(IRBuilder<> &Builder, Value *V1,
if (NumElts1 > NumElts2) {
// Extend with UNDEFs.
- Constant *ExtMask =
- createSequentialMask(Builder, 0, NumElts2, NumElts1 - NumElts2);
- V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask);
+ V2 = Builder.CreateShuffleVector(
+ V2, UndefValue::get(VecTy2),
+ createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
}
- Constant *Mask = createSequentialMask(Builder, 0, NumElts1 + NumElts2, 0);
- return Builder.CreateShuffleVector(V1, V2, Mask);
+ return Builder.CreateShuffleVector(
+ V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));
}
-Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) {
+Value *llvm::concatenateVectors(IRBuilderBase &Builder,
+ ArrayRef<Value *> Vecs) {
unsigned NumVecs = Vecs.size();
assert(NumVecs > 1 && "Should be at least two vectors");
@@ -756,8 +866,9 @@ bool llvm::maskIsAllZeroOrUndef(Value *Mask) {
return false;
if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
return true;
- for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
- ++I) {
+ for (unsigned I = 0,
+ E = cast<VectorType>(ConstMask->getType())->getNumElements();
+ I != E; ++I) {
if (auto *MaskElt = ConstMask->getAggregateElement(I))
if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
continue;
@@ -773,8 +884,9 @@ bool llvm::maskIsAllOneOrUndef(Value *Mask) {
return false;
if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
return true;
- for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
- ++I) {
+ for (unsigned I = 0,
+ E = cast<VectorType>(ConstMask->getType())->getNumElements();
+ I != E; ++I) {
if (auto *MaskElt = ConstMask->getAggregateElement(I))
if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
continue;
@@ -835,13 +947,8 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
PointerType *PtrTy = cast<PointerType>(Ptr->getType());
uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
-
- // An alignment of 0 means target ABI alignment.
- MaybeAlign Alignment = MaybeAlign(getLoadStoreAlignment(&I));
- if (!Alignment)
- Alignment = Align(DL.getABITypeAlignment(PtrTy->getElementType()));
-
- AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, *Alignment);
+ AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
+ getLoadStoreAlignment(&I));
}
}
@@ -922,7 +1029,7 @@ void InterleavedAccessInfo::analyzeInterleaving(
// create a group for B, we continue with the bottom-up algorithm to ensure
// we don't break any of B's dependences.
InterleaveGroup<Instruction> *Group = nullptr;
- if (isStrided(DesB.Stride) &&
+ if (isStrided(DesB.Stride) &&
(!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
Group = getInterleaveGroup(B);
if (!Group) {
@@ -1023,8 +1130,8 @@ void InterleavedAccessInfo::analyzeInterleaving(
// All members of a predicated interleave-group must have the same predicate,
// and currently must reside in the same BB.
- BasicBlock *BlockA = A->getParent();
- BasicBlock *BlockB = B->getParent();
+ BasicBlock *BlockA = A->getParent();
+ BasicBlock *BlockB = B->getParent();
if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
(!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
continue;
@@ -1127,22 +1234,23 @@ void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() {
if (!requiresScalarEpilogue())
return;
- // Avoid releasing a Group twice.
- SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet;
- for (auto &I : InterleaveGroupMap) {
- InterleaveGroup<Instruction> *Group = I.second;
- if (Group->requiresScalarEpilogue())
- DelSet.insert(Group);
- }
- for (auto *Ptr : DelSet) {
+ bool ReleasedGroup = false;
+ // Release groups requiring scalar epilogues. Note that this also removes them
+ // from InterleaveGroups.
+ for (auto *Group : make_early_inc_range(InterleaveGroups)) {
+ if (!Group->requiresScalarEpilogue())
+ continue;
LLVM_DEBUG(
dbgs()
<< "LV: Invalidate candidate interleaved group due to gaps that "
"require a scalar epilogue (not allowed under optsize) and cannot "
"be masked (not enabled). \n");
- releaseGroup(Ptr);
+ releaseGroup(Group);
+ ReleasedGroup = true;
}
-
+ assert(ReleasedGroup && "At least one group must be invalidated, as a "
+ "scalar epilogue was required");
+ (void)ReleasedGroup;
RequiresScalarEpilogue = false;
}
@@ -1161,6 +1269,18 @@ void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const {
}
}
+std::string VFABI::mangleTLIVectorName(StringRef VectorName,
+ StringRef ScalarName, unsigned numArgs,
+ unsigned VF) {
+ SmallString<256> Buffer;
+ llvm::raw_svector_ostream Out(Buffer);
+ Out << "_ZGV" << VFABI::_LLVM_ << "N" << VF;
+ for (unsigned I = 0; I < numArgs; ++I)
+ Out << "v";
+ Out << "_" << ScalarName << "(" << VectorName << ")";
+ return std::string(Out.str());
+}
+
void VFABI::getVectorVariantNames(
const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) {
const StringRef S =
@@ -1174,12 +1294,13 @@ void VFABI::getVectorVariantNames(
for (auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) {
#ifndef NDEBUG
- Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S);
+ LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n");
+ Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S, *(CI.getModule()));
assert(Info.hasValue() && "Invalid name for a VFABI variant.");
assert(CI.getModule()->getFunction(Info.getValue().VectorName) &&
"Vector function is missing.");
#endif
- VariantMappings.push_back(S);
+ VariantMappings.push_back(std::string(S));
}
}