summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp')
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp34
1 files changed, 33 insertions, 1 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index c9482c37ec805..beafebc1284a6 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -363,13 +363,22 @@ int AMDGPUTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
unsigned Index) {
switch (Opcode) {
case Instruction::ExtractElement:
- case Instruction::InsertElement:
+ case Instruction::InsertElement: {
+ unsigned EltSize
+ = DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
+ if (EltSize < 32) {
+ if (EltSize == 16 && Index == 0 && ST->has16BitInsts())
+ return 0;
+ return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+ }
+
// Extracts are just reads of a subregister, so are free. Inserts are
// considered free because we don't want to have any cost for scalarizing
// operations, and we don't have to copy into a different register class.
// Dynamic indexing isn't free and is best avoided.
return Index == ~0u ? 2 : 0;
+ }
default:
return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
}
@@ -479,3 +488,26 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
return false;
}
+
+unsigned AMDGPUTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
+ if (ST->hasVOP3PInsts()) {
+ VectorType *VT = cast<VectorType>(Tp);
+ if (VT->getNumElements() == 2 &&
+ DL.getTypeSizeInBits(VT->getElementType()) == 16) {
+ // With op_sel VOP3P instructions freely can access the low half or high
+ // half of a register, so any swizzle is free.
+
+ switch (Kind) {
+ case TTI::SK_Broadcast:
+ case TTI::SK_Reverse:
+ case TTI::SK_PermuteSingleSrc:
+ return 0;
+ default:
+ break;
+ }
+ }
+ }
+
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+}