aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp12
1 files changed, 11 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 81d083c1c88a..cb877a4695f1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -650,6 +650,15 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
return LT.first * Cost * NElts;
}
+ if (SLT == MVT::f32 && ((CxtI && CxtI->hasApproxFunc()) ||
+ TLI->getTargetMachine().Options.UnsafeFPMath)) {
+ // Fast unsafe fdiv lowering:
+ // f32 rcp
+ // f32 fmul
+ int Cost = getQuarterRateInstrCost(CostKind) + getFullRateInstrCost();
+ return LT.first * Cost * NElts;
+ }
+
if (SLT == MVT::f32 || SLT == MVT::f16) {
// 4 more v_cvt_* insts without f16 insts support
int Cost = (SLT == MVT::f16 ? 14 : 10) * getFullRateInstrCost() +
@@ -1114,7 +1123,8 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
- Kind = improveShuffleKindFromMask(Kind, Mask);
+ Kind = improveShuffleKindFromMask(Kind, Mask, VT, Index, SubTp);
+
if (ST->hasVOP3PInsts()) {
if (cast<FixedVectorType>(VT)->getNumElements() == 2 &&
DL.getTypeSizeInBits(VT->getElementType()) == 16) {