aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SISchedule.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SISchedule.td')
-rw-r--r--llvm/lib/Target/AMDGPU/SISchedule.td39
1 files changed, 27 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td
index 824d1aeb0df9..932381c99e0b 100644
--- a/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -1,4 +1,4 @@
-//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
+//===-- SISchedule.td - SI Scheduling definitions -------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -27,10 +27,14 @@ def WriteBarrier : SchedWrite;
def MIVGPRRead : SchedRead;
def MIMFMARead : SchedRead;
-// Vector ALU instructions
+// Normal 16 or 32 bit VALU instructions
def Write32Bit : SchedWrite;
+// Conversion to or from F32 (but not converting F64 to or from F32)
+def WriteFloatCvt : SchedWrite;
+// F16 or F32 transcendental instructions (these are quarter rate)
+def WriteTrans32 : SchedWrite;
+// Other quarter rate VALU instructions
def WriteQuarterRate32 : SchedWrite;
-def WriteFullOrQuarterRate32 : SchedWrite;
def WriteFloatFMA : SchedWrite;
@@ -43,6 +47,10 @@ def WriteDoubleAdd : SchedWrite;
// Conversion to or from f64 instruction
def WriteDoubleCvt : SchedWrite;
+// F64 "transcendental" (actually only reciprocal and/or square root)
+// instructions
+def WriteTrans64 : SchedWrite;
+
// Half rate 64-bit instructions.
def Write64Bit : SchedWrite;
@@ -56,7 +64,7 @@ def Write16PassMAI : SchedWrite;
// instructions)
class SISchedMachineModel : SchedMachineModel {
- let CompleteModel = 0;
+ let CompleteModel = 1;
// MicroOpBufferSize = 1 means that instructions will always be added
// the ready queue when they become available. This exposes them
// to the register pressure analysis.
@@ -127,6 +135,8 @@ multiclass SICommonWriteRes {
def : HWVALUWriteRes<Write32Bit, 1>;
def : HWVALUWriteRes<Write64Bit, 2>;
+ def : HWVALUWriteRes<WriteFloatCvt, 4>;
+ def : HWVALUWriteRes<WriteTrans32, 4>;
def : HWVALUWriteRes<WriteQuarterRate32, 4>;
def : HWVALUWriteRes<Write2PassMAI, 2>;
def : HWVALUWriteRes<Write8PassMAI, 8>;
@@ -135,9 +145,9 @@ multiclass SICommonWriteRes {
def : ReadAdvance<MIVGPRRead, -2>;
def : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32$")>;
- // Technicaly mfma reads can be from 0 to 4 cycles but that does not make
+ // Technically mfma reads can be from 0 to 4 cycles but that does not make
// sense to model because its register setup is huge. In particular if we
- // properly model read advanice as -2 for a vgpr read it will result in a
+ // properly model read advance as -2 for a vgpr read it will result in a
// bad scheduling of acc writes before that mfma. To avoid it we would
// need to consume 2 or 4 more vgprs to be initialized before the acc
// write sequence. Just assume worst case here.
@@ -163,6 +173,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 1>;
def : HWVALUWriteRes<WriteDouble, 4>;
def : HWVALUWriteRes<WriteDoubleAdd, 2>;
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
+def : HWVALUWriteRes<WriteTrans64, 4>;
def : InstRW<[WriteCopy], (instrs COPY)>;
@@ -176,6 +187,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 16>;
def : HWVALUWriteRes<WriteDouble, 16>;
def : HWVALUWriteRes<WriteDoubleAdd, 8>;
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
+def : HWVALUWriteRes<WriteTrans64, 16>;
def : InstRW<[WriteCopy], (instrs COPY)>;
@@ -186,17 +198,20 @@ let SchedModel = GFX10SpeedModel in {
// The latency values are 1 / (operations / cycle).
// Add 1 stall cycle for VGPR read.
def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
-def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 9>;
-def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 17>;
+def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>;
+def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>;
+def : HWWriteRes<WriteTrans32, [HWVALU, HWRC], 10>;
+def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>;
def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
-def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 17>;
-def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 17>;
-def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 17>;
+def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 22>;
+def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 22>;
+def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 22>;
+def : HWWriteRes<WriteTrans64, [HWVALU, HWRC], 24>;
def : HWWriteRes<WriteBranch, [HWBranch], 32>;
def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>;
def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>;
-def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 5>;
+def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>;
def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;