1 files changed, 27 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td
index 824d1aeb0df9..932381c99e0b 100644
--- a/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -1,4 +1,4 @@
-//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
+//===-- SISchedule.td - SI Scheduling definitions -------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -27,10 +27,14 @@ def WriteBarrier : SchedWrite;
 def MIVGPRRead  : SchedRead;
 def MIMFMARead  : SchedRead;
 
-// Vector ALU instructions
+// Normal 16 or 32 bit VALU instructions
 def Write32Bit         : SchedWrite;
+// Conversion to or from F32 (but not converting F64 to or from F32)
+def WriteFloatCvt      : SchedWrite;
+// F16 or F32 transcendental instructions (these are quarter rate)
+def WriteTrans32       : SchedWrite;
+// Other quarter rate VALU instructions
 def WriteQuarterRate32 : SchedWrite;
-def WriteFullOrQuarterRate32 : SchedWrite;
 
 def WriteFloatFMA   : SchedWrite;
 
@@ -43,6 +47,10 @@ def WriteDoubleAdd  : SchedWrite;
 // Conversion to or from f64 instruction
 def WriteDoubleCvt  : SchedWrite;
 
+// F64 "transcendental" (actually only reciprocal and/or square root)
+// instructions
+def WriteTrans64    : SchedWrite;
+
 // Half rate 64-bit instructions.
 def Write64Bit : SchedWrite;
 
@@ -56,7 +64,7 @@ def Write16PassMAI : SchedWrite;
 // instructions)
 
 class SISchedMachineModel : SchedMachineModel {
-  let CompleteModel = 0;
+  let CompleteModel = 1;
   // MicroOpBufferSize = 1 means that instructions will always be added
   // the ready queue when they become available.  This exposes them
   // to the register pressure analysis.
@@ -127,6 +135,8 @@ multiclass SICommonWriteRes {
 
   def : HWVALUWriteRes<Write32Bit,         1>;
   def : HWVALUWriteRes<Write64Bit,         2>;
+  def : HWVALUWriteRes<WriteFloatCvt,      4>;
+  def : HWVALUWriteRes<WriteTrans32,       4>;
   def : HWVALUWriteRes<WriteQuarterRate32, 4>;
   def : HWVALUWriteRes<Write2PassMAI,      2>;
   def : HWVALUWriteRes<Write8PassMAI,      8>;
@@ -135,9 +145,9 @@ multiclass SICommonWriteRes {
   def : ReadAdvance<MIVGPRRead, -2>;
   def : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32$")>;
 
-  // Technicaly mfma reads can be from 0 to 4 cycles but that does not make
+  // Technically mfma reads can be from 0 to 4 cycles but that does not make
   // sense to model because its register setup is huge. In particular if we
-  // properly model read advanice as -2 for a vgpr read it will result in a
+  // properly model read advance as -2 for a vgpr read it will result in a
   // bad scheduling of acc writes before that mfma. To avoid it we would
   // need to consume 2 or 4 more vgprs to be initialized before the acc
   // write sequence. Just assume worst case here.
@@ -163,6 +173,7 @@ def : HWVALUWriteRes<WriteFloatFMA,   1>;
 def : HWVALUWriteRes<WriteDouble,     4>;
 def : HWVALUWriteRes<WriteDoubleAdd,  2>;
 def : HWVALUWriteRes<WriteDoubleCvt,  4>;
+def : HWVALUWriteRes<WriteTrans64,    4>;
 
 def : InstRW<[WriteCopy], (instrs COPY)>;
 
@@ -176,6 +187,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 16>;
 def : HWVALUWriteRes<WriteDouble,   16>;
 def : HWVALUWriteRes<WriteDoubleAdd, 8>;
 def : HWVALUWriteRes<WriteDoubleCvt, 4>;
+def : HWVALUWriteRes<WriteTrans64,  16>;
 
 def : InstRW<[WriteCopy], (instrs COPY)>;
 
@@ -186,17 +198,20 @@ let SchedModel = GFX10SpeedModel in {
 // The latency values are 1 / (operations / cycle).
 // Add 1 stall cycle for VGPR read.
 def : HWWriteRes<Write32Bit,         [HWVALU, HWRC],   5>;
-def : HWWriteRes<Write64Bit,         [HWVALU, HWRC],   9>;
-def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC],   17>;
+def : HWWriteRes<WriteFloatCvt,      [HWVALU, HWRC],   5>;
+def : HWWriteRes<Write64Bit,         [HWVALU, HWRC],   6>;
+def : HWWriteRes<WriteTrans32,       [HWVALU, HWRC],   10>;
+def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC],   8>;
 def : HWWriteRes<WriteFloatFMA,      [HWVALU, HWRC],   5>;
-def : HWWriteRes<WriteDouble,        [HWVALU, HWRC],   17>;
-def : HWWriteRes<WriteDoubleAdd,     [HWVALU, HWRC],   17>;
-def : HWWriteRes<WriteDoubleCvt,     [HWVALU, HWRC],   17>;
+def : HWWriteRes<WriteDouble,        [HWVALU, HWRC],   22>;
+def : HWWriteRes<WriteDoubleAdd,     [HWVALU, HWRC],   22>;
+def : HWWriteRes<WriteDoubleCvt,     [HWVALU, HWRC],   22>;
+def : HWWriteRes<WriteTrans64,       [HWVALU, HWRC],   24>;
 
 def : HWWriteRes<WriteBranch,        [HWBranch],       32>;
 def : HWWriteRes<WriteExport,        [HWExport, HWRC], 16>;
 def : HWWriteRes<WriteLDS,           [HWLGKM,   HWRC], 20>;
-def : HWWriteRes<WriteSALU,          [HWSALU,   HWRC], 5>;
+def : HWWriteRes<WriteSALU,          [HWSALU,   HWRC], 2>;
 def : HWWriteRes<WriteSMEM,          [HWLGKM,   HWRC], 20>;
 def : HWWriteRes<WriteVMEM,          [HWVMEM,   HWRC], 320>;
 def : HWWriteRes<WriteBarrier,       [HWBranch],       2000>;