diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SISchedule.td')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SISchedule.td | 39 |
1 files changed, 27 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td index 824d1aeb0df9..932381c99e0b 100644 --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -1,4 +1,4 @@ -//===-- SISchedule.td - SI Scheduling definitons -------------------------===// +//===-- SISchedule.td - SI Scheduling definitions -------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -27,10 +27,14 @@ def WriteBarrier : SchedWrite; def MIVGPRRead : SchedRead; def MIMFMARead : SchedRead; -// Vector ALU instructions +// Normal 16 or 32 bit VALU instructions def Write32Bit : SchedWrite; +// Conversion to or from F32 (but not converting F64 to or from F32) +def WriteFloatCvt : SchedWrite; +// F16 or F32 transcendental instructions (these are quarter rate) +def WriteTrans32 : SchedWrite; +// Other quarter rate VALU instructions def WriteQuarterRate32 : SchedWrite; -def WriteFullOrQuarterRate32 : SchedWrite; def WriteFloatFMA : SchedWrite; @@ -43,6 +47,10 @@ def WriteDoubleAdd : SchedWrite; // Conversion to or from f64 instruction def WriteDoubleCvt : SchedWrite; +// F64 "transcendental" (actually only reciprocal and/or square root) +// instructions +def WriteTrans64 : SchedWrite; + // Half rate 64-bit instructions. def Write64Bit : SchedWrite; @@ -56,7 +64,7 @@ def Write16PassMAI : SchedWrite; // instructions) class SISchedMachineModel : SchedMachineModel { - let CompleteModel = 0; + let CompleteModel = 1; // MicroOpBufferSize = 1 means that instructions will always be added // the ready queue when they become available. This exposes them // to the register pressure analysis. @@ -127,6 +135,8 @@ multiclass SICommonWriteRes { def : HWVALUWriteRes<Write32Bit, 1>; def : HWVALUWriteRes<Write64Bit, 2>; + def : HWVALUWriteRes<WriteFloatCvt, 4>; + def : HWVALUWriteRes<WriteTrans32, 4>; def : HWVALUWriteRes<WriteQuarterRate32, 4>; def : HWVALUWriteRes<Write2PassMAI, 2>; def : HWVALUWriteRes<Write8PassMAI, 8>; @@ -135,9 +145,9 @@ multiclass SICommonWriteRes { def : ReadAdvance<MIVGPRRead, -2>; def : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32$")>; - // Technicaly mfma reads can be from 0 to 4 cycles but that does not make + // Technically mfma reads can be from 0 to 4 cycles but that does not make // sense to model because its register setup is huge. In particular if we - // properly model read advanice as -2 for a vgpr read it will result in a + // properly model read advance as -2 for a vgpr read it will result in a // bad scheduling of acc writes before that mfma. To avoid it we would // need to consume 2 or 4 more vgprs to be initialized before the acc // write sequence. Just assume worst case here. @@ -163,6 +173,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 1>; def : HWVALUWriteRes<WriteDouble, 4>; def : HWVALUWriteRes<WriteDoubleAdd, 2>; def : HWVALUWriteRes<WriteDoubleCvt, 4>; +def : HWVALUWriteRes<WriteTrans64, 4>; def : InstRW<[WriteCopy], (instrs COPY)>; @@ -176,6 +187,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 16>; def : HWVALUWriteRes<WriteDouble, 16>; def : HWVALUWriteRes<WriteDoubleAdd, 8>; def : HWVALUWriteRes<WriteDoubleCvt, 4>; +def : HWVALUWriteRes<WriteTrans64, 16>; def : InstRW<[WriteCopy], (instrs COPY)>; @@ -186,17 +198,20 @@ let SchedModel = GFX10SpeedModel in { // The latency values are 1 / (operations / cycle). // Add 1 stall cycle for VGPR read. def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>; -def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 9>; -def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 17>; +def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>; +def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>; +def : HWWriteRes<WriteTrans32, [HWVALU, HWRC], 10>; +def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>; def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>; -def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 17>; -def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 17>; -def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 17>; +def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 22>; +def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 22>; +def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 22>; +def : HWWriteRes<WriteTrans64, [HWVALU, HWRC], 24>; def : HWWriteRes<WriteBranch, [HWBranch], 32>; def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>; def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>; -def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 5>; +def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>; def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>; def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>; def : HWWriteRes<WriteBarrier, [HWBranch], 2000>; |