vendor/llvm/llvm-trunk-r321017

author: Dimitry Andric <dim@FreeBSD.org> 2017-12-18 20:10:56 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2017-12-18 20:10:56 +0000
commit: 044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree: 1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Target/PowerPC/P9InstrResources.td
parent: eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)
download: src-test2-044eb2f6afba375a914ac9d8024f8f5142bb912e.tar.gz
src-test2-044eb2f6afba375a914ac9d8024f8f5142bb912e.zip
1 files changed, 435 insertions, 252 deletions
diff --git a/lib/Target/PowerPC/P9InstrResources.td b/lib/Target/PowerPC/P9InstrResources.td
index aea022f88766..dc6ed16e53ce 100644
--- a/lib/Target/PowerPC/P9InstrResources.td
+++ b/lib/Target/PowerPC/P9InstrResources.td
@@ -12,11 +12,29 @@
 // is listed here. Instructions in this file belong to itinerary classes that
 // have instructions with different resource requirements.
 //
+// The makeup of the P9 CPU is modeled as follows:
+//   - Each CPU is made up of two superslices.
+//   - Each superslice is made up of two slices. Therefore, there are 4 slices
+//      for each CPU.
+//   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
+//   - Each CPU has:
+//     - One CY (Crypto) unit P9_CY_*
+//     - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
+//     - Two PM (Permute) units. One on each superslice. P9_PM_*
+//     - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
+//     - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
+//     - Four DP (Floating Point) units. One on each slice. P9_DP_*
+//       This also includes fixed point multiply add.
+//     - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
+//     - Four Load/Store Queues. P9_LS_*
+//   - Each set of instructions will require a number of these resources.
 //===----------------------------------------------------------------------===//
 
-
+// Two cycle ALU vector operation that uses an entire superslice.
+//  Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
+//  (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
 def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C, DISP_1C],
       (instrs
     VADDCUW,
     VADDUBM,
@@ -26,47 +44,41 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
     VAND,
     VANDC,
     VCMPEQUB,
-    VCMPEQUBo,
     VCMPEQUD,
-    VCMPEQUDo,
     VCMPEQUH,
-    VCMPEQUHo,
     VCMPEQUW,
-    VCMPEQUWo,
-    VCMPGTSB,
-    VCMPGTSBo,
-    VCMPGTSD,
-    VCMPGTSDo,
-    VCMPGTSH,
-    VCMPGTSHo,
-    VCMPGTSW,
-    VCMPGTSWo,
-    VCMPGTUB,
-    VCMPGTUBo,
-    VCMPGTUD,
-    VCMPGTUDo,
-    VCMPGTUH,
-    VCMPGTUHo,
-    VCMPGTUW,
-    VCMPGTUWo,
     VCMPNEB,
-    VCMPNEBo,
     VCMPNEH,
-    VCMPNEHo,
     VCMPNEW,
-    VCMPNEWo,
     VCMPNEZB,
-    VCMPNEZBo,
     VCMPNEZH,
-    VCMPNEZHo,
     VCMPNEZW,
-    VCMPNEZWo,
     VEQV,
     VEXTSB2D,
     VEXTSB2W,
     VEXTSH2D,
     VEXTSH2W,
     VEXTSW2D,
+    VRLB,
+    VRLD,
+    VRLDMI,
+    VRLDNM,
+    VRLH,
+    VRLW,
+    VRLWMI,
+    VRLWNM,
+    VSRAB,
+    VSRAD,
+    VSRAH,
+    VSRAW,
+    VSRB,
+    VSRD,
+    VSRH,
+    VSRW,
+    VSLB,
+    VSLD,
+    VSLH,
+    VSLW,
     VMRGEW,
     VMRGOW,
     VNAND,
@@ -77,9 +89,7 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
     VORC,
     VPOPCNTB,
     VPOPCNTH,
-    VPOPCNTW,
     VSEL,
-    VSUBCUW,
     VSUBUBM,
     VSUBUDM,
     VSUBUHM,
@@ -98,6 +108,8 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
     XVNEGDP,
     XVNEGSP,
     XVXEXPDP,
+    XVIEXPSP,
+    XVXEXPSP,
     XXLAND,
     XXLANDC,
     XXLEQV,
@@ -107,28 +119,128 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
     XXLORf,
     XXLORC,
     XXLXOR,
-    XXSEL
-)>;
-
-def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
-      (instrs
+    XXSEL,
     XSABSQP,
     XSCPSGNQP,
     XSIEXPQP,
     XSNABSQP,
     XSNEGQP,
-    XSXEXPQP,
-    XSABSDP,
-    XSCPSGNDP,
-    XSIEXPDP,
+    XSXEXPQP
+)>;
+
+// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
+//  slingle slice. However, since it is Restricted it requires all 3 dispatches
+//  (DISP) for that superslice.
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    FCMPUS,
+    FCMPUD,
+    XSTSTDCDP,
+    XSTSTDCSP
+)>;
+
+// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
+      (instrs
+    XSMAXCDP,
+    XSMAXDP,
+    XSMAXJDP,
+    XSMINCDP,
+    XSMINDP,
+    XSMINJDP,
+    XSTDIVDP,
+    XSTSQRTDP,
+    XSCMPEQDP,
+    XSCMPEXPDP,
+    XSCMPGEDP,
+    XSCMPGTDP,
+    XSCMPODP,
+    XSCMPUDP,
+    XSXSIGDP,
+    XSCVSPDPN
+)>;
+
+// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
+      (instrs
+    ADDIStocHA,
+    ADDItocL,
+    MCRF,
+    MCRXRX,
+    SLD,
+    SRD,
+    SRAD,
+    SRADI,
+    RLDIC,
     XSNABSDP,
+    XSXEXPDP,
+    XSABSDP,
     XSNEGDP,
-    XSXEXPDP
+    XSCPSGNDP
 )>;
 
-def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
+//  slingle slice. However, since it is Restricted it requires all 3 dispatches
+//  (DISP) for that superslice.
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
+    RLDCL,
+    RLDCR,
+    RLDIMI,
+    RLDICL,
+    RLDICR,
+    RLDICL_32_64,
+    XSIEXPDP,
+    FMR,
+    FABSD,
+    FABSS,
+    FNABSD,
+    FNABSS,
+    FNEGD,
+    FNEGS,
+    FCPSGND,
+    FCPSGNS
+)>;
 
+// Three cycle ALU vector operation that uses an entire superslice.
+//  Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
+//  (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
+def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
+              DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    VBPERMD,
+    VABSDUB,
+    VABSDUH,
+    VABSDUW,
+    VADDUBS,
+    VADDUHS,
+    VADDUWS,
+    VAVGSB,
+    VAVGSH,
+    VAVGSW,
+    VAVGUB,
+    VAVGUH,
+    VAVGUW,
+    VCMPEQFP,
+    VCMPEQFPo,
+    VCMPGEFP,
+    VCMPGEFPo,
+    VCMPBFP,
+    VCMPBFPo,
+    VCMPGTFP,
+    VCMPGTFPo,
+    VCLZB,
+    VCLZD,
+    VCLZH,
+    VCLZW,
+    VCTZB,
+    VCTZD,
+    VCTZH,
+    VCTZW,
+    VADDSBS,
+    VADDSHS,
+    VADDSWS,
+    VMINFP,
     VMINSB,
     VMINSD,
     VMINSH,
@@ -137,55 +249,54 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C
     VMINUD,
     VMINUH,
     VMINUW,
+    VMAXFP,
+    VMAXSB,
+    VMAXSD,
+    VMAXSH,
+    VMAXSW,
+    VMAXUB,
+    VMAXUD,
+    VMAXUH,
+    VMAXUW,
+    VPOPCNTW,
     VPOPCNTD,
     VPRTYBD,
     VPRTYBW,
-    VRLB,
-    VRLD,
-    VRLDMI,
-    VRLDNM,
-    VRLH,
-    VRLW,
-    VRLWMI,
-    VRLWNM,
     VSHASIGMAD,
     VSHASIGMAW,
-    VSLB,
-    VSLD,
-    VSLH,
-    VSLW,
-    VSRAB,
-    VSRAD,
-    VSRAH,
-    VSRAW,
-    VSRB,
-    VSRD,
-    VSRH,
-    VSRW,
     VSUBSBS,
     VSUBSHS,
     VSUBSWS,
     VSUBUBS,
     VSUBUHS,
     VSUBUWS,
-    XSCMPEQDP,
-    XSCMPEXPDP,
-    XSCMPGEDP,
-    XSCMPGTDP,
-    XSCMPODP,
-    XSCMPUDP,
-    XSCVSPDPN,
-    XSMAXCDP,
-    XSMAXDP,
-    XSMAXJDP,
-    XSMINCDP,
-    XSMINDP,
-    XSMINJDP,
-    XSTDIVDP,
-    XSTSQRTDP,
-    XSTSTDCDP,
-    XSTSTDCSP,
-    XSXSIGDP,
+    VSUBCUW,
+    VCMPGTSB,
+    VCMPGTSBo,
+    VCMPGTSD,
+    VCMPGTSDo,
+    VCMPGTSH,
+    VCMPGTSHo,
+    VCMPGTSW,
+    VCMPGTSWo,
+    VCMPGTUB,
+    VCMPGTUBo,
+    VCMPGTUD,
+    VCMPGTUDo,
+    VCMPGTUH,
+    VCMPGTUHo,
+    VCMPGTUW,
+    VCMPGTUWo,
+    VCMPNEBo,
+    VCMPNEHo,
+    VCMPNEWo,
+    VCMPNEZBo,
+    VCMPNEZHo,
+    VCMPNEZWo,
+    VCMPEQUBo,
+    VCMPEQUDo,
+    VCMPEQUHo,
+    VCMPEQUWo,
     XVCMPEQDP,
     XVCMPEQDPo,
     XVCMPEQSP,
@@ -198,7 +309,6 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C
     XVCMPGTDPo,
     XVCMPGTSP,
     XVCMPGTSPo,
-    XVIEXPSP,
     XVMAXDP,
     XVMAXSP,
     XVMINDP,
@@ -209,58 +319,15 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C
     XVTSQRTSP,
     XVTSTDCDP,
     XVTSTDCSP,
-    XVXEXPSP,
     XVXSIGDP,
     XVXSIGSP
 )>;
 
-def : InstRW<[P9_ALUE_4C, P9_ALUO_4C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
-      (instrs
-    VABSDUB,
-    VABSDUH,
-    VABSDUW,
-    VADDSBS,
-    VADDSHS,
-    VADDSWS,
-    VADDUBS,
-    VADDUHS,
-    VADDUWS,
-    VAVGSB,
-    VAVGSH,
-    VAVGSW,
-    VAVGUB,
-    VAVGUH,
-    VAVGUW,
-    VBPERMD,
-    VCLZB,
-    VCLZD,
-    VCLZH,
-    VCLZW,
-    VCMPBFP,
-    VCMPBFPo,
-    VCMPGTFP,
-    VCMPGTFPo,
-    VCTZB,
-    VCTZD,
-    VCTZH,
-    VCTZW,
-    VMAXFP,
-    VMAXSB,
-    VMAXSD,
-    VMAXSH,
-    VMAXSW,
-    VMAXUB,
-    VMAXUD,
-    VMAXUH,
-    VMAXUW,
-    VMINFP,
-    VCMPEQFP,
-    VCMPEQFPo,
-    VCMPGEFP,
-    VCMPGEFPo
-)>;
-
-def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+// 7 cycle DP vector operation that uses an entire superslice.
+//  Uses both DP units (the even DPE and odd DPO units), two pipelines
+//  (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
+def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
+              DISP_1C, DISP_1C, DISP_1C],
       (instrs
     VADDFP,
     VCTSXS,
@@ -367,8 +434,47 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
     VSUMSWS
 )>;
 
+// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
+//  dispatch units for the superslice.
 def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
+    FRSP,
+    FRIND,
+    FRINS,
+    FRIPD,
+    FRIPS,
+    FRIZD,
+    FRIZS,
+    FRIMD,
+    FRIMS,
+    FRE,
+    FRES,
+    FRSQRTE,
+    FRSQRTES,
+    FMADDS,
+    FMADD,
+    FMSUBS,
+    FMSUB,
+    FNMADDS,
+    FNMADD,
+    FNMSUBS,
+    FNMSUB,
+    FSELD,
+    FSELS,
+    FADDS,
+    FMULS,
+    FMUL,
+    FSUBS,
+    FCFID,
+    FCTID,
+    FCTIDZ,
+    FCFIDU,
+    FCFIDS,
+    FCFIDUS,
+    FCTIDUZ,
+    FCTIWUZ,
+    FCTIW,
+    FCTIWZ,
     XSMADDADP,
     XSMADDASP,
     XSMADDMDP,
@@ -389,7 +495,19 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
     XSNMSUBMSP
 )>;
 
+// 7 cycle Restricted DP operation and one 2 cycle ALU operation.
+//  The DP is restricted so we need a full 5 dispatches.
+def : InstRW<[P9_DPOpAndALUOp_9C, IP_EXEC_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    FMULo,
+    FMADDo,
+    FMSUBo,
+    FNMADDo,
+    FNMSUBo
+)>;
 
+// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
 def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
     XSADDDP,
@@ -397,8 +515,10 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
     XSCVDPHP,
     XSCVDPSP,
     XSCVDPSXDS,
+    XSCVDPSXDSs,
     XSCVDPSXWS,
     XSCVDPUXDS,
+    XSCVDPUXDSs,
     XSCVDPUXWS,
     XSCVHPDP,
     XSCVSPDP,
@@ -421,7 +541,10 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
     XSCVDPSPN
 )>;
 
-def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C],
+// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     VBPERMQ,
     VCLZLSBB,
@@ -469,7 +592,9 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C],
     VSLO,
     VSLV,
     VSPLTB,
+    VSPLTBs,
     VSPLTH,
+    VSPLTHs,
     VSPLTISB,
     VSPLTISH,
     VSPLTISW,
@@ -498,6 +623,9 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C],
     XXSLDWI,
     XXSPLTIB,
     XXSPLTW,
+    XXSPLTWs,
+    XXPERMDI,
+    XXPERMDIs,
     VADDCUQ,
     VADDECUQ,
     VADDEUQM,
@@ -517,7 +645,10 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C],
     XSXSIGQP
 )>;
 
-def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     XSADDQP,
     XSADDQPO,
@@ -536,7 +667,10 @@ def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
     XSSUBQPO
 )>;
 
-def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     XSMADDQP,
     XSMADDQPO,
@@ -550,45 +684,57 @@ def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
     XSNMSUBQPO
 )>;
 
-def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     XSDIVQP,
     XSDIVQPO
 )>;
 
-def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     XSSQRTQP,
     XSSQRTQPO
 )>;
 
-// Load Operation in IIC_LdStLFD
-
+// 5 Cycle load uses a single slice.
 def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
       (instrs
     LXSDX,
     LXVD2X,
     LXSIWZX,
     LXV,
-    LXSD
+    LXVX,
+    LXSD,
+    DFLOADf64,
+    XFLOADf64
 )>;
 
-def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+// 4 Cycle load uses a single slice.
+def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
       (instrs
-    LFIWZX,
-    LFDX,
-    LFD
+    COPY
 )>;
 
-def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
+//  superslice.
+def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
-    LXSSPX,
-    LXSIWAX,
-    LXSSP
+    LFIWZX,
+    LFDX,
+    LFD
 )>;
 
-def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
+// Cracked Restricted Load instruction.
+// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
+//  operations cannot be done at the same time and so their latencies are added.
+// Full 6 dispatches are required as this is both cracked and restricted.
+def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     LFIWAX,
@@ -596,14 +742,38 @@ def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
     LFS
 )>;
 
-def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
+// Cracked Load instruction.
+// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
+//  operations cannot be done at the same time and so their latencies are added.
+// Full 4 dispatches are required as this is a cracked instruction.
+def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    LXSSPX,
+    LXSIWAX,
+    LXSSP,
+    DFLOADf32,
+    XFLOADf32,
+    LIWAX,
+    LIWZX
+)>;
+
+// Cracked Load that requires the PM resource.
+// Since the Load and the PM cannot be done at the same time the latencies are
+//  added. Requires 8 cycles.
+// Since the PM requires the full superslice we need both EXECE, EXECO pipelines
+//  as well as 3 dispatches for the PM. The Load requires the remaining 2
+//  dispatches.
+def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     LXVDSX,
+    LXVWSX,
     LXVW4X
 )>;
 
-// Store Operations in IIC_LdStSTFD.
-
+// Single slice Restricted store operation. The restricted operation requires
+//  all three dispatches for the superslice.
 def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     STFS,
@@ -613,74 +783,88 @@ def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
     STFDX,
     STXSDX,
     STXSSPX,
-    STXSIWX
+    STXSIWX,
+    DFSTOREf32,
+    DFSTOREf64,
+    XFSTOREf32,
+    XFSTOREf64,
+    STIWX
 )>;
 
-def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C],
+// Store operation that requires the whole superslice.
+def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
+              DISP_1C, DISP_1C, DISP_1C],
       (instrs
     STXVD2X,
     STXVW4X
 )>;
 
 
-// Divide Operations in IIC_IntDivW, IIC_IntDivD.
-
-def : InstRW<[P9_DIV_16C_8, IP_EXECE_1C, DISP_1C, DISP_1C],
+// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
+              DISP_1C, DISP_1C, DISP_1C],
       (instrs
     DIVW,
-    DIVWU
+    DIVWU,
+    MODSW
 )>;
 
-def : InstRW<[P9_DIV_24C_8, IP_EXECE_1C, DISP_1C, DISP_1C],
+// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
+              DISP_1C, DISP_1C, DISP_1C],
       (instrs
     DIVWE,
     DIVD,
     DIVWEU,
-    DIVDU
+    DIVDU,
+    MODSD,
+    MODUD,
+    MODUW
 )>;
 
-def : InstRW<[P9_DIV_40C_8, IP_EXECE_1C, DISP_1C, DISP_1C],
+// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
+              DISP_1C, DISP_1C, DISP_1C],
       (instrs
     DIVDE,
     DIVDEU
 )>;
 
-def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
+//  and one full superslice for the DIV operation since there is only one DIV
+//  per superslice. Latency of DIV plus ALU is 26.
+def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
+    DIVDo,
+    DIVDUo,
     DIVWEo,
     DIVWEUo
 )>;
 
-def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
+//  and one full superslice for the DIV operation since there is only one DIV
+//  per superslice. Latency of DIV plus ALU is 42.
+def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     DIVDEo,
     DIVDEUo
 )>;
 
-// Rotate Operations in IIC_IntRotateD, IIC_IntRotateDI
-def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
-      (instrs
-    SLD,
-    SRD,
-    SRAD,
-    SRADI,
-    RLDIC
-)>;
-
-def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
-      (instrs
-    RLDCL,
-    RLDCR,
-    RLDIMI,
-    RLDICL,
-    RLDICR,
-    RLDICL_32_64
-)>;
-
 // CR access instructions in _BrMCR, IIC_BrMCRX.
 
+// Cracked, restricted, ALU operations.
+// Here the two ALU ops can actually be done in parallel and therefore the
+//  latencies are not added together. Otherwise this is like having two
+//  instructions running together on two pipelines and 6 dispatches.
+// ALU ops are 2 cycles each.
 def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
@@ -690,13 +874,12 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
     MTCRF8
 )>;
 
-def : InstRW<[P9_ALU_5C, IP_EXEC_1C, DISP_1C, DISP_1C],
-      (instrs
-    MCRF,
-    MCRXRX
-)>;
-
-def : InstRW<[P9_ALU_5C, P9_ALU_5C, IP_EXEC_1C, IP_EXEC_1C,
+// Cracked, restricted, ALU operations.
+// Here the two ALU ops can actually be done in parallel and therefore the
+//  latencies are not added together. Otherwise this is like having two
+//  instructions running together on two pipelines and 6 dispatches.
+// ALU ops are 3 cycles each.
+def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     MCRFS
@@ -704,93 +887,71 @@ def : InstRW<[P9_ALU_5C, P9_ALU_5C, IP_EXEC_1C, IP_EXEC_1C,
 
 // FP Div instructions in IIC_FPDivD and IIC_FPDivS.
 
+// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
 def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
-    FDIV,
-    XSDIVDP
+    FDIV
 )>;
 
-def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+// 33 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
+def : InstRW<[P9_DPOpAndALUOp_35C_8, IP_EXEC_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
-    FDIVS,
-    XSDIVSP
+    FDIVo
 )>;
 
-def : InstRW<[P9_DP_24C_8, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+// 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
+def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
-    XVDIVSP
+    XSDIVDP
 )>;
 
-def : InstRW<[P9_DP_33C_8, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
+def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
-    XVDIVDP
+    FDIVS
 )>;
 
-// FP Instructions in IIC_FPGeneral, IIC_FPFused
+// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
+def : InstRW<[P9_DPOpAndALUOp_24C_5, IP_EXEC_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    FDIVSo
+)>;
 
-def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+// 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
+def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
-    FRSP,
-    FRIND,
-    FRINS,
-    FRIPD,
-    FRIPS,
-    FRIZD,
-    FRIZS,
-    FRIMD,
-    FRIMS,
-    FRE,
-    FRES,
-    FRSQRTE,
-    FRSQRTES,
-    FMADDS,
-    FMADD,
-    FMSUBS,
-    FMSUB,
-    FNMADDS,
-    FNMADD,
-    FNMSUBS,
-    FNMSUB,
-    FSELD,
-    FSELS,
-    FADDS,
-    FMULS,
-    FMUL,
-    FSUBS,
-    FCFID,
-    FCTID,
-    FCTIDZ,
-    FCFIDU,
-    FCFIDS,
-    FCFIDUS,
-    FCTIDUZ,
-    FCTIWUZ,
-    FCTIW,
-    FCTIWZ
+    XSDIVSP
 )>;
 
-def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+// 24 Cycle DP Vector Instruction. Takes one full superslice.
+// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
+//  superslice.
+def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
+              DISP_1C, DISP_1C, DISP_1C],
       (instrs
-    FMR,
-    FABSD,
-    FABSS,
-    FNABSD,
-    FNABSS,
-    FNEGD,
-    FNEGS,
-    FCPSGND,
-    FCPSGNS
+    XVDIVSP
 )>;
 
-def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+// 33 Cycle DP Vector Instruction. Takes one full superslice.
+// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
+//  superslice.
+def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
+              DISP_1C, DISP_1C, DISP_1C],
       (instrs
-    FCMPUS,
-    FCMPUD
+    XVDIVDP
 )>;
 
 // Load instructions in IIC_LdStLFDU and IIC_LdStLFDUX.
 
-def : InstRW<[P9_LoadAndALUOp_7C, P9_ALU_2C,
+// Instruction cracked into three pieces. One Load and two ALU operations.
+// The Load and one of the ALU ops cannot be run at the same time and so the
+//  latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
+// Both the load and the ALU that depends on it are restricted and so they take
+//  a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
+// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
+def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C,
               IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C],
@@ -799,10 +960,32 @@ def : InstRW<[P9_LoadAndALUOp_7C, P9_ALU_2C,
     LFSUX
 )>;
 
-def : InstRW<[P9_LS_5C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
+// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
+//  the load and so it can be run at the same time as the load. The load is also
+//  restricted. 3 dispatches are from the restricted load while the other two
+//  are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
+//  is required for the ALU.
+def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     LFDU,
     LFDUX
 )>;
 
+// Crypto Instructions
+
+// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  dispatches.
+def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+  VPMSUMB,
+  VPMSUMD,
+  VPMSUMH,
+  VPMSUMW,
+  VCIPHER,
+  VCIPHERLAST,
+  VNCIPHER,
+  VNCIPHERLAST,
+  VSBOX
+)>;
author	Dimitry Andric <dim@FreeBSD.org>	2017-12-18 20:10:56 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-12-18 20:10:56 +0000
commit	044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree	1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Target/PowerPC/P9InstrResources.td
parent	eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)
download	src-test2-044eb2f6afba375a914ac9d8024f8f5142bb912e.tar.gz src-test2-044eb2f6afba375a914ac9d8024f8f5142bb912e.zip