diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2021-12-02 21:49:08 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2022-06-04 11:59:04 +0000 |
| commit | 574b7079b96703a748f89ef5adb7dc3e26b8f7fc (patch) | |
| tree | 195000196b1e0cc13dea43258fa240e006f48184 /contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp | |
| parent | 1f6fd64fe9c996b4795ee4a6c66b8f9216747560 (diff) | |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp index fc984d2dda64..1479933a2850 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "amdgpu-postlegalizer-combiner" @@ -58,6 +59,9 @@ public: bool matchUCharToFloat(MachineInstr &MI); void applyUCharToFloat(MachineInstr &MI); + bool matchRcpSqrtToRsq(MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo); + // FIXME: Should be able to have 2 separate matchdatas rather than custom // struct boilerplate. struct CvtF32UByteMatchInfo { @@ -203,6 +207,48 @@ void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) { MI.eraseFromParent(); } +bool AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + + auto getRcpSrc = [=](const MachineInstr &MI) { + MachineInstr *ResMI = nullptr; + if (MI.getOpcode() == TargetOpcode::G_INTRINSIC && + MI.getIntrinsicID() == Intrinsic::amdgcn_rcp) + ResMI = MRI.getVRegDef(MI.getOperand(2).getReg()); + + return ResMI; + }; + + auto getSqrtSrc = [=](const MachineInstr &MI) { + MachineInstr *SqrtSrcMI = nullptr; + mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI))); + return SqrtSrcMI; + }; + + MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr; + // rcp(sqrt(x)) + if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) { + MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) { + B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false) + .addUse(SqrtSrcMI->getOperand(0).getReg()) + .setMIFlags(MI.getFlags()); + }; + return true; + } + + // sqrt(rcp(x)) + if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) { + MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) { + B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false) + .addUse(RcpSrcMI->getOperand(0).getReg()) + .setMIFlags(MI.getFlags()); + }; + return true; + } + + return false; +} + bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN( MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) { Register SrcReg = MI.getOperand(1).getReg(); |
