aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp46
1 files changed, 46 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index fc984d2dda64..1479933a2850 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
@@ -58,6 +59,9 @@ public:
bool matchUCharToFloat(MachineInstr &MI);
void applyUCharToFloat(MachineInstr &MI);
+ bool matchRcpSqrtToRsq(MachineInstr &MI,
+ std::function<void(MachineIRBuilder &)> &MatchInfo);
+
// FIXME: Should be able to have 2 separate matchdatas rather than custom
// struct boilerplate.
struct CvtF32UByteMatchInfo {
@@ -203,6 +207,48 @@ void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
MI.eraseFromParent();
}
+bool AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+
+ auto getRcpSrc = [=](const MachineInstr &MI) {
+ MachineInstr *ResMI = nullptr;
+ if (MI.getOpcode() == TargetOpcode::G_INTRINSIC &&
+ MI.getIntrinsicID() == Intrinsic::amdgcn_rcp)
+ ResMI = MRI.getVRegDef(MI.getOperand(2).getReg());
+
+ return ResMI;
+ };
+
+ auto getSqrtSrc = [=](const MachineInstr &MI) {
+ MachineInstr *SqrtSrcMI = nullptr;
+ mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
+ return SqrtSrcMI;
+ };
+
+ MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
+ // rcp(sqrt(x))
+ if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
+ MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
+ B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
+ .addUse(SqrtSrcMI->getOperand(0).getReg())
+ .setMIFlags(MI.getFlags());
+ };
+ return true;
+ }
+
+ // sqrt(rcp(x))
+ if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
+ MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
+ B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
+ .addUse(RcpSrcMI->getOperand(0).getReg())
+ .setMIFlags(MI.getFlags());
+ };
+ return true;
+ }
+
+ return false;
+}
+
bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
Register SrcReg = MI.getOperand(1).getReg();