aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-12-02 21:49:08 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-06-04 11:59:04 +0000
commit574b7079b96703a748f89ef5adb7dc3e26b8f7fc (patch)
tree195000196b1e0cc13dea43258fa240e006f48184 /contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
parent1f6fd64fe9c996b4795ee4a6c66b8f9216747560 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp619
1 files changed, 601 insertions, 18 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 3a52959d54bf..755b3b844570 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Casting.h"
@@ -3732,8 +3733,7 @@ void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
Builder.setInstrAndDebugLoc(MI);
auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
NewPhi.addDef(DstReg);
- for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) {
- auto &MO = MI.getOperand(SrcIdx);
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
if (!MO.isReg()) {
NewPhi.addMBB(MO.getMBB());
continue;
@@ -3825,8 +3825,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector(
unsigned NumElts = DstTy.getNumElements();
SmallBitVector ExtractedElts(NumElts);
- for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg),
- MRI.use_instr_nodbg_end())) {
+ for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
return false;
auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
@@ -3868,6 +3867,51 @@ void CombinerHelper::applyBuildFnNoErase(
MatchInfo(Builder);
}
+bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_OR);
+
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ unsigned BitWidth = Ty.getScalarSizeInBits();
+
+ Register ShlSrc, ShlAmt, LShrSrc, LShrAmt;
+ unsigned FshOpc = 0;
+
+ // Match (or (shl x, amt), (lshr y, sub(bw, amt))).
+ if (mi_match(
+ Dst, MRI,
+ // m_GOr() handles the commuted version as well.
+ m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
+ m_GLShr(m_Reg(LShrSrc), m_GSub(m_SpecificICstOrSplat(BitWidth),
+ m_Reg(LShrAmt)))))) {
+ FshOpc = TargetOpcode::G_FSHL;
+
+ // Match (or (shl x, sub(bw, amt)), (lshr y, amt)).
+ } else if (mi_match(Dst, MRI,
+ m_GOr(m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)),
+ m_GShl(m_Reg(ShlSrc),
+ m_GSub(m_SpecificICstOrSplat(BitWidth),
+ m_Reg(ShlAmt)))))) {
+ FshOpc = TargetOpcode::G_FSHR;
+
+ } else {
+ return false;
+ }
+
+ if (ShlAmt != LShrAmt)
+ return false;
+
+ LLT AmtTy = MRI.getType(ShlAmt);
+ if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, ShlAmt});
+ };
+ return true;
+}
+
/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
@@ -4499,20 +4543,9 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd(
bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
unsigned Opc = MI.getOpcode();
assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
- // Check for a constant 2 or a splat of 2 on the RHS.
- auto RHS = MI.getOperand(3).getReg();
- bool IsVector = MRI.getType(RHS).isVector();
- if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2)))
- return false;
- if (IsVector) {
- // FIXME: There's no mi_match pattern for this yet.
- auto *RHSDef = getDefIgnoringCopies(RHS, MRI);
- if (!RHSDef)
- return false;
- auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI);
- if (!Splat || *Splat != 2)
- return false;
- }
+
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
+ return false;
MatchInfo = [=, &MI](MachineIRBuilder &B) {
Observer.changingInstr(MI);
@@ -4760,6 +4793,556 @@ bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI,
return true;
}
+/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
+/// due to global flags or MachineInstr flags.
+static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
+ if (MI.getOpcode() != TargetOpcode::G_FMUL)
+ return false;
+ return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
+}
+
+static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
+ const MachineRegisterInfo &MRI) {
+ return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
+ MRI.use_instr_nodbg_end()) >
+ std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
+ MRI.use_instr_nodbg_end());
+}
+
+bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI,
+ bool &AllowFusionGlobally,
+ bool &HasFMAD, bool &Aggressive,
+ bool CanReassociate) {
+
+ auto *MF = MI.getMF();
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
+ const TargetOptions &Options = MF->getTarget().Options;
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+
+ if (CanReassociate &&
+ !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
+ return false;
+
+ // Floating-point multiply-add with intermediate rounding.
+ HasFMAD = (LI && TLI.isFMADLegal(MI, DstType));
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return false;
+
+ AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath || HasFMAD;
+ // If the addition is not contractable, do not combine.
+ if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
+ return false;
+
+ Aggressive = TLI.enableAggressiveFMAFusion(DstType);
+ return true;
+}
+
+bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS, *RHS, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (isContractableFMul(*LHS, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(),
+ RHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+ if (isContractableFMul(*RHS, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {RHS->getOperand(1).getReg(), RHS->getOperand(2).getReg(),
+ LHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS, *RHS, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ MachineInstr *FpExtSrc;
+ if (mi_match(LHS->getOperand(0).getReg(), MRI,
+ m_GFPExt(m_MInstr(FpExtSrc))) &&
+ isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
+ auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
+ B.buildInstr(
+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX.getReg(0), FpExtY.getReg(0), RHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
+ // Note: Commutes FADD operands.
+ if (mi_match(RHS->getOperand(0).getReg(), MRI,
+ m_GFPExt(m_MInstr(FpExtSrc))) &&
+ isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
+ auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
+ B.buildInstr(
+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX.getReg(0), FpExtY.getReg(0), LHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
+ return false;
+
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS, *RHS, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ MachineInstr *FMA = nullptr;
+ Register Z;
+ // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
+ if (LHS->getOpcode() == PreferredFusedOpcode &&
+ (MRI.getVRegDef(LHS->getOperand(3).getReg())->getOpcode() ==
+ TargetOpcode::G_FMUL) &&
+ MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()) &&
+ MRI.hasOneNonDBGUse(LHS->getOperand(3).getReg())) {
+ FMA = LHS;
+ Z = RHS->getOperand(0).getReg();
+ }
+ // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
+ else if (RHS->getOpcode() == PreferredFusedOpcode &&
+ (MRI.getVRegDef(RHS->getOperand(3).getReg())->getOpcode() ==
+ TargetOpcode::G_FMUL) &&
+ MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()) &&
+ MRI.hasOneNonDBGUse(RHS->getOperand(3).getReg())) {
+ Z = LHS->getOperand(0).getReg();
+ FMA = RHS;
+ }
+
+ if (FMA) {
+ MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
+ Register X = FMA->getOperand(1).getReg();
+ Register Y = FMA->getOperand(2).getReg();
+ Register U = FMulMI->getOperand(1).getReg();
+ Register V = FMulMI->getOperand(2).getReg();
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
+ B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {X, Y, InnerFMA});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ if (!Aggressive)
+ return false;
+
+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS, *RHS, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
+ auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
+ Register Y, MachineIRBuilder &B) {
+ Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
+ Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
+ Register InnerFMA =
+ B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
+ .getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {X, Y, InnerFMA});
+ };
+
+ MachineInstr *FMulMI, *FMAMI;
+ // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ if (LHS->getOpcode() == PreferredFusedOpcode &&
+ mi_match(LHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(),
+ RHS->getOperand(0).getReg(), LHS->getOperand(1).getReg(),
+ LHS->getOperand(2).getReg(), B);
+ };
+ return true;
+ }
+
+ // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (mi_match(LHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) &&
+ FMAMI->getOpcode() == PreferredFusedOpcode) {
+ MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
+ if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMAMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ Register X = FMAMI->getOperand(1).getReg();
+ Register Y = FMAMI->getOperand(2).getReg();
+ X = B.buildFPExt(DstType, X).getReg(0);
+ Y = B.buildFPExt(DstType, Y).getReg(0);
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(),
+ RHS->getOperand(0).getReg(), X, Y, B);
+ };
+
+ return true;
+ }
+ }
+
+ // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ if (RHS->getOpcode() == PreferredFusedOpcode &&
+ mi_match(RHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(),
+ LHS->getOperand(0).getReg(), RHS->getOperand(1).getReg(),
+ RHS->getOperand(2).getReg(), B);
+ };
+ return true;
+ }
+
+ // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (mi_match(RHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) &&
+ FMAMI->getOpcode() == PreferredFusedOpcode) {
+ MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
+ if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMAMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ Register X = FMAMI->getOperand(1).getReg();
+ Register Y = FMAMI->getOperand(2).getReg();
+ X = B.buildFPExt(DstType, X).getReg(0);
+ Y = B.buildFPExt(DstType, Y).getReg(0);
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(),
+ LHS->getOperand(0).getReg(), X, Y, B);
+ };
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ int FirstMulHasFewerUses = true;
+ if (isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally) &&
+ hasMoreUses(*LHS, *RHS, MRI))
+ FirstMulHasFewerUses = false;
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
+ if (FirstMulHasFewerUses &&
+ (isContractableFMul(*LHS, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg())))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register NegZ = B.buildFNeg(DstTy, RHS->getOperand(0).getReg()).getReg(0);
+ B.buildInstr(
+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(), NegZ});
+ };
+ return true;
+ }
+ // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
+ else if ((isContractableFMul(*RHS, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg())))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register NegY = B.buildFNeg(DstTy, RHS->getOperand(1).getReg()).getReg(0);
+ B.buildInstr(
+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {NegY, RHS->getOperand(2).getReg(), LHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ MachineInstr *FMulMI;
+ // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
+ (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
+ MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally)) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register NegX =
+ B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
+ Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {NegX, FMulMI->getOperand(2).getReg(), NegZ});
+ };
+ return true;
+ }
+
+ // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
+ if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
+ (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
+ MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally)) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), LHSReg});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFpExtFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ MachineInstr *FMulMI;
+ // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
+ if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register FpExtX =
+ B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
+ Register FpExtY =
+ B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
+ Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX, FpExtY, NegZ});
+ };
+ return true;
+ }
+
+ // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
+ if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register FpExtY =
+ B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
+ Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
+ Register FpExtZ =
+ B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {NegY, FpExtZ, LHSReg});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
+ MachineIRBuilder &B) {
+ Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
+ Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
+ };
+
+ MachineInstr *FMulMI;
+ // fold (fsub (fpext (fneg (fmul x, y))), z) ->
+ // (fneg (fma (fpext x), (fpext y), z))
+ // fold (fsub (fneg (fpext (fmul x, y))), z) ->
+ // (fneg (fma (fpext x), (fpext y), z))
+ if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
+ mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
+ buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), RHSReg, B);
+ B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
+ };
+ return true;
+ }
+
+ // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
+ // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
+ if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
+ mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), LHSReg, B);
+ };
+ return true;
+ }
+
+ return false;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;