src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2021-12-02 21:49:08 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2022-06-04 11:59:04 +0000
commit	574b7079b96703a748f89ef5adb7dc3e26b8f7fc (patch)
tree	195000196b1e0cc13dea43258fa240e006f48184 /contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
parent	1f6fd64fe9c996b4795ee4a6c66b8f9216747560 (diff)

Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp')

-rw-r--r--

contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

619

1 files changed, 601 insertions, 18 deletions

diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 3a52959d54bf..755b3b844570 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

@@ -27,6 +27,7 @@

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/TargetInstrInfo.h"

#include "llvm/CodeGen/TargetLowering.h"

+#include "llvm/Target/TargetMachine.h"

#include "llvm/CodeGen/TargetOpcodes.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/Support/Casting.h"

@@ -3732,8 +3733,7 @@ void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,

Builder.setInstrAndDebugLoc(MI);

auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);

NewPhi.addDef(DstReg);

- for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) {

- auto &MO = MI.getOperand(SrcIdx);

+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {

if (!MO.isReg()) {

NewPhi.addMBB(MO.getMBB());

continue;

@@ -3825,8 +3825,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector(

unsigned NumElts = DstTy.getNumElements();

SmallBitVector ExtractedElts(NumElts);

- for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg),

- MRI.use_instr_nodbg_end())) {

+ for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {

if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)

return false;

auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);

@@ -3868,6 +3867,51 @@ void CombinerHelper::applyBuildFnNoErase(

MatchInfo(Builder);

}

+bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI,

+ BuildFnTy &MatchInfo) {

+ assert(MI.getOpcode() == TargetOpcode::G_OR);

+ Register Dst = MI.getOperand(0).getReg();

+ LLT Ty = MRI.getType(Dst);

+ unsigned BitWidth = Ty.getScalarSizeInBits();

+ Register ShlSrc, ShlAmt, LShrSrc, LShrAmt;

+ unsigned FshOpc = 0;

+ // Match (or (shl x, amt), (lshr y, sub(bw, amt))).

+ if (mi_match(

+ Dst, MRI,

+ // m_GOr() handles the commuted version as well.

+ m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),

+ m_GLShr(m_Reg(LShrSrc), m_GSub(m_SpecificICstOrSplat(BitWidth),

+ m_Reg(LShrAmt)))))) {

+ FshOpc = TargetOpcode::G_FSHL;

+ // Match (or (shl x, sub(bw, amt)), (lshr y, amt)).

+ } else if (mi_match(Dst, MRI,

+ m_GOr(m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)),

+ m_GShl(m_Reg(ShlSrc),

+ m_GSub(m_SpecificICstOrSplat(BitWidth),

+ m_Reg(ShlAmt)))))) {

+ FshOpc = TargetOpcode::G_FSHR;

+ } else {

+ return false;

+ }

+ if (ShlAmt != LShrAmt)

+ return false;

+ LLT AmtTy = MRI.getType(ShlAmt);

+ if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))

+ return false;

+ MatchInfo = [=](MachineIRBuilder &B) {

+ B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, ShlAmt});

+ };

+ return true;

/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.

bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {

unsigned Opc = MI.getOpcode();

@@ -4499,20 +4543,9 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd(

bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {

unsigned Opc = MI.getOpcode();

assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);

- // Check for a constant 2 or a splat of 2 on the RHS.

- auto RHS = MI.getOperand(3).getReg();

- bool IsVector = MRI.getType(RHS).isVector();

- if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2)))

- return false;

- if (IsVector) {

- // FIXME: There's no mi_match pattern for this yet.

- auto *RHSDef = getDefIgnoringCopies(RHS, MRI);

- if (!RHSDef)

- return false;

- auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI);

- if (!Splat || *Splat != 2)

- return false;

- }

+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))

+ return false;

MatchInfo = [=, &MI](MachineIRBuilder &B) {

Observer.changingInstr(MI);

@@ -4760,6 +4793,556 @@ bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI,

return true;

}

+/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either

+/// due to global flags or MachineInstr flags.

+static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {

+ if (MI.getOpcode() != TargetOpcode::G_FMUL)

+ return false;

+ return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);

+static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,

+ const MachineRegisterInfo &MRI) {

+ return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),

+ MRI.use_instr_nodbg_end()) >

+ std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),

+ MRI.use_instr_nodbg_end());

+bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI,

+ bool &AllowFusionGlobally,

+ bool &HasFMAD, bool &Aggressive,

+ bool CanReassociate) {

+ auto *MF = MI.getMF();

+ const auto &TLI = *MF->getSubtarget().getTargetLowering();

+ const TargetOptions &Options = MF->getTarget().Options;

+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());

+ if (CanReassociate &&

+ !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))

+ return false;

+ // Floating-point multiply-add with intermediate rounding.

+ HasFMAD = (LI && TLI.isFMADLegal(MI, DstType));

+ // Floating-point multiply-add without intermediate rounding.

+ bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&

+ isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});

+ // No valid opcode, do not combine.

+ if (!HasFMAD && !HasFMA)

+ return false;

+ AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||

+ Options.UnsafeFPMath || HasFMAD;

+ // If the addition is not contractable, do not combine.

+ if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))

+ return false;

+ Aggressive = TLI.enableAggressiveFMAFusion(DstType);

+ return true;

+bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA(

+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {

+ assert(MI.getOpcode() == TargetOpcode::G_FADD);

+ bool AllowFusionGlobally, HasFMAD, Aggressive;

+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))

+ return false;

+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());

+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());

+ unsigned PreferredFusedOpcode =

+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;

+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),

+ // prefer to fold the multiply with fewer uses.

+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&

+ isContractableFMul(*RHS, AllowFusionGlobally)) {

+ if (hasMoreUses(*LHS, *RHS, MRI))

+ std::swap(LHS, RHS);

+ }

+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)

+ if (isContractableFMul(*LHS, AllowFusionGlobally) &&

+ (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()))) {

+ MatchInfo = [=, &MI](MachineIRBuilder &B) {

+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},

+ {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(),

+ RHS->getOperand(0).getReg()});

+ };

+ return true;

+ }

+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)

+ if (isContractableFMul(*RHS, AllowFusionGlobally) &&

+ (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()))) {

+ MatchInfo = [=, &MI](MachineIRBuilder &B) {

+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},

+ {RHS->getOperand(1).getReg(), RHS->getOperand(2).getReg(),

+ LHS->getOperand(0).getReg()});

+ };

+ return true;

+ }

+ return false;

+bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA(

+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {

+ assert(MI.getOpcode() == TargetOpcode::G_FADD);

+ bool AllowFusionGlobally, HasFMAD, Aggressive;

+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))

+ return false;

+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();

+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());

+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());

+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());

+ unsigned PreferredFusedOpcode =

+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;

+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),

+ // prefer to fold the multiply with fewer uses.

+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&

+ isContractableFMul(*RHS, AllowFusionGlobally)) {

+ if (hasMoreUses(*LHS, *RHS, MRI))

+ std::swap(LHS, RHS);

+ }

+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)

+ MachineInstr *FpExtSrc;

+ if (mi_match(LHS->getOperand(0).getReg(), MRI,

+ m_GFPExt(m_MInstr(FpExtSrc))) &&

+ isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&

+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,

+ MRI.getType(FpExtSrc->getOperand(1).getReg()))) {

+ MatchInfo = [=, &MI](MachineIRBuilder &B) {

+ auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());

+ auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());

+ B.buildInstr(

+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},

+ {FpExtX.getReg(0), FpExtY.getReg(0), RHS->getOperand(0).getReg()});

+ };

+ return true;

+ }

+ // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)

+ // Note: Commutes FADD operands.

+ if (mi_match(RHS->getOperand(0).getReg(), MRI,

+ m_GFPExt(m_MInstr(FpExtSrc))) &&

+ isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&

+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,

+ MRI.getType(FpExtSrc->getOperand(1).getReg()))) {

+ MatchInfo = [=, &MI](MachineIRBuilder &B) {

+ auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());

+ auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());

+ B.buildInstr(

+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},

+ {FpExtX.getReg(0), FpExtY.getReg(0), LHS->getOperand(0).getReg()});

+ };

+ return true;

+ }

+ return false;

+bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA(

+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {

+ assert(MI.getOpcode() == TargetOpcode::G_FADD);

+ bool AllowFusionGlobally, HasFMAD, Aggressive;

+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))

+ return false;

+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());

+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());

+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

+ unsigned PreferredFusedOpcode =

+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;

+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),

+ // prefer to fold the multiply with fewer uses.

+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&

+ isContractableFMul(*RHS, AllowFusionGlobally)) {

+ if (hasMoreUses(*LHS, *RHS, MRI))

+ std::swap(LHS, RHS);

+ }

+ MachineInstr *FMA = nullptr;

+ Register Z;

+ // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))

+ if (LHS->getOpcode() == PreferredFusedOpcode &&

+ (MRI.getVRegDef(LHS->getOperand(3).getReg())->getOpcode() ==

+ TargetOpcode::G_FMUL) &&

+ MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()) &&

+ MRI.hasOneNonDBGUse(LHS->getOperand(3).getReg())) {

+ FMA = LHS;

+ Z = RHS->getOperand(0).getReg();

+ }

+ // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))

+ else if (RHS->getOpcode() == PreferredFusedOpcode &&

+ (MRI.getVRegDef(RHS->getOperand(3).getReg())->getOpcode() ==

+ TargetOpcode::G_FMUL) &&

+ MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()) &&

+ MRI.hasOneNonDBGUse(RHS->getOperand(3).getReg())) {

+ Z = LHS->getOperand(0).getReg();

+ FMA = RHS;

+ }

+ if (FMA) {

+ MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());

+ Register X = FMA->getOperand(1).getReg();

+ Register Y = FMA->getOperand(2).getReg();

+ Register U = FMulMI->getOperand(1).getReg();

+ Register V = FMulMI->getOperand(2).getReg();

+ MatchInfo = [=, &MI](MachineIRBuilder &B) {

+ Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);

+ B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});

+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},

+ {X, Y, InnerFMA});

+ };

+ return true;

+ }

+ return false;

+bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(

+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {

+ assert(MI.getOpcode() == TargetOpcode::G_FADD);

+ bool AllowFusionGlobally, HasFMAD, Aggressive;

+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))

+ return false;

+ if (!Aggressive)

+ return false;

+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();

+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());

+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());

+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());

+ unsigned PreferredFusedOpcode =

+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;

+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),

+ // prefer to fold the multiply with fewer uses.

+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&

+ isContractableFMul(*RHS, AllowFusionGlobally)) {

+ if (hasMoreUses(*LHS, *RHS, MRI))

+ std::swap(LHS, RHS);

+ }

+ // Builds: (fma x, y, (fma (fpext u), (fpext v), z))

+ auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,

+ Register Y, MachineIRBuilder &B) {

+ Register FpExtU = B.buildFPExt(DstType, U).getReg(0);

+ Register FpExtV = B.buildFPExt(DstType, V).getReg(0);

+ Register InnerFMA =

+ B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})

+ .getReg(0);

+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},

+ {X, Y, InnerFMA});

+ };

+ MachineInstr *FMulMI, *FMAMI;

+ // fold (fadd (fma x, y, (fpext (fmul u, v))), z)

+ // -> (fma x, y, (fma (fpext u), (fpext v), z))

+ if (LHS->getOpcode() == PreferredFusedOpcode &&

+ mi_match(LHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) &&

+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&

+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,

+ MRI.getType(FMulMI->getOperand(0).getReg()))) {

+ MatchInfo = [=](MachineIRBuilder &B) {

+ buildMatchInfo(FMulMI->getOperand(1).getReg(),

+ FMulMI->getOperand(2).getReg(),

+ RHS->getOperand(0).getReg(), LHS->getOperand(1).getReg(),

+ LHS->getOperand(2).getReg(), B);

+ };

+ return true;

+ }

+ // fold (fadd (fpext (fma x, y, (fmul u, v))), z)

+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))

+ // FIXME: This turns two single-precision and one double-precision

+ // operation into two double-precision operations, which might not be

+ // interesting for all targets, especially GPUs.

+ if (mi_match(LHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) &&

+ FMAMI->getOpcode() == PreferredFusedOpcode) {

+ MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());

+ if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&

+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,

+ MRI.getType(FMAMI->getOperand(0).getReg()))) {

+ MatchInfo = [=](MachineIRBuilder &B) {

+ Register X = FMAMI->getOperand(1).getReg();

+ Register Y = FMAMI->getOperand(2).getReg();

+ X = B.buildFPExt(DstType, X).getReg(0);

+ Y = B.buildFPExt(DstType, Y).getReg(0);

+ buildMatchInfo(FMulMI->getOperand(1).getReg(),

+ FMulMI->getOperand(2).getReg(),

+ RHS->getOperand(0).getReg(), X, Y, B);

+ };

+ return true;

+ }

+ // fold (fadd z, (fma x, y, (fpext (fmul u, v)))

+ // -> (fma x, y, (fma (fpext u), (fpext v), z))

+ if (RHS->getOpcode() == PreferredFusedOpcode &&

+ mi_match(RHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) &&

+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&

+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,

+ MRI.getType(FMulMI->getOperand(0).getReg()))) {

+ MatchInfo = [=](MachineIRBuilder &B) {

+ buildMatchInfo(FMulMI->getOperand(1).getReg(),

+ FMulMI->getOperand(2).getReg(),

+ LHS->getOperand(0).getReg(), RHS->getOperand(1).getReg(),

+ RHS->getOperand(2).getReg(), B);

+ };

+ return true;

+ }

+ // fold (fadd z, (fpext (fma x, y, (fmul u, v)))

+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))

+ // FIXME: This turns two single-precision and one double-precision

+ // operation into two double-precision operations, which might not be

+ // interesting for all targets, especially GPUs.

+ if (mi_match(RHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) &&

+ FMAMI->getOpcode() == PreferredFusedOpcode) {

+ MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());

+ if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&

+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,

+ MRI.getType(FMAMI->getOperand(0).getReg()))) {

+ MatchInfo = [=](MachineIRBuilder &B) {

+ Register X = FMAMI->getOperand(1).getReg();

+ Register Y = FMAMI->getOperand(2).getReg();

+ X = B.buildFPExt(DstType, X).getReg(0);

+ Y = B.buildFPExt(DstType, Y).getReg(0);

+ buildMatchInfo(FMulMI->getOperand(1).getReg(),

+ FMulMI->getOperand(2).getReg(),

+ LHS->getOperand(0).getReg(), X, Y, B);

+ };

+ return true;

+ }

+ return false;

+bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA(

+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {

+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);

+ bool AllowFusionGlobally, HasFMAD, Aggressive;

+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))

+ return false;

+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());

+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());

+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),

+ // prefer to fold the multiply with fewer uses.

+ int FirstMulHasFewerUses = true;

+ if (isContractableFMul(*LHS, AllowFusionGlobally) &&

+ isContractableFMul(*RHS, AllowFusionGlobally) &&

+ hasMoreUses(*LHS, *RHS, MRI))

+ FirstMulHasFewerUses = false;

+ unsigned PreferredFusedOpcode =

+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;

+ // fold (fsub (fmul x, y), z) -> (fma x, y, -z)

+ if (FirstMulHasFewerUses &&

+ (isContractableFMul(*LHS, AllowFusionGlobally) &&

+ (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg())))) {

+ MatchInfo = [=, &MI](MachineIRBuilder &B) {

+ Register NegZ = B.buildFNeg(DstTy, RHS->getOperand(0).getReg()).getReg(0);

+ B.buildInstr(

+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},

+ {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(), NegZ});

+ };

+ return true;

+ }

+ // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)

+ else if ((isContractableFMul(*RHS, AllowFusionGlobally) &&

+ (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg())))) {

+ MatchInfo = [=, &MI](MachineIRBuilder &B) {

+ Register NegY = B.buildFNeg(DstTy, RHS->getOperand(1).getReg()).getReg(0);

+ B.buildInstr(

+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},

+ {NegY, RHS->getOperand(2).getReg(), LHS->getOperand(0).getReg()});

+ };

+ return true;

+ }

+ return false;

+bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA(

+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {

+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);

+ bool AllowFusionGlobally, HasFMAD, Aggressive;

+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))

+ return false;

+ Register LHSReg = MI.getOperand(1).getReg();

+ Register RHSReg = MI.getOperand(2).getReg();

+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

+ unsigned PreferredFusedOpcode =

+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;

+ MachineInstr *FMulMI;

+ // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))

+ if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&

+ (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&

+ MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&

+ isContractableFMul(*FMulMI, AllowFusionGlobally)) {

+ MatchInfo = [=, &MI](MachineIRBuilder &B) {

+ Register NegX =

+ B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);

+ Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);

+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},

+ {NegX, FMulMI->getOperand(2).getReg(), NegZ});

+ };

+ return true;

+ }

+ // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)

+ if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&

+ (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&

+ MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&

+ isContractableFMul(*FMulMI, AllowFusionGlobally)) {

+ MatchInfo = [=, &MI](MachineIRBuilder &B) {

+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},

+ {FMulMI->getOperand(1).getReg(),

+ FMulMI->getOperand(2).getReg(), LHSReg});

+ };

+ return true;

+ }

+ return false;

+bool CombinerHelper::matchCombineFSubFpExtFMulToFMadOrFMA(

+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {

+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);

+ bool AllowFusionGlobally, HasFMAD, Aggressive;

+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))

+ return false;

+ Register LHSReg = MI.getOperand(1).getReg();

+ Register RHSReg = MI.getOperand(2).getReg();

+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

+ unsigned PreferredFusedOpcode =

+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;

+ MachineInstr *FMulMI;

+ // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))

+ if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&

+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&

+ (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {

+ MatchInfo = [=, &MI](MachineIRBuilder &B) {

+ Register FpExtX =

+ B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);

+ Register FpExtY =

+ B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);

+ Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);

+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},

+ {FpExtX, FpExtY, NegZ});

+ };

+ return true;

+ }

+ // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)

+ if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&

+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&

+ (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {

+ MatchInfo = [=, &MI](MachineIRBuilder &B) {

+ Register FpExtY =

+ B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);

+ Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);

+ Register FpExtZ =

+ B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);

+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},

+ {NegY, FpExtZ, LHSReg});

+ };

+ return true;

+ }

+ return false;

+bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA(

+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {

+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);

+ bool AllowFusionGlobally, HasFMAD, Aggressive;

+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))

+ return false;

+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();

+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

+ Register LHSReg = MI.getOperand(1).getReg();

+ Register RHSReg = MI.getOperand(2).getReg();

+ unsigned PreferredFusedOpcode =

+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;

+ auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,

+ MachineIRBuilder &B) {

+ Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);

+ Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);

+ B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});

+ };

+ MachineInstr *FMulMI;

+ // fold (fsub (fpext (fneg (fmul x, y))), z) ->

+ // (fneg (fma (fpext x), (fpext y), z))

+ // fold (fsub (fneg (fpext (fmul x, y))), z) ->

+ // (fneg (fma (fpext x), (fpext y), z))

+ if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||

+ mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&

+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&

+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,

+ MRI.getType(FMulMI->getOperand(0).getReg()))) {

+ MatchInfo = [=, &MI](MachineIRBuilder &B) {

+ Register FMAReg = MRI.createGenericVirtualRegister(DstTy);

+ buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),

+ FMulMI->getOperand(2).getReg(), RHSReg, B);

+ B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);

+ };

+ return true;

+ }

+ // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)

+ // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)

+ if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||

+ mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&

+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&

+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,

+ MRI.getType(FMulMI->getOperand(0).getReg()))) {

+ MatchInfo = [=, &MI](MachineIRBuilder &B) {

+ buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),

+ FMulMI->getOperand(2).getReg(), LHSReg, B);

+ };

+ return true;

+ }

+ return false;

bool CombinerHelper::tryCombine(MachineInstr &MI) {

if (tryCombineCopy(MI))

return true;