aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIFoldOperands.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIFoldOperands.cpp')
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp114
1 files changed, 83 insertions, 31 deletions
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index 74d77d328019..4eac03168760 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -142,16 +142,20 @@ static bool isInlineConstantIfFolded(const SIInstrInfo *TII,
switch (Opc) {
case AMDGPU::V_MAC_F32_e64:
case AMDGPU::V_MAC_F16_e64:
- case AMDGPU::V_FMAC_F32_e64: {
+ case AMDGPU::V_FMAC_F32_e64:
+ case AMDGPU::V_FMAC_F16_e64: {
// Special case for mac. Since this is replaced with mad when folded into
// src2, we need to check the legality for the final instruction.
int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
if (static_cast<int>(OpNo) == Src2Idx) {
- bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
- bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
+ bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64 ||
+ Opc == AMDGPU::V_FMAC_F16_e64;
+ bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64 ||
+ Opc == AMDGPU::V_FMAC_F32_e64;
unsigned Opc = IsFMA ?
- AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
+ (IsF32 ? AMDGPU::V_FMA_F32 : AMDGPU::V_FMA_F16_gfx9) :
+ (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
const MCInstrDesc &MadDesc = TII->get(Opc);
return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
}
@@ -235,9 +239,11 @@ static bool updateOperand(FoldCandidate &Fold,
if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
MachineBasicBlock *MBB = MI->getParent();
- auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
- if (Liveness != MachineBasicBlock::LQR_Dead)
+ auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI, 16);
+ if (Liveness != MachineBasicBlock::LQR_Dead) {
+ LLVM_DEBUG(dbgs() << "Not shrinking " << MI << " due to vcc liveness\n");
return false;
+ }
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
int Op32 = Fold.getShrinkOpcode();
@@ -248,7 +254,7 @@ static bool updateOperand(FoldCandidate &Fold,
bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
- unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
+ Register NewReg0 = MRI.createVirtualRegister(Dst0RC);
MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
@@ -314,12 +320,15 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
// Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
unsigned Opc = MI->getOpcode();
if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
- Opc == AMDGPU::V_FMAC_F32_e64) &&
+ Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
(int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
- bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
- bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
+ bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64 ||
+ Opc == AMDGPU::V_FMAC_F16_e64;
+ bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64 ||
+ Opc == AMDGPU::V_FMAC_F32_e64;
unsigned NewOpc = IsFMA ?
- AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
+ (IsF32 ? AMDGPU::V_FMA_F32 : AMDGPU::V_FMA_F16_gfx9) :
+ (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
// Check if changing this to a v_mad_{f16, f32} instruction will allow us
// to fold the operand.
@@ -435,7 +444,8 @@ static bool tryToFoldACImm(const SIInstrInfo *TII,
OpTy > AMDGPU::OPERAND_REG_INLINE_AC_LAST)
return false;
- if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy)) {
+ if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy) &&
+ TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm());
return true;
}
@@ -443,8 +453,8 @@ static bool tryToFoldACImm(const SIInstrInfo *TII,
if (!OpToFold.isReg())
return false;
- unsigned UseReg = OpToFold.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(UseReg))
+ Register UseReg = OpToFold.getReg();
+ if (!Register::isVirtualRegister(UseReg))
return false;
if (llvm::find_if(FoldList, [UseMI](const FoldCandidate &FC) {
@@ -481,6 +491,9 @@ static bool tryToFoldACImm(const SIInstrInfo *TII,
return false; // Can only fold splat constants
}
+ if (!TII->isOperandLegal(*UseMI, UseOpIdx, Op))
+ return false;
+
FoldList.push_back(FoldCandidate(UseMI, UseOpIdx, Op));
return true;
}
@@ -518,7 +531,7 @@ void SIFoldOperands::foldOperand(
// REG_SEQUENCE instructions, so we have to fold them into the
// uses of REG_SEQUENCE.
if (UseMI->isRegSequence()) {
- unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
+ Register RegSeqDstReg = UseMI->getOperand(0).getReg();
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
MachineRegisterInfo::use_iterator Next;
@@ -569,15 +582,18 @@ void SIFoldOperands::foldOperand(
OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
if (FoldingImmLike && UseMI->isCopy()) {
- unsigned DestReg = UseMI->getOperand(0).getReg();
- const TargetRegisterClass *DestRC
- = TargetRegisterInfo::isVirtualRegister(DestReg) ?
- MRI->getRegClass(DestReg) :
- TRI->getPhysRegClass(DestReg);
-
- unsigned SrcReg = UseMI->getOperand(1).getReg();
- if (TargetRegisterInfo::isVirtualRegister(DestReg) &&
- TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ Register DestReg = UseMI->getOperand(0).getReg();
+
+ // Don't fold into a copy to a physical register. Doing so would interfere
+ // with the register coalescer's logic which would avoid redundant
+ // initalizations.
+ if (DestReg.isPhysical())
+ return;
+
+ const TargetRegisterClass *DestRC = MRI->getRegClass(DestReg);
+
+ Register SrcReg = UseMI->getOperand(1).getReg();
+ if (SrcReg.isVirtual()) { // XXX - This can be an assert?
const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg);
if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) {
MachineRegisterInfo::use_iterator NextUse;
@@ -613,10 +629,17 @@ void SIFoldOperands::foldOperand(
return;
UseMI->setDesc(TII->get(MovOp));
+ MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
+ MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
+ while (ImpOpI != ImpOpE) {
+ MachineInstr::mop_iterator Tmp = ImpOpI;
+ ImpOpI++;
+ UseMI->RemoveOperand(UseMI->getOperandNo(Tmp));
+ }
CopiesToReplace.push_back(UseMI);
} else {
if (UseMI->isCopy() && OpToFold.isReg() &&
- TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(0).getReg()) &&
+ Register::isVirtualRegister(UseMI->getOperand(0).getReg()) &&
TRI->isVectorRegister(*MRI, UseMI->getOperand(0).getReg()) &&
TRI->isVectorRegister(*MRI, UseMI->getOperand(1).getReg()) &&
!UseMI->getOperand(1).getSubReg()) {
@@ -677,6 +700,9 @@ void SIFoldOperands::foldOperand(
// =>
// %sgpr1 = COPY %sgpr0
UseMI->setDesc(TII->get(AMDGPU::COPY));
+ UseMI->getOperand(1).setReg(OpToFold.getReg());
+ UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
+ UseMI->getOperand(1).setIsKill(false);
UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
return;
}
@@ -708,7 +734,7 @@ void SIFoldOperands::foldOperand(
// Split 64-bit constants into 32-bits for folding.
if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
- unsigned UseReg = UseOp.getReg();
+ Register UseReg = UseOp.getReg();
const TargetRegisterClass *UseRC = MRI->getRegClass(UseReg);
if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
@@ -810,7 +836,7 @@ static MachineOperand *getImmOrMaterializedImm(MachineRegisterInfo &MRI,
if (Op.isReg()) {
// If this has a subregister, it obviously is a register source.
if (Op.getSubReg() != AMDGPU::NoSubRegister ||
- !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
+ !Register::isVirtualRegister(Op.getReg()))
return &Op;
MachineInstr *Def = MRI.getVRegDef(Op.getReg());
@@ -1073,6 +1099,13 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
Copy->addImplicitDefUseOperands(*MF);
for (FoldCandidate &Fold : FoldList) {
+ if (Fold.isReg() && Register::isVirtualRegister(Fold.OpToFold->getReg())) {
+ Register Reg = Fold.OpToFold->getReg();
+ MachineInstr *DefMI = Fold.OpToFold->getParent();
+ if (DefMI->readsRegister(AMDGPU::EXEC, TRI) &&
+ execMayBeModifiedBeforeUse(*MRI, Reg, *DefMI, *Fold.UseMI))
+ continue;
+ }
if (updateOperand(Fold, *TII, *TRI, *ST)) {
// Clear kill flags.
if (Fold.isReg()) {
@@ -1316,6 +1349,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock *MBB : depth_first(&MF)) {
MachineBasicBlock::iterator I, Next;
+
+ MachineOperand *CurrentKnownM0Val = nullptr;
for (I = MBB->begin(); I != MBB->end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
@@ -1328,6 +1363,25 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
!tryFoldOMod(MI))
tryFoldClamp(MI);
+
+ // Saw an unknown clobber of m0, so we no longer know what it is.
+ if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI))
+ CurrentKnownM0Val = nullptr;
+ continue;
+ }
+
+ // Specially track simple redefs of m0 to the same value in a block, so we
+ // can erase the later ones.
+ if (MI.getOperand(0).getReg() == AMDGPU::M0) {
+ MachineOperand &NewM0Val = MI.getOperand(1);
+ if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) {
+ MI.eraseFromParent();
+ continue;
+ }
+
+ // We aren't tracking other physical registers
+ CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) ?
+ nullptr : &NewM0Val;
continue;
}
@@ -1339,8 +1393,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
if (!FoldingImm && !OpToFold.isReg())
continue;
- if (OpToFold.isReg() &&
- !TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()))
+ if (OpToFold.isReg() && !Register::isVirtualRegister(OpToFold.getReg()))
continue;
// Prevent folding operands backwards in the function. For example,
@@ -1350,8 +1403,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
// ...
// %vgpr0 = V_MOV_B32_e32 1, implicit %exec
MachineOperand &Dst = MI.getOperand(0);
- if (Dst.isReg() &&
- !TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
+ if (Dst.isReg() && !Register::isVirtualRegister(Dst.getReg()))
continue;
foldInstOperand(MI, OpToFold);