summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIFoldOperands.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIFoldOperands.cpp')
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp73
1 files changed, 41 insertions, 32 deletions
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index f4e866958369..d679abd107d2 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -201,49 +201,55 @@ static bool updateOperand(FoldCandidate &Fold,
Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
}
}
+ }
- if (Fold.needsShrink()) {
- MachineBasicBlock *MBB = MI->getParent();
- auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
- if (Liveness != MachineBasicBlock::LQR_Dead)
- return false;
-
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- int Op32 = Fold.getShrinkOpcode();
- MachineOperand &Dst0 = MI->getOperand(0);
- MachineOperand &Dst1 = MI->getOperand(1);
- assert(Dst0.isDef() && Dst1.isDef());
-
- bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
+ if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) {
+ MachineBasicBlock *MBB = MI->getParent();
+ auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
+ if (Liveness != MachineBasicBlock::LQR_Dead)
+ return false;
- const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
- unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
- const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
- unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ int Op32 = Fold.getShrinkOpcode();
+ MachineOperand &Dst0 = MI->getOperand(0);
+ MachineOperand &Dst1 = MI->getOperand(1);
+ assert(Dst0.isDef() && Dst1.isDef());
- MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
+ bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
- if (HaveNonDbgCarryUse) {
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
- .addReg(AMDGPU::VCC, RegState::Kill);
- }
+ const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
+ unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
- // Keep the old instruction around to avoid breaking iterators, but
- // replace the outputs with dummy registers.
- Dst0.setReg(NewReg0);
- Dst1.setReg(NewReg1);
+ MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
- if (Fold.isCommuted())
- TII.commuteInstruction(*Inst32, false);
- return true;
+ if (HaveNonDbgCarryUse) {
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
+ .addReg(AMDGPU::VCC, RegState::Kill);
}
- Old.ChangeToImmediate(Fold.ImmToFold);
+ // Keep the old instruction around to avoid breaking iterators, but
+ // replace it with a dummy instruction to remove uses.
+ //
+ // FIXME: We should not invert how this pass looks at operands to avoid
+ // this. Should track set of foldable movs instead of looking for uses
+ // when looking at a use.
+ Dst0.setReg(NewReg0);
+ for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
+ MI->RemoveOperand(I);
+ MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
+
+ if (Fold.isCommuted())
+ TII.commuteInstruction(*Inst32, false);
return true;
}
assert(!Fold.needsShrink() && "not handled");
+ if (Fold.isImm()) {
+ Old.ChangeToImmediate(Fold.ImmToFold);
+ return true;
+ }
+
if (Fold.isFI()) {
Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
return true;
@@ -344,7 +350,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
if ((Opc == AMDGPU::V_ADD_I32_e64 ||
Opc == AMDGPU::V_SUB_I32_e64 ||
Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
- OpToFold->isImm()) {
+ (OpToFold->isImm() || OpToFold->isFI())) {
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
// Verify the other operand is a VGPR, otherwise we would violate the
@@ -357,7 +363,10 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
assert(MI->getOperand(1).isDef());
- int Op32 = AMDGPU::getVOPe32(Opc);
+ // Make sure to get the 32-bit version of the commuted opcode.
+ unsigned MaybeCommutedOpc = MI->getOpcode();
+ int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
+
FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
Op32));
return true;