diff options
Diffstat (limited to 'lib/Target/AMDGPU/SIFoldOperands.cpp')
-rw-r--r-- | lib/Target/AMDGPU/SIFoldOperands.cpp | 73 |
1 files changed, 41 insertions, 32 deletions
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index f4e866958369..d679abd107d2 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -201,49 +201,55 @@ static bool updateOperand(FoldCandidate &Fold, Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); } } + } - if (Fold.needsShrink()) { - MachineBasicBlock *MBB = MI->getParent(); - auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); - if (Liveness != MachineBasicBlock::LQR_Dead) - return false; - - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - int Op32 = Fold.getShrinkOpcode(); - MachineOperand &Dst0 = MI->getOperand(0); - MachineOperand &Dst1 = MI->getOperand(1); - assert(Dst0.isDef() && Dst1.isDef()); - - bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg()); + if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) { + MachineBasicBlock *MBB = MI->getParent(); + auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); + if (Liveness != MachineBasicBlock::LQR_Dead) + return false; - const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg()); - unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC); - const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg()); - unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + int Op32 = Fold.getShrinkOpcode(); + MachineOperand &Dst0 = MI->getOperand(0); + MachineOperand &Dst1 = MI->getOperand(1); + assert(Dst0.isDef() && Dst1.isDef()); - MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32); + bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg()); - if (HaveNonDbgCarryUse) { - BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg()) - .addReg(AMDGPU::VCC, RegState::Kill); - } + const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg()); + unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC); - // Keep the old instruction around to avoid breaking iterators, but - // replace the outputs with dummy registers. - Dst0.setReg(NewReg0); - Dst1.setReg(NewReg1); + MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32); - if (Fold.isCommuted()) - TII.commuteInstruction(*Inst32, false); - return true; + if (HaveNonDbgCarryUse) { + BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg()) + .addReg(AMDGPU::VCC, RegState::Kill); } - Old.ChangeToImmediate(Fold.ImmToFold); + // Keep the old instruction around to avoid breaking iterators, but + // replace it with a dummy instruction to remove uses. + // + // FIXME: We should not invert how this pass looks at operands to avoid + // this. Should track set of foldable movs instead of looking for uses + // when looking at a use. + Dst0.setReg(NewReg0); + for (unsigned I = MI->getNumOperands() - 1; I > 0; --I) + MI->RemoveOperand(I); + MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF)); + + if (Fold.isCommuted()) + TII.commuteInstruction(*Inst32, false); return true; } assert(!Fold.needsShrink() && "not handled"); + if (Fold.isImm()) { + Old.ChangeToImmediate(Fold.ImmToFold); + return true; + } + if (Fold.isFI()) { Old.ChangeToFrameIndex(Fold.FrameIndexToFold); return true; @@ -344,7 +350,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList, if ((Opc == AMDGPU::V_ADD_I32_e64 || Opc == AMDGPU::V_SUB_I32_e64 || Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME - OpToFold->isImm()) { + (OpToFold->isImm() || OpToFold->isFI())) { MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); // Verify the other operand is a VGPR, otherwise we would violate the @@ -357,7 +363,10 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList, assert(MI->getOperand(1).isDef()); - int Op32 = AMDGPU::getVOPe32(Opc); + // Make sure to get the 32-bit version of the commuted opcode. + unsigned MaybeCommutedOpc = MI->getOpcode(); + int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc); + FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true, Op32)); return true; |