diff options
Diffstat (limited to 'lib/Target/AMDGPU')
| -rw-r--r-- | lib/Target/AMDGPU/SIFoldOperands.cpp | 73 | ||||
| -rw-r--r-- | lib/Target/AMDGPU/VOP2Instructions.td | 12 | 
2 files changed, 44 insertions, 41 deletions
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index f4e866958369..d679abd107d2 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -201,49 +201,55 @@ static bool updateOperand(FoldCandidate &Fold,          Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);        }      } +  } -    if (Fold.needsShrink()) { -      MachineBasicBlock *MBB = MI->getParent(); -      auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); -      if (Liveness != MachineBasicBlock::LQR_Dead) -        return false; - -      MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); -      int Op32 = Fold.getShrinkOpcode(); -      MachineOperand &Dst0 = MI->getOperand(0); -      MachineOperand &Dst1 = MI->getOperand(1); -      assert(Dst0.isDef() && Dst1.isDef()); - -      bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg()); +  if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) { +    MachineBasicBlock *MBB = MI->getParent(); +    auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); +    if (Liveness != MachineBasicBlock::LQR_Dead) +      return false; -      const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg()); -      unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC); -      const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg()); -      unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC); +    MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); +    int Op32 = Fold.getShrinkOpcode(); +    MachineOperand &Dst0 = MI->getOperand(0); +    MachineOperand &Dst1 = MI->getOperand(1); +    assert(Dst0.isDef() && Dst1.isDef()); -      MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32); +    bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg()); -      if (HaveNonDbgCarryUse) { -        BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg()) -          .addReg(AMDGPU::VCC, RegState::Kill); -      } +    const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg()); +    unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC); -      // Keep the old instruction around to avoid breaking iterators, but -      // replace the outputs with dummy registers. -      Dst0.setReg(NewReg0); -      Dst1.setReg(NewReg1); +    MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32); -      if (Fold.isCommuted()) -        TII.commuteInstruction(*Inst32, false); -      return true; +    if (HaveNonDbgCarryUse) { +      BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg()) +        .addReg(AMDGPU::VCC, RegState::Kill);      } -    Old.ChangeToImmediate(Fold.ImmToFold); +    // Keep the old instruction around to avoid breaking iterators, but +    // replace it with a dummy instruction to remove uses. +    // +    // FIXME: We should not invert how this pass looks at operands to avoid +    // this. Should track set of foldable movs instead of looking for uses +    // when looking at a use. +    Dst0.setReg(NewReg0); +    for (unsigned I = MI->getNumOperands() - 1; I > 0; --I) +      MI->RemoveOperand(I); +    MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF)); + +    if (Fold.isCommuted()) +      TII.commuteInstruction(*Inst32, false);      return true;    }    assert(!Fold.needsShrink() && "not handled"); +  if (Fold.isImm()) { +    Old.ChangeToImmediate(Fold.ImmToFold); +    return true; +  } +    if (Fold.isFI()) {      Old.ChangeToFrameIndex(Fold.FrameIndexToFold);      return true; @@ -344,7 +350,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,        if ((Opc == AMDGPU::V_ADD_I32_e64 ||             Opc == AMDGPU::V_SUB_I32_e64 ||             Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME -          OpToFold->isImm()) { +          (OpToFold->isImm() || OpToFold->isFI())) {          MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();          // Verify the other operand is a VGPR, otherwise we would violate the @@ -357,7 +363,10 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,          assert(MI->getOperand(1).isDef()); -        int Op32 =  AMDGPU::getVOPe32(Opc); +        // Make sure to get the 32-bit version of the commuted opcode. +        unsigned MaybeCommutedOpc = MI->getOpcode(); +        int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc); +          FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,                                           Op32));          return true; diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index e3fd7b5f9fad..8cf524a5128d 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -515,18 +515,12 @@ let AddedComplexity = 1 in {  }  let SubtargetPredicate = HasAddNoCarryInsts in { -  def : DivergentBinOp<add, V_ADD_U32_e32>; -  def : DivergentBinOp<sub, V_SUB_U32_e32>; -  def : DivergentBinOp<sub, V_SUBREV_U32_e32>; +  def : DivergentBinOp<add, V_ADD_U32_e64>; +  def : DivergentBinOp<sub, V_SUB_U32_e64>;  } - -def : DivergentBinOp<add, V_ADD_I32_e32>; -  def : DivergentBinOp<add, V_ADD_I32_e64>; -def : DivergentBinOp<sub, V_SUB_I32_e32>; - -def : DivergentBinOp<sub, V_SUBREV_I32_e32>; +def : DivergentBinOp<sub, V_SUB_I32_e64>;  def : DivergentBinOp<srl, V_LSHRREV_B32_e32>;  def : DivergentBinOp<sra, V_ASHRREV_I32_e32>;  | 
