diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-05-30 17:37:31 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-05-30 17:37:31 +0000 |
commit | ee2f195dd3e40f49698ca4dc2666ec09c770e80d (patch) | |
tree | 66fa9a69e5789356dfe844991e64bac9222f3a35 /lib/Target/AMDGPU | |
parent | ab44ce3d598882e51a25eb82eb7ae6308de85ae6 (diff) |
Notes
Diffstat (limited to 'lib/Target/AMDGPU')
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUSubtarget.h | 2 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIFoldOperands.cpp | 7 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 49 |
4 files changed, 43 insertions, 18 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 660879426810f..0582ce95693a8 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -730,7 +730,7 @@ public: /// \returns True if waitcnt instruction is needed before barrier instruction, /// false otherwise. bool needWaitcntBeforeBarrier() const { - return getGeneration() < GFX9; + return true; } /// \returns true if the flat_scratch register should be initialized with the diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index a9d3a31a72407..48827f4639974 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -736,6 +736,9 @@ void GCNPassConfig::addMachineSSAOptimization() { addPass(createSIShrinkInstructionsPass()); if (EnableSDWAPeephole) { addPass(&SIPeepholeSDWAID); + addPass(&MachineLICMID); + addPass(&MachineCSEID); + addPass(&SIFoldOperandsID); addPass(&DeadMachineInstructionElimID); } } diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index d63414735b95a..f13629a3185f3 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -247,9 +247,10 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList, // If the use operand doesn't care about the value, this may be an operand only // used for register indexing, in which case it is unsafe to fold. -static bool isUseSafeToFold(const MachineInstr &MI, +static bool isUseSafeToFold(const SIInstrInfo *TII, + const MachineInstr &MI, const MachineOperand &UseMO) { - return !UseMO.isUndef(); + return !UseMO.isUndef() && !TII->isSDWA(MI); //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg()); } @@ -261,7 +262,7 @@ void SIFoldOperands::foldOperand( SmallVectorImpl<MachineInstr *> &CopiesToReplace) const { const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); - if (!isUseSafeToFold(*UseMI, UseOp)) + if (!isUseSafeToFold(TII, *UseMI, UseOp)) return; // FIXME: Fold operands with subregs. diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 4dc090d9b7edc..fae249b04492a 100644 --- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -55,6 +55,7 @@ private: std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands; std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches; + SmallVector<MachineInstr *, 8> ConvertedInstructions; Optional<int64_t> foldToImm(const MachineOperand &Op) const; @@ -69,6 +70,7 @@ public: void matchSDWAOperands(MachineFunction &MF); bool isConvertibleToSDWA(const MachineInstr &MI) const; bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands); + void legalizeScalarOperands(MachineInstr &MI) const; StringRef getPassName() const override { return "SI Peephole SDWA"; } @@ -289,7 +291,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel); MachineOperand *SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); - assert(Src && Src->isReg()); + assert(Src && (Src->isReg() || Src->isImm())); if (!isSameReg(*Src, *getReplacedOperand())) { // If this is not src0 then it should be src1 Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1); @@ -580,18 +582,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { } bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const { - // Check if this instruction can be converted to SDWA: - // 1. Does this opcode support SDWA - if (AMDGPU::getSDWAOp(MI.getOpcode()) == -1) - return false; - - // 2. Are all operands - VGPRs - for (const MachineOperand &Operand : MI.explicit_operands()) { - if (!Operand.isReg() || !TRI->isVGPR(*MRI, Operand.getReg())) - return false; - } - - return true; + // Check if this instruction has opcode that supports SDWA + return AMDGPU::getSDWAOp(MI.getOpcode()) != -1; } bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, @@ -685,7 +677,9 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, if (PotentialMatches.count(Operand->getParentInst()) == 0) Converted |= Operand->convertToSDWA(*SDWAInst, TII); } - if (!Converted) { + if (Converted) { + ConvertedInstructions.push_back(SDWAInst); + } else { SDWAInst->eraseFromParent(); return false; } @@ -698,6 +692,29 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, return true; } +// If an instruction was converted to SDWA it should not have immediates or SGPR +// operands. Copy its scalar operands into VGPRs. +void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI) const { + const MCInstrDesc &Desc = TII->get(MI.getOpcode()); + for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) { + MachineOperand &Op = MI.getOperand(I); + if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg()))) + continue; + if (Desc.OpInfo[I].RegClass == -1 || + !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass))) + continue; + unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), + TII->get(AMDGPU::V_MOV_B32_e32), VGPR); + if (Op.isImm()) + Copy.addImm(Op.getImm()); + else if (Op.isReg()) + Copy.addReg(Op.getReg(), Op.isKill() ? RegState::Kill : 0, + Op.getSubReg()); + Op.ChangeToRegister(VGPR, false); + } +} + bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); @@ -728,5 +745,9 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { PotentialMatches.clear(); SDWAOperands.clear(); + + while (!ConvertedInstructions.empty()) + legalizeScalarOperands(*ConvertedInstructions.pop_back_val()); + return false; } |