aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp147
1 files changed, 78 insertions, 69 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 3986ca6dfa81..9c6833a7dab6 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -185,6 +185,11 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
if (!MI.getOperand(0).isReg())
TII->commuteInstruction(MI, false, 0, 1);
+ // cmpk requires src0 to be a register
+ const MachineOperand &Src0 = MI.getOperand(0);
+ if (!Src0.isReg())
+ return;
+
const MachineOperand &Src1 = MI.getOperand(1);
if (!Src1.isImm())
return;
@@ -220,7 +225,7 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
- if (Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
+ if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
return;
MachineFunction *MF = MI.getParent()->getParent();
@@ -323,60 +328,61 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
MachineOperand *SrcReg = Src0;
MachineOperand *SrcImm = Src1;
- if (SrcImm->isImm() &&
- !AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST.hasInv2PiInlineImm())) {
- uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
- uint32_t NewImm = 0;
-
- if (Opc == AMDGPU::S_AND_B32) {
- if (isPowerOf2_32(~Imm)) {
- NewImm = countTrailingOnes(Imm);
- Opc = AMDGPU::S_BITSET0_B32;
- } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
- NewImm = ~Imm;
- Opc = AMDGPU::S_ANDN2_B32;
- }
- } else if (Opc == AMDGPU::S_OR_B32) {
- if (isPowerOf2_32(Imm)) {
- NewImm = countTrailingZeros(Imm);
- Opc = AMDGPU::S_BITSET1_B32;
- } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
- NewImm = ~Imm;
- Opc = AMDGPU::S_ORN2_B32;
- }
- } else if (Opc == AMDGPU::S_XOR_B32) {
- if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
- NewImm = ~Imm;
- Opc = AMDGPU::S_XNOR_B32;
- }
- } else {
- llvm_unreachable("unexpected opcode");
- }
+ if (!SrcImm->isImm() ||
+ AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST.hasInv2PiInlineImm()))
+ return false;
+
+ uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
+ uint32_t NewImm = 0;
- if ((Opc == AMDGPU::S_ANDN2_B32 || Opc == AMDGPU::S_ORN2_B32) &&
- SrcImm == Src0) {
- if (!TII->commuteInstruction(MI, false, 1, 2))
- NewImm = 0;
+ if (Opc == AMDGPU::S_AND_B32) {
+ if (isPowerOf2_32(~Imm)) {
+ NewImm = countTrailingOnes(Imm);
+ Opc = AMDGPU::S_BITSET0_B32;
+ } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+ NewImm = ~Imm;
+ Opc = AMDGPU::S_ANDN2_B32;
+ }
+ } else if (Opc == AMDGPU::S_OR_B32) {
+ if (isPowerOf2_32(Imm)) {
+ NewImm = countTrailingZeros(Imm);
+ Opc = AMDGPU::S_BITSET1_B32;
+ } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+ NewImm = ~Imm;
+ Opc = AMDGPU::S_ORN2_B32;
+ }
+ } else if (Opc == AMDGPU::S_XOR_B32) {
+ if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+ NewImm = ~Imm;
+ Opc = AMDGPU::S_XNOR_B32;
}
+ } else {
+ llvm_unreachable("unexpected opcode");
+ }
- if (NewImm != 0) {
- if (Register::isVirtualRegister(Dest->getReg()) && SrcReg->isReg()) {
- MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
- MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
- return true;
- }
+ if ((Opc == AMDGPU::S_ANDN2_B32 || Opc == AMDGPU::S_ORN2_B32) &&
+ SrcImm == Src0) {
+ if (!TII->commuteInstruction(MI, false, 1, 2))
+ NewImm = 0;
+ }
- if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
- MI.setDesc(TII->get(Opc));
- if (Opc == AMDGPU::S_BITSET0_B32 ||
- Opc == AMDGPU::S_BITSET1_B32) {
- Src0->ChangeToImmediate(NewImm);
- // Remove the immediate and add the tied input.
- MI.getOperand(2).ChangeToRegister(Dest->getReg(), false);
- MI.tieOperands(0, 2);
- } else {
- SrcImm->setImm(NewImm);
- }
+ if (NewImm != 0) {
+ if (Register::isVirtualRegister(Dest->getReg()) && SrcReg->isReg()) {
+ MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
+ MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
+ return true;
+ }
+
+ if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
+ MI.setDesc(TII->get(Opc));
+ if (Opc == AMDGPU::S_BITSET0_B32 ||
+ Opc == AMDGPU::S_BITSET1_B32) {
+ Src0->ChangeToImmediate(NewImm);
+ // Remove the immediate and add the tied input.
+ MI.getOperand(2).ChangeToRegister(Dest->getReg(), false);
+ MI.tieOperands(0, 2);
+ } else {
+ SrcImm->setImm(NewImm);
}
}
}
@@ -426,8 +432,7 @@ getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I,
if (Register::isPhysicalRegister(Reg)) {
Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I));
} else {
- LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub);
- Sub = TRI.getSubRegFromChannel(I + countTrailingZeros(LM.getAsInteger()));
+ Sub = TRI.getSubRegFromChannel(I + TRI.getChannelFromSubReg(Sub));
}
}
return TargetInstrInfo::RegSubRegPair(Reg, Sub);
@@ -472,26 +477,30 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
if (!TRI.isVGPR(MRI, X))
return nullptr;
- for (MachineOperand &YTop : MRI.use_nodbg_operands(T)) {
- if (YTop.getSubReg() != Tsub)
- continue;
-
- MachineInstr &MovY = *YTop.getParent();
- if ((MovY.getOpcode() != AMDGPU::V_MOV_B32_e32 &&
- MovY.getOpcode() != AMDGPU::COPY) ||
- MovY.getOperand(1).getSubReg() != Tsub)
+ const unsigned SearchLimit = 16;
+ unsigned Count = 0;
+ for (auto Iter = std::next(MovT.getIterator()),
+ E = MovT.getParent()->instr_end();
+ Iter != E && Count < SearchLimit; ++Iter, ++Count) {
+
+ MachineInstr *MovY = &*Iter;
+ if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
+ MovY->getOpcode() != AMDGPU::COPY) ||
+ !MovY->getOperand(1).isReg() ||
+ MovY->getOperand(1).getReg() != T ||
+ MovY->getOperand(1).getSubReg() != Tsub)
continue;
- Register Y = MovY.getOperand(0).getReg();
- unsigned Ysub = MovY.getOperand(0).getSubReg();
+ Register Y = MovY->getOperand(0).getReg();
+ unsigned Ysub = MovY->getOperand(0).getSubReg();
- if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent())
+ if (!TRI.isVGPR(MRI, Y))
continue;
MachineInstr *MovX = nullptr;
- auto I = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end();
- for (auto IY = MovY.getIterator(); I != E && I != IY; ++I) {
- if (instReadsReg(&*I, X, Xsub, TRI) ||
+ for (auto IY = MovY->getIterator(), I = std::next(MovT.getIterator());
+ I != IY; ++I) {
+ if (instReadsReg(&*I, X, Xsub, TRI) ||
instModifiesReg(&*I, Y, Ysub, TRI) ||
instModifiesReg(&*I, T, Tsub, TRI) ||
(MovX && instModifiesReg(&*I, X, Xsub, TRI))) {
@@ -516,7 +525,7 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
MovX = &*I;
}
- if (!MovX || I == E)
+ if (!MovX)
continue;
LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
@@ -533,7 +542,7 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
.addReg(X1.Reg, 0, X1.SubReg).getInstr();
}
MovX->eraseFromParent();
- MovY.eraseFromParent();
+ MovY->eraseFromParent();
MachineInstr *Next = &*std::next(MovT.getIterator());
if (MRI.use_nodbg_empty(T))
MovT.eraseFromParent();