aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIInstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIInstrInfo.cpp')
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp558
1 files changed, 379 insertions, 179 deletions
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index ba8ed6993a56..d97e6a62971b 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -318,8 +318,25 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
- if (SOffset && SOffset->isReg())
- return false;
+ if (SOffset && SOffset->isReg()) {
+ // We can only handle this if it's a stack access, as any other resource
+ // would require reporting multiple base registers.
+ const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+ if (AddrReg && !AddrReg->isFI())
+ return false;
+
+ const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc);
+ const SIMachineFunctionInfo *MFI
+ = LdSt.getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
+ if (RSrc->getReg() != MFI->getScratchRSrcReg())
+ return false;
+
+ const MachineOperand *OffsetImm =
+ getNamedOperand(LdSt, AMDGPU::OpName::offset);
+ BaseOp = SOffset;
+ Offset = OffsetImm->getImm();
+ return true;
+ }
const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
if (!AddrReg)
@@ -458,9 +475,9 @@ bool SIInstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
const MachineRegisterInfo &MRI =
FirstLdSt.getParent()->getParent()->getRegInfo();
- const unsigned Reg = FirstDst->getReg();
+ const Register Reg = FirstDst->getReg();
- const TargetRegisterClass *DstRC = TargetRegisterInfo::isVirtualRegister(Reg)
+ const TargetRegisterClass *DstRC = Register::isVirtualRegister(Reg)
? MRI.getRegClass(Reg)
: RI.getPhysRegClass(Reg);
@@ -807,7 +824,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
"Not a VGPR32 reg");
if (Cond.size() == 1) {
- unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+ Register SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
.add(Cond[0]);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
@@ -820,7 +837,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
assert(Cond[0].isImm() && "Cond[0] is not an immediate");
switch (Cond[0].getImm()) {
case SIInstrInfo::SCC_TRUE: {
- unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+ Register SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
.addImm(-1)
@@ -834,7 +851,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
break;
}
case SIInstrInfo::SCC_FALSE: {
- unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+ Register SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
.addImm(0)
@@ -850,7 +867,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
case SIInstrInfo::VCCNZ: {
MachineOperand RegOp = Cond[1];
RegOp.setImplicit(false);
- unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+ Register SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
.add(RegOp);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
@@ -864,7 +881,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
case SIInstrInfo::VCCZ: {
MachineOperand RegOp = Cond[1];
RegOp.setImplicit(false);
- unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+ Register SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
.add(RegOp);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
@@ -876,8 +893,8 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
break;
}
case SIInstrInfo::EXECNZ: {
- unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
- unsigned SReg2 = MRI.createVirtualRegister(RI.getBoolRC());
+ Register SReg = MRI.createVirtualRegister(BoolXExecRC);
+ Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC());
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
: AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
.addImm(0);
@@ -894,8 +911,8 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
break;
}
case SIInstrInfo::EXECZ: {
- unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
- unsigned SReg2 = MRI.createVirtualRegister(RI.getBoolRC());
+ Register SReg = MRI.createVirtualRegister(BoolXExecRC);
+ Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC());
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
: AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
.addImm(0);
@@ -925,7 +942,7 @@ unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB,
const DebugLoc &DL,
unsigned SrcReg, int Value) const {
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- unsigned Reg = MRI.createVirtualRegister(RI.getBoolRC());
+ Register Reg = MRI.createVirtualRegister(RI.getBoolRC());
BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
.addImm(Value)
.addReg(SrcReg);
@@ -938,7 +955,7 @@ unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB,
const DebugLoc &DL,
unsigned SrcReg, int Value) const {
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- unsigned Reg = MRI.createVirtualRegister(RI.getBoolRC());
+ Register Reg = MRI.createVirtualRegister(RI.getBoolRC());
BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
.addImm(Value)
.addReg(SrcReg);
@@ -1052,12 +1069,12 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
// The SGPR spill/restore instructions only work on number sgprs, so we need
// to make sure we are using the correct register class.
- if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) {
+ if (Register::isVirtualRegister(SrcReg) && SpillSize == 4) {
MachineRegisterInfo &MRI = MF->getRegInfo();
MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
}
- MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc)
+ BuildMI(MBB, MI, DL, OpDesc)
.addReg(SrcReg, getKillRegState(isKill)) // data
.addFrameIndex(FrameIndex) // addr
.addMemOperand(MMO)
@@ -1068,11 +1085,6 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
// correctly handled.
if (RI.spillSGPRToVGPR())
FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill);
- if (ST.hasScalarStores()) {
- // m0 is used for offset to scalar stores if used to spill.
- Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
- }
-
return;
}
@@ -1083,7 +1095,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
auto MIB = BuildMI(MBB, MI, DL, get(Opcode));
if (RI.hasAGPRs(RC)) {
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
MIB.addReg(Tmp, RegState::Define);
}
MIB.addReg(SrcReg, getKillRegState(isKill)) // data
@@ -1182,24 +1194,18 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
// FIXME: Maybe this should not include a memoperand because it will be
// lowered to non-memory instructions.
const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
- if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) {
+ if (Register::isVirtualRegister(DestReg) && SpillSize == 4) {
MachineRegisterInfo &MRI = MF->getRegInfo();
MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
}
if (RI.spillSGPRToVGPR())
FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill);
- MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
+ BuildMI(MBB, MI, DL, OpDesc, DestReg)
.addFrameIndex(FrameIndex) // addr
.addMemOperand(MMO)
.addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
.addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
-
- if (ST.hasScalarStores()) {
- // m0 is used for offset to scalar stores if used to spill.
- Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
- }
-
return;
}
@@ -1208,7 +1214,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
auto MIB = BuildMI(MBB, MI, DL, get(Opcode), DestReg);
if (RI.hasAGPRs(RC)) {
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
MIB.addReg(Tmp, RegState::Define);
}
MIB.addFrameIndex(FrameIndex) // vaddr
@@ -1242,13 +1248,13 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
if (!AMDGPU::isShader(MF->getFunction().getCallingConv()) &&
WorkGroupSize > WavefrontSize) {
- unsigned TIDIGXReg
- = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
- unsigned TIDIGYReg
- = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
- unsigned TIDIGZReg
- = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
- unsigned InputPtrReg =
+ Register TIDIGXReg =
+ MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
+ Register TIDIGYReg =
+ MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
+ Register TIDIGZReg =
+ MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+ Register InputPtrReg =
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
if (!Entry.isLiveIn(Reg))
@@ -1410,9 +1416,9 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
break;
case AMDGPU::V_MOV_B64_PSEUDO: {
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
- unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
+ Register Dst = MI.getOperand(0).getReg();
+ Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
+ Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
const MachineOperand &SrcOp = MI.getOperand(1);
// FIXME: Will this work for 64-bit floating point immediates?
@@ -1437,6 +1443,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.eraseFromParent();
break;
}
+ case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
+ expandMovDPP64(MI);
+ break;
+ }
case AMDGPU::V_SET_INACTIVE_B32: {
unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
@@ -1469,7 +1479,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::V_MOVRELD_B32_V8:
case AMDGPU::V_MOVRELD_B32_V16: {
const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
- unsigned VecReg = MI.getOperand(0).getReg();
+ Register VecReg = MI.getOperand(0).getReg();
bool IsUndef = MI.getOperand(1).isUndef();
unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
assert(VecReg == MI.getOperand(1).getReg());
@@ -1492,9 +1502,9 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
case AMDGPU::SI_PC_ADD_REL_OFFSET: {
MachineFunction &MF = *MBB.getParent();
- unsigned Reg = MI.getOperand(0).getReg();
- unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
- unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
+ Register Reg = MI.getOperand(0).getReg();
+ Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
+ Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
// Create a bundle so these instructions won't be re-ordered by the
// post-RA scheduler.
@@ -1531,7 +1541,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
break;
}
case TargetOpcode::BUNDLE: {
- if (!MI.mayLoad())
+ if (!MI.mayLoad() || MI.hasUnmodeledSideEffects())
return false;
// If it is a load it must be a memory clause
@@ -1550,6 +1560,64 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
+std::pair<MachineInstr*, MachineInstr*>
+SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
+ assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ MachineFunction *MF = MBB.getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ Register Dst = MI.getOperand(0).getReg();
+ unsigned Part = 0;
+ MachineInstr *Split[2];
+
+
+ for (auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
+ auto MovDPP = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_dpp));
+ if (Dst.isPhysical()) {
+ MovDPP.addDef(RI.getSubReg(Dst, Sub));
+ } else {
+ assert(MRI.isSSA());
+ auto Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ MovDPP.addDef(Tmp);
+ }
+
+ for (unsigned I = 1; I <= 2; ++I) { // old and src operands.
+ const MachineOperand &SrcOp = MI.getOperand(I);
+ assert(!SrcOp.isFPImm());
+ if (SrcOp.isImm()) {
+ APInt Imm(64, SrcOp.getImm());
+ Imm.ashrInPlace(Part * 32);
+ MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
+ } else {
+ assert(SrcOp.isReg());
+ Register Src = SrcOp.getReg();
+ if (Src.isPhysical())
+ MovDPP.addReg(RI.getSubReg(Src, Sub));
+ else
+ MovDPP.addReg(Src, SrcOp.isUndef() ? RegState::Undef : 0, Sub);
+ }
+ }
+
+ for (unsigned I = 3; I < MI.getNumExplicitOperands(); ++I)
+ MovDPP.addImm(MI.getOperand(I).getImm());
+
+ Split[Part] = MovDPP;
+ ++Part;
+ }
+
+ if (Dst.isVirtual())
+ BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), Dst)
+ .addReg(Split[0]->getOperand(0).getReg())
+ .addImm(AMDGPU::sub0)
+ .addReg(Split[1]->getOperand(0).getReg())
+ .addImm(AMDGPU::sub1);
+
+ MI.eraseFromParent();
+ return std::make_pair(Split[0], Split[1]);
+}
+
bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI,
MachineOperand &Src0,
unsigned Src0OpName,
@@ -1574,7 +1642,7 @@ bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI,
static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
MachineOperand &RegOp,
MachineOperand &NonRegOp) {
- unsigned Reg = RegOp.getReg();
+ Register Reg = RegOp.getReg();
unsigned SubReg = RegOp.getSubReg();
bool IsKill = RegOp.isKill();
bool IsDead = RegOp.isDead();
@@ -1646,7 +1714,8 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
// This needs to be implemented because the source modifiers may be inserted
// between the true commutable operands, and the base
// TargetInstrInfo::commuteInstruction uses it.
-bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
+bool SIInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
+ unsigned &SrcOpIdx0,
unsigned &SrcOpIdx1) const {
return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1);
}
@@ -1710,7 +1779,7 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
// FIXME: Virtual register workaround for RegScavenger not working with empty
// blocks.
- unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Register PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
auto I = MBB.end();
@@ -2163,7 +2232,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
SmallVector<unsigned, 8> Regs;
for (int Idx = 0; Idx != NElts; ++Idx) {
- unsigned DstElt = MRI.createVirtualRegister(EltRC);
+ Register DstElt = MRI.createVirtualRegister(EltRC);
Regs.push_back(DstElt);
unsigned SubIdx = SubIndices[Idx];
@@ -2327,7 +2396,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
UseMI.RemoveOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
- unsigned Src1Reg = Src1->getReg();
+ Register Src1Reg = Src1->getReg();
unsigned Src1SubReg = Src1->getSubReg();
Src0->setReg(Src1Reg);
Src0->setSubReg(Src1SubReg);
@@ -2367,12 +2436,12 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
MRI->hasOneUse(Src0->getReg())) {
Src0->ChangeToImmediate(Def->getOperand(1).getImm());
Src0Inlined = true;
- } else if ((RI.isPhysicalRegister(Src0->getReg()) &&
- (ST.getConstantBusLimit(Opc) <= 1 &&
- RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) ||
- (RI.isVirtualRegister(Src0->getReg()) &&
- (ST.getConstantBusLimit(Opc) <= 1 &&
- RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))))
+ } else if ((Register::isPhysicalRegister(Src0->getReg()) &&
+ (ST.getConstantBusLimit(Opc) <= 1 &&
+ RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) ||
+ (Register::isVirtualRegister(Src0->getReg()) &&
+ (ST.getConstantBusLimit(Opc) <= 1 &&
+ RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))))
return false;
// VGPR is okay as Src0 - fallthrough
}
@@ -2385,10 +2454,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
MRI->hasOneUse(Src1->getReg()) &&
commuteInstruction(UseMI)) {
Src0->ChangeToImmediate(Def->getOperand(1).getImm());
- } else if ((RI.isPhysicalRegister(Src1->getReg()) &&
- RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) ||
- (RI.isVirtualRegister(Src1->getReg()) &&
- RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
+ } else if ((Register::isPhysicalRegister(Src1->getReg()) &&
+ RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) ||
+ (Register::isVirtualRegister(Src1->getReg()) &&
+ RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
return false;
// VGPR is okay as Src1 - fallthrough
}
@@ -2472,8 +2541,7 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
}
bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
- const MachineInstr &MIb,
- AliasAnalysis *AA) const {
+ const MachineInstr &MIb) const {
assert((MIa.mayLoad() || MIa.mayStore()) &&
"MIa must load from or modify a memory location");
assert((MIb.mayLoad() || MIb.mayStore()) &&
@@ -2664,6 +2732,7 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
+ MI.getOpcode() == AMDGPU::S_DENORM_MODE ||
changesVGPRIndexingMode(MI);
}
@@ -2865,8 +2934,16 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
if (OpInfo.RegClass < 0)
return false;
- if (MO.isImm() && isInlineConstant(MO, OpInfo))
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+
+ if (MO.isImm() && isInlineConstant(MO, OpInfo)) {
+ if (isMAI(MI) && ST.hasMFMAInlineLiteralBug() &&
+ OpNo ==(unsigned)AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::src2))
+ return false;
return RI.opCanUseInlineConstant(OpInfo.OperandType);
+ }
if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
return false;
@@ -2874,8 +2951,6 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo))
return true;
- const MachineFunction *MF = MI.getParent()->getParent();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
return ST.hasVOP3Literal();
}
@@ -3036,7 +3111,7 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
if (!MO.isUse())
return false;
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (Register::isVirtualRegister(MO.getReg()))
return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
// Null is free
@@ -3093,7 +3168,8 @@ static bool shouldReadExec(const MachineInstr &MI) {
return true;
}
- if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) ||
+ if (MI.isPreISelOpcode() ||
+ SIInstrInfo::isGenericOpcode(MI.getOpcode()) ||
SIInstrInfo::isSALU(MI) ||
SIInstrInfo::isSMRD(MI))
return false;
@@ -3104,7 +3180,7 @@ static bool shouldReadExec(const MachineInstr &MI) {
static bool isSubRegOf(const SIRegisterInfo &TRI,
const MachineOperand &SuperVec,
const MachineOperand &SubReg) {
- if (TargetRegisterInfo::isPhysicalRegister(SubReg.getReg()))
+ if (Register::isPhysicalRegister(SubReg.getReg()))
return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg());
return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
@@ -3144,8 +3220,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
if (!Op.isReg())
continue;
- unsigned Reg = Op.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) {
+ Register Reg = Op.getReg();
+ if (!Register::isVirtualRegister(Reg) && !RC->contains(Reg)) {
ErrInfo = "inlineasm operand has incorrect register class.";
return false;
}
@@ -3209,9 +3285,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
continue;
if (RegClass != -1) {
- unsigned Reg = MI.getOperand(i).getReg();
- if (Reg == AMDGPU::NoRegister ||
- TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MI.getOperand(i).getReg();
+ if (Reg == AMDGPU::NoRegister || Register::isVirtualRegister(Reg))
continue;
const TargetRegisterClass *RC = RI.getRegClass(RegClass);
@@ -3304,7 +3379,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
ErrInfo =
"Dst register should be tied to implicit use of preserved register";
return false;
- } else if (TargetRegisterInfo::isPhysicalRegister(TiedMO.getReg()) &&
+ } else if (Register::isPhysicalRegister(TiedMO.getReg()) &&
Dst.getReg() != TiedMO.getReg()) {
ErrInfo = "Dst register should use same physical register as preserved";
return false;
@@ -3409,6 +3484,32 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
+ // Special case for writelane - this can break the multiple constant bus rule,
+ // but still can't use more than one SGPR register
+ if (Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
+ unsigned SGPRCount = 0;
+ Register SGPRUsed = AMDGPU::NoRegister;
+
+ for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx}) {
+ if (OpIdx == -1)
+ break;
+
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+
+ if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
+ if (MO.isReg() && MO.getReg() != AMDGPU::M0) {
+ if (MO.getReg() != SGPRUsed)
+ ++SGPRCount;
+ SGPRUsed = MO.getReg();
+ }
+ }
+ if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
+ ErrInfo = "WRITELANE instruction violates constant bus restriction";
+ return false;
+ }
+ }
+ }
+
// Verify misc. restrictions on specific instructions.
if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
@@ -3609,7 +3710,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
ErrInfo = "Invalid dpp_ctrl value: "
- "broadcats are not supported on GFX10+";
+ "broadcasts are not supported on GFX10+";
return false;
}
if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
@@ -3631,6 +3732,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::PHI: return AMDGPU::PHI;
case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
case AMDGPU::WQM: return AMDGPU::WQM;
+ case AMDGPU::SOFT_WQM: return AMDGPU::SOFT_WQM;
case AMDGPU::WWM: return AMDGPU::WWM;
case AMDGPU::S_MOV_B32: {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
@@ -3708,9 +3810,9 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
const MCInstrDesc &Desc = get(MI.getOpcode());
if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
Desc.OpInfo[OpNo].RegClass == -1) {
- unsigned Reg = MI.getOperand(OpNo).getReg();
+ Register Reg = MI.getOperand(OpNo).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
return MRI.getRegClass(Reg);
return RI.getPhysRegClass(Reg);
}
@@ -3741,7 +3843,7 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
else
VRC = &AMDGPU::VGPR_32RegClass;
- unsigned Reg = MRI.createVirtualRegister(VRC);
+ Register Reg = MRI.createVirtualRegister(VRC);
DebugLoc DL = MBB->findDebugLoc(I);
BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO);
MO.ChangeToRegister(Reg, false);
@@ -3756,7 +3858,7 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
const {
MachineBasicBlock *MBB = MI->getParent();
DebugLoc DL = MI->getDebugLoc();
- unsigned SubReg = MRI.createVirtualRegister(SubRC);
+ Register SubReg = MRI.createVirtualRegister(SubRC);
if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
@@ -3768,7 +3870,7 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
// value so we don't need to worry about merging its subreg index with the
// SubIdx passed to this function. The register coalescer should be able to
// eliminate this extra copy.
- unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
+ Register NewSuperReg = MRI.createVirtualRegister(SuperRC);
BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
.addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
@@ -3814,11 +3916,10 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
if (!MO.isReg())
return false;
- unsigned Reg = MO.getReg();
- const TargetRegisterClass *RC =
- TargetRegisterInfo::isVirtualRegister(Reg) ?
- MRI.getRegClass(Reg) :
- RI.getPhysRegClass(Reg);
+ Register Reg = MO.getReg();
+ const TargetRegisterClass *RC = Register::isVirtualRegister(Reg)
+ ? MRI.getRegClass(Reg)
+ : RI.getPhysRegClass(Reg);
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
@@ -3935,13 +4036,13 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
if (Opc == AMDGPU::V_WRITELANE_B32) {
const DebugLoc &DL = MI.getDebugLoc();
if (Src0.isReg() && RI.isVGPR(MRI, Src0.getReg())) {
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
.add(Src0);
Src0.ChangeToRegister(Reg, false);
}
if (Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) {
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
const DebugLoc &DL = MI.getDebugLoc();
BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
.add(Src1);
@@ -3967,7 +4068,7 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
// select is uniform.
if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() &&
RI.isVGPR(MRI, Src1.getReg())) {
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
const DebugLoc &DL = MI.getDebugLoc();
BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
.add(Src1);
@@ -4003,7 +4104,7 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
MI.setDesc(get(CommutedOpc));
- unsigned Src0Reg = Src0.getReg();
+ Register Src0Reg = Src0.getReg();
unsigned Src0SubReg = Src0.getSubReg();
bool Src0Kill = Src0.isKill();
@@ -4039,13 +4140,13 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]);
const DebugLoc &DL = MI.getDebugLoc();
if (Src1.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) {
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
.add(Src1);
Src1.ChangeToRegister(Reg, false);
}
if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) {
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
.add(Src2);
Src2.ChangeToRegister(Reg, false);
@@ -4113,12 +4214,12 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
MachineRegisterInfo &MRI) const {
const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
- unsigned DstReg = MRI.createVirtualRegister(SRC);
+ Register DstReg = MRI.createVirtualRegister(SRC);
unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
if (RI.hasAGPRs(VRC)) {
VRC = RI.getEquivalentVGPRClass(VRC);
- unsigned NewSrcReg = MRI.createVirtualRegister(VRC);
+ Register NewSrcReg = MRI.createVirtualRegister(VRC);
BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
get(TargetOpcode::COPY), NewSrcReg)
.addReg(SrcReg);
@@ -4134,7 +4235,7 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
SmallVector<unsigned, 8> SRegs;
for (unsigned i = 0; i < SubRegs; ++i) {
- unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
.addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
@@ -4176,7 +4277,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
MachineOperand &Op,
MachineRegisterInfo &MRI,
const DebugLoc &DL) const {
- unsigned OpReg = Op.getReg();
+ Register OpReg = Op.getReg();
unsigned OpSubReg = Op.getSubReg();
const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg(
@@ -4186,7 +4287,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
if (DstRC == OpRC)
return;
- unsigned DstReg = MRI.createVirtualRegister(DstRC);
+ Register DstReg = MRI.createVirtualRegister(DstRC);
MachineInstr *Copy =
BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
@@ -4198,8 +4299,19 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
return;
// Try to eliminate the copy if it is copying an immediate value.
- if (Def->isMoveImmediate())
+ if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
FoldImmediate(*Copy, *Def, OpReg, &MRI);
+
+ bool ImpDef = Def->isImplicitDef();
+ while (!ImpDef && Def && Def->isCopy()) {
+ if (Def->getOperand(1).getReg().isPhysical())
+ break;
+ Def = MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
+ ImpDef = Def && Def->isImplicitDef();
+ }
+ if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
+ !ImpDef)
+ Copy->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
}
// Emit the actual waterfall loop, executing the wrapped instruction for each
@@ -4223,18 +4335,18 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
MachineBasicBlock::iterator I = LoopBB.begin();
- unsigned VRsrc = Rsrc.getReg();
+ Register VRsrc = Rsrc.getReg();
unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef());
- unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC);
- unsigned CondReg0 = MRI.createVirtualRegister(BoolXExecRC);
- unsigned CondReg1 = MRI.createVirtualRegister(BoolXExecRC);
- unsigned AndCond = MRI.createVirtualRegister(BoolXExecRC);
- unsigned SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+ Register SaveExec = MRI.createVirtualRegister(BoolXExecRC);
+ Register CondReg0 = MRI.createVirtualRegister(BoolXExecRC);
+ Register CondReg1 = MRI.createVirtualRegister(BoolXExecRC);
+ Register AndCond = MRI.createVirtualRegister(BoolXExecRC);
+ Register SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register SRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
// Beginning of the loop, read the next Rsrc variant.
BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub0)
@@ -4302,7 +4414,7 @@ static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
- unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC);
+ Register SaveExec = MRI.createVirtualRegister(BoolXExecRC);
// Save the EXEC mask
BuildMI(MBB, I, DL, TII.get(MovExecOpc), SaveExec).addReg(Exec);
@@ -4370,10 +4482,10 @@ extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) {
AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
// Create an empty resource descriptor
- unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+ Register Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Register SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat();
// Zero64 = 0
@@ -4430,7 +4542,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
if (!MI.getOperand(i).isReg() ||
- !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
+ !Register::isVirtualRegister(MI.getOperand(i).getReg()))
continue;
const TargetRegisterClass *OpRC =
MRI.getRegClass(MI.getOperand(i).getReg());
@@ -4447,8 +4559,16 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
if (!VRC) {
assert(SRC);
- VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) ? RI.getEquivalentAGPRClass(SRC)
- : RI.getEquivalentVGPRClass(SRC);
+ if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) {
+ VRC = &AMDGPU::VReg_1RegClass;
+ } else
+ VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
+ ? RI.getEquivalentAGPRClass(SRC)
+ : RI.getEquivalentVGPRClass(SRC);
+ } else {
+ VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
+ ? RI.getEquivalentAGPRClass(VRC)
+ : RI.getEquivalentVGPRClass(VRC);
}
RC = VRC;
} else {
@@ -4458,7 +4578,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
// Update all the operands so they have the same type.
for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
MachineOperand &Op = MI.getOperand(I);
- if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
+ if (!Op.isReg() || !Register::isVirtualRegister(Op.getReg()))
continue;
// MI is a PHI instruction.
@@ -4483,7 +4603,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
// subregister index types e.g. sub0_sub1 + sub2 + sub3
for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
MachineOperand &Op = MI.getOperand(I);
- if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
+ if (!Op.isReg() || !Register::isVirtualRegister(Op.getReg()))
continue;
const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
@@ -4502,8 +4622,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
// Legalize INSERT_SUBREG
// src0 must have the same register class as dst
if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned Src0 = MI.getOperand(1).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
if (DstRC != Src0RC) {
@@ -4577,13 +4697,13 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) {
// This is already an ADDR64 instruction so we need to add the pointer
// extracted from the resource descriptor to the current value of VAddr.
- unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ Register NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
const auto *BoolXExecRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
- unsigned CondReg0 = MRI.createVirtualRegister(BoolXExecRC);
- unsigned CondReg1 = MRI.createVirtualRegister(BoolXExecRC);
+ Register CondReg0 = MRI.createVirtualRegister(BoolXExecRC);
+ Register CondReg1 = MRI.createVirtualRegister(BoolXExecRC);
unsigned RsrcPtr, NewSRsrc;
std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc);
@@ -4623,7 +4743,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
unsigned RsrcPtr, NewSRsrc;
std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc);
- unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
@@ -4661,6 +4781,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
MIB.addImm(TFE->getImm());
}
+ MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::swz));
+
MIB.cloneMemRefs(MI);
Addr64 = MIB;
} else {
@@ -4933,8 +5055,8 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef();
unsigned NewDstReg = AMDGPU::NoRegister;
if (HasDst) {
- unsigned DstReg = Inst.getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ Register DstReg = Inst.getOperand(0).getReg();
+ if (Register::isPhysicalRegister(DstReg))
continue;
// Update the destination register class.
@@ -4943,7 +5065,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
continue;
if (Inst.isCopy() &&
- TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) &&
+ Register::isVirtualRegister(Inst.getOperand(1).getReg()) &&
NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
// Instead of creating a copy where src and dst are the same register
// class, we just replace all uses of dst with src. These kinds of
@@ -4988,8 +5110,8 @@ bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- unsigned OldDstReg = Inst.getOperand(0).getReg();
- unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register OldDstReg = Inst.getOperand(0).getReg();
+ Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned Opc = Inst.getOpcode();
assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
@@ -5022,8 +5144,8 @@ void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
MachineOperand &Dest = Inst.getOperand(0);
MachineOperand &Src = Inst.getOperand(1);
- unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned SubOp = ST.hasAddNoCarry() ?
AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32;
@@ -5052,7 +5174,7 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
MachineOperand &Src1 = Inst.getOperand(2);
if (ST.hasDLInsts()) {
- unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
@@ -5072,8 +5194,8 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
bool Src1IsSGPR = Src1.isReg() &&
RI.isSGPRClass(MRI.getRegClass(Src1.getReg()));
MachineInstr *Xor;
- unsigned Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
// Build a pair of scalar instructions and add them to the work list.
// The next iteration over the work list will lower these to the vector
@@ -5117,8 +5239,8 @@ void SIInstrInfo::splitScalarNotBinop(SetVectorType &Worklist,
MachineOperand &Src0 = Inst.getOperand(1);
MachineOperand &Src1 = Inst.getOperand(2);
- unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm)
.add(Src0)
@@ -5146,8 +5268,8 @@ void SIInstrInfo::splitScalarBinOpN2(SetVectorType& Worklist,
MachineOperand &Src0 = Inst.getOperand(1);
MachineOperand &Src1 = Inst.getOperand(2);
- unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm)
.add(Src1);
@@ -5189,16 +5311,16 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
- unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
+ Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
AMDGPU::sub1, Src0SubRC);
- unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
+ Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
- unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
+ Register FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
.addReg(DestSub0)
.addImm(AMDGPU::sub0)
@@ -5226,12 +5348,12 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
- unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
- unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned CarryReg = MRI.createVirtualRegister(CarryRC);
- unsigned DeadCarryReg = MRI.createVirtualRegister(CarryRC);
+ Register CarryReg = MRI.createVirtualRegister(CarryRC);
+ Register DeadCarryReg = MRI.createVirtualRegister(CarryRC);
MachineOperand &Dest = Inst.getOperand(0);
MachineOperand &Src0 = Inst.getOperand(1);
@@ -5327,17 +5449,17 @@ void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist,
const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
- unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
+ Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0)
.add(SrcReg0Sub0)
.add(SrcReg1Sub0);
- unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
+ Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
.add(SrcReg0Sub1)
.add(SrcReg1Sub1);
- unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
+ Register FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
.addReg(DestSub0)
.addImm(AMDGPU::sub0)
@@ -5368,7 +5490,7 @@ void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist,
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
- unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
MachineOperand* Op0;
MachineOperand* Op1;
@@ -5384,7 +5506,7 @@ void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist,
BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm)
.add(*Op0);
- unsigned NewDest = MRI.createVirtualRegister(DestRC);
+ Register NewDest = MRI.createVirtualRegister(DestRC);
MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest)
.addReg(Interm)
@@ -5411,8 +5533,8 @@ void SIInstrInfo::splitScalar64BitBCNT(
MRI.getRegClass(Src.getReg()) :
&AMDGPU::SGPR_32RegClass;
- unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
@@ -5451,9 +5573,9 @@ void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
Offset == 0 && "Not implemented");
if (BitWidth < 32) {
- unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ Register MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
.addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0)
@@ -5476,8 +5598,8 @@ void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
}
MachineOperand &Src = Inst.getOperand(1);
- unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
.addImm(31)
@@ -5506,6 +5628,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
switch (UseMI.getOpcode()) {
case AMDGPU::COPY:
case AMDGPU::WQM:
+ case AMDGPU::SOFT_WQM:
case AMDGPU::WWM:
case AMDGPU::REG_SEQUENCE:
case AMDGPU::PHI:
@@ -5531,7 +5654,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
MachineRegisterInfo &MRI,
MachineInstr &Inst) const {
- unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
MachineBasicBlock *MBB = Inst.getParent();
MachineOperand &Src0 = Inst.getOperand(1);
MachineOperand &Src1 = Inst.getOperand(2);
@@ -5539,8 +5662,8 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
switch (Inst.getOpcode()) {
case AMDGPU::S_PACK_LL_B32_B16: {
- unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
// FIXME: Can do a lot better if we know the high bits of src0 or src1 are
// 0.
@@ -5558,7 +5681,7 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
break;
}
case AMDGPU::S_PACK_LH_B32_B16: {
- unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
.addImm(0xffff);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg)
@@ -5568,8 +5691,8 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
break;
}
case AMDGPU::S_PACK_HH_B32_B16: {
- unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
.addImm(16)
.add(Src0);
@@ -5623,17 +5746,27 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
case AMDGPU::REG_SEQUENCE:
case AMDGPU::INSERT_SUBREG:
case AMDGPU::WQM:
+ case AMDGPU::SOFT_WQM:
case AMDGPU::WWM: {
const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1);
if (RI.hasAGPRs(SrcRC)) {
if (RI.hasAGPRs(NewDstRC))
return nullptr;
- NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
+ switch (Inst.getOpcode()) {
+ case AMDGPU::PHI:
+ case AMDGPU::REG_SEQUENCE:
+ case AMDGPU::INSERT_SUBREG:
+ NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
+ break;
+ default:
+ NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
+ }
+
if (!NewDstRC)
return nullptr;
} else {
- if (RI.hasVGPRs(NewDstRC))
+ if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
return nullptr;
NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
@@ -5686,7 +5819,7 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI,
return MO.getReg();
// If this could be a VGPR or an SGPR, Check the dynamic register class.
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
if (RI.isSGPRClass(RegRC))
UsedSGPRs[i] = Reg;
@@ -5941,7 +6074,7 @@ void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo();
if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
- unsigned DstReg = MRI.createVirtualRegister(RI.getBoolRC());
+ Register DstReg = MRI.createVirtualRegister(RI.getBoolRC());
MachineInstr *SIIF =
BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg)
.add(Branch->getOperand(0))
@@ -5968,8 +6101,8 @@ void SIInstrInfo::convertNonUniformLoopRegion(
if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
- unsigned DstReg = MRI.createVirtualRegister(RI.getBoolRC());
- unsigned BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC());
+ Register DstReg = MRI.createVirtualRegister(RI.getBoolRC());
+ Register BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC());
MachineInstrBuilder HeaderPHIBuilder =
BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
@@ -5979,7 +6112,7 @@ void SIInstrInfo::convertNonUniformLoopRegion(
HeaderPHIBuilder.addReg(BackEdgeReg);
} else {
MachineBasicBlock *PMBB = *PI;
- unsigned ZeroReg = MRI.createVirtualRegister(RI.getBoolRC());
+ Register ZeroReg = MRI.createVirtualRegister(RI.getBoolRC());
materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(),
ZeroReg, 0);
HeaderPHIBuilder.addReg(ZeroReg);
@@ -6063,13 +6196,30 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- unsigned UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC());
+ Register UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC());
MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
.addReg(UnusedCarry, RegState::Define | RegState::Dead);
}
+MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL,
+ Register DestReg,
+ RegScavenger &RS) const {
+ if (ST.hasAddNoCarry())
+ return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg);
+
+ Register UnusedCarry = RS.scavengeRegister(RI.getBoolRC(), I, 0, false);
+ // TODO: Users need to deal with this.
+ if (!UnusedCarry.isValid())
+ return MachineInstrBuilder();
+
+ return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
+ .addReg(UnusedCarry, RegState::Define | RegState::Dead);
+}
+
bool SIInstrInfo::isKillTerminator(unsigned Opcode) {
switch (Opcode) {
case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
@@ -6115,7 +6265,21 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
return false;
const auto RCID = MI.getDesc().OpInfo[Idx].RegClass;
- return RCID == AMDGPU::SReg_128RegClassID;
+ return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
+}
+
+unsigned SIInstrInfo::getNumFlatOffsetBits(unsigned AddrSpace,
+ bool Signed) const {
+ if (!ST.hasFlatInstOffsets())
+ return 0;
+
+ if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS)
+ return 0;
+
+ if (ST.getGeneration() >= AMDGPUSubtarget::GFX10)
+ return Signed ? 12 : 11;
+
+ return Signed ? 13 : 12;
}
bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
@@ -6254,7 +6418,7 @@ static bool followSubRegDef(MachineInstr &MI,
MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
MachineRegisterInfo &MRI) {
assert(MRI.isSSA());
- if (!TargetRegisterInfo::isVirtualRegister(P.Reg))
+ if (!Register::isVirtualRegister(P.Reg))
return nullptr;
auto RSR = P;
@@ -6265,8 +6429,7 @@ MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
case AMDGPU::COPY:
case AMDGPU::V_MOV_B32_e32: {
auto &Op1 = MI->getOperand(1);
- if (Op1.isReg() &&
- TargetRegisterInfo::isVirtualRegister(Op1.getReg())) {
+ if (Op1.isReg() && Register::isVirtualRegister(Op1.getReg())) {
if (Op1.isUndef())
return nullptr;
RSR = getRegSubRegPair(Op1);
@@ -6360,3 +6523,40 @@ bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
return true;
}
}
+
+MachineInstr *SIInstrInfo::createPHIDestinationCopy(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt,
+ const DebugLoc &DL, Register Src, Register Dst) const {
+ auto Cur = MBB.begin();
+ if (Cur != MBB.end())
+ do {
+ if (!Cur->isPHI() && Cur->readsRegister(Dst))
+ return BuildMI(MBB, Cur, DL, get(TargetOpcode::COPY), Dst).addReg(Src);
+ ++Cur;
+ } while (Cur != MBB.end() && Cur != LastPHIIt);
+
+ return TargetInstrInfo::createPHIDestinationCopy(MBB, LastPHIIt, DL, Src,
+ Dst);
+}
+
+MachineInstr *SIInstrInfo::createPHISourceCopy(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt,
+ const DebugLoc &DL, Register Src, Register SrcSubReg, Register Dst) const {
+ if (InsPt != MBB.end() &&
+ (InsPt->getOpcode() == AMDGPU::SI_IF ||
+ InsPt->getOpcode() == AMDGPU::SI_ELSE ||
+ InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
+ InsPt->definesRegister(Src)) {
+ InsPt++;
+ return BuildMI(MBB, InsPt, InsPt->getDebugLoc(),
+ get(ST.isWave32() ? AMDGPU::S_MOV_B32_term
+ : AMDGPU::S_MOV_B64_term),
+ Dst)
+ .addReg(Src, 0, SrcSubReg)
+ .addReg(AMDGPU::EXEC, RegState::Implicit);
+ }
+ return TargetInstrInfo::createPHISourceCopy(MBB, InsPt, DL, Src, SrcSubReg,
+ Dst);
+}
+
+bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); }