diff options
Diffstat (limited to 'lib/Target/AMDGPU/SIInstrInfo.cpp')
-rw-r--r-- | lib/Target/AMDGPU/SIInstrInfo.cpp | 558 |
1 files changed, 379 insertions, 179 deletions
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index ba8ed6993a56..d97e6a62971b 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -318,8 +318,25 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, if (isMUBUF(LdSt) || isMTBUF(LdSt)) { const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset); - if (SOffset && SOffset->isReg()) - return false; + if (SOffset && SOffset->isReg()) { + // We can only handle this if it's a stack access, as any other resource + // would require reporting multiple base registers. + const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); + if (AddrReg && !AddrReg->isFI()) + return false; + + const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc); + const SIMachineFunctionInfo *MFI + = LdSt.getParent()->getParent()->getInfo<SIMachineFunctionInfo>(); + if (RSrc->getReg() != MFI->getScratchRSrcReg()) + return false; + + const MachineOperand *OffsetImm = + getNamedOperand(LdSt, AMDGPU::OpName::offset); + BaseOp = SOffset; + Offset = OffsetImm->getImm(); + return true; + } const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); if (!AddrReg) @@ -458,9 +475,9 @@ bool SIInstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1, const MachineRegisterInfo &MRI = FirstLdSt.getParent()->getParent()->getRegInfo(); - const unsigned Reg = FirstDst->getReg(); + const Register Reg = FirstDst->getReg(); - const TargetRegisterClass *DstRC = TargetRegisterInfo::isVirtualRegister(Reg) + const TargetRegisterClass *DstRC = Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg) : RI.getPhysRegClass(Reg); @@ -807,7 +824,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, "Not a VGPR32 reg"); if (Cond.size() == 1) { - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(Cond[0]); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) @@ -820,7 +837,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, assert(Cond[0].isImm() && "Cond[0] is not an immediate"); switch (Cond[0].getImm()) { case SIInstrInfo::SCC_TRUE: { - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64), SReg) .addImm(-1) @@ -834,7 +851,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, break; } case SIInstrInfo::SCC_FALSE: { - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64), SReg) .addImm(0) @@ -850,7 +867,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, case SIInstrInfo::VCCNZ: { MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) @@ -864,7 +881,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, case SIInstrInfo::VCCZ: { MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) @@ -876,8 +893,8 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, break; } case SIInstrInfo::EXECNZ: { - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); - unsigned SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2) .addImm(0); @@ -894,8 +911,8 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, break; } case SIInstrInfo::EXECZ: { - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); - unsigned SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2) .addImm(0); @@ -925,7 +942,7 @@ unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB, const DebugLoc &DL, unsigned SrcReg, int Value) const { MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - unsigned Reg = MRI.createVirtualRegister(RI.getBoolRC()); + Register Reg = MRI.createVirtualRegister(RI.getBoolRC()); BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg) .addImm(Value) .addReg(SrcReg); @@ -938,7 +955,7 @@ unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB, const DebugLoc &DL, unsigned SrcReg, int Value) const { MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - unsigned Reg = MRI.createVirtualRegister(RI.getBoolRC()); + Register Reg = MRI.createVirtualRegister(RI.getBoolRC()); BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg) .addImm(Value) .addReg(SrcReg); @@ -1052,12 +1069,12 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, // The SGPR spill/restore instructions only work on number sgprs, so we need // to make sure we are using the correct register class. - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) { + if (Register::isVirtualRegister(SrcReg) && SpillSize == 4) { MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass); } - MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc) + BuildMI(MBB, MI, DL, OpDesc) .addReg(SrcReg, getKillRegState(isKill)) // data .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO) @@ -1068,11 +1085,6 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, // correctly handled. if (RI.spillSGPRToVGPR()) FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill); - if (ST.hasScalarStores()) { - // m0 is used for offset to scalar stores if used to spill. - Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead); - } - return; } @@ -1083,7 +1095,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, auto MIB = BuildMI(MBB, MI, DL, get(Opcode)); if (RI.hasAGPRs(RC)) { MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); MIB.addReg(Tmp, RegState::Define); } MIB.addReg(SrcReg, getKillRegState(isKill)) // data @@ -1182,24 +1194,18 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, // FIXME: Maybe this should not include a memoperand because it will be // lowered to non-memory instructions. const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize)); - if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) { + if (Register::isVirtualRegister(DestReg) && SpillSize == 4) { MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass); } if (RI.spillSGPRToVGPR()) FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill); - MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg) + BuildMI(MBB, MI, DL, OpDesc, DestReg) .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO) .addReg(MFI->getScratchRSrcReg(), RegState::Implicit) .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit); - - if (ST.hasScalarStores()) { - // m0 is used for offset to scalar stores if used to spill. - Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead); - } - return; } @@ -1208,7 +1214,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, auto MIB = BuildMI(MBB, MI, DL, get(Opcode), DestReg); if (RI.hasAGPRs(RC)) { MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); MIB.addReg(Tmp, RegState::Define); } MIB.addFrameIndex(FrameIndex) // vaddr @@ -1242,13 +1248,13 @@ unsigned SIInstrInfo::calculateLDSSpillAddress( if (!AMDGPU::isShader(MF->getFunction().getCallingConv()) && WorkGroupSize > WavefrontSize) { - unsigned TIDIGXReg - = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X); - unsigned TIDIGYReg - = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y); - unsigned TIDIGZReg - = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); - unsigned InputPtrReg = + Register TIDIGXReg = + MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X); + Register TIDIGYReg = + MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y); + Register TIDIGZReg = + MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + Register InputPtrReg = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) { if (!Entry.isLiveIn(Reg)) @@ -1410,9 +1416,9 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { break; case AMDGPU::V_MOV_B64_PSEUDO: { - unsigned Dst = MI.getOperand(0).getReg(); - unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); - unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); + Register Dst = MI.getOperand(0).getReg(); + Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0); + Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1); const MachineOperand &SrcOp = MI.getOperand(1); // FIXME: Will this work for 64-bit floating point immediates? @@ -1437,6 +1443,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.eraseFromParent(); break; } + case AMDGPU::V_MOV_B64_DPP_PSEUDO: { + expandMovDPP64(MI); + break; + } case AMDGPU::V_SET_INACTIVE_B32: { unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; @@ -1469,7 +1479,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case AMDGPU::V_MOVRELD_B32_V8: case AMDGPU::V_MOVRELD_B32_V16: { const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32); - unsigned VecReg = MI.getOperand(0).getReg(); + Register VecReg = MI.getOperand(0).getReg(); bool IsUndef = MI.getOperand(1).isUndef(); unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm(); assert(VecReg == MI.getOperand(1).getReg()); @@ -1492,9 +1502,9 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } case AMDGPU::SI_PC_ADD_REL_OFFSET: { MachineFunction &MF = *MBB.getParent(); - unsigned Reg = MI.getOperand(0).getReg(); - unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0); - unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1); + Register Reg = MI.getOperand(0).getReg(); + Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0); + Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1); // Create a bundle so these instructions won't be re-ordered by the // post-RA scheduler. @@ -1531,7 +1541,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { break; } case TargetOpcode::BUNDLE: { - if (!MI.mayLoad()) + if (!MI.mayLoad() || MI.hasUnmodeledSideEffects()) return false; // If it is a load it must be a memory clause @@ -1550,6 +1560,64 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } +std::pair<MachineInstr*, MachineInstr*> +SIInstrInfo::expandMovDPP64(MachineInstr &MI) const { + assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MBB.findDebugLoc(MI); + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + Register Dst = MI.getOperand(0).getReg(); + unsigned Part = 0; + MachineInstr *Split[2]; + + + for (auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) { + auto MovDPP = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_dpp)); + if (Dst.isPhysical()) { + MovDPP.addDef(RI.getSubReg(Dst, Sub)); + } else { + assert(MRI.isSSA()); + auto Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + MovDPP.addDef(Tmp); + } + + for (unsigned I = 1; I <= 2; ++I) { // old and src operands. + const MachineOperand &SrcOp = MI.getOperand(I); + assert(!SrcOp.isFPImm()); + if (SrcOp.isImm()) { + APInt Imm(64, SrcOp.getImm()); + Imm.ashrInPlace(Part * 32); + MovDPP.addImm(Imm.getLoBits(32).getZExtValue()); + } else { + assert(SrcOp.isReg()); + Register Src = SrcOp.getReg(); + if (Src.isPhysical()) + MovDPP.addReg(RI.getSubReg(Src, Sub)); + else + MovDPP.addReg(Src, SrcOp.isUndef() ? RegState::Undef : 0, Sub); + } + } + + for (unsigned I = 3; I < MI.getNumExplicitOperands(); ++I) + MovDPP.addImm(MI.getOperand(I).getImm()); + + Split[Part] = MovDPP; + ++Part; + } + + if (Dst.isVirtual()) + BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), Dst) + .addReg(Split[0]->getOperand(0).getReg()) + .addImm(AMDGPU::sub0) + .addReg(Split[1]->getOperand(0).getReg()) + .addImm(AMDGPU::sub1); + + MI.eraseFromParent(); + return std::make_pair(Split[0], Split[1]); +} + bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, @@ -1574,7 +1642,7 @@ bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI, static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp) { - unsigned Reg = RegOp.getReg(); + Register Reg = RegOp.getReg(); unsigned SubReg = RegOp.getSubReg(); bool IsKill = RegOp.isKill(); bool IsDead = RegOp.isDead(); @@ -1646,7 +1714,8 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, // This needs to be implemented because the source modifiers may be inserted // between the true commutable operands, and the base // TargetInstrInfo::commuteInstruction uses it. -bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0, +bool SIInstrInfo::findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const { return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1); } @@ -1710,7 +1779,7 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, // FIXME: Virtual register workaround for RegScavenger not working with empty // blocks. - unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + Register PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); auto I = MBB.end(); @@ -2163,7 +2232,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, SmallVector<unsigned, 8> Regs; for (int Idx = 0; Idx != NElts; ++Idx) { - unsigned DstElt = MRI.createVirtualRegister(EltRC); + Register DstElt = MRI.createVirtualRegister(EltRC); Regs.push_back(DstElt); unsigned SubIdx = SubIndices[Idx]; @@ -2327,7 +2396,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, UseMI.RemoveOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); - unsigned Src1Reg = Src1->getReg(); + Register Src1Reg = Src1->getReg(); unsigned Src1SubReg = Src1->getSubReg(); Src0->setReg(Src1Reg); Src0->setSubReg(Src1SubReg); @@ -2367,12 +2436,12 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, MRI->hasOneUse(Src0->getReg())) { Src0->ChangeToImmediate(Def->getOperand(1).getImm()); Src0Inlined = true; - } else if ((RI.isPhysicalRegister(Src0->getReg()) && - (ST.getConstantBusLimit(Opc) <= 1 && - RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) || - (RI.isVirtualRegister(Src0->getReg()) && - (ST.getConstantBusLimit(Opc) <= 1 && - RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))) + } else if ((Register::isPhysicalRegister(Src0->getReg()) && + (ST.getConstantBusLimit(Opc) <= 1 && + RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) || + (Register::isVirtualRegister(Src0->getReg()) && + (ST.getConstantBusLimit(Opc) <= 1 && + RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))) return false; // VGPR is okay as Src0 - fallthrough } @@ -2385,10 +2454,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, MRI->hasOneUse(Src1->getReg()) && commuteInstruction(UseMI)) { Src0->ChangeToImmediate(Def->getOperand(1).getImm()); - } else if ((RI.isPhysicalRegister(Src1->getReg()) && - RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) || - (RI.isVirtualRegister(Src1->getReg()) && - RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))) + } else if ((Register::isPhysicalRegister(Src1->getReg()) && + RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) || + (Register::isVirtualRegister(Src1->getReg()) && + RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))) return false; // VGPR is okay as Src1 - fallthrough } @@ -2472,8 +2541,7 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, } bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA) const { + const MachineInstr &MIb) const { assert((MIa.mayLoad() || MIa.mayStore()) && "MIa must load from or modify a memory location"); assert((MIb.mayLoad() || MIb.mayStore()) && @@ -2664,6 +2732,7 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, MI.modifiesRegister(AMDGPU::EXEC, &RI) || MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 || MI.getOpcode() == AMDGPU::S_SETREG_B32 || + MI.getOpcode() == AMDGPU::S_DENORM_MODE || changesVGPRIndexingMode(MI); } @@ -2865,8 +2934,16 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, if (OpInfo.RegClass < 0) return false; - if (MO.isImm() && isInlineConstant(MO, OpInfo)) + const MachineFunction *MF = MI.getParent()->getParent(); + const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); + + if (MO.isImm() && isInlineConstant(MO, OpInfo)) { + if (isMAI(MI) && ST.hasMFMAInlineLiteralBug() && + OpNo ==(unsigned)AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::src2)) + return false; return RI.opCanUseInlineConstant(OpInfo.OperandType); + } if (!RI.opCanUseLiteralConstant(OpInfo.OperandType)) return false; @@ -2874,8 +2951,6 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo)) return true; - const MachineFunction *MF = MI.getParent()->getParent(); - const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); return ST.hasVOP3Literal(); } @@ -3036,7 +3111,7 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, if (!MO.isUse()) return false; - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (Register::isVirtualRegister(MO.getReg())) return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); // Null is free @@ -3093,7 +3168,8 @@ static bool shouldReadExec(const MachineInstr &MI) { return true; } - if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) || + if (MI.isPreISelOpcode() || + SIInstrInfo::isGenericOpcode(MI.getOpcode()) || SIInstrInfo::isSALU(MI) || SIInstrInfo::isSMRD(MI)) return false; @@ -3104,7 +3180,7 @@ static bool shouldReadExec(const MachineInstr &MI) { static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg) { - if (TargetRegisterInfo::isPhysicalRegister(SubReg.getReg())) + if (Register::isPhysicalRegister(SubReg.getReg())) return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg()); return SubReg.getSubReg() != AMDGPU::NoSubRegister && @@ -3144,8 +3220,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, if (!Op.isReg()) continue; - unsigned Reg = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) { + Register Reg = Op.getReg(); + if (!Register::isVirtualRegister(Reg) && !RC->contains(Reg)) { ErrInfo = "inlineasm operand has incorrect register class."; return false; } @@ -3209,9 +3285,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, continue; if (RegClass != -1) { - unsigned Reg = MI.getOperand(i).getReg(); - if (Reg == AMDGPU::NoRegister || - TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MI.getOperand(i).getReg(); + if (Reg == AMDGPU::NoRegister || Register::isVirtualRegister(Reg)) continue; const TargetRegisterClass *RC = RI.getRegClass(RegClass); @@ -3304,7 +3379,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, ErrInfo = "Dst register should be tied to implicit use of preserved register"; return false; - } else if (TargetRegisterInfo::isPhysicalRegister(TiedMO.getReg()) && + } else if (Register::isPhysicalRegister(TiedMO.getReg()) && Dst.getReg() != TiedMO.getReg()) { ErrInfo = "Dst register should use same physical register as preserved"; return false; @@ -3409,6 +3484,32 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } } + // Special case for writelane - this can break the multiple constant bus rule, + // but still can't use more than one SGPR register + if (Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) { + unsigned SGPRCount = 0; + Register SGPRUsed = AMDGPU::NoRegister; + + for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx}) { + if (OpIdx == -1) + break; + + const MachineOperand &MO = MI.getOperand(OpIdx); + + if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) { + if (MO.isReg() && MO.getReg() != AMDGPU::M0) { + if (MO.getReg() != SGPRUsed) + ++SGPRCount; + SGPRUsed = MO.getReg(); + } + } + if (SGPRCount > ST.getConstantBusLimit(Opcode)) { + ErrInfo = "WRITELANE instruction violates constant bus restriction"; + return false; + } + } + } + // Verify misc. restrictions on specific instructions. if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { @@ -3609,7 +3710,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 && ST.getGeneration() >= AMDGPUSubtarget::GFX10) { ErrInfo = "Invalid dpp_ctrl value: " - "broadcats are not supported on GFX10+"; + "broadcasts are not supported on GFX10+"; return false; } if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST && @@ -3631,6 +3732,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { case AMDGPU::PHI: return AMDGPU::PHI; case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; case AMDGPU::WQM: return AMDGPU::WQM; + case AMDGPU::SOFT_WQM: return AMDGPU::SOFT_WQM; case AMDGPU::WWM: return AMDGPU::WWM; case AMDGPU::S_MOV_B32: { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); @@ -3708,9 +3810,9 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, const MCInstrDesc &Desc = get(MI.getOpcode()); if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || Desc.OpInfo[OpNo].RegClass == -1) { - unsigned Reg = MI.getOperand(OpNo).getReg(); + Register Reg = MI.getOperand(OpNo).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return MRI.getRegClass(Reg); return RI.getPhysRegClass(Reg); } @@ -3741,7 +3843,7 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { else VRC = &AMDGPU::VGPR_32RegClass; - unsigned Reg = MRI.createVirtualRegister(VRC); + Register Reg = MRI.createVirtualRegister(VRC); DebugLoc DL = MBB->findDebugLoc(I); BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO); MO.ChangeToRegister(Reg, false); @@ -3756,7 +3858,7 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, const { MachineBasicBlock *MBB = MI->getParent(); DebugLoc DL = MI->getDebugLoc(); - unsigned SubReg = MRI.createVirtualRegister(SubRC); + Register SubReg = MRI.createVirtualRegister(SubRC); if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) { BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) @@ -3768,7 +3870,7 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, // value so we don't need to worry about merging its subreg index with the // SubIdx passed to this function. The register coalescer should be able to // eliminate this extra copy. - unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); + Register NewSuperReg = MRI.createVirtualRegister(SuperRC); BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); @@ -3814,11 +3916,10 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, if (!MO.isReg()) return false; - unsigned Reg = MO.getReg(); - const TargetRegisterClass *RC = - TargetRegisterInfo::isVirtualRegister(Reg) ? - MRI.getRegClass(Reg) : - RI.getPhysRegClass(Reg); + Register Reg = MO.getReg(); + const TargetRegisterClass *RC = Register::isVirtualRegister(Reg) + ? MRI.getRegClass(Reg) + : RI.getPhysRegClass(Reg); const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo()); @@ -3935,13 +4036,13 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, if (Opc == AMDGPU::V_WRITELANE_B32) { const DebugLoc &DL = MI.getDebugLoc(); if (Src0.isReg() && RI.isVGPR(MRI, Src0.getReg())) { - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) .add(Src0); Src0.ChangeToRegister(Reg, false); } if (Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) { - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); const DebugLoc &DL = MI.getDebugLoc(); BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) .add(Src1); @@ -3967,7 +4068,7 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, // select is uniform. if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) { - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); const DebugLoc &DL = MI.getDebugLoc(); BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) .add(Src1); @@ -4003,7 +4104,7 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, MI.setDesc(get(CommutedOpc)); - unsigned Src0Reg = Src0.getReg(); + Register Src0Reg = Src0.getReg(); unsigned Src0SubReg = Src0.getSubReg(); bool Src0Kill = Src0.isKill(); @@ -4039,13 +4140,13 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]); const DebugLoc &DL = MI.getDebugLoc(); if (Src1.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) { - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) .add(Src1); Src1.ChangeToRegister(Reg, false); } if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) { - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) .add(Src2); Src2.ChangeToRegister(Reg, false); @@ -4113,12 +4214,12 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI) const { const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); - unsigned DstReg = MRI.createVirtualRegister(SRC); + Register DstReg = MRI.createVirtualRegister(SRC); unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32; if (RI.hasAGPRs(VRC)) { VRC = RI.getEquivalentVGPRClass(VRC); - unsigned NewSrcReg = MRI.createVirtualRegister(VRC); + Register NewSrcReg = MRI.createVirtualRegister(VRC); BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(TargetOpcode::COPY), NewSrcReg) .addReg(SrcReg); @@ -4134,7 +4235,7 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, SmallVector<unsigned, 8> SRegs; for (unsigned i = 0; i < SubRegs; ++i) { - unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(AMDGPU::V_READFIRSTLANE_B32), SGPR) .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); @@ -4176,7 +4277,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const { - unsigned OpReg = Op.getReg(); + Register OpReg = Op.getReg(); unsigned OpSubReg = Op.getSubReg(); const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg( @@ -4186,7 +4287,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, if (DstRC == OpRC) return; - unsigned DstReg = MRI.createVirtualRegister(DstRC); + Register DstReg = MRI.createVirtualRegister(DstRC); MachineInstr *Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op); @@ -4198,8 +4299,19 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, return; // Try to eliminate the copy if it is copying an immediate value. - if (Def->isMoveImmediate()) + if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass) FoldImmediate(*Copy, *Def, OpReg, &MRI); + + bool ImpDef = Def->isImplicitDef(); + while (!ImpDef && Def && Def->isCopy()) { + if (Def->getOperand(1).getReg().isPhysical()) + break; + Def = MRI.getUniqueVRegDef(Def->getOperand(1).getReg()); + ImpDef = Def && Def->isImplicitDef(); + } + if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) && + !ImpDef) + Copy->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); } // Emit the actual waterfall loop, executing the wrapped instruction for each @@ -4223,18 +4335,18 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock::iterator I = LoopBB.begin(); - unsigned VRsrc = Rsrc.getReg(); + Register VRsrc = Rsrc.getReg(); unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef()); - unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC); - unsigned CondReg0 = MRI.createVirtualRegister(BoolXExecRC); - unsigned CondReg1 = MRI.createVirtualRegister(BoolXExecRC); - unsigned AndCond = MRI.createVirtualRegister(BoolXExecRC); - unsigned SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); + Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); + Register CondReg0 = MRI.createVirtualRegister(BoolXExecRC); + Register CondReg1 = MRI.createVirtualRegister(BoolXExecRC); + Register AndCond = MRI.createVirtualRegister(BoolXExecRC); + Register SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass); // Beginning of the loop, read the next Rsrc variant. BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub0) @@ -4302,7 +4414,7 @@ static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC); + Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); // Save the EXEC mask BuildMI(MBB, I, DL, TII.get(MovExecOpc), SaveExec).addReg(Exec); @@ -4370,10 +4482,10 @@ extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) { AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass); // Create an empty resource descriptor - unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); + Register Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + Register SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass); uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat(); // Zero64 = 0 @@ -4430,7 +4542,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { if (!MI.getOperand(i).isReg() || - !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) + !Register::isVirtualRegister(MI.getOperand(i).getReg())) continue; const TargetRegisterClass *OpRC = MRI.getRegClass(MI.getOperand(i).getReg()); @@ -4447,8 +4559,16 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) { if (!VRC) { assert(SRC); - VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) ? RI.getEquivalentAGPRClass(SRC) - : RI.getEquivalentVGPRClass(SRC); + if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) { + VRC = &AMDGPU::VReg_1RegClass; + } else + VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) + ? RI.getEquivalentAGPRClass(SRC) + : RI.getEquivalentVGPRClass(SRC); + } else { + VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) + ? RI.getEquivalentAGPRClass(VRC) + : RI.getEquivalentVGPRClass(VRC); } RC = VRC; } else { @@ -4458,7 +4578,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, // Update all the operands so they have the same type. for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { MachineOperand &Op = MI.getOperand(I); - if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) + if (!Op.isReg() || !Register::isVirtualRegister(Op.getReg())) continue; // MI is a PHI instruction. @@ -4483,7 +4603,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, // subregister index types e.g. sub0_sub1 + sub2 + sub3 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { MachineOperand &Op = MI.getOperand(I); - if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) + if (!Op.isReg() || !Register::isVirtualRegister(Op.getReg())) continue; const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg()); @@ -4502,8 +4622,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, // Legalize INSERT_SUBREG // src0 must have the same register class as dst if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) { - unsigned Dst = MI.getOperand(0).getReg(); - unsigned Src0 = MI.getOperand(1).getReg(); + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); if (DstRC != Src0RC) { @@ -4577,13 +4697,13 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) { // This is already an ADDR64 instruction so we need to add the pointer // extracted from the resource descriptor to the current value of VAddr. - unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); const auto *BoolXExecRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - unsigned CondReg0 = MRI.createVirtualRegister(BoolXExecRC); - unsigned CondReg1 = MRI.createVirtualRegister(BoolXExecRC); + Register CondReg0 = MRI.createVirtualRegister(BoolXExecRC); + Register CondReg1 = MRI.createVirtualRegister(BoolXExecRC); unsigned RsrcPtr, NewSRsrc; std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc); @@ -4623,7 +4743,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, unsigned RsrcPtr, NewSRsrc; std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc); - unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata); MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset); @@ -4661,6 +4781,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, MIB.addImm(TFE->getImm()); } + MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::swz)); + MIB.cloneMemRefs(MI); Addr64 = MIB; } else { @@ -4933,8 +5055,8 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst, bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef(); unsigned NewDstReg = AMDGPU::NoRegister; if (HasDst) { - unsigned DstReg = Inst.getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + Register DstReg = Inst.getOperand(0).getReg(); + if (Register::isPhysicalRegister(DstReg)) continue; // Update the destination register class. @@ -4943,7 +5065,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst, continue; if (Inst.isCopy() && - TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) && + Register::isVirtualRegister(Inst.getOperand(1).getReg()) && NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { // Instead of creating a copy where src and dst are the same register // class, we just replace all uses of dst with src. These kinds of @@ -4988,8 +5110,8 @@ bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst, MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - unsigned OldDstReg = Inst.getOperand(0).getReg(); - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register OldDstReg = Inst.getOperand(0).getReg(); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned Opc = Inst.getOpcode(); assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32); @@ -5022,8 +5144,8 @@ void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist, MachineOperand &Dest = Inst.getOperand(0); MachineOperand &Src = Inst.getOperand(1); - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned SubOp = ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32; @@ -5052,7 +5174,7 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist, MachineOperand &Src1 = Inst.getOperand(2); if (ST.hasDLInsts()) { - unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL); legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL); @@ -5072,8 +5194,8 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist, bool Src1IsSGPR = Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())); MachineInstr *Xor; - unsigned Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); // Build a pair of scalar instructions and add them to the work list. // The next iteration over the work list will lower these to the vector @@ -5117,8 +5239,8 @@ void SIInstrInfo::splitScalarNotBinop(SetVectorType &Worklist, MachineOperand &Src0 = Inst.getOperand(1); MachineOperand &Src1 = Inst.getOperand(2); - unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm) .add(Src0) @@ -5146,8 +5268,8 @@ void SIInstrInfo::splitScalarBinOpN2(SetVectorType& Worklist, MachineOperand &Src0 = Inst.getOperand(1); MachineOperand &Src1 = Inst.getOperand(2); - unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm) .add(Src1); @@ -5189,16 +5311,16 @@ void SIInstrInfo::splitScalar64BitUnaryOp( const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); - unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); + Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0); MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC); - unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); + Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1); - unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); + Register FullDestReg = MRI.createVirtualRegister(NewDestRC); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) .addImm(AMDGPU::sub0) @@ -5226,12 +5348,12 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist, MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); - unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned CarryReg = MRI.createVirtualRegister(CarryRC); - unsigned DeadCarryReg = MRI.createVirtualRegister(CarryRC); + Register CarryReg = MRI.createVirtualRegister(CarryRC); + Register DeadCarryReg = MRI.createVirtualRegister(CarryRC); MachineOperand &Dest = Inst.getOperand(0); MachineOperand &Src0 = Inst.getOperand(1); @@ -5327,17 +5449,17 @@ void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist, const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); - unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); + Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0) .add(SrcReg0Sub0) .add(SrcReg1Sub0); - unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); + Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1) .add(SrcReg0Sub1) .add(SrcReg1Sub1); - unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); + Register FullDestReg = MRI.createVirtualRegister(NewDestRC); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) .addImm(AMDGPU::sub0) @@ -5368,7 +5490,7 @@ void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist, const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); - unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); MachineOperand* Op0; MachineOperand* Op1; @@ -5384,7 +5506,7 @@ void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist, BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm) .add(*Op0); - unsigned NewDest = MRI.createVirtualRegister(DestRC); + Register NewDest = MRI.createVirtualRegister(DestRC); MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest) .addReg(Interm) @@ -5411,8 +5533,8 @@ void SIInstrInfo::splitScalar64BitBCNT( MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass; - unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); @@ -5451,9 +5573,9 @@ void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist, Offset == 0 && "Not implemented"); if (BitWidth < 32) { - unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo) .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0) @@ -5476,8 +5598,8 @@ void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist, } MachineOperand &Src = Inst.getOperand(1); - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg) .addImm(31) @@ -5506,6 +5628,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist( switch (UseMI.getOpcode()) { case AMDGPU::COPY: case AMDGPU::WQM: + case AMDGPU::SOFT_WQM: case AMDGPU::WWM: case AMDGPU::REG_SEQUENCE: case AMDGPU::PHI: @@ -5531,7 +5654,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist( void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, MachineRegisterInfo &MRI, MachineInstr &Inst) const { - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); MachineBasicBlock *MBB = Inst.getParent(); MachineOperand &Src0 = Inst.getOperand(1); MachineOperand &Src1 = Inst.getOperand(2); @@ -5539,8 +5662,8 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, switch (Inst.getOpcode()) { case AMDGPU::S_PACK_LL_B32_B16: { - unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); // FIXME: Can do a lot better if we know the high bits of src0 or src1 are // 0. @@ -5558,7 +5681,7 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, break; } case AMDGPU::S_PACK_LH_B32_B16: { - unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) .addImm(0xffff); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg) @@ -5568,8 +5691,8 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, break; } case AMDGPU::S_PACK_HH_B32_B16: { - unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg) .addImm(16) .add(Src0); @@ -5623,17 +5746,27 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( case AMDGPU::REG_SEQUENCE: case AMDGPU::INSERT_SUBREG: case AMDGPU::WQM: + case AMDGPU::SOFT_WQM: case AMDGPU::WWM: { const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1); if (RI.hasAGPRs(SrcRC)) { if (RI.hasAGPRs(NewDstRC)) return nullptr; - NewDstRC = RI.getEquivalentAGPRClass(NewDstRC); + switch (Inst.getOpcode()) { + case AMDGPU::PHI: + case AMDGPU::REG_SEQUENCE: + case AMDGPU::INSERT_SUBREG: + NewDstRC = RI.getEquivalentAGPRClass(NewDstRC); + break; + default: + NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); + } + if (!NewDstRC) return nullptr; } else { - if (RI.hasVGPRs(NewDstRC)) + if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass) return nullptr; NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); @@ -5686,7 +5819,7 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI, return MO.getReg(); // If this could be a VGPR or an SGPR, Check the dynamic register class. - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); const TargetRegisterClass *RegRC = MRI.getRegClass(Reg); if (RI.isSGPRClass(RegRC)) UsedSGPRs[i] = Reg; @@ -5941,7 +6074,7 @@ void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry, MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo(); if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { - unsigned DstReg = MRI.createVirtualRegister(RI.getBoolRC()); + Register DstReg = MRI.createVirtualRegister(RI.getBoolRC()); MachineInstr *SIIF = BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg) .add(Branch->getOperand(0)) @@ -5968,8 +6101,8 @@ void SIInstrInfo::convertNonUniformLoopRegion( if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { - unsigned DstReg = MRI.createVirtualRegister(RI.getBoolRC()); - unsigned BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC()); + Register DstReg = MRI.createVirtualRegister(RI.getBoolRC()); + Register BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC()); MachineInstrBuilder HeaderPHIBuilder = BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg); for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(), @@ -5979,7 +6112,7 @@ void SIInstrInfo::convertNonUniformLoopRegion( HeaderPHIBuilder.addReg(BackEdgeReg); } else { MachineBasicBlock *PMBB = *PI; - unsigned ZeroReg = MRI.createVirtualRegister(RI.getBoolRC()); + Register ZeroReg = MRI.createVirtualRegister(RI.getBoolRC()); materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(), ZeroReg, 0); HeaderPHIBuilder.addReg(ZeroReg); @@ -6063,13 +6196,30 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - unsigned UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC()); + Register UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC()); MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC()); return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead); } +MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, + Register DestReg, + RegScavenger &RS) const { + if (ST.hasAddNoCarry()) + return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg); + + Register UnusedCarry = RS.scavengeRegister(RI.getBoolRC(), I, 0, false); + // TODO: Users need to deal with this. + if (!UnusedCarry.isValid()) + return MachineInstrBuilder(); + + return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg) + .addReg(UnusedCarry, RegState::Define | RegState::Dead); +} + bool SIInstrInfo::isKillTerminator(unsigned Opcode) { switch (Opcode) { case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: @@ -6115,7 +6265,21 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const { return false; const auto RCID = MI.getDesc().OpInfo[Idx].RegClass; - return RCID == AMDGPU::SReg_128RegClassID; + return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass); +} + +unsigned SIInstrInfo::getNumFlatOffsetBits(unsigned AddrSpace, + bool Signed) const { + if (!ST.hasFlatInstOffsets()) + return 0; + + if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS) + return 0; + + if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) + return Signed ? 12 : 11; + + return Signed ? 13 : 12; } bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, @@ -6254,7 +6418,7 @@ static bool followSubRegDef(MachineInstr &MI, MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI) { assert(MRI.isSSA()); - if (!TargetRegisterInfo::isVirtualRegister(P.Reg)) + if (!Register::isVirtualRegister(P.Reg)) return nullptr; auto RSR = P; @@ -6265,8 +6429,7 @@ MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, case AMDGPU::COPY: case AMDGPU::V_MOV_B32_e32: { auto &Op1 = MI->getOperand(1); - if (Op1.isReg() && - TargetRegisterInfo::isVirtualRegister(Op1.getReg())) { + if (Op1.isReg() && Register::isVirtualRegister(Op1.getReg())) { if (Op1.isUndef()) return nullptr; RSR = getRegSubRegPair(Op1); @@ -6360,3 +6523,40 @@ bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, return true; } } + +MachineInstr *SIInstrInfo::createPHIDestinationCopy( + MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt, + const DebugLoc &DL, Register Src, Register Dst) const { + auto Cur = MBB.begin(); + if (Cur != MBB.end()) + do { + if (!Cur->isPHI() && Cur->readsRegister(Dst)) + return BuildMI(MBB, Cur, DL, get(TargetOpcode::COPY), Dst).addReg(Src); + ++Cur; + } while (Cur != MBB.end() && Cur != LastPHIIt); + + return TargetInstrInfo::createPHIDestinationCopy(MBB, LastPHIIt, DL, Src, + Dst); +} + +MachineInstr *SIInstrInfo::createPHISourceCopy( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, + const DebugLoc &DL, Register Src, Register SrcSubReg, Register Dst) const { + if (InsPt != MBB.end() && + (InsPt->getOpcode() == AMDGPU::SI_IF || + InsPt->getOpcode() == AMDGPU::SI_ELSE || + InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) && + InsPt->definesRegister(Src)) { + InsPt++; + return BuildMI(MBB, InsPt, InsPt->getDebugLoc(), + get(ST.isWave32() ? AMDGPU::S_MOV_B32_term + : AMDGPU::S_MOV_B64_term), + Dst) + .addReg(Src, 0, SrcSubReg) + .addReg(AMDGPU::EXEC, RegState::Implicit); + } + return TargetInstrInfo::createPHISourceCopy(MBB, InsPt, DL, Src, SrcSubReg, + Dst); +} + +bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); } |