aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIRegisterInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIRegisterInfo.cpp')
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp137
1 files changed, 52 insertions, 85 deletions
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 4a3fbb4593bb..65cdc13e03cd 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -148,7 +148,6 @@ unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
// EXEC_LO and EXEC_HI could be allocated and used as regular register, but
// this seems likely to result in bugs, so I'm marking them as reserved.
@@ -173,6 +172,8 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
+ reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
+ reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
@@ -237,8 +238,15 @@ bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const
return true;
}
-bool SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
- return MF.getFrameInfo().hasStackObjects();
+bool SIRegisterInfo::requiresFrameIndexScavenging(
+ const MachineFunction &MF) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MFI.hasStackObjects())
+ return true;
+
+ // May need to deal with callee saved registers.
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ return !Info->isEntryFunction();
}
bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
@@ -429,6 +437,10 @@ static int getOffsetMUBUFStore(unsigned Opc) {
return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
+ case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
+ return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
+ case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
+ return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
default:
return -1;
}
@@ -450,6 +462,18 @@ static int getOffsetMUBUFLoad(unsigned Opc) {
return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
+ case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
+ return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
+ case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
+ return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
+ case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
+ return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
+ case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
+ return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
+ case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
+ return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
+ case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
+ return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
default:
return -1;
}
@@ -472,17 +496,21 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
if (LoadStoreOp == -1)
return false;
- unsigned Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg();
+ const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
+ MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
+ .add(*Reg)
+ .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
+ .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
+ .addImm(Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
- BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
- .addReg(Reg, getDefRegState(!IsStore))
- .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
- .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
- .addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // tfe
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
+ AMDGPU::OpName::vdata_in);
+ if (VDataIn)
+ NewMI.add(*VDataIn);
return true;
}
@@ -1045,8 +1073,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
.addImm(Log2_32(ST.getWavefrontSize()))
.addReg(DiffReg);
} else {
- unsigned CarryOut
- = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
unsigned ScaledReg
= MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -1056,8 +1082,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// TODO: Fold if use instruction is another add of a constant.
if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg)
- .addReg(CarryOut, RegState::Define | RegState::Dead)
+ TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
.addImm(Offset)
.addReg(ScaledReg, RegState::Kill);
} else {
@@ -1066,13 +1091,10 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
.addImm(Offset);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg)
- .addReg(CarryOut, RegState::Define | RegState::Dead)
+ TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
.addReg(ConstOffsetReg, RegState::Kill)
.addReg(ScaledReg, RegState::Kill);
}
-
- MRI.setRegAllocationHint(CarryOut, 0, AMDGPU::VCC);
}
// Don't introduce an extra copy if we're just materializing in a mov.
@@ -1275,8 +1297,7 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
return RC;
// We can assume that each lane corresponds to one 32-bit register.
- LaneBitmask::Type Mask = getSubRegIndexLaneMask(SubIdx).getAsInteger();
- unsigned Count = countPopulation(Mask);
+ unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
if (isSGPRClass(RC)) {
switch (Count) {
case 1:
@@ -1322,73 +1343,18 @@ bool SIRegisterInfo::shouldRewriteCopySrc(
// class.
//
// e.g. if we have something like
- // vreg0 = ...
- // vreg1 = ...
- // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2
- // vreg3 = COPY vreg2, sub0
+ // %0 = ...
+ // %1 = ...
+ // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
+ // %3 = COPY %2, sub0
//
// We want to look through the COPY to find:
- // => vreg3 = COPY vreg0
+ // => %3 = COPY %0
// Plain copy.
return getCommonSubClass(DefRC, SrcRC) != nullptr;
}
-// FIXME: Most of these are flexible with HSA and we don't need to reserve them
-// as input registers if unused. Whether the dispatch ptr is necessary should be
-// easy to detect from used intrinsics. Scratch setup is harder to know.
-unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
- enum PreloadedValue Value) const {
-
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- (void)ST;
- switch (Value) {
- case SIRegisterInfo::WORKGROUP_ID_X:
- assert(MFI->hasWorkGroupIDX());
- return MFI->WorkGroupIDXSystemSGPR;
- case SIRegisterInfo::WORKGROUP_ID_Y:
- assert(MFI->hasWorkGroupIDY());
- return MFI->WorkGroupIDYSystemSGPR;
- case SIRegisterInfo::WORKGROUP_ID_Z:
- assert(MFI->hasWorkGroupIDZ());
- return MFI->WorkGroupIDZSystemSGPR;
- case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
- return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
- case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
- assert(MFI->hasPrivateSegmentBuffer());
- return MFI->PrivateSegmentBufferUserSGPR;
- case SIRegisterInfo::IMPLICIT_BUFFER_PTR:
- assert(MFI->hasImplicitBufferPtr());
- return MFI->ImplicitBufferPtrUserSGPR;
- case SIRegisterInfo::KERNARG_SEGMENT_PTR:
- assert(MFI->hasKernargSegmentPtr());
- return MFI->KernargSegmentPtrUserSGPR;
- case SIRegisterInfo::DISPATCH_ID:
- assert(MFI->hasDispatchID());
- return MFI->DispatchIDUserSGPR;
- case SIRegisterInfo::FLAT_SCRATCH_INIT:
- assert(MFI->hasFlatScratchInit());
- return MFI->FlatScratchInitUserSGPR;
- case SIRegisterInfo::DISPATCH_PTR:
- assert(MFI->hasDispatchPtr());
- return MFI->DispatchPtrUserSGPR;
- case SIRegisterInfo::QUEUE_PTR:
- assert(MFI->hasQueuePtr());
- return MFI->QueuePtrUserSGPR;
- case SIRegisterInfo::WORKITEM_ID_X:
- assert(MFI->hasWorkItemIDX());
- return AMDGPU::VGPR0;
- case SIRegisterInfo::WORKITEM_ID_Y:
- assert(MFI->hasWorkItemIDY());
- return AMDGPU::VGPR1;
- case SIRegisterInfo::WORKITEM_ID_Z:
- assert(MFI->hasWorkItemIDZ());
- return AMDGPU::VGPR2;
- }
- llvm_unreachable("unexpected preloaded value type");
-}
-
/// \brief Returns a register that is not used at any point in the function.
/// If all registers are used, then this function will return
// AMDGPU::NoRegister.
@@ -1525,7 +1491,8 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
unsigned SubReg,
const TargetRegisterClass *DstRC,
unsigned DstSubReg,
- const TargetRegisterClass *NewRC) const {
+ const TargetRegisterClass *NewRC,
+ LiveIntervals &LIS) const {
unsigned SrcSize = getRegSizeInBits(*SrcRC);
unsigned DstSize = getRegSizeInBits(*DstRC);
unsigned NewSize = getRegSizeInBits(*NewRC);
@@ -1547,7 +1514,7 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
- *MF.getFunction());
+ MF.getFunction());
switch (RC->getID()) {
default:
return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);