diff options
Diffstat (limited to 'lib/Target/AMDGPU/SIRegisterInfo.cpp')
| -rw-r--r-- | lib/Target/AMDGPU/SIRegisterInfo.cpp | 137 |
1 files changed, 52 insertions, 85 deletions
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 4a3fbb4593bb..65cdc13e03cd 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -148,7 +148,6 @@ unsigned SIRegisterInfo::reservedStackPtrOffsetReg( BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); // EXEC_LO and EXEC_HI could be allocated and used as regular register, but // this seems likely to result in bugs, so I'm marking them as reserved. @@ -173,6 +172,8 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7); reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9); reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11); + reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13); + reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15); const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); @@ -237,8 +238,15 @@ bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const return true; } -bool SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const { - return MF.getFrameInfo().hasStackObjects(); +bool SIRegisterInfo::requiresFrameIndexScavenging( + const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (MFI.hasStackObjects()) + return true; + + // May need to deal with callee saved registers. + const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); + return !Info->isEntryFunction(); } bool SIRegisterInfo::requiresFrameIndexReplacementScavenging( @@ -429,6 +437,10 @@ static int getOffsetMUBUFStore(unsigned Opc) { return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET; case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN: return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET; + case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN: + return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET; + case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN: + return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET; default: return -1; } @@ -450,6 +462,18 @@ static int getOffsetMUBUFLoad(unsigned Opc) { return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET; case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN: return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET; + case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN: + return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET; + case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN: + return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET; + case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN: + return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET; + case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN: + return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET; + case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN: + return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET; + case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN: + return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET; default: return -1; } @@ -472,17 +496,21 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, if (LoadStoreOp == -1) return false; - unsigned Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg(); + const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); + MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) + .add(*Reg) + .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)) + .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)) + .addImm(Offset) + .addImm(0) // glc + .addImm(0) // slc + .addImm(0) // tfe + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); - BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) - .addReg(Reg, getDefRegState(!IsStore)) - .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)) - .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)) - .addImm(Offset) - .addImm(0) // glc - .addImm(0) // slc - .addImm(0) // tfe - .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + const MachineOperand *VDataIn = TII->getNamedOperand(*MI, + AMDGPU::OpName::vdata_in); + if (VDataIn) + NewMI.add(*VDataIn); return true; } @@ -1045,8 +1073,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, .addImm(Log2_32(ST.getWavefrontSize())) .addReg(DiffReg); } else { - unsigned CarryOut - = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned ScaledReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); @@ -1056,8 +1082,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, // TODO: Fold if use instruction is another add of a constant. if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg) - .addReg(CarryOut, RegState::Define | RegState::Dead) + TII->getAddNoCarry(*MBB, MI, DL, ResultReg) .addImm(Offset) .addReg(ScaledReg, RegState::Kill); } else { @@ -1066,13 +1091,10 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) .addImm(Offset); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg) - .addReg(CarryOut, RegState::Define | RegState::Dead) + TII->getAddNoCarry(*MBB, MI, DL, ResultReg) .addReg(ConstOffsetReg, RegState::Kill) .addReg(ScaledReg, RegState::Kill); } - - MRI.setRegAllocationHint(CarryOut, 0, AMDGPU::VCC); } // Don't introduce an extra copy if we're just materializing in a mov. @@ -1275,8 +1297,7 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass( return RC; // We can assume that each lane corresponds to one 32-bit register. - LaneBitmask::Type Mask = getSubRegIndexLaneMask(SubIdx).getAsInteger(); - unsigned Count = countPopulation(Mask); + unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes(); if (isSGPRClass(RC)) { switch (Count) { case 1: @@ -1322,73 +1343,18 @@ bool SIRegisterInfo::shouldRewriteCopySrc( // class. // // e.g. if we have something like - // vreg0 = ... - // vreg1 = ... - // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2 - // vreg3 = COPY vreg2, sub0 + // %0 = ... + // %1 = ... + // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2 + // %3 = COPY %2, sub0 // // We want to look through the COPY to find: - // => vreg3 = COPY vreg0 + // => %3 = COPY %0 // Plain copy. return getCommonSubClass(DefRC, SrcRC) != nullptr; } -// FIXME: Most of these are flexible with HSA and we don't need to reserve them -// as input registers if unused. Whether the dispatch ptr is necessary should be -// easy to detect from used intrinsics. Scratch setup is harder to know. -unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, - enum PreloadedValue Value) const { - - const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); - (void)ST; - switch (Value) { - case SIRegisterInfo::WORKGROUP_ID_X: - assert(MFI->hasWorkGroupIDX()); - return MFI->WorkGroupIDXSystemSGPR; - case SIRegisterInfo::WORKGROUP_ID_Y: - assert(MFI->hasWorkGroupIDY()); - return MFI->WorkGroupIDYSystemSGPR; - case SIRegisterInfo::WORKGROUP_ID_Z: - assert(MFI->hasWorkGroupIDZ()); - return MFI->WorkGroupIDZSystemSGPR; - case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: - return MFI->PrivateSegmentWaveByteOffsetSystemSGPR; - case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER: - assert(MFI->hasPrivateSegmentBuffer()); - return MFI->PrivateSegmentBufferUserSGPR; - case SIRegisterInfo::IMPLICIT_BUFFER_PTR: - assert(MFI->hasImplicitBufferPtr()); - return MFI->ImplicitBufferPtrUserSGPR; - case SIRegisterInfo::KERNARG_SEGMENT_PTR: - assert(MFI->hasKernargSegmentPtr()); - return MFI->KernargSegmentPtrUserSGPR; - case SIRegisterInfo::DISPATCH_ID: - assert(MFI->hasDispatchID()); - return MFI->DispatchIDUserSGPR; - case SIRegisterInfo::FLAT_SCRATCH_INIT: - assert(MFI->hasFlatScratchInit()); - return MFI->FlatScratchInitUserSGPR; - case SIRegisterInfo::DISPATCH_PTR: - assert(MFI->hasDispatchPtr()); - return MFI->DispatchPtrUserSGPR; - case SIRegisterInfo::QUEUE_PTR: - assert(MFI->hasQueuePtr()); - return MFI->QueuePtrUserSGPR; - case SIRegisterInfo::WORKITEM_ID_X: - assert(MFI->hasWorkItemIDX()); - return AMDGPU::VGPR0; - case SIRegisterInfo::WORKITEM_ID_Y: - assert(MFI->hasWorkItemIDY()); - return AMDGPU::VGPR1; - case SIRegisterInfo::WORKITEM_ID_Z: - assert(MFI->hasWorkItemIDZ()); - return AMDGPU::VGPR2; - } - llvm_unreachable("unexpected preloaded value type"); -} - /// \brief Returns a register that is not used at any point in the function. /// If all registers are used, then this function will return // AMDGPU::NoRegister. @@ -1525,7 +1491,8 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const { + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const { unsigned SrcSize = getRegSizeInBits(*SrcRC); unsigned DstSize = getRegSizeInBits(*DstRC); unsigned NewSize = getRegSizeInBits(*NewRC); @@ -1547,7 +1514,7 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), - *MF.getFunction()); + MF.getFunction()); switch (RC->getID()) { default: return AMDGPURegisterInfo::getRegPressureLimit(RC, MF); |
