diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp')
| -rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 340 | 
1 files changed, 268 insertions, 72 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 0b5715515880..08a64de38501 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -8,10 +8,10 @@  //==-----------------------------------------------------------------------===//  #include "SIFrameLowering.h" +#include "AMDGPUSubtarget.h"  #include "SIInstrInfo.h"  #include "SIMachineFunctionInfo.h"  #include "SIRegisterInfo.h" -#include "AMDGPUSubtarget.h"  #include "llvm/CodeGen/MachineFrameInfo.h"  #include "llvm/CodeGen/MachineFunction.h" @@ -21,22 +21,24 @@  using namespace llvm; -static ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF, -                                         const SIRegisterInfo *TRI) { +static ArrayRef<MCPhysReg> getAllSGPR128(const SISubtarget &ST, +                                         const MachineFunction &MF) {    return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(), -                      TRI->getMaxNumSGPRs(MF) / 4); +                      ST.getMaxNumSGPRs(MF) / 4);  } -static ArrayRef<MCPhysReg> getAllSGPRs(const MachineFunction &MF, -                                       const SIRegisterInfo *TRI) { +static ArrayRef<MCPhysReg> getAllSGPRs(const SISubtarget &ST, +                                       const MachineFunction &MF) {    return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(), -                      TRI->getMaxNumSGPRs(MF)); +                      ST.getMaxNumSGPRs(MF));  } -void SIFrameLowering::emitFlatScratchInit(const SIInstrInfo *TII, -                                          const SIRegisterInfo* TRI, +void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST,                                            MachineFunction &MF,                                            MachineBasicBlock &MBB) const { +  const SIInstrInfo *TII = ST.getInstrInfo(); +  const SIRegisterInfo* TRI = &TII->getRegisterInfo(); +    // We don't need this if we only have spills since there is no user facing    // scratch. @@ -59,16 +61,28 @@ void SIFrameLowering::emitFlatScratchInit(const SIInstrInfo *TII,    MRI.addLiveIn(FlatScratchInitReg);    MBB.addLiveIn(FlatScratchInitReg); -  // Copy the size in bytes. -  unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); -  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO) -    .addReg(FlatScrInitHi, RegState::Kill); -    unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); +  unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);    const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();    unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); +  // Do a 64-bit pointer add. +  if (ST.flatScratchIsPointer()) { +    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO) +      .addReg(FlatScrInitLo) +      .addReg(ScratchWaveOffsetReg); +    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI) +      .addReg(FlatScrInitHi) +      .addImm(0); + +    return; +  } + +  // Copy the size in bytes. +  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO) +    .addReg(FlatScrInitHi, RegState::Kill); +    // Add wave offset in bytes to private base offset.    // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) @@ -87,10 +101,12 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(    const SIRegisterInfo *TRI,    SIMachineFunctionInfo *MFI,    MachineFunction &MF) const { +  MachineRegisterInfo &MRI = MF.getRegInfo();    // We need to insert initialization of the scratch resource descriptor.    unsigned ScratchRsrcReg = MFI->getScratchRSrcReg(); -  if (ScratchRsrcReg == AMDGPU::NoRegister) +  if (ScratchRsrcReg == AMDGPU::NoRegister || +      !MRI.isPhysRegUsed(ScratchRsrcReg))      return AMDGPU::NoRegister;    if (ST.hasSGPRInitBug() || @@ -108,19 +124,16 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(    // We find the resource first because it has an alignment requirement. -  MachineRegisterInfo &MRI = MF.getRegInfo(); -    unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4; -  ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(MF, TRI); +  ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);    AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded)); -  // Skip the last 2 elements because the last one is reserved for VCC, and -  // this is the 2nd to last element already. +  // Skip the last N reserved elements because they should have already been +  // reserved for VCC etc.    for (MCPhysReg Reg : AllSGPR128s) {      // Pick the first unallocated one. Make sure we don't clobber the other      // reserved input we needed.      if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) { -      //assert(MRI.isAllocatable(Reg));        MRI.replaceRegWith(ScratchRsrcReg, Reg);        MFI->setScratchRSrcReg(Reg);        return Reg; @@ -130,25 +143,34 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(    return ScratchRsrcReg;  } -unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( +// Shift down registers reserved for the scratch wave offset and stack pointer +// SGPRs. +std::pair<unsigned, unsigned> +SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(    const SISubtarget &ST,    const SIInstrInfo *TII,    const SIRegisterInfo *TRI,    SIMachineFunctionInfo *MFI,    MachineFunction &MF) const { +  MachineRegisterInfo &MRI = MF.getRegInfo();    unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); -  if (ST.hasSGPRInitBug() || -      ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) -    return ScratchWaveOffsetReg; -  unsigned ScratchRsrcReg = MFI->getScratchRSrcReg(); -  MachineRegisterInfo &MRI = MF.getRegInfo(); +  // No replacement necessary. +  if (ScratchWaveOffsetReg == AMDGPU::NoRegister || +      !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) { +    assert(MFI->getStackPtrOffsetReg() == AMDGPU::NoRegister); +    return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister); +  } + +  unsigned SPReg = MFI->getStackPtrOffsetReg(); +  if (ST.hasSGPRInitBug()) +    return std::make_pair(ScratchWaveOffsetReg, SPReg);    unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); -  ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(MF, TRI); +  ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);    if (NumPreloaded > AllSGPRs.size()) -    return ScratchWaveOffsetReg; +    return std::make_pair(ScratchWaveOffsetReg, SPReg);    AllSGPRs = AllSGPRs.slice(NumPreloaded); @@ -163,33 +185,41 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(    //     register from the list to consider, it means that when this    //     register is being used for the scratch wave offset and there    //     are no other free SGPRs, then the value will stay in this register. +  // + 1 if stack pointer is used.    // ---- -  //  13 -  if (AllSGPRs.size() < 13) -    return ScratchWaveOffsetReg; +  //  13 (+1) +  unsigned ReservedRegCount = 13; + +  if (AllSGPRs.size() < ReservedRegCount) +    return std::make_pair(ScratchWaveOffsetReg, SPReg); + +  bool HandledScratchWaveOffsetReg = +    ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF); -  for (MCPhysReg Reg : AllSGPRs.drop_back(13)) { +  for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {      // Pick the first unallocated SGPR. Be careful not to pick an alias of the      // scratch descriptor, since we haven’t added its uses yet. -    if (!MRI.isPhysRegUsed(Reg)) { -      if (!MRI.isAllocatable(Reg) || -          TRI->isSubRegisterEq(ScratchRsrcReg, Reg)) -        continue; +    if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) { +      if (!HandledScratchWaveOffsetReg) { +        HandledScratchWaveOffsetReg = true; -      MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); -      MFI->setScratchWaveOffsetReg(Reg); -      return Reg; +        MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); +        MFI->setScratchWaveOffsetReg(Reg); +        ScratchWaveOffsetReg = Reg; +        break; +      }      }    } -  return ScratchWaveOffsetReg; +  return std::make_pair(ScratchWaveOffsetReg, SPReg);  } -void SIFrameLowering::emitPrologue(MachineFunction &MF, -                                   MachineBasicBlock &MBB) const { +void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, +                                                MachineBasicBlock &MBB) const {    // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was    // specified.    const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); +  auto AMDGPUASI = ST.getAMDGPUAS();    if (ST.debuggerEmitPrologue())      emitDebuggerPrologue(MF, MBB); @@ -207,18 +237,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,    const SIRegisterInfo *TRI = &TII->getRegisterInfo();    MachineRegisterInfo &MRI = MF.getRegInfo(); -  unsigned ScratchRsrcReg -    = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF); -  unsigned ScratchWaveOffsetReg -    = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); - -  if (ScratchRsrcReg == AMDGPU::NoRegister) { -    assert(ScratchWaveOffsetReg == AMDGPU::NoRegister); -    return; -  } - -  assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg)); -    // We need to do the replacement of the private segment buffer and wave offset    // register even if there are no stack objects. There could be stores to undef    // or a constant without an associated object. @@ -229,21 +247,51 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,    // emitted after frame indices are eliminated.    if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit()) -    emitFlatScratchInit(TII, TRI, MF, MBB); +    emitFlatScratchInit(ST, MF, MBB); + +  unsigned SPReg = MFI->getStackPtrOffsetReg(); +  if (SPReg != AMDGPU::NoRegister) { +    DebugLoc DL; +    int64_t StackSize = MF.getFrameInfo().getStackSize(); + +    if (StackSize == 0) { +      BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg) +        .addReg(MFI->getScratchWaveOffsetReg()); +    } else { +      BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg) +        .addReg(MFI->getScratchWaveOffsetReg()) +        .addImm(StackSize * ST.getWavefrontSize()); +    } +  } + +  unsigned ScratchRsrcReg +    = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF); + +  unsigned ScratchWaveOffsetReg; +  std::tie(ScratchWaveOffsetReg, SPReg) +    = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); + +  // It's possible to have uses of only ScratchWaveOffsetReg without +  // ScratchRsrcReg if it's only used for the initialization of flat_scratch, +  // but the inverse is not true. +  if (ScratchWaveOffsetReg == AMDGPU::NoRegister) { +    assert(ScratchRsrcReg == AMDGPU::NoRegister); +    return; +  }    // We need to insert initialization of the scratch resource descriptor.    unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(      MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); -    unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; -  if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) { +  if (ST.isAmdCodeObjectV2(MF)) {      PreloadedPrivateBufferReg = TRI->getPreloadedValue(        MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);    } -  bool OffsetRegUsed = !MRI.use_empty(ScratchWaveOffsetReg); -  bool ResourceRegUsed = !MRI.use_empty(ScratchRsrcReg); +  bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg); +  bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister && +                         MRI.isPhysRegUsed(ScratchRsrcReg);    // We added live-ins during argument lowering, but since they were not used    // they were deleted. We're adding the uses now, so add them back. @@ -296,7 +344,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,    if (OffsetRegUsed &&        PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg) -      .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill); +      .addReg(PreloadedScratchWaveOffsetReg, +              MRI.isPhysRegUsed(ScratchWaveOffsetReg) ? 0 : RegState::Kill);    }    if (CopyBuffer && !CopyBufferFirst) { @@ -314,21 +363,21 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,      // Use relocations to get the pointer, and setup the other bits manually.      uint64_t Rsrc23 = TII->getScratchRsrcWords23(); -    if (MFI->hasPrivateMemoryInputPtr()) { +    if (MFI->hasImplicitBufferPtr()) {        unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);        if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {          const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);          BuildMI(MBB, I, DL, Mov64, Rsrc01) -          .addReg(PreloadedPrivateBufferReg) +          .addReg(MFI->getImplicitBufferPtrUserSGPR())            .addReg(ScratchRsrcReg, RegState::ImplicitDefine);        } else {          const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);          PointerType *PtrTy =            PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()), -                           AMDGPUAS::CONSTANT_ADDRESS); +                           AMDGPUASI.CONSTANT_ADDRESS);          MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));          auto MMO = MF.getMachineMemOperand(PtrInfo,                                             MachineMemOperand::MOLoad | @@ -336,7 +385,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,                                             MachineMemOperand::MODereferenceable,                                             0, 0);          BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) -          .addReg(PreloadedPrivateBufferReg) +          .addReg(MFI->getImplicitBufferPtrUserSGPR())            .addImm(0) // offset            .addImm(0) // glc            .addMemOperand(MMO) @@ -366,9 +415,89 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,    }  } +void SIFrameLowering::emitPrologue(MachineFunction &MF, +                                   MachineBasicBlock &MBB) const { +  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); +  if (FuncInfo->isEntryFunction()) { +    emitEntryFunctionPrologue(MF, MBB); +    return; +  } + +  const MachineFrameInfo &MFI = MF.getFrameInfo(); +  const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); +  const SIInstrInfo *TII = ST.getInstrInfo(); + +  unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); +  unsigned FramePtrReg = FuncInfo->getFrameOffsetReg(); + +  MachineBasicBlock::iterator MBBI = MBB.begin(); +  DebugLoc DL; + +  bool NeedFP = hasFP(MF); +  if (NeedFP) { +    // If we need a base pointer, set it up here. It's whatever the value of +    // the stack pointer is at this point. Any variable size objects will be +    // allocated after this, so we can still use the base pointer to reference +    // locals. +    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) +      .addReg(StackPtrReg) +      .setMIFlag(MachineInstr::FrameSetup); +  } + +  uint32_t NumBytes = MFI.getStackSize(); +  if (NumBytes != 0 && hasSP(MF)) { +    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg) +      .addReg(StackPtrReg) +      .addImm(NumBytes * ST.getWavefrontSize()) +      .setMIFlag(MachineInstr::FrameSetup); +  } +} +  void SIFrameLowering::emitEpilogue(MachineFunction &MF,                                     MachineBasicBlock &MBB) const { +  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); +  if (FuncInfo->isEntryFunction()) +    return; + +  unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); +  if (StackPtrReg == AMDGPU::NoRegister) +    return; +  const MachineFrameInfo &MFI = MF.getFrameInfo(); +  uint32_t NumBytes = MFI.getStackSize(); + +  const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); +  const SIInstrInfo *TII = ST.getInstrInfo(); +  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); +  DebugLoc DL; + +  // FIXME: Clarify distinction between no set SP and SP. For callee functions, +  // it's really whether we need SP to be accurate or not. + +  if (NumBytes != 0 && hasSP(MF)) { +    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg) +      .addReg(StackPtrReg) +      .addImm(NumBytes * ST.getWavefrontSize()) +      .setMIFlag(MachineInstr::FrameDestroy); +  } +} + +static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { +  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); +       I != E; ++I) { +    if (!MFI.isDeadObjectIndex(I)) +      return false; +  } + +  return true; +} + +int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, +                                            unsigned &FrameReg) const { +  const SIRegisterInfo *RI = MF.getSubtarget<SISubtarget>().getRegisterInfo(); + +  FrameReg = RI->getFrameRegister(MF); +  return MF.getFrameInfo().getObjectOffset(FI);  }  void SIFrameLowering::processFunctionBeforeFrameFinalized( @@ -379,15 +508,66 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(    if (!MFI.hasStackObjects())      return; -  bool MayNeedScavengingEmergencySlot = MFI.hasStackObjects(); +  const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); +  const SIInstrInfo *TII = ST.getInstrInfo(); +  const SIRegisterInfo &TRI = TII->getRegisterInfo(); +  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); +  bool AllSGPRSpilledToVGPRs = false; + +  if (TRI.spillSGPRToVGPR() && FuncInfo->hasSpilledSGPRs()) { +    AllSGPRSpilledToVGPRs = true; + +    // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs +    // are spilled to VGPRs, in which case we can eliminate the stack usage. +    // +    // XXX - This operates under the assumption that only other SGPR spills are +    // users of the frame index. I'm not 100% sure this is correct. The +    // StackColoring pass has a comment saying a future improvement would be to +    // merging of allocas with spill slots, but for now according to +    // MachineFrameInfo isSpillSlot can't alias any other object. +    for (MachineBasicBlock &MBB : MF) { +      MachineBasicBlock::iterator Next; +      for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) { +        MachineInstr &MI = *I; +        Next = std::next(I); + +        if (TII->isSGPRSpill(MI)) { +          int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); +          if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) { +            bool Spilled = TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS); +            (void)Spilled; +            assert(Spilled && "failed to spill SGPR to VGPR when allocated"); +          } else +            AllSGPRSpilledToVGPRs = false; +        } +      } +    } -  assert((RS || !MayNeedScavengingEmergencySlot) && -         "RegScavenger required if spilling"); +    FuncInfo->removeSGPRToVGPRFrameIndices(MFI); +  } -  if (MayNeedScavengingEmergencySlot) { -    int ScavengeFI = MFI.CreateStackObject( -      AMDGPU::SGPR_32RegClass.getSize(), -      AMDGPU::SGPR_32RegClass.getAlignment(), false); +  // FIXME: The other checks should be redundant with allStackObjectsAreDead, +  // but currently hasNonSpillStackObjects is set only from source +  // allocas. Stack temps produced from legalization are not counted currently. +  if (FuncInfo->hasNonSpillStackObjects() || FuncInfo->hasSpilledVGPRs() || +      !AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) { +    assert(RS && "RegScavenger required if spilling"); + +    // We force this to be at offset 0 so no user object ever has 0 as an +    // address, so we may use 0 as an invalid pointer value. This is because +    // LLVM assumes 0 is an invalid pointer in address space 0. Because alloca +    // is required to be address space 0, we are forced to accept this for +    // now. Ideally we could have the stack in another address space with 0 as a +    // valid pointer, and -1 as the null value. +    // +    // This will also waste additional space when user stack objects require > 4 +    // byte alignment. +    // +    // The main cost here is losing the offset for addressing modes. However +    // this also ensures we shouldn't need a register for the offset when +    // emergency scavenging. +    int ScavengeFI = MFI.CreateFixedObject( +      TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);      RS->addScavengingFrameIndex(ScavengeFI);    }  } @@ -432,3 +612,19 @@ void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,        WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);    }  } + +bool SIFrameLowering::hasFP(const MachineFunction &MF) const { +  // All stack operations are relative to the frame offset SGPR. +  // TODO: Still want to eliminate sometimes. +  const MachineFrameInfo &MFI = MF.getFrameInfo(); + +  // XXX - Is this only called after frame is finalized? Should be able to check +  // frame size. +  return MFI.hasStackObjects() && !allStackObjectsAreDead(MFI); +} + +bool SIFrameLowering::hasSP(const MachineFunction &MF) const { +  // All stack operations are relative to the frame offset SGPR. +  const MachineFrameInfo &MFI = MF.getFrameInfo(); +  return MFI.hasCalls() || MFI.hasVarSizedObjects(); +}  | 
