diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 126 |
1 files changed, 110 insertions, 16 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index cca8565c9ff9..0504c59ebd9e 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -31,6 +31,9 @@ using namespace llvm; SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), + BufferPSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())), + ImagePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())), + GWSResourcePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())), PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), @@ -48,8 +51,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) ImplicitBufferPtr(false), ImplicitArgPtr(false), GITPtrHigh(0xffffffff), - HighBitsOf32BitAddress(0), - GDSSize(0) { + HighBitsOf32BitAddress(0) { const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const Function &F = MF.getFunction(); FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); @@ -74,6 +76,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) PSInputAddr = AMDGPU::getInitialPSInputAddr(F); } + MayNeedAGPRs = ST.hasMAIInsts(); + if (!isEntryFunction()) { if (CC != CallingConv::AMDGPU_Gfx) ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo; @@ -97,6 +101,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) ImplicitArgPtr = false; MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(), MaxKernArgAlign); + + if (ST.hasGFX90AInsts() && + ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() && + !mayUseAGPRs(MF)) + MayNeedAGPRs = false; // We will select all MAI with VGPR operands. } bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); @@ -177,9 +186,20 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (!S.empty()) S.consumeInteger(0, HighBitsOf32BitAddress); - S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); - if (!S.empty()) - S.consumeInteger(0, GDSSize); + // On GFX908, in order to guarantee copying between AGPRs, we need a scratch + // VGPR available at all times. For now, reserve highest available VGPR. After + // RA, shift it to the lowest available unused VGPR if the one exist. + if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) { + VGPRForAGPRCopy = + AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1); + } +} + +MachineFunctionInfo *SIMachineFunctionInfo::clone( + BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB) + const { + return DestMF.cloneInfo<SIMachineFunctionInfo>(*this); } void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { @@ -265,7 +285,7 @@ bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF, /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, int FI) { - std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; + std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; // This has already been allocated. if (!SpillLanes.empty()) @@ -320,7 +340,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI)); - // Add this register as live-in to all blocks to avoid machine verifer + // Add this register as live-in to all blocks to avoid machine verifier // complaining about use of an undefined physical register. for (MachineBasicBlock &BB : MF) BB.addLiveIn(LaneVGPR); @@ -328,7 +348,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, LaneVGPR = SpillVGPRs.back().VGPR; } - SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); + SpillLanes.push_back(SIRegisterInfo::SpilledReg(LaneVGPR, VGPRIndex)); } return true; @@ -402,7 +422,8 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, return Spill.FullyAllocated; } -void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { +bool SIMachineFunctionInfo::removeDeadFrameIndices( + MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) { // Remove dead frame indices from function frame, however keep FP & BP since // spills for them haven't been inserted yet. And also make sure to remove the // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could @@ -415,17 +436,42 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { } } - // All other SPGRs must be allocated on the default stack, so reset the stack - // ID. - for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e; - ++i) - if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) - MFI.setStackID(i, TargetStackID::Default); + bool HaveSGPRToMemory = false; + + if (ResetSGPRSpillStackIDs) { + // All other SPGRs must be allocated on the default stack, so reset the + // stack ID. + for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e; + ++i) { + if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) { + if (MFI.getStackID(i) == TargetStackID::SGPRSpill) { + MFI.setStackID(i, TargetStackID::Default); + HaveSGPRToMemory = true; + } + } + } + } for (auto &R : VGPRToAGPRSpills) { if (R.second.IsDead) MFI.RemoveStackObject(R.first); } + + return HaveSGPRToMemory; +} + +void SIMachineFunctionInfo::allocateWWMReservedSpillSlots( + MachineFrameInfo &MFI, const SIRegisterInfo &TRI) { + assert(WWMReservedFrameIndexes.empty()); + + WWMReservedFrameIndexes.resize(WWMReservedRegs.size()); + + int I = 0; + for (Register VGPR : WWMReservedRegs) { + const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR); + WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject( + TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC)); + } } int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI, @@ -539,6 +585,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( const llvm::MachineFunction &MF) : ExplicitKernArgSize(MFI.getExplicitKernArgSize()), MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()), + GDSSize(MFI.getGDSSize()), DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()), NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()), MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()), @@ -549,7 +596,14 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)), FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)), + BytesInStackArgArea(MFI.getBytesInStackArgArea()), + ReturnsVoid(MFI.returnsVoid()), ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) { + for (Register Reg : MFI.WWMReservedRegs) + WWMReservedRegs.push_back(regToString(Reg, TRI)); + + if (MFI.getVGPRForAGPRCopy()) + VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI); auto SFI = MFI.getOptionalScavengeFI(); if (SFI) ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo()); @@ -563,8 +617,9 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) { ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize; - MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign); + MaxKernArgAlign = YamlMFI.MaxKernArgAlign; LDSSize = YamlMFI.LDSSize; + GDSSize = YamlMFI.GDSSize; DynLDSAlign = YamlMFI.DynLDSAlign; HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; Occupancy = YamlMFI.Occupancy; @@ -574,6 +629,8 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( WaveLimiter = YamlMFI.WaveLimiter; HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs; HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs; + BytesInStackArgArea = YamlMFI.BytesInStackArgArea; + ReturnsVoid = YamlMFI.ReturnsVoid; if (YamlMFI.ScavengeFI) { auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo()); @@ -595,10 +652,47 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( return false; } +bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const { + for (const BasicBlock &BB : MF.getFunction()) { + for (const Instruction &I : BB) { + const auto *CB = dyn_cast<CallBase>(&I); + if (!CB) + continue; + + if (CB->isInlineAsm()) { + const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand()); + for (const auto &CI : IA->ParseConstraints()) { + for (StringRef Code : CI.Codes) { + Code.consume_front("{"); + if (Code.startswith("a")) + return true; + } + } + continue; + } + + const Function *Callee = + dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts()); + if (!Callee) + return true; + + if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic) + return true; + } + } + + return false; +} + bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const { if (UsesAGPRs) return *UsesAGPRs; + if (!mayNeedAGPRs()) { + UsesAGPRs = false; + return false; + } + if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) || MF.getFrameInfo().hasCalls()) { UsesAGPRs = true; |
