diff options
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUSubtarget.h')
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUSubtarget.h | 67 |
1 files changed, 47 insertions, 20 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 78c3b823946d..936feb00c62b 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -75,6 +75,7 @@ protected: bool HasFminFmaxLegacy; bool EnablePromoteAlloca; bool HasTrigReducedRange; + unsigned MaxWavesPerEU; int LocalMemorySize; unsigned WavefrontSize; @@ -195,8 +196,8 @@ public: return LocalMemorySize; } - unsigned getAlignmentForImplicitArgPtr() const { - return isAmdHsaOS() ? 8 : 4; + Align getAlignmentForImplicitArgPtr() const { + return isAmdHsaOS() ? Align(8) : Align(4); } /// Returns the offset in bytes from the start of the input buffer @@ -223,7 +224,9 @@ public: /// subtarget. virtual unsigned getMinWavesPerEU() const = 0; - unsigned getMaxWavesPerEU() const { return 10; } + /// \returns Maximum number of waves per execution unit supported by the + /// subtarget without any kind of limitation. + unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; } /// Creates value range metadata on an workitemid.* inrinsic call or load. bool makeLIDRangeMetadata(Instruction *I) const; @@ -235,16 +238,17 @@ public: return 16; return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0); } - uint64_t getExplicitKernArgSize(const Function &F, - unsigned &MaxAlign) const; - unsigned getKernArgSegmentSize(const Function &F, - unsigned &MaxAlign) const; + uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const; + unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const; virtual ~AMDGPUSubtarget() {} }; class GCNSubtarget : public AMDGPUGenSubtargetInfo, public AMDGPUSubtarget { + + using AMDGPUSubtarget::getMaxWavesPerEU; + public: enum TrapHandlerAbi { TrapHandlerAbiNone = 0, @@ -362,6 +366,7 @@ protected: bool CaymanISA; bool CFALUBug; bool LDSMisalignedBug; + bool HasMFMAInlineLiteralBug; bool HasVertexCache; short TexVTXClauseSize; bool ScalarizeGlobal; @@ -416,7 +421,7 @@ public: return CallLoweringInfo.get(); } - const InstructionSelector *getInstructionSelector() const override { + InstructionSelector *getInstructionSelector() const override { return InstSelector.get(); } @@ -544,6 +549,14 @@ public: return GFX9Insts; } + bool hasScalarPackInsts() const { + return GFX9Insts; + } + + bool hasScalarMulHiInsts() const { + return GFX9Insts; + } + TrapHandlerAbi getTrapHandlerAbi() const { return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; } @@ -611,6 +624,11 @@ public: return getGeneration() >= AMDGPUSubtarget::GFX9; } + /// \returns If target supports S_DENORM_MODE. + bool hasDenormModeInst() const { + return getGeneration() >= AMDGPUSubtarget::GFX10; + } + bool useFlatForGlobal() const { return FlatForGlobal; } @@ -848,9 +866,7 @@ public: // on the pointer value itself may rely on the alignment / known low bits of // the pointer. Set this to something above the minimum to avoid needing // dynamic realignment in common cases. - unsigned getStackAlignment() const { - return 16; - } + Align getStackAlignment() const { return Align(16); } bool enableMachineScheduler() const override { return true; @@ -881,12 +897,6 @@ public: return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize); } - /// \returns Maximum number of waves per execution unit supported by the - /// subtarget without any kind of limitation. - unsigned getMaxWavesPerEU() const { - return AMDGPU::IsaInfo::getMaxWavesPerEU(); - } - /// \returns Number of waves per work group supported by the subtarget and /// limited by given \p FlatWorkGroupSize. unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const { @@ -944,6 +954,14 @@ public: return HasDPP; } + bool hasDPPBroadcasts() const { + return HasDPP && getGeneration() < GFX10; + } + + bool hasDPPWavefrontShifts() const { + return HasDPP && getGeneration() < GFX10; + } + bool hasDPP8() const { return HasDPP8; } @@ -974,6 +992,10 @@ public: return SGPRInitBug; } + bool hasMFMAInlineLiteralBug() const { + return HasMFMAInlineLiteralBug; + } + bool has12DWordStoreHazard() const { return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; } @@ -1036,6 +1058,13 @@ public: /// VGPRs unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; + /// Return occupancy for the given function. Used LDS and a number of + /// registers if provided. + /// Note, occupancy can be affected by the scratch allocation as well, but + /// we do not have enough information to compute it. + unsigned computeOccupancy(const MachineFunction &MF, unsigned LDSSize = 0, + unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const; + /// \returns true if the flat_scratch register should be initialized with the /// pointer to the wave's scratch memory rather than a size and offset. bool flatScratchIsPointer() const { @@ -1226,9 +1255,7 @@ public: return Gen; } - unsigned getStackAlignment() const { - return 4; - } + Align getStackAlignment() const { return Align(4); } R600Subtarget &initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS); |