diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-01-17 20:45:01 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-01-17 20:45:01 +0000 |
commit | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (patch) | |
tree | 4adf86a776049cbf7f69a1929c4babcbbef925eb /llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | |
parent | 7cc9cf2bf09f069cb2dd947ead05d0b54301fb71 (diff) |
Notes
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h')
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 61 |
1 files changed, 54 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index f78dadd447ff5..a5bada2890d2c 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -41,6 +41,14 @@ class Triple; namespace AMDGPU { +struct GcnBufferFormatInfo { + unsigned Format; + unsigned BitsPerComp; + unsigned NumComponents; + unsigned NumFormat; + unsigned DataFormat; +}; + #define GET_MIMGBaseOpcode_DECL #define GET_MIMGDim_DECL #define GET_MIMGEncoding_DECL @@ -300,6 +308,15 @@ LLVM_READONLY bool getMUBUFHasSoffset(unsigned Opc); LLVM_READONLY +const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, + uint8_t NumComponents, + uint8_t NumFormat, + const MCSubtargetInfo &STI); +LLVM_READONLY +const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, + const MCSubtargetInfo &STI); + +LLVM_READONLY int getMCOpcode(uint16_t Opcode, unsigned Gen); void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, @@ -646,7 +663,6 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, /// \returns true if the intrinsic is divergent bool isIntrinsicSourceOfDivergence(unsigned IntrID); - // Track defaults for fields in the MODE registser. struct SIModeRegisterDefaults { /// Floating point opcodes that support exception flag gathering quiet and @@ -659,29 +675,60 @@ struct SIModeRegisterDefaults { /// clamp NaN to zero; otherwise, pass NaN through. bool DX10Clamp : 1; - // TODO: FP mode fields + /// If this is set, neither input or output denormals are flushed for most f32 + /// instructions. + /// + /// TODO: Split into separate input and output fields if necessary like the + /// control bits really provide? + bool FP32Denormals : 1; + + /// If this is set, neither input or output denormals are flushed for both f64 + /// and f16/v2f16 instructions. + bool FP64FP16Denormals : 1; SIModeRegisterDefaults() : IEEE(true), - DX10Clamp(true) {} + DX10Clamp(true), + FP32Denormals(true), + FP64FP16Denormals(true) {} - SIModeRegisterDefaults(const Function &F); + // FIXME: Should not depend on the subtarget + SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST); static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { + const bool IsCompute = AMDGPU::isCompute(CC); + SIModeRegisterDefaults Mode; Mode.DX10Clamp = true; - Mode.IEEE = AMDGPU::isCompute(CC); + Mode.IEEE = IsCompute; + Mode.FP32Denormals = false; // FIXME: Should be on by default. + Mode.FP64FP16Denormals = true; return Mode; } bool operator ==(const SIModeRegisterDefaults Other) const { - return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp; + return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && + FP32Denormals == Other.FP32Denormals && + FP64FP16Denormals == Other.FP64FP16Denormals; + } + + /// Returns true if a flag is compatible if it's enabled in the callee, but + /// disabled in the caller. + static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { + return CallerMode == CalleeMode || (CallerMode && !CalleeMode); } // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should // be able to override. bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { - return *this == CalleeMode; + if (DX10Clamp != CalleeMode.DX10Clamp) + return false; + if (IEEE != CalleeMode.IEEE) + return false; + + // Allow inlining denormals enabled into denormals flushed functions. + return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) && + oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals); } }; |