summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/CaymanInstructions.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/CaymanInstructions.td')
-rw-r--r--llvm/lib/Target/AMDGPU/CaymanInstructions.td9
1 files changed, 5 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/CaymanInstructions.td b/llvm/lib/Target/AMDGPU/CaymanInstructions.td
index 1a526675164a0..f4ddbf1131c34 100644
--- a/llvm/lib/Target/AMDGPU/CaymanInstructions.td
+++ b/llvm/lib/Target/AMDGPU/CaymanInstructions.td
@@ -50,16 +50,19 @@ def COS_cm : COS_Common<0x8E>;
def : RsqPat<RECIPSQRT_IEEE_cm, f32>;
+def : SqrtPat<RECIPSQRT_IEEE_cm, RECIP_IEEE_cm>;
+
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
// RECIP_UINT emulation for Cayman
-// The multiplication scales from [0,1] to the unsigned integer range
+// The multiplication scales from [0,1) to the unsigned integer range,
+// rounding down a bit to avoid unwanted overflow.
def : R600Pat <
(AMDGPUurecip i32:$src0),
(FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)),
- (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
+ (MOV_IMM_I32 CONST.FP_4294966784)))
>;
def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
@@ -70,8 +73,6 @@ def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
-def : R600Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
-
class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :
CF_MEM_RAT_CACHELESS <0x14, 0, mask,
(ins rc:$rw_gpr, R600_TReg32_X:$index_gpr),