diff options
Diffstat (limited to 'test/CodeGen/AMDGPU/v_cndmask.ll')
-rw-r--r-- | test/CodeGen/AMDGPU/v_cndmask.ll | 52 |
1 files changed, 26 insertions, 26 deletions
diff --git a/test/CodeGen/AMDGPU/v_cndmask.ll b/test/CodeGen/AMDGPU/v_cndmask.ll index 1cd49feb0d883..d4a68a418ee41 100644 --- a/test/CodeGen/AMDGPU/v_cndmask.ll +++ b/test/CodeGen/AMDGPU/v_cndmask.ll @@ -4,12 +4,12 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 ; GCN-LABEL: {{^}}v_cnd_nan_nosgpr: -; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0 -; GCN: v_cndmask_b32_e32 v{{[0-9]}}, -1, v{{[0-9]+}}, vcc +; GCN: v_cmp_eq_u32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0 +; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]}}, -1, v{{[0-9]+}}, [[COND]] ; GCN-DAG: v{{[0-9]}} ; All nan values are converted to 0xffffffff ; GCN: s_endpgm -define void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 { +define amdgpu_kernel void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 { %idx = call i32 @llvm.amdgcn.workitem.id.x() #1 %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx %f = load float, float addrspace(1)* %f.gep @@ -30,7 +30,7 @@ define void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace( ; GCN-DAG: v{{[0-9]}} ; All nan values are converted to 0xffffffff ; GCN: s_endpgm -define void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) #0 { +define amdgpu_kernel void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) #0 { %setcc = icmp ne i32 %c, 0 %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f store float %select, float addrspace(1)* %out @@ -47,7 +47,7 @@ define void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) #0 { ; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc -define void @fcmp_sgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, float %x, float %z) #0 { +define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, float %x, float %z) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext @@ -62,7 +62,7 @@ define void @fcmp_sgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, float % ; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc -define void @fcmp_sgprX_k0_select_k1_sgprX_f32(float addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(float addrspace(1)* %out, float %x) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext @@ -77,7 +77,7 @@ define void @fcmp_sgprX_k0_select_k1_sgprX_f32(float addrspace(1)* %out, float % ; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VZ]], vcc -define void @fcmp_sgprX_k0_select_k0_sgprZ_f32(float addrspace(1)* %out, float %x, float %z) #0 { +define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(float addrspace(1)* %out, float %x, float %z) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext @@ -92,7 +92,7 @@ define void @fcmp_sgprX_k0_select_k0_sgprZ_f32(float addrspace(1)* %out, float % ; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VZ]], vcc -define void @fcmp_sgprX_k0_select_k0_sgprX_f32(float addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(float addrspace(1)* %out, float %x) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext @@ -105,9 +105,9 @@ define void @fcmp_sgprX_k0_select_k0_sgprX_f32(float addrspace(1)* %out, float % ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_vgprZ_f32: ; GCN-DAG: s_load_dword [[X:s[0-9]+]] ; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]] -; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0 -; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[Z]], vcc -define void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 { +; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0 +; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 0, [[Z]], [[COND]] +define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext @@ -122,9 +122,9 @@ define void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* %out, float % ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_vgprZ_f32: ; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]] ; GCN-DAG: s_load_dword [[X:s[0-9]+]] -; GCN: v_cmp_nlg_f32_e64 vcc, [[X]], 0 -; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[Z]], vcc -define void @fcmp_sgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 { +; GCN: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0 +; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 1.0, [[Z]], [[COND]] +define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext @@ -142,7 +142,7 @@ define void @fcmp_sgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float % ; GCN-DAG: v_cmp_ngt_f32_e32 vcc, 0, [[X]] ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc -define void @fcmp_vgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float %z) #0 { +define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float %z) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext @@ -159,7 +159,7 @@ define void @fcmp_vgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, float a ; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]] ; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]] ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[Z]], vcc -define void @fcmp_vgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext @@ -178,7 +178,7 @@ define void @fcmp_vgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float a ; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]] ; GCN: v_cmp_lt_i32_e32 vcc, -1, [[X]] ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 2, [[Z]], vcc -define void @icmp_vgprX_k0_select_k1_vgprZ_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i32 addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i32 addrspace(1)* %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext @@ -203,7 +203,7 @@ define void @icmp_vgprX_k0_select_k1_vgprZ_i32(i32 addrspace(1)* %out, i32 addrs ; VI-DAG: v_cmp_lt_i64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1, v{{\[}}[[X_LO]]:[[X_HI]]{{\]}} ; VI-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v[[Z_HI]], s ; VI-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 2, v[[Z_LO]], s -define void @icmp_vgprX_k0_select_k1_vgprZ_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds i64, i64 addrspace(1)* %x.ptr, i64 %tid.ext @@ -226,7 +226,7 @@ define void @icmp_vgprX_k0_select_k1_vgprZ_i64(i64 addrspace(1)* %out, i64 addrs ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc -define void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext @@ -249,7 +249,7 @@ define void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(<4 x float> addrspace(1)* %out, ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc -define void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext @@ -275,7 +275,7 @@ define void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc -define void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext @@ -298,7 +298,7 @@ define void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, ; GCN-DAG: s_or_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, vcc ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s ; GCN: store_byte -define void @icmp_vgprX_k0_select_k1_vgprZ_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i1 addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i1 addrspace(1)* %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext @@ -321,7 +321,7 @@ define void @icmp_vgprX_k0_select_k1_vgprZ_i1(i1 addrspace(1)* %out, i32 addrspa ; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]] ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc -define void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(double addrspace(1)* %out, float addrspace(1)* %x.ptr, double addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(double addrspace(1)* %out, float addrspace(1)* %x.ptr, double addrspace(1)* %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext @@ -343,7 +343,7 @@ define void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(double addrspace(1)* %out, flo ; GCN: v_cmp_nlg_f32_e32 vcc, 0, [[X]] ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 3, v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc -define void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(i64 addrspace(1)* %out, float addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(i64 addrspace(1)* %out, float addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext @@ -364,7 +364,7 @@ define void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(i64 addrspace(1)* %out, float ; GCN: v_cmp_gt_u32_e32 vcc, 2, [[X]] ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[Z]], vcc -define void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(float addrspace(1)* %out, i32 addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(float addrspace(1)* %out, i32 addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext @@ -386,7 +386,7 @@ define void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(float addrspace(1)* %out, i32 ; GCN: v_cmp_nle_f32_e32 vcc, 4.0, [[X]] ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -1.0, vcc ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -2.0, vcc -define void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext |