diff options
Diffstat (limited to 'test/CodeGen/AMDGPU/bitcast-vector-extract.ll')
-rw-r--r-- | test/CodeGen/AMDGPU/bitcast-vector-extract.ll | 32 |
1 files changed, 28 insertions, 4 deletions
diff --git a/test/CodeGen/AMDGPU/bitcast-vector-extract.ll b/test/CodeGen/AMDGPU/bitcast-vector-extract.ll index 3a55870c2882..cf95f74afb84 100644 --- a/test/CodeGen/AMDGPU/bitcast-vector-extract.ll +++ b/test/CodeGen/AMDGPU/bitcast-vector-extract.ll @@ -11,7 +11,7 @@ ; GCN: buffer_store_dwordx4 ; GCN-NOT: v_mov_b32 ; GCN: buffer_store_dwordx4 -define void @store_bitcast_constant_v8i32_to_v8f32(<8 x float> addrspace(1)* %out, <8 x i32> %vec) { +define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v8f32(<8 x float> addrspace(1)* %out, <8 x i32> %vec) { %vec0.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8> to <8 x float> store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out @@ -27,7 +27,7 @@ define void @store_bitcast_constant_v8i32_to_v8f32(<8 x float> addrspace(1)* %ou ; GCN: buffer_store_dwordx4 ; GCN-NOT: v_mov_b32 ; GCN: buffer_store_dwordx4 -define void @store_bitcast_constant_v4i64_to_v8f32(<8 x float> addrspace(1)* %out, <4 x i64> %vec) { +define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v8f32(<8 x float> addrspace(1)* %out, <4 x i64> %vec) { %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <8 x float> store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out @@ -43,7 +43,7 @@ define void @store_bitcast_constant_v4i64_to_v8f32(<8 x float> addrspace(1)* %ou ; GCN: buffer_store_dwordx4 ; GCN-NOT: v_mov_b32 ; GCN: buffer_store_dwordx4 -define void @store_bitcast_constant_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %vec) { +define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %vec) { %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <4 x double> store volatile <4 x double> %vec0.bc, <4 x double> addrspace(1)* %out @@ -59,7 +59,7 @@ define void @store_bitcast_constant_v4i64_to_v4f64(<4 x double> addrspace(1)* %o ; GCN: buffer_store_dwordx4 ; GCN-NOT: v_mov_b32 ; GCN: buffer_store_dwordx4 -define void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> addrspace(1)* %out, <16 x i16> %vec) { +define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> addrspace(1)* %out, <16 x i16> %vec) { %vec0.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 8> to <8 x float> store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out @@ -67,3 +67,27 @@ define void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> addrspace(1)* %o store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out ret void } + +; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source: +; GCN-NOT: store_dword +define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 { + %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1 + %bc = bitcast i64 %undef to <2 x i32> + store volatile <2 x i32> %bc, <2 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt: +; GCN-NOT: store_dword +define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 { + %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1 + %bc = bitcast i64 %undef to <2 x i32> + %elt1 = extractelement <2 x i32> %bc, i32 1 + store volatile i32 %elt1, i32 addrspace(1)* %out + ret void +} + +declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone convergent } |