diff options
Diffstat (limited to 'test/CodeGen/AMDGPU/fneg.f16.ll')
-rw-r--r-- | test/CodeGen/AMDGPU/fneg.f16.ll | 39 |
1 files changed, 37 insertions, 2 deletions
diff --git a/test/CodeGen/AMDGPU/fneg.f16.ll b/test/CodeGen/AMDGPU/fneg.f16.ll index 626a0b50cce8a..ed36666db807d 100644 --- a/test/CodeGen/AMDGPU/fneg.f16.ll +++ b/test/CodeGen/AMDGPU/fneg.f16.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=kaveri -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=CIVI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=GFX89 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN -check-prefix=GFX89 %s ; FIXME: Should be able to do scalar op ; GCN-LABEL: {{^}}s_fneg_f16: @@ -129,6 +129,41 @@ define amdgpu_kernel void @v_fneg_fold_v2f16(<2 x half> addrspace(1)* %out, <2 x ret void } +; GCN-LABEL: {{^}}v_extract_fneg_fold_v2f16: +; GCN: flat_load_dword [[VAL:v[0-9]+]] +; CI-DAG: v_mul_f32_e32 v{{[0-9]+}}, -4.0, v{{[0-9]+}} +; CI-DAG: v_sub_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}} + +; GFX89: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[VAL]] +; GFX89-DAG: v_mul_f16_e32 v{{[0-9]+}}, -4.0, [[VAL]] +; GFX89-DAG: v_sub_f16_e32 v{{[0-9]+}}, 2.0, [[ELT1]] +define amdgpu_kernel void @v_extract_fneg_fold_v2f16(<2 x half> addrspace(1)* %in) #0 { + %val = load <2 x half>, <2 x half> addrspace(1)* %in + %fneg = fsub <2 x half> <half -0.0, half -0.0>, %val + %elt0 = extractelement <2 x half> %fneg, i32 0 + %elt1 = extractelement <2 x half> %fneg, i32 1 + + %fmul0 = fmul half %elt0, 4.0 + %fadd1 = fadd half %elt1, 2.0 + store volatile half %fmul0, half addrspace(1)* undef + store volatile half %fadd1, half addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}v_extract_fneg_no_fold_v2f16: +; GCN: flat_load_dword [[VAL:v[0-9]+]] +; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80008000, [[VAL]] +; GCN: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[NEG]] +define amdgpu_kernel void @v_extract_fneg_no_fold_v2f16(<2 x half> addrspace(1)* %in) #0 { + %val = load <2 x half>, <2 x half> addrspace(1)* %in + %fneg = fsub <2 x half> <half -0.0, half -0.0>, %val + %elt0 = extractelement <2 x half> %fneg, i32 0 + %elt1 = extractelement <2 x half> %fneg, i32 1 + store volatile half %elt0, half addrspace(1)* undef + store volatile half %elt1, half addrspace(1)* undef + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } |