diff options
Diffstat (limited to 'test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll')
| -rw-r--r-- | test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll | 32 |
1 files changed, 22 insertions, 10 deletions
diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll index 89adcff1a278..350dd38ef583 100644 --- a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -258,8 +258,10 @@ define amdgpu_kernel void @v_insertelement_v2i16_0_inlineimm(<2 x i16> addrspace ; FIXME: fold lshl_or c0, c1, v0 -> or (c0 << c1), v0 ; GCN-LABEL: {{^}}v_insertelement_v2i16_1: +; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e70000 ; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] -; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e70000, [[VEC]] +; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e70000, [[VEC]] +; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3e7 ; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] @@ -278,9 +280,12 @@ define amdgpu_kernel void @v_insertelement_v2i16_1(<2 x i16> addrspace(1)* %out, } ; GCN-LABEL: {{^}}v_insertelement_v2i16_1_inlineimm: +; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xfff10000 ; GCN: flat_load_dword [[VEC:v[0-9]+]] -; GCN: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] -; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0xfff10000, [[ELT0]] +; CI: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] +; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] +; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0xfff10000, [[ELT0]] +; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], -15, 16, [[ELT0]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2i16_1_inlineimm(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { @@ -337,8 +342,10 @@ define amdgpu_kernel void @v_insertelement_v2f16_0_inlineimm(<2 x half> addrspac } ; GCN-LABEL: {{^}}v_insertelement_v2f16_1: +; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x45000000 ; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] -; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x45000000, [[VEC]] +; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x45000000, [[VEC]] +; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x4500 ; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] @@ -357,9 +364,12 @@ define amdgpu_kernel void @v_insertelement_v2f16_1(<2 x half> addrspace(1)* %out } ; GCN-LABEL: {{^}}v_insertelement_v2f16_1_inlineimm: +; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x230000 ; GCN: flat_load_dword [[VEC:v[0-9]+]] -; GCN: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] -; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x230000, [[ELT0]] +; CI: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] +; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] +; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x230000, [[ELT0]] +; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], 35, 16, [[ELT0]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2f16_1_inlineimm(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { @@ -411,11 +421,12 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_sgpr(<2 x i16> addrspac } ; GCN-LABEL: {{^}}v_insertelement_v2i16_dynamic_vgpr: +; GFX89: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} +; CI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7 ; GCN: flat_load_dword [[IDX:v[0-9]+]] ; GCN: flat_load_dword [[VEC:v[0-9]+]] -; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7 +; GFX89-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7 -; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} ; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] ; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]] @@ -438,11 +449,12 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(<2 x i16> addrspac } ; GCN-LABEL: {{^}}v_insertelement_v2f16_dynamic_vgpr: +; GFX89: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} +; CI: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234 ; GCN: flat_load_dword [[IDX:v[0-9]+]] ; GCN: flat_load_dword [[VEC:v[0-9]+]] -; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234 +; GFX89-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234 -; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} ; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] ; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]] |
