diff options
Diffstat (limited to 'test/CodeGen/AMDGPU/flat-address-space.ll')
-rw-r--r-- | test/CodeGen/AMDGPU/flat-address-space.ll | 56 |
1 files changed, 28 insertions, 28 deletions
diff --git a/test/CodeGen/AMDGPU/flat-address-space.ll b/test/CodeGen/AMDGPU/flat-address-space.ll index 55b5482d031fb..c867e4fca2295 100644 --- a/test/CodeGen/AMDGPU/flat-address-space.ll +++ b/test/CodeGen/AMDGPU/flat-address-space.ll @@ -17,43 +17,43 @@ ; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] ; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] ; CHECK: flat_store_dword v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}, v[[DATA]] -define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 { +define amdgpu_kernel void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 { %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)* - store i32 %x, i32 addrspace(4)* %fptr, align 4 + store volatile i32 %x, i32 addrspace(4)* %fptr, align 4 ret void } ; CHECK-LABEL: {{^}}store_flat_i64: ; CHECK: flat_store_dwordx2 -define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 { +define amdgpu_kernel void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 { %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)* - store i64 %x, i64 addrspace(4)* %fptr, align 8 + store volatile i64 %x, i64 addrspace(4)* %fptr, align 8 ret void } ; CHECK-LABEL: {{^}}store_flat_v4i32: ; CHECK: flat_store_dwordx4 -define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 { +define amdgpu_kernel void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 { %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)* - store <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16 + store volatile <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16 ret void } ; CHECK-LABEL: {{^}}store_flat_trunc_i16: ; CHECK: flat_store_short -define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 { +define amdgpu_kernel void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 { %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)* %y = trunc i32 %x to i16 - store i16 %y, i16 addrspace(4)* %fptr, align 2 + store volatile i16 %y, i16 addrspace(4)* %fptr, align 2 ret void } ; CHECK-LABEL: {{^}}store_flat_trunc_i8: ; CHECK: flat_store_byte -define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 { +define amdgpu_kernel void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 { %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)* %y = trunc i32 %x to i8 - store i8 %y, i8 addrspace(4)* %fptr, align 2 + store volatile i8 %y, i8 addrspace(4)* %fptr, align 2 ret void } @@ -61,36 +61,36 @@ define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 { ; CHECK-LABEL: load_flat_i32: ; CHECK: flat_load_dword -define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 { +define amdgpu_kernel void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 { %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)* - %fload = load i32, i32 addrspace(4)* %fptr, align 4 + %fload = load volatile i32, i32 addrspace(4)* %fptr, align 4 store i32 %fload, i32 addrspace(1)* %out, align 4 ret void } ; CHECK-LABEL: load_flat_i64: ; CHECK: flat_load_dwordx2 -define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 { +define amdgpu_kernel void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 { %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)* - %fload = load i64, i64 addrspace(4)* %fptr, align 8 + %fload = load volatile i64, i64 addrspace(4)* %fptr, align 8 store i64 %fload, i64 addrspace(1)* %out, align 8 ret void } ; CHECK-LABEL: load_flat_v4i32: ; CHECK: flat_load_dwordx4 -define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 { +define amdgpu_kernel void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 { %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)* - %fload = load <4 x i32>, <4 x i32> addrspace(4)* %fptr, align 32 + %fload = load volatile <4 x i32>, <4 x i32> addrspace(4)* %fptr, align 32 store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8 ret void } ; CHECK-LABEL: sextload_flat_i8: ; CHECK: flat_load_sbyte -define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 { +define amdgpu_kernel void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 { %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)* - %fload = load i8, i8 addrspace(4)* %fptr, align 4 + %fload = load volatile i8, i8 addrspace(4)* %fptr, align 4 %ext = sext i8 %fload to i32 store i32 %ext, i32 addrspace(1)* %out, align 4 ret void @@ -98,9 +98,9 @@ define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* n ; CHECK-LABEL: zextload_flat_i8: ; CHECK: flat_load_ubyte -define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 { +define amdgpu_kernel void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 { %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)* - %fload = load i8, i8 addrspace(4)* %fptr, align 4 + %fload = load volatile i8, i8 addrspace(4)* %fptr, align 4 %ext = zext i8 %fload to i32 store i32 %ext, i32 addrspace(1)* %out, align 4 ret void @@ -108,9 +108,9 @@ define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* n ; CHECK-LABEL: sextload_flat_i16: ; CHECK: flat_load_sshort -define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 { +define amdgpu_kernel void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 { %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)* - %fload = load i16, i16 addrspace(4)* %fptr, align 4 + %fload = load volatile i16, i16 addrspace(4)* %fptr, align 4 %ext = sext i16 %fload to i32 store i32 %ext, i32 addrspace(1)* %out, align 4 ret void @@ -118,9 +118,9 @@ define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* ; CHECK-LABEL: zextload_flat_i16: ; CHECK: flat_load_ushort -define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 { +define amdgpu_kernel void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 { %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)* - %fload = load i16, i16 addrspace(4)* %fptr, align 4 + %fload = load volatile i16, i16 addrspace(4)* %fptr, align 4 %ext = zext i16 %fload to i32 store i32 %ext, i32 addrspace(1)* %out, align 4 ret void @@ -131,7 +131,7 @@ define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* ; CHECK: flat_load_ubyte ; CHECK: flat_load_ubyte ; CHECK: flat_load_ubyte -define void @flat_scratch_unaligned_load() { +define amdgpu_kernel void @flat_scratch_unaligned_load() { %scratch = alloca i32 %fptr = addrspacecast i32* %scratch to i32 addrspace(4)* %ld = load volatile i32, i32 addrspace(4)* %fptr, align 1 @@ -143,7 +143,7 @@ define void @flat_scratch_unaligned_load() { ; CHECK: flat_store_byte ; CHECK: flat_store_byte ; CHECK: flat_store_byte -define void @flat_scratch_unaligned_store() { +define amdgpu_kernel void @flat_scratch_unaligned_store() { %scratch = alloca i32 %fptr = addrspacecast i32* %scratch to i32 addrspace(4)* store volatile i32 0, i32 addrspace(4)* %fptr, align 1 @@ -154,7 +154,7 @@ define void @flat_scratch_unaligned_store() { ; HSA: flat_load_dword ; HSA: flat_load_dword ; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr -define void @flat_scratch_multidword_load() { +define amdgpu_kernel void @flat_scratch_multidword_load() { %scratch = alloca <2 x i32> %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)* %ld = load volatile <2 x i32>, <2 x i32> addrspace(4)* %fptr @@ -165,7 +165,7 @@ define void @flat_scratch_multidword_load() { ; HSA: flat_store_dword ; HSA: flat_store_dword ; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr -define void @flat_scratch_multidword_store() { +define amdgpu_kernel void @flat_scratch_multidword_store() { %scratch = alloca <2 x i32> %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)* store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(4)* %fptr |