diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-09-06 18:34:38 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-09-06 18:34:38 +0000 |
commit | 69156b4c20249e7800cc09e0eef0beb3d15ac1ad (patch) | |
tree | 461d3cf041290f4a99740d540bf0973d6084f98e /test/CodeGen/AMDGPU | |
parent | ee8648bdac07986a0f1ec897b02ec82a2f144d46 (diff) |
Notes
Diffstat (limited to 'test/CodeGen/AMDGPU')
-rw-r--r-- | test/CodeGen/AMDGPU/cgp-addressing-modes.ll | 16 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/global_atomics.ll | 280 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/gv-const-addrspace.ll | 12 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll | 12 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/private-memory.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/scratch-buffer.ll | 51 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/smrd.ll | 8 |
7 files changed, 262 insertions, 119 deletions
diff --git a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll index 77f7bd01b7f0..a68d110fdc96 100644 --- a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -1,12 +1,15 @@ -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=OPT %s -; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN %s +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare i32 @llvm.r600.read.tidig.x() #0 ; OPT-LABEL: @test_sink_global_small_offset_i32( -; OPT-NOT: getelementptr i32, i32 addrspace(1)* %in +; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in +; OPT-VI: getelementptr i32, i32 addrspace(1)* %in ; OPT: br i1 -; OPT: ptrtoint +; OPT-CI: ptrtoint ; GCN-LABEL: {{^}}test_sink_global_small_offset_i32: ; GCN: {{^}}BB0_2: @@ -214,8 +217,11 @@ done: } ; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32: +; VI-DAG: s_movk_i32 flat_scratch_lo, 0x0 +; VI-DAG: s_movk_i32 flat_scratch_hi, 0x0 ; GCN: s_and_saveexec_b64 -; GCN: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; GCN: {{^}}BB7_2: define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) { entry: diff --git a/test/CodeGen/AMDGPU/global_atomics.ll b/test/CodeGen/AMDGPU/global_atomics.ll index 847950f6376e..146f0a5fbf26 100644 --- a/test/CodeGen/AMDGPU/global_atomics.ll +++ b/test/CodeGen/AMDGPU/global_atomics.ll @@ -1,7 +1,9 @@ -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=VI --check-prefix=FUNC %s + ; FUNC-LABEL: {{^}}atomic_add_i32_offset: -; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -10,8 +12,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset: -; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -22,6 +24,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset: ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} + define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -32,7 +38,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64_offset: ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -43,7 +52,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_add_i32: -; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst @@ -51,8 +60,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_add_i32_ret: -; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst @@ -62,6 +71,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_add_i32_addr64: ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -71,7 +83,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64: ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -81,7 +96,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_and_i32_offset: -; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -90,8 +105,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset: -; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -102,6 +117,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset: ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -112,7 +130,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64_offset: ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -123,7 +144,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_and_i32: -; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst @@ -131,8 +152,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_and_i32_ret: -; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst @@ -142,6 +163,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_and_i32_addr64: ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -151,7 +175,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64: ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -161,7 +188,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_sub_i32_offset: -; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -170,8 +197,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset: -; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -182,6 +209,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset: ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -192,7 +222,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset: ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -203,7 +236,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_sub_i32: -; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst @@ -211,8 +244,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_sub_i32_ret: -; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst @@ -222,6 +255,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_sub_i32_addr64: ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -231,7 +267,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64: ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -241,7 +280,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_max_i32_offset: -; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -250,8 +289,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset: -; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -262,6 +301,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset: ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -272,7 +314,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64_offset: ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -283,7 +328,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_max_i32: -; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst @@ -291,8 +336,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_max_i32_ret: -; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst @@ -302,6 +347,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_max_i32_addr64: ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -311,7 +359,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64: ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -321,7 +372,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umax_i32_offset: -; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -330,8 +381,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset: -; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -342,6 +393,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset: ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -352,7 +406,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset: ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -363,7 +420,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umax_i32: -; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst @@ -371,8 +428,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umax_i32_ret: -; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst @@ -382,6 +439,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_umax_i32_addr64: ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -391,7 +451,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64: ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -401,7 +464,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_min_i32_offset: -; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -410,8 +473,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset: -; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -422,6 +485,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset: ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -432,7 +498,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64_offset: ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -443,7 +512,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_min_i32: -; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst @@ -451,8 +520,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_min_i32_ret: -; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst @@ -462,6 +531,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_min_i32_addr64: ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -471,7 +543,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64: ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -481,7 +556,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umin_i32_offset: -; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -490,8 +565,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset: -; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -502,6 +577,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset: ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -512,7 +590,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset: ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -523,7 +604,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umin_i32: -; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst @@ -532,7 +613,7 @@ entry: ; FUNC-LABEL: {{^}}atomic_umin_i32_ret: ; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_store_dword [[RET]] define void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst @@ -542,6 +623,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_umin_i32_addr64: ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -551,7 +635,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64: ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -561,7 +648,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_or_i32_offset: -; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -570,8 +657,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset: -; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -582,6 +669,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset: ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -592,7 +682,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64_offset: ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -603,7 +696,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_or_i32: -; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst @@ -611,8 +704,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_or_i32_ret: -; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst @@ -622,6 +715,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_or_i32_addr64: ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -631,7 +727,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64: ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -641,7 +740,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xchg_i32_offset: -; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -650,8 +749,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset: -; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -672,7 +771,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset: ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -683,7 +785,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xchg_i32: -; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst @@ -691,8 +793,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret: -; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst @@ -702,6 +804,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64: ; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -711,7 +816,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64: ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -721,7 +829,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xor_i32_offset: -; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -730,8 +838,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset: -; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -742,6 +850,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset: ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -752,7 +863,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset: ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -763,7 +877,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xor_i32: -; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst @@ -771,8 +885,8 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xor_i32_ret: -; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; SI: buffer_store_dword [[RET]] +; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_store_dword [[RET]] define void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst @@ -782,6 +896,9 @@ entry: ; FUNC-LABEL: {{^}}atomic_xor_i32_addr64: ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -791,7 +908,10 @@ entry: ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64: ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; SI: buffer_store_dword [[RET]] +; VI: s_movk_i32 flat_scratch_lo, 0x0 +; VI: s_movk_i32 flat_scratch_hi, 0x0 +; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GCN: buffer_store_dword [[RET]] define void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index diff --git a/test/CodeGen/AMDGPU/gv-const-addrspace.ll b/test/CodeGen/AMDGPU/gv-const-addrspace.ll index 3c1fc6c98f74..d4d13125cfbf 100644 --- a/test/CodeGen/AMDGPU/gv-const-addrspace.ll +++ b/test/CodeGen/AMDGPU/gv-const-addrspace.ll @@ -8,9 +8,7 @@ @float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4 ; FUNC-LABEL: {{^}}float: -; FIXME: We should be using s_load_dword here. -; SI: buffer_load_dword -; VI: s_load_dword +; GCN: s_load_dword ; EG-DAG: MOV {{\** *}}T2.X ; EG-DAG: MOV {{\** *}}T3.X @@ -31,9 +29,7 @@ entry: ; FUNC-LABEL: {{^}}i32: -; FIXME: We should be using s_load_dword here. -; SI: buffer_load_dword -; VI: s_load_dword +; GCN: s_load_dword ; EG-DAG: MOV {{\** *}}T2.X ; EG-DAG: MOV {{\** *}}T3.X @@ -71,9 +67,7 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) { <1 x i32> <i32 4> ] ; FUNC-LABEL: {{^}}array_v1_gv_load: -; FIXME: We should be using s_load_dword here. -; SI: buffer_load_dword -; VI: s_load_dword +; GCN: s_load_dword define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) { %gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index %load = load <1 x i32>, <1 x i32> addrspace(2)* %gep, align 4 diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll index e098dd35d6da..6049dca04012 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll @@ -11,8 +11,8 @@ declare double @llvm.AMDGPU.fract.f64(double) nounwind readnone ; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff ; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] ; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3 -; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[LO]], v[[MINLO]], [[COND]] -; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[HI]], v[[MINHI]], [[COND]] +; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]] +; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]] ; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]] ; CI: buffer_store_dwordx2 [[FRC]] define void @fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) nounwind { @@ -28,8 +28,8 @@ define void @fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) nou ; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff ; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] ; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3 -; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[LO]], v[[MINLO]], [[COND]] -; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[HI]], v[[MINHI]], [[COND]] +; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]] +; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]] ; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]] ; CI: buffer_store_dwordx2 [[FRC]] define void @fract_f64_neg(double addrspace(1)* %out, double addrspace(1)* %src) nounwind { @@ -46,8 +46,8 @@ define void @fract_f64_neg(double addrspace(1)* %out, double addrspace(1)* %src) ; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff ; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] ; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3 -; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[LO]], v[[MINLO]], [[COND]] -; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[HI]], v[[MINHI]], [[COND]] +; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]] +; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]] ; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]] ; CI: buffer_store_dwordx2 [[FRC]] define void @fract_f64_neg_abs(double addrspace(1)* %out, double addrspace(1)* %src) nounwind { diff --git a/test/CodeGen/AMDGPU/private-memory.ll b/test/CodeGen/AMDGPU/private-memory.ll index 1c5629780508..645dc04f4420 100644 --- a/test/CodeGen/AMDGPU/private-memory.ll +++ b/test/CodeGen/AMDGPU/private-memory.ll @@ -298,7 +298,7 @@ entry: ; FUNC-LABEL: ptrtoint: ; SI-NOT: ds_write ; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen -; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:5 +; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) { %alloca = alloca [16 x i32] %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a diff --git a/test/CodeGen/AMDGPU/scratch-buffer.ll b/test/CodeGen/AMDGPU/scratch-buffer.ll index 56088718ada8..268869daaa32 100644 --- a/test/CodeGen/AMDGPU/scratch-buffer.ll +++ b/test/CodeGen/AMDGPU/scratch-buffer.ll @@ -1,5 +1,7 @@ -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck --check-prefix=GCN --check-prefix=DEFAULT-SCRATCH %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck --check-prefix=GCN --check-prefix=DEFAULT-SCRATCH %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=+huge-scratch-buffer -mcpu=SI < %s | FileCheck --check-prefix=GCN --check-prefix=HUGE-SCRATCH %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=+huge-scratch-buffer -mcpu=tonga < %s | FileCheck --check-prefix=GCN --check-prefix=HUGE-SCRATCH %s ; When a frame index offset is more than 12-bits, make sure we don't store ; it in mubuf's offset field. @@ -8,11 +10,11 @@ ; for both stores. This register is allocated by the register scavenger, so we ; should be able to reuse the same regiser for each scratch buffer access. -; CHECK-LABEL: {{^}}legal_offset_fi: -; CHECK: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}} -; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen -; CHECK: v_mov_b32_e32 [[OFFSET]], 0x8000 -; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} +; GCN-LABEL: {{^}}legal_offset_fi: +; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen +; GCN: v_mov_b32_e32 [[OFFSET]], 0x8000 +; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} define void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) { entry: @@ -47,10 +49,10 @@ done: } -; CHECK-LABEL: {{^}}legal_offset_fi_offset -; CHECK: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen -; CHECK: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000 -; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} +; GCN-LABEL: {{^}}legal_offset_fi_offset +; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen +; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000 +; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) { entry: @@ -85,3 +87,30 @@ done: ret void } +; GCN-LABEL: @neg_vaddr_offset +; We can't prove %offset is positive, so we must do the computation with the +; immediate in an add instruction instead of folding offset and the immediate into +; the store instruction. +; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}} +define void @neg_vaddr_offset(i32 %offset) { +entry: + %array = alloca [8192 x i32] + %ptr_offset = add i32 %offset, 4 + %ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 %ptr_offset + store i32 0, i32* %ptr + ret void +} + +; GCN-LABEL: @pos_vaddr_offse +; DEFAULT-SCRATCH: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:16 +; HUGE-SCRATCH: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}} +define void @pos_vaddr_offset(i32 addrspace(1)* %out, i32 %offset) { +entry: + %array = alloca [8192 x i32] + %ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 4 + store i32 0, i32* %ptr + %load_ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 %offset + %val = load i32, i32* %load_ptr + store i32 %val, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/AMDGPU/smrd.ll b/test/CodeGen/AMDGPU/smrd.ll index b0c18ca5959c..0598208e1317 100644 --- a/test/CodeGen/AMDGPU/smrd.ll +++ b/test/CodeGen/AMDGPU/smrd.ll @@ -43,13 +43,7 @@ entry: ; GCN-LABEL: {{^}}smrd3: ; FIXME: There are too many copies here because we don't fold immediates ; through REG_SEQUENCE -; SI: s_mov_b32 s[[SLO:[0-9]+]], 0 ; -; SI: s_mov_b32 s[[SHI:[0-9]+]], 4 -; SI: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]] -; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]] -; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] -; FIXME: We should be able to use s_load_dword here -; SI: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 +; SI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b ; TODO: Add VI checks ; GCN: s_endpgm define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { |