diff options
Diffstat (limited to 'test/CodeGen/AMDGPU/GlobalISel')
-rw-r--r-- | test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir | 28 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir | 142 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir | 29 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir | 69 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll | 11 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/GlobalISel/smrd.ll | 89 |
6 files changed, 368 insertions, 0 deletions
diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir new file mode 100644 index 0000000000000..56a9e7022db9c --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -0,0 +1,28 @@ +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +# REQUIRES: global-isel + +--- | + define amdgpu_kernel void @global_addrspace(i32 addrspace(1)* %global0) { ret void } +... +--- + +name: global_addrspace +legalized: true +regBankSelected: true + +# GCN: global_addrspace +# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1 +# GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0, 0 + +body: | + bb.0: + liveins: %vgpr0_vgpr1 + + %0:vgpr(p1) = COPY %vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load 4 from %ir.global0) + %vgpr0 = COPY %1 + +... +--- diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir new file mode 100644 index 0000000000000..ea2ad2ba83a52 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir @@ -0,0 +1,142 @@ +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI + +# REQUIRES: global-isel + +--- | + define amdgpu_kernel void @smrd_imm(i32 addrspace(2)* %const0) { ret void } +... +--- + +name: smrd_imm +legalized: true +regBankSelected: true + +# GCN: body: +# GCN: [[PTR:%[0-9]+]] = COPY %sgpr0_sgpr1 + +# Immediate offset: +# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0 +# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0 + +# Max immediate offset for SI +# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0 +# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0 + +# Immediate overflow for SI +# SI: [[K1024:%[0-9]+]] = S_MOV_B32 1024 +# SI: S_LOAD_DWORD_SGPR [[PTR]], [[K1024]], 0 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 256, 0 +# VI: S_LOAD_DWORD_IMM [[PTR]], 1024, 0 + +# Max immediate offset for VI +# SI: [[K1048572:%[0-9]+]] = S_MOV_B32 1048572 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262143 +# VI: S_LOAD_DWORD_IMM [[PTR]], 1048572 + +# +# Immediate overflow for VI +# SIVI: [[K1048576:%[0-9]+]] = S_MOV_B32 1048576 +# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K1048576]], 0 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0 + +# Max immediate for CI +# SIVI: [[K_LO:%[0-9]+]] = S_MOV_B32 4294967292 +# SIVI: [[K_HI:%[0-9]+]] = S_MOV_B32 3 +# SIVI: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2 +# SIVI-DAG: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0 +# SIVI-DAG: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0 +# SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] +# SIVI-DAG: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1 +# SIVI-DAG: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1 +# SIVI: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] +# SIVI: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2 +# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0 + +# Immediate overflow for CI +# GCN: [[K_LO:%[0-9]+]] = S_MOV_B32 0 +# GCN: [[K_HI:%[0-9]+]] = S_MOV_B32 4 +# GCN: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2 +# GCN-DAG: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0 +# GCN-DAG: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0 +# GCN: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] +# GCN-DAG: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1 +# GCN-DAG: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1 +# GCN: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] +# GCN: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2 +# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 + +# Max 32-bit byte offset +# SIVI: [[K4294967292:%[0-9]+]] = S_MOV_B32 4294967292 +# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K4294967292]], 0 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0 + +# Overflow 32-bit byte offset +# SIVI: [[K_LO:%[0-9]+]] = S_MOV_B32 0 +# SIVI: [[K_HI:%[0-9]+]] = S_MOV_B32 1 +# SIVI: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2 +# SIVI-DAG: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0 +# SIVI-DAG: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0 +# SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] +# SIVI-DAG: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1 +# SIVI-DAG: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1 +# SIVI: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] +# SIVI: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2 +# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0 + +body: | + bb.0: + liveins: %sgpr0_sgpr1 + + %0:sgpr(p2) = COPY %sgpr0_sgpr1 + + %1:sgpr(s64) = G_CONSTANT i64 4 + %2:sgpr(p2) = G_GEP %0, %1 + %3:sgpr(s32) = G_LOAD %2 :: (load 4 from %ir.const0) + %sgpr0 = COPY %3 + + %4:sgpr(s64) = G_CONSTANT i64 1020 + %5:sgpr(p2) = G_GEP %0, %4 + %6:sgpr(s32) = G_LOAD %5 :: (load 4 from %ir.const0) + %sgpr0 = COPY %6 + + %7:sgpr(s64) = G_CONSTANT i64 1024 + %8:sgpr(p2) = G_GEP %0, %7 + %9:sgpr(s32) = G_LOAD %8 :: (load 4 from %ir.const0) + %sgpr0 = COPY %9 + + %10:sgpr(s64) = G_CONSTANT i64 1048572 + %11:sgpr(p2) = G_GEP %0, %10 + %12:sgpr(s32) = G_LOAD %11 :: (load 4 from %ir.const0) + %sgpr0 = COPY %12 + + %13:sgpr(s64) = G_CONSTANT i64 1048576 + %14:sgpr(p2) = G_GEP %0, %13 + %15:sgpr(s32) = G_LOAD %14 :: (load 4 from %ir.const0) + %sgpr0 = COPY %15 + + %16:sgpr(s64) = G_CONSTANT i64 17179869180 + %17:sgpr(p2) = G_GEP %0, %16 + %18:sgpr(s32) = G_LOAD %17 :: (load 4 from %ir.const0) + %sgpr0 = COPY %18 + + %19:sgpr(s64) = G_CONSTANT i64 17179869184 + %20:sgpr(p2) = G_GEP %0, %19 + %21:sgpr(s32) = G_LOAD %20 :: (load 4 from %ir.const0) + %sgpr0 = COPY %21 + + %22:sgpr(s64) = G_CONSTANT i64 4294967292 + %23:sgpr(p2) = G_GEP %0, %22 + %24:sgpr(s32) = G_LOAD %23 :: (load 4 from %ir.const0) + %sgpr0 = COPY %24 + + %25:sgpr(s64) = G_CONSTANT i64 4294967296 + %26:sgpr(p2) = G_GEP %0, %25 + %27:sgpr(s32) = G_LOAD %26 :: (load 4 from %ir.const0) + %sgpr0 = COPY %27 + +... +--- diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir new file mode 100644 index 0000000000000..ea435725bf25d --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -0,0 +1,29 @@ +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +# REQUIRES: global-isel + +--- | + define amdgpu_kernel void @global_addrspace(i32 addrspace(1)* %global0) { ret void } +... +--- + +name: global_addrspace +legalized: true +regBankSelected: true + +# GCN: global_addrspace +# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1 +# GCN: [[VAL:%[0-9]+]] = COPY %vgpr2 +# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0, 0 + +body: | + bb.0: + liveins: %vgpr0_vgpr1, %vgpr2 + + %0:vgpr(p1) = COPY %vgpr0_vgpr1 + %1:vgpr(s32) = COPY %vgpr2 + G_STORE %1, %0 :: (store 4 into %ir.global0) + +... +--- diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir new file mode 100644 index 0000000000000..3496b1ab71fe6 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir @@ -0,0 +1,69 @@ +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=regbankselect -global-isel %s -verify-machineinstrs -o - | FileCheck %s + +# REQUIRES: global-isel + +--- | + define amdgpu_kernel void @load_constant(i32 addrspace(2)* %ptr0) { ret void } + define amdgpu_kernel void @load_global_uniform(i32 addrspace(1)* %ptr1) { + %tmp0 = load i32, i32 addrspace(1)* %ptr1 + ret void + } + define amdgpu_kernel void @load_global_non_uniform(i32 addrspace(1)* %ptr2) { + %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0 + %tmp1 = getelementptr i32, i32 addrspace(1)* %ptr2, i32 %tmp0 + %tmp2 = load i32, i32 addrspace(1)* %tmp1 + ret void + } + declare i32 @llvm.amdgcn.workitem.id.x() #0 + attributes #0 = { nounwind readnone } +... + +--- +name : load_constant +legalized: true + +# CHECK-LABEL: name: load_constant +# CHECK: registers: +# CHECK: - { id: 0, class: sgpr } +# CHECK: - { id: 1, class: sgpr } + +body: | + bb.0: + liveins: %sgpr0_sgpr1 + %0:_(p2) = COPY %sgpr0_sgpr1 + %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr0) +... + +--- +name: load_global_uniform +legalized: true + +# CHECK-LABEL: name: load_global_uniform +# CHECK: registers: +# CHECK: - { id: 0, class: sgpr } +# CHECK: - { id: 1, class: sgpr } + +body: | + bb.0: + liveins: %sgpr0_sgpr1 + %0:_(p1) = COPY %sgpr0_sgpr1 + %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr1) +... + +--- +name: load_global_non_uniform +legalized: true + +# CHECK-LABEL: name: load_global_non_uniform +# CHECK: registers: +# CHECK: - { id: 0, class: sgpr } +# CHECK: - { id: 1, class: vgpr } +# CHECK: - { id: 2, class: vgpr } + + +body: | + bb.0: + liveins: %sgpr0_sgpr1 + %0:_(p1) = COPY %sgpr0_sgpr1 + %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.tmp1) +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll b/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll new file mode 100644 index 0000000000000..a1bf987e65521 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=GCN %s + +; REQUIRES: global-isel + +; GCN-LABEL: vs_epilog +; GCN: s_endpgm + +define amdgpu_vs void @vs_epilog() { +main_body: + ret void +} diff --git a/test/CodeGen/AMDGPU/GlobalISel/smrd.ll b/test/CodeGen/AMDGPU/GlobalISel/smrd.ll new file mode 100644 index 0000000000000..8a6b3df9cff8d --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/smrd.ll @@ -0,0 +1,89 @@ +; FIXME: Need to add support for mubuf stores to enable this on SI. +; XUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=SIVI %s +; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=CI --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s + +; REQUIRES: global-isel + +; SMRD load with an immediate offset. +; GCN-LABEL: {{^}}smrd0: +; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01 +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 +define amdgpu_kernel void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { +entry: + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1 + %1 = load i32, i32 addrspace(2)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; SMRD load with the largest possible immediate offset. +; GCN-LABEL: {{^}}smrd1: +; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}} +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc +define amdgpu_kernel void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { +entry: + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255 + %1 = load i32, i32 addrspace(2)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; SMRD load with an offset greater than the largest possible immediate. +; GCN-LABEL: {{^}}smrd2: +; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400 +; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] +; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100 +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400 +; GCN: s_endpgm +define amdgpu_kernel void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { +entry: + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256 + %1 = load i32, i32 addrspace(2)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; SMRD load with a 64-bit offset +; GCN-LABEL: {{^}}smrd3: +; FIXME: There are too many copies here because we don't fold immediates +; through REG_SEQUENCE +; XSI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b +; TODO: Add VI checks +; XGCN: s_endpgm +define amdgpu_kernel void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { +entry: + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32 + %1 = load i32, i32 addrspace(2)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; SMRD load with the largest possible immediate offset on VI +; GCN-LABEL: {{^}}smrd4: +; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc +; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] +; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc +define amdgpu_kernel void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { +entry: + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143 + %1 = load i32, i32 addrspace(2)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; SMRD load with an offset greater than the largest possible immediate on VI +; GCN-LABEL: {{^}}smrd5: +; SIVI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000 +; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] +; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000 +; GCN: s_endpgm +define amdgpu_kernel void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { +entry: + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144 + %1 = load i32, i32 addrspace(2)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + |