diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-06-10 13:44:06 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-06-10 13:44:06 +0000 |
commit | 7ab83427af0f77b59941ceba41d509d7d097b065 (patch) | |
tree | cc41c05b1db454e3d802f34df75e636ee922ad87 /test/CodeGen/AMDGPU | |
parent | d288ef4c1788d3a951a7558c68312c2d320612b1 (diff) |
Diffstat (limited to 'test/CodeGen/AMDGPU')
45 files changed, 860 insertions, 514 deletions
diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir new file mode 100644 index 000000000000..ebd473d769b3 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir @@ -0,0 +1,24 @@ +# RUN: llc -O0 -march=amdgcn -mcpu=fiji -run-pass=legalizer -global-isel %s -o - | FileCheck %s + +--- | + define void @test_icmp() { + entry: + ret void + } +... + +--- +name: test_icmp +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0.entry: + liveins: %vgpr0 + %0(s32) = G_CONSTANT i32 0 + %1(s32) = COPY %vgpr0 + + ; CHECK: %2(s1) = G_ICMP intpred(ne), %0(s32), %1 + %2(s1) = G_ICMP intpred(ne), %0, %1 +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir new file mode 100644 index 000000000000..d11130936bd9 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -0,0 +1,28 @@ +# RUN: llc -O0 -march=amdgcn -mcpu=fiji -run-pass=legalizer -global-isel %s -o - | FileCheck %s + +--- | + define void @test_select() { ret void } +... + +--- +name: test_select +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } +body: | + bb.0: + liveins: %vgpr0 + %0(s32) = G_CONSTANT i32 0 + %1(s32) = COPY %vgpr0 + + %2(s1) = G_ICMP intpred(ne), %0, %1 + %3(s32) = G_CONSTANT i32 1 + %4(s32) = G_CONSTANT i32 2 + ; CHECK: %5(s32) = G_SELECT %2(s1), %3, %4 + %5(s32) = G_SELECT %2, %3, %4 + +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir index 3496b1ab71fe..902f1e6c6725 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir @@ -24,8 +24,8 @@ legalized: true # CHECK-LABEL: name: load_constant # CHECK: registers: -# CHECK: - { id: 0, class: sgpr } -# CHECK: - { id: 1, class: sgpr } +# CHECK: - { id: 0, class: sgpr, preferred-register: '' } +# CHECK: - { id: 1, class: sgpr, preferred-register: '' } body: | bb.0: @@ -40,8 +40,8 @@ legalized: true # CHECK-LABEL: name: load_global_uniform # CHECK: registers: -# CHECK: - { id: 0, class: sgpr } -# CHECK: - { id: 1, class: sgpr } +# CHECK: - { id: 0, class: sgpr, preferred-register: '' } +# CHECK: - { id: 1, class: sgpr, preferred-register: '' } body: | bb.0: @@ -56,9 +56,9 @@ legalized: true # CHECK-LABEL: name: load_global_non_uniform # CHECK: registers: -# CHECK: - { id: 0, class: sgpr } -# CHECK: - { id: 1, class: vgpr } -# CHECK: - { id: 2, class: vgpr } +# CHECK: - { id: 0, class: sgpr, preferred-register: '' } +# CHECK: - { id: 1, class: vgpr, preferred-register: '' } +# CHECK: - { id: 2, class: vgpr, preferred-register: '' } body: | diff --git a/test/CodeGen/AMDGPU/add.v2i16.ll b/test/CodeGen/AMDGPU/add.v2i16.ll index e5e2d436deb0..76f724c2b90b 100644 --- a/test/CodeGen/AMDGPU/add.v2i16.ll +++ b/test/CodeGen/AMDGPU/add.v2i16.ll @@ -66,7 +66,7 @@ define amdgpu_kernel void @s_test_add_v2i16_kernarg(<2 x i16> addrspace(1)* %out ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x7b, v{{[0-9]+}} ; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0x1c8 -; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v[[SCONST]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid @@ -84,7 +84,7 @@ define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %ou ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfffffcb3, v{{[0-9]+}} ; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0xfffffc21 -; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v[[SCONST]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid @@ -101,7 +101,7 @@ define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* ; VI: v_mov_b32_e32 v[[SCONST:[0-9]+]], -1 ; VI: flat_load_ushort [[LOAD0:v[0-9]+]] ; VI: flat_load_ushort [[LOAD1:v[0-9]+]] -; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v[[SCONST]], [[LOAD0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[LOAD0]], v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, -1, [[LOAD1]] ; VI: v_or_b32_e32 define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { @@ -140,7 +140,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_lo_zero_hi(<2 x i16> addrspac ; VI-NOT: v_add_u16 ; VI: v_mov_b32_e32 v[[K:[0-9]+]], 0x3f80 -; VI: v_add_u16_sdwa v{{[0-9]+}}, v[[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NOT: v_add_u16 ; VI: v_or_b32_e32 define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { diff --git a/test/CodeGen/AMDGPU/ashr.v2i16.ll b/test/CodeGen/AMDGPU/ashr.v2i16.ll index 7f424ef2a147..dd96e6264418 100644 --- a/test/CodeGen/AMDGPU/ashr.v2i16.ll +++ b/test/CodeGen/AMDGPU/ashr.v2i16.ll @@ -9,7 +9,7 @@ ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[VLHS]] ; VI: v_ashrrev_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; CI: v_ashrrev_i32_e32 ; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} diff --git a/test/CodeGen/AMDGPU/branch-relax-spill.ll b/test/CodeGen/AMDGPU/branch-relax-spill.ll index ede15559c4ff..db476c21636f 100644 --- a/test/CodeGen/AMDGPU/branch-relax-spill.ll +++ b/test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -7,110 +7,110 @@ define amdgpu_kernel void @spill(i32 addrspace(1)* %arg, i32 %cnd) #0 { entry: - %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={SGPR0}"() #0 - %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={SGPR1}"() #0 - %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={SGPR2}"() #0 - %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={SGPR3}"() #0 - %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={SGPR4}"() #0 - %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={SGPR5}"() #0 - %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={SGPR6}"() #0 - %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={SGPR7}"() #0 - %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={SGPR8}"() #0 - %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={SGPR9}"() #0 - %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={SGPR10}"() #0 - %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={SGPR11}"() #0 - %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={SGPR12}"() #0 - %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={SGPR13}"() #0 - %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={SGPR14}"() #0 - %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={SGPR15}"() #0 - %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={SGPR16}"() #0 - %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={SGPR17}"() #0 - %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={SGPR18}"() #0 - %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={SGPR19}"() #0 - %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={SGPR20}"() #0 - %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={SGPR21}"() #0 - %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={SGPR22}"() #0 - %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={SGPR23}"() #0 - %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={SGPR24}"() #0 - %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={SGPR25}"() #0 - %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={SGPR26}"() #0 - %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={SGPR27}"() #0 - %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={SGPR28}"() #0 - %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={SGPR29}"() #0 - %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={SGPR30}"() #0 - %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={SGPR31}"() #0 - %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={SGPR32}"() #0 - %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={SGPR33}"() #0 - %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={SGPR34}"() #0 - %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={SGPR35}"() #0 - %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={SGPR36}"() #0 - %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={SGPR37}"() #0 - %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={SGPR38}"() #0 - %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={SGPR39}"() #0 - %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={SGPR40}"() #0 - %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={SGPR41}"() #0 - %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={SGPR42}"() #0 - %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={SGPR43}"() #0 - %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={SGPR44}"() #0 - %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={SGPR45}"() #0 - %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={SGPR46}"() #0 - %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={SGPR47}"() #0 - %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={SGPR48}"() #0 - %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={SGPR49}"() #0 - %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={SGPR50}"() #0 - %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={SGPR51}"() #0 - %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={SGPR52}"() #0 - %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={SGPR53}"() #0 - %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={SGPR54}"() #0 - %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={SGPR55}"() #0 - %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={SGPR56}"() #0 - %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={SGPR57}"() #0 - %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={SGPR58}"() #0 - %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={SGPR59}"() #0 - %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={SGPR60}"() #0 - %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={SGPR61}"() #0 - %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={SGPR62}"() #0 - %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={SGPR63}"() #0 - %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={SGPR64}"() #0 - %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={SGPR65}"() #0 - %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={SGPR66}"() #0 - %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={SGPR67}"() #0 - %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={SGPR68}"() #0 - %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={SGPR69}"() #0 - %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={SGPR70}"() #0 - %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={SGPR71}"() #0 - %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={SGPR72}"() #0 - %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={SGPR73}"() #0 - %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={SGPR74}"() #0 - %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={SGPR75}"() #0 - %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={SGPR76}"() #0 - %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={SGPR77}"() #0 - %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={SGPR78}"() #0 - %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={SGPR79}"() #0 - %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={SGPR80}"() #0 - %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={SGPR81}"() #0 - %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={SGPR82}"() #0 - %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={SGPR83}"() #0 - %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={SGPR84}"() #0 - %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={SGPR85}"() #0 - %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={SGPR86}"() #0 - %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={SGPR87}"() #0 - %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={SGPR88}"() #0 - %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={SGPR89}"() #0 - %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={SGPR90}"() #0 - %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={SGPR91}"() #0 - %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={SGPR92}"() #0 - %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={SGPR93}"() #0 - %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={SGPR94}"() #0 - %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={SGPR95}"() #0 - %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={SGPR96}"() #0 - %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={SGPR97}"() #0 - %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={SGPR98}"() #0 - %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={SGPR99}"() #0 - %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={SGPR100}"() #0 - %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={SGPR101}"() #0 - %sgpr102 = tail call i32 asm sideeffect "s_mov_b32 s102, 0", "={SGPR102}"() #0 - %sgpr103 = tail call i32 asm sideeffect "s_mov_b32 s103, 0", "={SGPR103}"() #0 + %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0 + %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={s1}"() #0 + %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={s2}"() #0 + %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={s3}"() #0 + %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={s4}"() #0 + %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={s5}"() #0 + %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={s6}"() #0 + %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={s7}"() #0 + %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={s8}"() #0 + %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={s9}"() #0 + %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={s10}"() #0 + %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={s11}"() #0 + %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={s12}"() #0 + %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={s13}"() #0 + %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={s14}"() #0 + %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={s15}"() #0 + %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={s16}"() #0 + %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={s17}"() #0 + %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={s18}"() #0 + %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={s19}"() #0 + %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={s20}"() #0 + %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={s21}"() #0 + %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={s22}"() #0 + %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={s23}"() #0 + %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={s24}"() #0 + %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={s25}"() #0 + %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={s26}"() #0 + %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={s27}"() #0 + %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={s28}"() #0 + %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={s29}"() #0 + %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={s30}"() #0 + %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={s31}"() #0 + %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={s32}"() #0 + %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={s33}"() #0 + %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={s34}"() #0 + %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={s35}"() #0 + %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={s36}"() #0 + %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={s37}"() #0 + %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={s38}"() #0 + %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={s39}"() #0 + %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={s40}"() #0 + %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={s41}"() #0 + %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={s42}"() #0 + %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={s43}"() #0 + %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={s44}"() #0 + %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={s45}"() #0 + %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={s46}"() #0 + %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={s47}"() #0 + %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={s48}"() #0 + %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={s49}"() #0 + %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={s50}"() #0 + %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={s51}"() #0 + %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={s52}"() #0 + %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={s53}"() #0 + %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={s54}"() #0 + %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={s55}"() #0 + %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={s56}"() #0 + %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={s57}"() #0 + %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={s58}"() #0 + %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={s59}"() #0 + %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={s60}"() #0 + %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={s61}"() #0 + %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={s62}"() #0 + %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={s63}"() #0 + %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={s64}"() #0 + %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={s65}"() #0 + %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={s66}"() #0 + %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={s67}"() #0 + %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={s68}"() #0 + %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={s69}"() #0 + %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={s70}"() #0 + %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={s71}"() #0 + %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={s72}"() #0 + %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={s73}"() #0 + %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={s74}"() #0 + %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={s75}"() #0 + %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={s76}"() #0 + %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={s77}"() #0 + %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={s78}"() #0 + %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={s79}"() #0 + %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={s80}"() #0 + %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={s81}"() #0 + %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={s82}"() #0 + %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={s83}"() #0 + %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={s84}"() #0 + %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={s85}"() #0 + %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={s86}"() #0 + %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={s87}"() #0 + %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={s88}"() #0 + %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={s89}"() #0 + %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={s90}"() #0 + %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={s91}"() #0 + %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={s92}"() #0 + %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={s93}"() #0 + %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={s94}"() #0 + %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={s95}"() #0 + %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={s96}"() #0 + %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={s97}"() #0 + %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={s98}"() #0 + %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={s99}"() #0 + %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={s100}"() #0 + %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={s101}"() #0 + %sgpr102 = tail call i32 asm sideeffect "s_mov_b32 s102, 0", "={s102}"() #0 + %sgpr103 = tail call i32 asm sideeffect "s_mov_b32 s103, 0", "={s103}"() #0 %vcc_lo = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={VCC_LO}"() #0 %vcc_hi = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={VCC_HI}"() #0 %cmp = icmp eq i32 %cnd, 0 @@ -126,112 +126,112 @@ bb2: ; 28 bytes br label %bb3 bb3: - tail call void asm sideeffect "; reg use $0", "{SGPR0}"(i32 %sgpr0) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR1}"(i32 %sgpr1) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR2}"(i32 %sgpr2) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR3}"(i32 %sgpr3) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR4}"(i32 %sgpr4) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR5}"(i32 %sgpr5) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR6}"(i32 %sgpr6) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR7}"(i32 %sgpr7) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR8}"(i32 %sgpr8) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR9}"(i32 %sgpr9) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR10}"(i32 %sgpr10) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR11}"(i32 %sgpr11) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR12}"(i32 %sgpr12) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR13}"(i32 %sgpr13) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR14}"(i32 %sgpr14) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR15}"(i32 %sgpr15) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR16}"(i32 %sgpr16) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR17}"(i32 %sgpr17) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR18}"(i32 %sgpr18) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR19}"(i32 %sgpr19) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR20}"(i32 %sgpr20) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR21}"(i32 %sgpr21) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR22}"(i32 %sgpr22) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR23}"(i32 %sgpr23) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR24}"(i32 %sgpr24) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR25}"(i32 %sgpr25) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR26}"(i32 %sgpr26) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR27}"(i32 %sgpr27) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR28}"(i32 %sgpr28) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR29}"(i32 %sgpr29) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR30}"(i32 %sgpr30) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR31}"(i32 %sgpr31) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR32}"(i32 %sgpr32) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR33}"(i32 %sgpr33) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR34}"(i32 %sgpr34) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR35}"(i32 %sgpr35) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR36}"(i32 %sgpr36) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR37}"(i32 %sgpr37) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR38}"(i32 %sgpr38) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR39}"(i32 %sgpr39) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR40}"(i32 %sgpr40) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR41}"(i32 %sgpr41) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR42}"(i32 %sgpr42) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR43}"(i32 %sgpr43) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR44}"(i32 %sgpr44) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR45}"(i32 %sgpr45) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR46}"(i32 %sgpr46) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR47}"(i32 %sgpr47) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR48}"(i32 %sgpr48) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR49}"(i32 %sgpr49) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR50}"(i32 %sgpr50) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR51}"(i32 %sgpr51) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR52}"(i32 %sgpr52) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR53}"(i32 %sgpr53) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR54}"(i32 %sgpr54) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR55}"(i32 %sgpr55) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR56}"(i32 %sgpr56) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR57}"(i32 %sgpr57) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR58}"(i32 %sgpr58) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR59}"(i32 %sgpr59) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR60}"(i32 %sgpr60) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR61}"(i32 %sgpr61) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR62}"(i32 %sgpr62) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR63}"(i32 %sgpr63) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR64}"(i32 %sgpr64) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR65}"(i32 %sgpr65) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR66}"(i32 %sgpr66) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR67}"(i32 %sgpr67) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR68}"(i32 %sgpr68) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR69}"(i32 %sgpr69) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR70}"(i32 %sgpr70) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR71}"(i32 %sgpr71) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR72}"(i32 %sgpr72) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR73}"(i32 %sgpr73) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR74}"(i32 %sgpr74) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR75}"(i32 %sgpr75) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR76}"(i32 %sgpr76) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR77}"(i32 %sgpr77) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR78}"(i32 %sgpr78) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR79}"(i32 %sgpr79) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR80}"(i32 %sgpr80) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR81}"(i32 %sgpr81) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR82}"(i32 %sgpr82) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR83}"(i32 %sgpr83) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR84}"(i32 %sgpr84) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR85}"(i32 %sgpr85) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR86}"(i32 %sgpr86) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR87}"(i32 %sgpr87) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR88}"(i32 %sgpr88) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR89}"(i32 %sgpr89) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR90}"(i32 %sgpr90) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR91}"(i32 %sgpr91) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR92}"(i32 %sgpr92) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR93}"(i32 %sgpr93) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR94}"(i32 %sgpr94) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR95}"(i32 %sgpr95) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR96}"(i32 %sgpr96) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR97}"(i32 %sgpr97) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR98}"(i32 %sgpr98) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR99}"(i32 %sgpr99) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR100}"(i32 %sgpr100) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR101}"(i32 %sgpr101) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR102}"(i32 %sgpr102) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR103}"(i32 %sgpr103) #0 - tail call void asm sideeffect "; reg use $0", "{VCC_LO}"(i32 %vcc_lo) #0 - tail call void asm sideeffect "; reg use $0", "{VCC_HI}"(i32 %vcc_hi) #0 + tail call void asm sideeffect "; reg use $0", "{s0}"(i32 %sgpr0) #0 + tail call void asm sideeffect "; reg use $0", "{s1}"(i32 %sgpr1) #0 + tail call void asm sideeffect "; reg use $0", "{s2}"(i32 %sgpr2) #0 + tail call void asm sideeffect "; reg use $0", "{s3}"(i32 %sgpr3) #0 + tail call void asm sideeffect "; reg use $0", "{s4}"(i32 %sgpr4) #0 + tail call void asm sideeffect "; reg use $0", "{s5}"(i32 %sgpr5) #0 + tail call void asm sideeffect "; reg use $0", "{s6}"(i32 %sgpr6) #0 + tail call void asm sideeffect "; reg use $0", "{s7}"(i32 %sgpr7) #0 + tail call void asm sideeffect "; reg use $0", "{s8}"(i32 %sgpr8) #0 + tail call void asm sideeffect "; reg use $0", "{s9}"(i32 %sgpr9) #0 + tail call void asm sideeffect "; reg use $0", "{s10}"(i32 %sgpr10) #0 + tail call void asm sideeffect "; reg use $0", "{s11}"(i32 %sgpr11) #0 + tail call void asm sideeffect "; reg use $0", "{s12}"(i32 %sgpr12) #0 + tail call void asm sideeffect "; reg use $0", "{s13}"(i32 %sgpr13) #0 + tail call void asm sideeffect "; reg use $0", "{s14}"(i32 %sgpr14) #0 + tail call void asm sideeffect "; reg use $0", "{s15}"(i32 %sgpr15) #0 + tail call void asm sideeffect "; reg use $0", "{s16}"(i32 %sgpr16) #0 + tail call void asm sideeffect "; reg use $0", "{s17}"(i32 %sgpr17) #0 + tail call void asm sideeffect "; reg use $0", "{s18}"(i32 %sgpr18) #0 + tail call void asm sideeffect "; reg use $0", "{s19}"(i32 %sgpr19) #0 + tail call void asm sideeffect "; reg use $0", "{s20}"(i32 %sgpr20) #0 + tail call void asm sideeffect "; reg use $0", "{s21}"(i32 %sgpr21) #0 + tail call void asm sideeffect "; reg use $0", "{s22}"(i32 %sgpr22) #0 + tail call void asm sideeffect "; reg use $0", "{s23}"(i32 %sgpr23) #0 + tail call void asm sideeffect "; reg use $0", "{s24}"(i32 %sgpr24) #0 + tail call void asm sideeffect "; reg use $0", "{s25}"(i32 %sgpr25) #0 + tail call void asm sideeffect "; reg use $0", "{s26}"(i32 %sgpr26) #0 + tail call void asm sideeffect "; reg use $0", "{s27}"(i32 %sgpr27) #0 + tail call void asm sideeffect "; reg use $0", "{s28}"(i32 %sgpr28) #0 + tail call void asm sideeffect "; reg use $0", "{s29}"(i32 %sgpr29) #0 + tail call void asm sideeffect "; reg use $0", "{s30}"(i32 %sgpr30) #0 + tail call void asm sideeffect "; reg use $0", "{s31}"(i32 %sgpr31) #0 + tail call void asm sideeffect "; reg use $0", "{s32}"(i32 %sgpr32) #0 + tail call void asm sideeffect "; reg use $0", "{s33}"(i32 %sgpr33) #0 + tail call void asm sideeffect "; reg use $0", "{s34}"(i32 %sgpr34) #0 + tail call void asm sideeffect "; reg use $0", "{s35}"(i32 %sgpr35) #0 + tail call void asm sideeffect "; reg use $0", "{s36}"(i32 %sgpr36) #0 + tail call void asm sideeffect "; reg use $0", "{s37}"(i32 %sgpr37) #0 + tail call void asm sideeffect "; reg use $0", "{s38}"(i32 %sgpr38) #0 + tail call void asm sideeffect "; reg use $0", "{s39}"(i32 %sgpr39) #0 + tail call void asm sideeffect "; reg use $0", "{s40}"(i32 %sgpr40) #0 + tail call void asm sideeffect "; reg use $0", "{s41}"(i32 %sgpr41) #0 + tail call void asm sideeffect "; reg use $0", "{s42}"(i32 %sgpr42) #0 + tail call void asm sideeffect "; reg use $0", "{s43}"(i32 %sgpr43) #0 + tail call void asm sideeffect "; reg use $0", "{s44}"(i32 %sgpr44) #0 + tail call void asm sideeffect "; reg use $0", "{s45}"(i32 %sgpr45) #0 + tail call void asm sideeffect "; reg use $0", "{s46}"(i32 %sgpr46) #0 + tail call void asm sideeffect "; reg use $0", "{s47}"(i32 %sgpr47) #0 + tail call void asm sideeffect "; reg use $0", "{s48}"(i32 %sgpr48) #0 + tail call void asm sideeffect "; reg use $0", "{s49}"(i32 %sgpr49) #0 + tail call void asm sideeffect "; reg use $0", "{s50}"(i32 %sgpr50) #0 + tail call void asm sideeffect "; reg use $0", "{s51}"(i32 %sgpr51) #0 + tail call void asm sideeffect "; reg use $0", "{s52}"(i32 %sgpr52) #0 + tail call void asm sideeffect "; reg use $0", "{s53}"(i32 %sgpr53) #0 + tail call void asm sideeffect "; reg use $0", "{s54}"(i32 %sgpr54) #0 + tail call void asm sideeffect "; reg use $0", "{s55}"(i32 %sgpr55) #0 + tail call void asm sideeffect "; reg use $0", "{s56}"(i32 %sgpr56) #0 + tail call void asm sideeffect "; reg use $0", "{s57}"(i32 %sgpr57) #0 + tail call void asm sideeffect "; reg use $0", "{s58}"(i32 %sgpr58) #0 + tail call void asm sideeffect "; reg use $0", "{s59}"(i32 %sgpr59) #0 + tail call void asm sideeffect "; reg use $0", "{s60}"(i32 %sgpr60) #0 + tail call void asm sideeffect "; reg use $0", "{s61}"(i32 %sgpr61) #0 + tail call void asm sideeffect "; reg use $0", "{s62}"(i32 %sgpr62) #0 + tail call void asm sideeffect "; reg use $0", "{s63}"(i32 %sgpr63) #0 + tail call void asm sideeffect "; reg use $0", "{s64}"(i32 %sgpr64) #0 + tail call void asm sideeffect "; reg use $0", "{s65}"(i32 %sgpr65) #0 + tail call void asm sideeffect "; reg use $0", "{s66}"(i32 %sgpr66) #0 + tail call void asm sideeffect "; reg use $0", "{s67}"(i32 %sgpr67) #0 + tail call void asm sideeffect "; reg use $0", "{s68}"(i32 %sgpr68) #0 + tail call void asm sideeffect "; reg use $0", "{s69}"(i32 %sgpr69) #0 + tail call void asm sideeffect "; reg use $0", "{s70}"(i32 %sgpr70) #0 + tail call void asm sideeffect "; reg use $0", "{s71}"(i32 %sgpr71) #0 + tail call void asm sideeffect "; reg use $0", "{s72}"(i32 %sgpr72) #0 + tail call void asm sideeffect "; reg use $0", "{s73}"(i32 %sgpr73) #0 + tail call void asm sideeffect "; reg use $0", "{s74}"(i32 %sgpr74) #0 + tail call void asm sideeffect "; reg use $0", "{s75}"(i32 %sgpr75) #0 + tail call void asm sideeffect "; reg use $0", "{s76}"(i32 %sgpr76) #0 + tail call void asm sideeffect "; reg use $0", "{s77}"(i32 %sgpr77) #0 + tail call void asm sideeffect "; reg use $0", "{s78}"(i32 %sgpr78) #0 + tail call void asm sideeffect "; reg use $0", "{s79}"(i32 %sgpr79) #0 + tail call void asm sideeffect "; reg use $0", "{s80}"(i32 %sgpr80) #0 + tail call void asm sideeffect "; reg use $0", "{s81}"(i32 %sgpr81) #0 + tail call void asm sideeffect "; reg use $0", "{s82}"(i32 %sgpr82) #0 + tail call void asm sideeffect "; reg use $0", "{s83}"(i32 %sgpr83) #0 + tail call void asm sideeffect "; reg use $0", "{s84}"(i32 %sgpr84) #0 + tail call void asm sideeffect "; reg use $0", "{s85}"(i32 %sgpr85) #0 + tail call void asm sideeffect "; reg use $0", "{s86}"(i32 %sgpr86) #0 + tail call void asm sideeffect "; reg use $0", "{s87}"(i32 %sgpr87) #0 + tail call void asm sideeffect "; reg use $0", "{s88}"(i32 %sgpr88) #0 + tail call void asm sideeffect "; reg use $0", "{s89}"(i32 %sgpr89) #0 + tail call void asm sideeffect "; reg use $0", "{s90}"(i32 %sgpr90) #0 + tail call void asm sideeffect "; reg use $0", "{s91}"(i32 %sgpr91) #0 + tail call void asm sideeffect "; reg use $0", "{s92}"(i32 %sgpr92) #0 + tail call void asm sideeffect "; reg use $0", "{s93}"(i32 %sgpr93) #0 + tail call void asm sideeffect "; reg use $0", "{s94}"(i32 %sgpr94) #0 + tail call void asm sideeffect "; reg use $0", "{s95}"(i32 %sgpr95) #0 + tail call void asm sideeffect "; reg use $0", "{s96}"(i32 %sgpr96) #0 + tail call void asm sideeffect "; reg use $0", "{s97}"(i32 %sgpr97) #0 + tail call void asm sideeffect "; reg use $0", "{s98}"(i32 %sgpr98) #0 + tail call void asm sideeffect "; reg use $0", "{s99}"(i32 %sgpr99) #0 + tail call void asm sideeffect "; reg use $0", "{s100}"(i32 %sgpr100) #0 + tail call void asm sideeffect "; reg use $0", "{s101}"(i32 %sgpr101) #0 + tail call void asm sideeffect "; reg use $0", "{s102}"(i32 %sgpr102) #0 + tail call void asm sideeffect "; reg use $0", "{s103}"(i32 %sgpr103) #0 + tail call void asm sideeffect "; reg use $0", "{vcc_lo}"(i32 %vcc_lo) #0 + tail call void asm sideeffect "; reg use $0", "{vcc_hi}"(i32 %vcc_hi) #0 ret void } diff --git a/test/CodeGen/AMDGPU/clamp-omod-special-case.mir b/test/CodeGen/AMDGPU/clamp-omod-special-case.mir index fbfd0fbf9308..6ecf75c1acec 100644 --- a/test/CodeGen/AMDGPU/clamp-omod-special-case.mir +++ b/test/CodeGen/AMDGPU/clamp-omod-special-case.mir @@ -24,6 +24,10 @@ ret void } + define amdgpu_ps void @v_max_reg_imm_f32() #0 { + ret void + } + attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" } ... @@ -422,3 +426,19 @@ body: | S_ENDPGM ... +--- + +# Pass used to crash with immediate second operand of max +name: v_max_reg_imm_f32 +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } +body: | + bb.0 (%ir-block.0): + liveins: %vgpr0 + + %0 = COPY %vgpr0 + %1 = V_MAX_F32_e64 0, killed %0, 0, 1056964608, 1, 0, implicit %exec + +... diff --git a/test/CodeGen/AMDGPU/exceed-max-sgprs.ll b/test/CodeGen/AMDGPU/exceed-max-sgprs.ll index 207dfce75f16..13aafc24895d 100644 --- a/test/CodeGen/AMDGPU/exceed-max-sgprs.ll +++ b/test/CodeGen/AMDGPU/exceed-max-sgprs.ll @@ -2,97 +2,97 @@ ; ERROR: error: scalar registers limit of 104 exceeded (106) in use_too_many_sgprs_tahiti define amdgpu_kernel void @use_too_many_sgprs_tahiti() #0 { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" () - call void asm sideeffect "", "~{VCC}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{s[96:103]}" () + call void asm sideeffect "", "~{vcc}" () ret void } ; ERROR: error: scalar registers limit of 104 exceeded (106) in use_too_many_sgprs_bonaire define amdgpu_kernel void @use_too_many_sgprs_bonaire() #1 { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" () - call void asm sideeffect "", "~{VCC}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{s[96:103]}" () + call void asm sideeffect "", "~{vcc}" () ret void } ; ERROR: error: scalar registers limit of 104 exceeded (108) in use_too_many_sgprs_bonaire_flat_scr define amdgpu_kernel void @use_too_many_sgprs_bonaire_flat_scr() #1 { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" () - call void asm sideeffect "", "~{VCC}" () - call void asm sideeffect "", "~{FLAT_SCR}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{s[96:103]}" () + call void asm sideeffect "", "~{vcc}" () + call void asm sideeffect "", "~{flat_scratch}" () ret void } ; ERROR: error: scalar registers limit of 96 exceeded (98) in use_too_many_sgprs_iceland define amdgpu_kernel void @use_too_many_sgprs_iceland() #2 { - call void asm sideeffect "", "~{VCC}" () - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () + call void asm sideeffect "", "~{vcc}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () ret void } ; ERROR: error: addressable scalar registers limit of 102 exceeded (103) in use_too_many_sgprs_fiji define amdgpu_kernel void @use_too_many_sgprs_fiji() #3 { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99}" () - call void asm sideeffect "", "~{SGPR100_SGPR101}" () - call void asm sideeffect "", "~{SGPR102}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{s[96:99]}" () + call void asm sideeffect "", "~{s[100:101]}" () + call void asm sideeffect "", "~{s102}" () ret void } diff --git a/test/CodeGen/AMDGPU/fabs.f16.ll b/test/CodeGen/AMDGPU/fabs.f16.ll index d4ef7124a334..4e2ec4b3054f 100644 --- a/test/CodeGen/AMDGPU/fabs.f16.ll +++ b/test/CodeGen/AMDGPU/fabs.f16.ll @@ -40,7 +40,7 @@ define amdgpu_kernel void @s_fabs_f16(half addrspace(1)* %out, half %in) { ; VI: flat_load_ushort [[LO:v[0-9]+]] ; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}} ; VI-DAG: v_and_b32_e32 [[FABS_LO:v[0-9]+]], [[MASK]], [[HI]] -; VI-DAG: v_and_b32_sdwa [[FABS_HI:v[0-9]+]], [[MASK]], [[LO]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_and_b32_sdwa [[FABS_HI:v[0-9]+]], [[LO]], [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, [[FABS_HI]], [[FABS_LO]] ; VI: flat_store_dword @@ -60,8 +60,8 @@ define amdgpu_kernel void @s_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half ; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]] ; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}} -; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} ; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} @@ -128,7 +128,7 @@ define amdgpu_kernel void @fabs_free_v2f16(<2 x half> addrspace(1)* %out, i32 %i ; CI: v_cvt_f16_f32 ; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, -; VI: v_mul_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} +; VI: v_mul_f16_sdwa v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI: v_mul_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} ; GFX9: v_and_b32_e32 [[FABS:v[0-9]+]], 0x7fff7fff, [[VAL]] diff --git a/test/CodeGen/AMDGPU/fadd.f16.ll b/test/CodeGen/AMDGPU/fadd.f16.ll index 9b3d2a475a14..08199be144f4 100644 --- a/test/CodeGen/AMDGPU/fadd.f16.ll +++ b/test/CodeGen/AMDGPU/fadd.f16.ll @@ -78,7 +78,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_add_f16_e32 v[[R_F16_LO:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] -; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] ; GCN: buffer_store_dword v[[R_V2_F16]] @@ -108,7 +108,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_mov_b32_e32 v[[CONST2:[0-9]+]], 0x4000 -; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[CONST2]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[B_V2_F16]] ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] @@ -137,7 +137,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00 -; VI-DAG: v_add_f16_sdwa v[[R_F16_0:[0-9]+]], v[[CONST1]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_add_f16_sdwa v[[R_F16_0:[0-9]+]], v[[A_V2_F16]], v[[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 2.0, v[[A_V2_F16]] ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]] diff --git a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll index 9e8ddd39bbaf..404358f0ecb9 100644 --- a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -278,9 +278,9 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspa } ; GCN-LABEL: {{^}}s_test_canonicalize_var_v2f16: -; VI: v_mul_f16_e64 [[REG0:v[0-9]+]], 1.0, {{s[0-9]+}} -; VI-DAG: v_mul_f16_e64 [[REG1:v[0-9]+]], 1.0, {{s[0-9]+}} -; VI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, +; VI: v_mov_b32_e32 [[ONE:v[0-9]+]], 0x3c00 +; VI: v_mul_f16_sdwa [[REG0:v[0-9]+]], [[ONE]], {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_mul_f16_e64 [[REG1:v[0-9]+]], 1.0, {{s[0-9]+}} ; VI-NOT: v_and_b32 ; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{s[0-9]+$}} diff --git a/test/CodeGen/AMDGPU/flat-scratch-reg.ll b/test/CodeGen/AMDGPU/flat-scratch-reg.ll index 5705cbc99443..a7664c399fbb 100644 --- a/test/CodeGen/AMDGPU/flat-scratch-reg.ll +++ b/test/CodeGen/AMDGPU/flat-scratch-reg.ll @@ -21,7 +21,7 @@ ; VI-XNACK: ; NumSgprs: 12 define amdgpu_kernel void @no_vcc_no_flat() { entry: - call void asm sideeffect "", "~{SGPR7}"() + call void asm sideeffect "", "~{s7}"() ret void } @@ -35,7 +35,7 @@ entry: ; VI-XNACK: ; NumSgprs: 12 define amdgpu_kernel void @vcc_no_flat() { entry: - call void asm sideeffect "", "~{SGPR7},~{VCC}"() + call void asm sideeffect "", "~{s7},~{vcc}"() ret void } @@ -52,7 +52,7 @@ entry: ; HSA-VI-XNACK: ; NumSgprs: 14 define amdgpu_kernel void @no_vcc_flat() { entry: - call void asm sideeffect "", "~{SGPR7},~{FLAT_SCR}"() + call void asm sideeffect "", "~{s7},~{flat_scratch}"() ret void } @@ -68,7 +68,7 @@ entry: ; HSA-VI-XNACK: ; NumSgprs: 14 define amdgpu_kernel void @vcc_flat() { entry: - call void asm sideeffect "", "~{SGPR7},~{VCC},~{FLAT_SCR}"() + call void asm sideeffect "", "~{s7},~{vcc},~{flat_scratch}"() ret void } @@ -81,7 +81,7 @@ entry: ; VI-XNACK: NumSgprs: 6 define amdgpu_kernel void @use_flat_scr() #0 { entry: - call void asm sideeffect "; clobber ", "~{FLAT_SCR}"() + call void asm sideeffect "; clobber ", "~{flat_scratch}"() ret void } @@ -91,7 +91,7 @@ entry: ; VI-XNACK: NumSgprs: 6 define amdgpu_kernel void @use_flat_scr_lo() #0 { entry: - call void asm sideeffect "; clobber ", "~{FLAT_SCR_LO}"() + call void asm sideeffect "; clobber ", "~{flat_scratch_lo}"() ret void } @@ -101,7 +101,7 @@ entry: ; VI-XNACK: NumSgprs: 6 define amdgpu_kernel void @use_flat_scr_hi() #0 { entry: - call void asm sideeffect "; clobber ", "~{FLAT_SCR_HI}"() + call void asm sideeffect "; clobber ", "~{flat_scratch_hi}"() ret void } diff --git a/test/CodeGen/AMDGPU/fmul.f16.ll b/test/CodeGen/AMDGPU/fmul.f16.ll index 4ef2aa693cf4..cd86409e2038 100644 --- a/test/CodeGen/AMDGPU/fmul.f16.ll +++ b/test/CodeGen/AMDGPU/fmul.f16.ll @@ -78,7 +78,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_mul_f16_e32 v[[R_F16_LO:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] -; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] ; GCN: buffer_store_dword v[[R_V2_F16]] @@ -105,7 +105,7 @@ entry: ; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], 4.0, v[[B_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; VI-DAG: v_mov_b32_e32 v[[CONST4:[0-9]+]], 0x4400 -; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[CONST4]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] @@ -131,7 +131,7 @@ entry: ; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; VI-DAG: v_mov_b32_e32 v[[CONST3:[0-9]+]], 0x4200 -; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[CONST3]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] diff --git a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll index c256159726bf..f4afaca2b7a7 100644 --- a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -73,7 +73,7 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa ; CIVI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} ; VI: v_mov_b32_e32 [[VMASK:v[0-9]+]], [[MASK]] ; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_sdwa v{{[0-9]+}}, [[VMASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; CIVI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], ; CIVI: flat_store_dword @@ -92,9 +92,9 @@ define amdgpu_kernel void @s_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x ; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], ; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], ; VI: v_mov_b32_e32 [[VMASK:v[0-9]+]], [[MASK]] -; VI: v_or_b32_sdwa v{{[0-9]+}}, [[VMASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_sdwa v{{[0-9]+}}, [[VMASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], ; GFX9: s_mov_b32 [[MASK:s[0-9]+]], 0x80008000 @@ -116,7 +116,7 @@ define amdgpu_kernel void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x h ; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} ; VI: v_mul_f16_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|, 4.0 -; VI: v_mul_f16_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|, 4.0 +; VI: v_mul_f16_sdwa v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 neg_lo:[1,0] neg_hi:[1,0] diff --git a/test/CodeGen/AMDGPU/fneg.f16.ll b/test/CodeGen/AMDGPU/fneg.f16.ll index 16e4fc680bea..59745a9352ce 100644 --- a/test/CodeGen/AMDGPU/fneg.f16.ll +++ b/test/CodeGen/AMDGPU/fneg.f16.ll @@ -117,7 +117,7 @@ define amdgpu_kernel void @fneg_free_v2f16(<2 x half> addrspace(1)* %out, i32 %i ; CI: v_cvt_f16_f32 ; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, -; VI: v_mul_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} +; VI: v_mul_f16_sdwa v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI: v_mul_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} neg_lo:[1,0] neg_hi:[1,0]{{$}} diff --git a/test/CodeGen/AMDGPU/fptosi.f16.ll b/test/CodeGen/AMDGPU/fptosi.f16.ll index 50e56e08416a..f310618d8bdb 100644 --- a/test/CodeGen/AMDGPU/fptosi.f16.ll +++ b/test/CodeGen/AMDGPU/fptosi.f16.ll @@ -66,7 +66,7 @@ entry: ; VI: v_cvt_f32_f16_sdwa v[[A_F32_1:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI: v_cvt_i32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]] ; VI: v_cvt_i32_f32_sdwa v[[R_I16_1:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_1]], v[[R_I16_0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_0]], v[[R_I16_1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GCN: buffer_store_dword v[[R_V2_I16]] ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/fptoui.f16.ll b/test/CodeGen/AMDGPU/fptoui.f16.ll index 2afa6111cf17..7641c08e33c3 100644 --- a/test/CodeGen/AMDGPU/fptoui.f16.ll +++ b/test/CodeGen/AMDGPU/fptoui.f16.ll @@ -66,7 +66,7 @@ entry: ; VI-DAG: v_cvt_f32_f16_sdwa v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI: v_cvt_i32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]] ; VI: v_cvt_i32_f32_sdwa v[[R_I16_0:[0-9]+]], v[[A_F32_0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_0]], v[[R_I16_1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_1]], v[[R_I16_0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GCN: buffer_store_dword v[[R_V2_I16]] ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/fsub.f16.ll b/test/CodeGen/AMDGPU/fsub.f16.ll index 836b480b6a67..fa00c06546db 100644 --- a/test/CodeGen/AMDGPU/fsub.f16.ll +++ b/test/CodeGen/AMDGPU/fsub.f16.ll @@ -78,7 +78,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_subrev_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] -; VI-DAG: v_subrev_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_sub_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] neg_lo:[0,1] neg_hi:[0,1] @@ -146,7 +146,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xbc00 -; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONSTM1]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], -2.0, v[[A_V2_F16]] ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] diff --git a/test/CodeGen/AMDGPU/hsa-note-no-func.ll b/test/CodeGen/AMDGPU/hsa-note-no-func.ll index af63a4f8df76..81d9ed2eba8c 100644 --- a/test/CodeGen/AMDGPU/hsa-note-no-func.ll +++ b/test/CodeGen/AMDGPU/hsa-note-no-func.ll @@ -1,6 +1,12 @@ +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx600 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI600 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx601 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI601 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI702 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s @@ -15,11 +21,16 @@ ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx901 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX901 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX902 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx903 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX903 %s ; HSA: .hsa_code_object_version 2,1 +; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU" +; HSA-SI601: .hsa_code_object_isa 6,0,1,"AMD","AMDGPU" ; HSA-CI700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" ; HSA-CI701: .hsa_code_object_isa 7,0,1,"AMD","AMDGPU" ; HSA-CI702: .hsa_code_object_isa 7,0,2,"AMD","AMDGPU" +; HSA-CI703: .hsa_code_object_isa 7,0,3,"AMD","AMDGPU" ; HSA-VI800: .hsa_code_object_isa 8,0,0,"AMD","AMDGPU" ; HSA-VI801: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" ; HSA-VI802: .hsa_code_object_isa 8,0,2,"AMD","AMDGPU" @@ -28,3 +39,5 @@ ; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU" ; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU" ; HSA-GFX901: .hsa_code_object_isa 9,0,1,"AMD","AMDGPU" +; HSA-GFX902: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU" +; HSA-GFX903: .hsa_code_object_isa 9,0,3,"AMD","AMDGPU" diff --git a/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll b/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll index 6e411ce5e017..0c5b8fbda222 100644 --- a/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll +++ b/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll @@ -5,40 +5,40 @@ ; GCN: ; illegal copy v1 to s9 define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_i32() #0 { - %vgpr = call i32 asm sideeffect "; def $0", "=${VGPR1}"() - call void asm sideeffect "; use $0", "${SGPR9}"(i32 %vgpr) + %vgpr = call i32 asm sideeffect "; def $0", "=${v1}"() + call void asm sideeffect "; use $0", "${s9}"(i32 %vgpr) ret void } ; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v2i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v[0:1] to s[10:11] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v2i32() #0 { - %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1}"() - call void asm sideeffect "; use $0", "${SGPR10_SGPR11}"(<2 x i32> %vgpr) + %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${v[0:1]}"() + call void asm sideeffect "; use $0", "${s[10:11]}"(<2 x i32> %vgpr) ret void } ; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v4i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v[0:3] to s[8:11] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v4i32() #0 { - %vgpr = call <4 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3}"() - call void asm sideeffect "; use $0", "${SGPR8_SGPR9_SGPR10_SGPR11}"(<4 x i32> %vgpr) + %vgpr = call <4 x i32> asm sideeffect "; def $0", "=${v[0:3]}"() + call void asm sideeffect "; use $0", "${s[8:11]}"(<4 x i32> %vgpr) ret void } ; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v8i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v[0:7] to s[8:15] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v8i32() #0 { - %vgpr = call <8 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}"() - call void asm sideeffect "; use $0", "${SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}"(<8 x i32> %vgpr) + %vgpr = call <8 x i32> asm sideeffect "; def $0", "=${v[0:7]}"() + call void asm sideeffect "; use $0", "${s[8:15]}"(<8 x i32> %vgpr) ret void } ; ERR error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v16i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v[0:15] to s[16:31] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v16i32() #0 { - %vgpr = call <16 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}"() - call void asm sideeffect "; use $0", "${SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23_SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}"(<16 x i32> %vgpr) + %vgpr = call <16 x i32> asm sideeffect "; def $0", "=${v[0:15]}"() + call void asm sideeffect "; use $0", "${s[16:31]}"(<16 x i32> %vgpr) ret void } diff --git a/test/CodeGen/AMDGPU/immv216.ll b/test/CodeGen/AMDGPU/immv216.ll index bc951a82becd..cd3502baee7b 100644 --- a/test/CodeGen/AMDGPU/immv216.ll +++ b/test/CodeGen/AMDGPU/immv216.ll @@ -124,7 +124,7 @@ define amdgpu_kernel void @store_literal_imm_v2f16(<2 x half> addrspace(1)* %out ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST0:v[0-9]+]], 0 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST0]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -142,7 +142,7 @@ define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %ou ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST05]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -160,7 +160,7 @@ define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %ou ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -0.5, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONSTM05:v[0-9]+]], 0xb800 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM05]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -178,7 +178,7 @@ define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)* ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1.0, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0x3c00 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -196,7 +196,7 @@ define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %ou ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1.0, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xbc00 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -214,7 +214,7 @@ define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)* ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2.0, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -232,7 +232,7 @@ define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %ou ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2.0, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xc000 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -250,7 +250,7 @@ define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)* ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 4.0, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST4]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -268,7 +268,7 @@ define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %ou ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -4.0, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONSTM4:v[0-9]+]], 0xc400 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM4]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -285,7 +285,7 @@ define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)* ; VI: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800 ; VI: buffer_load_dword ; VI-NOT: and -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST05]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, v{{[0-9]+}} ; VI: v_or_b32 ; VI: buffer_store_dword @@ -306,7 +306,7 @@ define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(<2 x half> addrspace ; VI-DAG: buffer_load_dword ; VI-NOT: and ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}} -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: buffer_store_dword define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { @@ -325,7 +325,7 @@ define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %o ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 1 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -343,7 +343,7 @@ define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out, ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 2 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -361,7 +361,7 @@ define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out, ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 16, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST16:v[0-9]+]], 16 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST16]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -379,7 +379,7 @@ define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xffff -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -397,7 +397,7 @@ define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* % ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xfffe -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -415,7 +415,7 @@ define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* % ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -16, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONSTM16:v[0-9]+]], 0xfff0 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM16]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -433,7 +433,7 @@ define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 63, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST63:v[0-9]+]], 63 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST63]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST63]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -451,7 +451,7 @@ define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 64, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST64:v[0-9]+]], 64 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST64]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST64]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_64_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/test/CodeGen/AMDGPU/indirect-addressing-si.ll index fab1f8d12253..0d20c32a4770 100644 --- a/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -379,7 +379,7 @@ entry: %idx0 = load volatile i32, i32 addrspace(1)* %gep %idx1 = add i32 %idx0, 1 %val0 = extractelement <4 x i32> <i32 7, i32 9, i32 11, i32 13>, i32 %idx0 - %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={SGPR4}" () + %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={s4}" () %val1 = extractelement <4 x i32> <i32 7, i32 9, i32 11, i32 13>, i32 %idx1 store volatile i32 %val0, i32 addrspace(1)* %out0 store volatile i32 %val1, i32 addrspace(1)* %out0 diff --git a/test/CodeGen/AMDGPU/inline-asm.ll b/test/CodeGen/AMDGPU/inline-asm.ll index 36441cf778c2..c0f5218efc16 100644 --- a/test/CodeGen/AMDGPU/inline-asm.ll +++ b/test/CodeGen/AMDGPU/inline-asm.ll @@ -193,7 +193,7 @@ entry: ; CHECK: use v[0:1] define amdgpu_kernel void @i64_imm_input_phys_vgpr() { entry: - call void asm sideeffect "; use $0 ", "{VGPR0_VGPR1}"(i64 123456) + call void asm sideeffect "; use $0 ", "{v[0:1]}"(i64 123456) ret void } @@ -202,7 +202,7 @@ entry: ; CHECK: ; use v0 define amdgpu_kernel void @i1_imm_input_phys_vgpr() { entry: - call void asm sideeffect "; use $0 ", "{VGPR0}"(i1 true) + call void asm sideeffect "; use $0 ", "{v0}"(i1 true) ret void } @@ -215,7 +215,7 @@ entry: define amdgpu_kernel void @i1_input_phys_vgpr() { entry: %val = load i1, i1 addrspace(1)* undef - call void asm sideeffect "; use $0 ", "{VGPR0}"(i1 %val) + call void asm sideeffect "; use $0 ", "{v0}"(i1 %val) ret void } @@ -229,7 +229,7 @@ define amdgpu_kernel void @i1_input_phys_vgpr_x2() { entry: %val0 = load volatile i1, i1 addrspace(1)* undef %val1 = load volatile i1, i1 addrspace(1)* undef - call void asm sideeffect "; use $0 $1 ", "{VGPR0}, {VGPR1}"(i1 %val0, i1 %val1) + call void asm sideeffect "; use $0 $1 ", "{v0}, {v1}"(i1 %val0, i1 %val1) ret void } @@ -240,8 +240,8 @@ entry: ; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1 define amdgpu_kernel void @muliple_def_phys_vgpr() { entry: - %def0 = call i32 asm sideeffect "; def $0 ", "={VGPR0}"() - %def1 = call i32 asm sideeffect "; def $0 ", "={VGPR0}"() + %def0 = call i32 asm sideeffect "; def $0 ", "={v0}"() + %def1 = call i32 asm sideeffect "; def $0 ", "={v0}"() %add = shl i32 %def0, %def1 store i32 %add, i32 addrspace(1)* undef ret void diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll index 1edccff3bf15..86fc41a23772 100644 --- a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -261,7 +261,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_0_inlineimm(<2 x i16> addrspace ; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e70000 ; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] ; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e70000, [[VEC]] -; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3e7 ; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] @@ -285,7 +285,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_1(<2 x i16> addrspace(1)* %out, ; CI: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0xfff10000, [[ELT0]] -; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], -15, 16, [[ELT0]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2i16_1_inlineimm(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { @@ -345,7 +345,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_0_inlineimm(<2 x half> addrspac ; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x45000000 ; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] ; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x45000000, [[VEC]] -; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x4500 ; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] @@ -369,7 +369,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_1(<2 x half> addrspace(1)* %out ; CI: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x230000, [[ELT0]] -; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], 35, 16, [[ELT0]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2f16_1_inlineimm(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { diff --git a/test/CodeGen/AMDGPU/limit-coalesce.mir b/test/CodeGen/AMDGPU/limit-coalesce.mir index a0d2d6c097a2..7d6d8a5891cd 100644 --- a/test/CodeGen/AMDGPU/limit-coalesce.mir +++ b/test/CodeGen/AMDGPU/limit-coalesce.mir @@ -2,13 +2,13 @@ # Check that coalescer does not create wider register tuple than in source -# CHECK: - { id: 2, class: vreg_64 } -# CHECK: - { id: 3, class: vreg_64 } -# CHECK: - { id: 4, class: vreg_64 } -# CHECK: - { id: 5, class: vreg_96 } -# CHECK: - { id: 6, class: vreg_96 } -# CHECK: - { id: 7, class: vreg_128 } -# CHECK: - { id: 8, class: vreg_128 } +# CHECK: - { id: 2, class: vreg_64, preferred-register: '' } +# CHECK: - { id: 3, class: vreg_64, preferred-register: '' } +# CHECK: - { id: 4, class: vreg_64, preferred-register: '' } +# CHECK: - { id: 5, class: vreg_96, preferred-register: '' } +# CHECK: - { id: 6, class: vreg_96, preferred-register: '' } +# CHECK: - { id: 7, class: vreg_128, preferred-register: '' } +# CHECK: - { id: 8, class: vreg_128, preferred-register: '' } # No more registers shall be defined # CHECK-NEXT: liveins: # CHECK: FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %4, diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll new file mode 100644 index 000000000000..873a3f0f368f --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.alignbit(i32, i32, i32) #0 +declare i32 @llvm.amdgcn.alignbyte(i32, i32, i32) #0 + +; GCN-LABEL: {{^}}v_alignbit_b32: +; GCN: v_alignbit_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}} +define amdgpu_kernel void @v_alignbit_b32(i32 addrspace(1)* %out, i32 %src1, i32 %src2, i32 %src3) #1 { + %val = call i32 @llvm.amdgcn.alignbit(i32 %src1, i32 %src2, i32 %src3) #0 + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_alignbyte_b32: +; GCN: v_alignbyte_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}} +define amdgpu_kernel void @v_alignbyte_b32(i32 addrspace(1)* %out, i32 %src1, i32 %src2, i32 %src3) #1 { + %val = call i32 @llvm.amdgcn.alignbyte(i32 %src1, i32 %src2, i32 %src3) #0 + store i32 %val, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll index 3a2b87cd87f3..83bc8b234724 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll @@ -4,18 +4,28 @@ declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0 ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8: -; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v5, v1 +; GCN-DAG: v_mov_b32_e32 v4, v0 define amdgpu_kernel void @v_mqsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) { - %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0 - store i64 %result, i64 addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) #0 + %tmp1 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) #0 + %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) #0 + store i64 %tmp2, i64 addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8_non_immediate: -; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: v_mqsad_pk_u16_u8 v[0:1], v[2:3], v4, v[6:7] +; GCN-DAG: v_mov_b32_e32 v3, v1 +; GCN-DAG: v_mov_b32_e32 v2, v0 define amdgpu_kernel void @v_mqsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) { - %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0 - store i64 %result, i64 addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) #0 + %tmp3 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) #0 + %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) #0 + store i64 %tmp4, i64 addrspace(1)* %out, align 4 ret void } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll index a8d03bf6bbac..685b5e0f29c4 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll @@ -3,45 +3,56 @@ declare <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64, i32, <4 x i32>) #0 -; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_non_inline_constant: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @v_mqsad_u32_u8_use_non_inline_constant(<4 x i32> addrspace(1)* %out, i64 %src) { - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 100, <4 x i32> <i32 100, i32 100, i32 100, i32 100>) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 +; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate: +; GCN-DAG: v_mov_b32_e32 v0, v2 +; GCN-DAG: v_mov_b32_e32 v1, v3 +; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] +define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> <i32 10, i32 20, i32 30, i32 40>) #0 + %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_u32_u8_non_immediate: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v0, v2 +; GCN-DAG: v_mov_b32_e32 v1, v3 +; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] define amdgpu_kernel void @v_mqsad_u32_u8_non_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> %b) { - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %b) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 - ret void -} - -; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> <i32 10, i32 20, i32 30, i32 40>) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %b) #0 + %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_fp_immediate: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v0, v2 +; GCN-DAG: v_mov_b32_e32 v1, v3 +; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] define amdgpu_kernel void @v_mqsad_u32_u8_inline_fp_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> <i32 1065353216, i32 0, i32 0, i32 0>) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> <i32 1065353216, i32 0, i32 0, i32 0>) #0 + %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_sgpr_vgpr: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v0, v2 +; GCN-DAG: v_mov_b32_e32 v1, v3 +; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] define amdgpu_kernel void @v_mqsad_u32_u8_use_sgpr_vgpr(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> addrspace(1)* %input) { %in = load <4 x i32>, <4 x i32> addrspace(1) * %input - - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %in) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %in) #0 + %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 ret void } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll index be71225c5e06..1f46613a8db0 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll @@ -4,18 +4,28 @@ declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0 ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8: -; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v5, v1 +; GCN-DAG: v_mov_b32_e32 v4, v0 define amdgpu_kernel void @v_qsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) { - %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0 - store i64 %result, i64 addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) #0 + %tmp1 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) #0 + %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) #0 + store i64 %tmp2, i64 addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8_non_immediate: -; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: v_qsad_pk_u16_u8 v[0:1], v[2:3], v4, v[6:7] +; GCN-DAG: v_mov_b32_e32 v3, v1 +; GCN-DAG: v_mov_b32_e32 v2, v0 define amdgpu_kernel void @v_qsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) { - %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0 - store i64 %result, i64 addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) #0 + %tmp3 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) #0 + %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) #0 + store i64 %tmp4, i64 addrspace(1)* %out, align 4 ret void } diff --git a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll index eec187390169..806723e5136c 100644 --- a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll +++ b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll @@ -118,7 +118,7 @@ define amdgpu_kernel void @fmuladd_f16_imm_b( ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] ; VI-FLUSH: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; VI-FLUSH-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[C_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-FLUSH-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[B_V2_F16]], v[[C_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-FLUSH-DAG: v_mac_f16_e32 v[[A_V2_F16]], v[[C_V2_F16]], v[[B_V2_F16]] ; VI-FLUSH-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[A_F16_1]] ; VI-FLUSH-NOT: v_and_b32 diff --git a/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll index a4353d1136e1..8f4b314ffabb 100644 --- a/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll +++ b/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll @@ -82,7 +82,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] -; VI-DAG: v_max_f16_sdwa v[[R_F16_1:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_max_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-NOT: and ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]] @@ -110,7 +110,7 @@ entry: ; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], 4.0, v[[B_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400 -; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST4]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] @@ -138,7 +138,7 @@ entry: ; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; VI-DAG: v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200 -; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST3]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] diff --git a/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/test/CodeGen/AMDGPU/llvm.minnum.f16.ll index 4875d26fc860..1a86286f7136 100644 --- a/test/CodeGen/AMDGPU/llvm.minnum.f16.ll +++ b/test/CodeGen/AMDGPU/llvm.minnum.f16.ll @@ -81,7 +81,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] -; VI-DAG: v_min_f16_sdwa v[[R_F16_1:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_min_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-NOT: and ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]] @@ -111,7 +111,7 @@ entry: ; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400 -; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST4]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] @@ -139,7 +139,7 @@ entry: ; SI: v_min_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; VI-DAG: v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200 -; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST3]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] diff --git a/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll index 77d793201adc..49f00e9447da 100644 --- a/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ b/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -608,11 +608,11 @@ ret: ; GCN: ;;#ASMSTART ; GCN: ; use s[0:1] define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 { - call void asm sideeffect "", "~{VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}" () #0 - call void asm sideeffect "", "~{VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}" () #0 - call void asm sideeffect "", "~{VGPR16_VGPR17_VGPR18_VGPR19}"() #0 - call void asm sideeffect "", "~{VGPR20_VGPR21}"() #0 - call void asm sideeffect "", "~{VGPR22}"() #0 + call void asm sideeffect "", "~{v[0:7]}" () #0 + call void asm sideeffect "", "~{v[8:15]}" () #0 + call void asm sideeffect "", "~{v[16:19]}"() #0 + call void asm sideeffect "", "~{v[20:21]}"() #0 + call void asm sideeffect "", "~{v22}"() #0 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 diff --git a/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll new file mode 100644 index 000000000000..5b2da788a405 --- /dev/null +++ b/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll @@ -0,0 +1,131 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-promote-alloca < %s | FileCheck --check-prefix=OPT %s
+
+; Make sure that array alloca loaded and stored as multi-element aggregates are handled correctly
+; Strictly the promote-alloca pass shouldn't have to deal with this case as it is non-canonical, but
+; the pass should handle it gracefully if it is
+; The checks look for lines that previously caused issues in PromoteAlloca (non-canonical). Opt
+; should now leave these unchanged
+
+; OPT-LABEL: @promote_1d_aggr(
+; OPT: store [1 x float] %tmp3, [1 x float]* %f1
+
+%Block = type { [1 x float], i32 }
+%gl_PerVertex = type { <4 x float>, float, [1 x float], [1 x float] }
+
+@block = external addrspace(1) global %Block
+@pv = external addrspace(1) global %gl_PerVertex
+
+define amdgpu_vs void @promote_1d_aggr() #0 {
+ %i = alloca i32
+ %f1 = alloca [1 x float]
+ %tmp = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 1
+ %tmp1 = load i32, i32 addrspace(1)* %tmp
+ store i32 %tmp1, i32* %i
+ %tmp2 = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 0
+ %tmp3 = load [1 x float], [1 x float] addrspace(1)* %tmp2
+ store [1 x float] %tmp3, [1 x float]* %f1
+ %tmp4 = load i32, i32* %i
+ %tmp5 = getelementptr [1 x float], [1 x float]* %f1, i32 0, i32 %tmp4
+ %tmp6 = load float, float* %tmp5
+ %tmp7 = alloca <4 x float>
+ %tmp8 = load <4 x float>, <4 x float>* %tmp7
+ %tmp9 = insertelement <4 x float> %tmp8, float %tmp6, i32 0
+ %tmp10 = insertelement <4 x float> %tmp9, float %tmp6, i32 1
+ %tmp11 = insertelement <4 x float> %tmp10, float %tmp6, i32 2
+ %tmp12 = insertelement <4 x float> %tmp11, float %tmp6, i32 3
+ %tmp13 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0
+ store <4 x float> %tmp12, <4 x float> addrspace(1)* %tmp13
+ ret void
+}
+
+
+; OPT-LABEL: @promote_store_aggr(
+; OPT: %tmp6 = load [2 x float], [2 x float]* %f1
+
+%Block2 = type { i32, [2 x float] }
+@block2 = external addrspace(1) global %Block2
+
+define amdgpu_vs void @promote_store_aggr() #0 {
+ %i = alloca i32
+ %f1 = alloca [2 x float]
+ %tmp = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 0
+ %tmp1 = load i32, i32 addrspace(1)* %tmp
+ store i32 %tmp1, i32* %i
+ %tmp2 = load i32, i32* %i
+ %tmp3 = sitofp i32 %tmp2 to float
+ %tmp4 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 0
+ store float %tmp3, float* %tmp4
+ %tmp5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 1
+ store float 2.000000e+00, float* %tmp5
+ %tmp6 = load [2 x float], [2 x float]* %f1
+ %tmp7 = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 1
+ store [2 x float] %tmp6, [2 x float] addrspace(1)* %tmp7
+ %tmp8 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0
+ store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> addrspace(1)* %tmp8
+ ret void
+}
+
+; OPT-LABEL: @promote_load_from_store_aggr(
+; OPT: store [2 x float] %tmp3, [2 x float]* %f1
+
+%Block3 = type { [2 x float], i32 }
+@block3 = external addrspace(1) global %Block3
+
+define amdgpu_vs void @promote_load_from_store_aggr() #0 {
+ %i = alloca i32
+ %f1 = alloca [2 x float]
+ %tmp = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 1
+ %tmp1 = load i32, i32 addrspace(1)* %tmp
+ store i32 %tmp1, i32* %i
+ %tmp2 = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 0
+ %tmp3 = load [2 x float], [2 x float] addrspace(1)* %tmp2
+ store [2 x float] %tmp3, [2 x float]* %f1
+ %tmp4 = load i32, i32* %i
+ %tmp5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 %tmp4
+ %tmp6 = load float, float* %tmp5
+ %tmp7 = alloca <4 x float>
+ %tmp8 = load <4 x float>, <4 x float>* %tmp7
+ %tmp9 = insertelement <4 x float> %tmp8, float %tmp6, i32 0
+ %tmp10 = insertelement <4 x float> %tmp9, float %tmp6, i32 1
+ %tmp11 = insertelement <4 x float> %tmp10, float %tmp6, i32 2
+ %tmp12 = insertelement <4 x float> %tmp11, float %tmp6, i32 3
+ %tmp13 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0
+ store <4 x float> %tmp12, <4 x float> addrspace(1)* %tmp13
+ ret void
+}
+
+; OPT-LABEL: @promote_double_aggr(
+; OPT: store [2 x double] %tmp5, [2 x double]* %s
+
+@tmp_g = external addrspace(1) global { [4 x double], <2 x double>, <3 x double>, <4 x double> }
+@frag_color = external addrspace(1) global <4 x float>
+
+define amdgpu_ps void @promote_double_aggr() #0 {
+ %s = alloca [2 x double]
+ %tmp = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0
+ %tmp1 = load double, double addrspace(1)* %tmp
+ %tmp2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1
+ %tmp3 = load double, double addrspace(1)* %tmp2
+ %tmp4 = insertvalue [2 x double] undef, double %tmp1, 0
+ %tmp5 = insertvalue [2 x double] %tmp4, double %tmp3, 1
+ store [2 x double] %tmp5, [2 x double]* %s
+ %tmp6 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1
+ %tmp7 = load double, double* %tmp6
+ %tmp8 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1
+ %tmp9 = load double, double* %tmp8
+ %tmp10 = fadd double %tmp7, %tmp9
+ %tmp11 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0
+ store double %tmp10, double* %tmp11
+ %tmp12 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0
+ %tmp13 = load double, double* %tmp12
+ %tmp14 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1
+ %tmp15 = load double, double* %tmp14
+ %tmp16 = fadd double %tmp13, %tmp15
+ %tmp17 = fptrunc double %tmp16 to float
+ %tmp18 = insertelement <4 x float> undef, float %tmp17, i32 0
+ %tmp19 = insertelement <4 x float> %tmp18, float %tmp17, i32 1
+ %tmp20 = insertelement <4 x float> %tmp19, float %tmp17, i32 2
+ %tmp21 = insertelement <4 x float> %tmp20, float %tmp17, i32 3
+ store <4 x float> %tmp21, <4 x float> addrspace(1)* @frag_color
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir b/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir new file mode 100644 index 000000000000..1a0d68d81f97 --- /dev/null +++ b/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir @@ -0,0 +1,69 @@ +# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass=simple-register-coalescing,rename-independent-subregs -o - %s | FileCheck -check-prefix=GCN %s +--- + +# GCN-LABEL: name: mac_invalid_operands +# GCN: undef %18.sub0 = V_MAC_F32_e32 undef %3, undef %9, undef %18.sub0, implicit %exec + +name: mac_invalid_operands +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: vreg_128 } + - { id: 1, class: vreg_128 } + - { id: 2, class: sgpr_64 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: vgpr_32 } + - { id: 5, class: vgpr_32 } + - { id: 6, class: vgpr_32 } + - { id: 7, class: sreg_64 } + - { id: 8, class: vgpr_32 } + - { id: 9, class: vgpr_32 } + - { id: 10, class: vreg_64 } + - { id: 11, class: vreg_64 } + - { id: 12, class: vreg_128 } + - { id: 13, class: vreg_128 } + - { id: 14, class: vgpr_32 } + - { id: 15, class: vreg_64 } + - { id: 16, class: vgpr_32 } + - { id: 17, class: vreg_128 } +body: | + bb.0: + successors: %bb.2, %bb.1 + + %7 = V_CMP_NEQ_F32_e64 0, 0, 0, undef %3, 0, 0, implicit %exec + %vcc = COPY killed %7 + S_CBRANCH_VCCZ %bb.2, implicit killed %vcc + + bb.1: + successors: %bb.3 + + %4 = V_ADD_F32_e32 undef %6, undef %5, implicit %exec + undef %12.sub0 = COPY killed %4 + %17 = COPY killed %12 + S_BRANCH %bb.3 + + bb.2: + successors: %bb.3 + + %8 = V_MAC_F32_e32 undef %3, undef %9, undef %8, implicit %exec + undef %13.sub0 = COPY %8 + %13.sub1 = COPY %8 + %13.sub2 = COPY killed %8 + %0 = COPY killed %13 + %17 = COPY killed %0 + + bb.3: + %1 = COPY killed %17 + FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, implicit %exec, implicit %flat_scr + %14 = COPY %1.sub1 + %16 = COPY killed %1.sub0 + undef %15.sub0 = COPY killed %16 + %15.sub1 = COPY killed %14 + FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM + +... diff --git a/test/CodeGen/AMDGPU/scratch-simple.ll b/test/CodeGen/AMDGPU/scratch-simple.ll index 6ed730ad60f4..abd15f1fb47f 100644 --- a/test/CodeGen/AMDGPU/scratch-simple.ll +++ b/test/CodeGen/AMDGPU/scratch-simple.ll @@ -12,8 +12,10 @@ ; GCN-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 ; GCN-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]] -; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], 0x200, [[CLAMP_IDX]] -; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], 0x400, [[CLAMP_IDX]] +; GCN-DAG: v_mov_b32_e32 [[C200:v[0-9]+]], 0x200 +; GCN-DAG: v_mov_b32_e32 [[C400:v[0-9]+]], 0x400 +; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], [[C200]], [[CLAMP_IDX]] +; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], [[C400]], [[CLAMP_IDX]] ; GCN: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen ; GCN: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen diff --git a/test/CodeGen/AMDGPU/sdwa-peephole.ll b/test/CodeGen/AMDGPU/sdwa-peephole.ll index a319edfc5ace..66e166d283f7 100644 --- a/test/CodeGen/AMDGPU/sdwa-peephole.ll +++ b/test/CodeGen/AMDGPU/sdwa-peephole.ll @@ -74,7 +74,7 @@ entry: ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL_LO:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL_HI:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; SDWA: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL_HI]], v[[DST_MUL_LO]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; SDWA: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL_LO]], v[[DST_MUL_HI]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD define amdgpu_kernel void @mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) { entry: @@ -97,8 +97,8 @@ entry: ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL1:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL2:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL3:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL3]], v[[DST_MUL2]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL1]], v[[DST_MUL0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL2]], v[[DST_MUL3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL0]], v[[DST_MUL1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD define amdgpu_kernel void @mul_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %ina, <4 x i16> addrspace(1)* %inb) { entry: @@ -125,10 +125,10 @@ entry: ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL5:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL6:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL7:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL7]], v[[DST_MUL6]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL5]], v[[DST_MUL4]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL3]], v[[DST_MUL2]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL1]], v[[DST_MUL0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL6]], v[[DST_MUL7]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL4]], v[[DST_MUL5]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL2]], v[[DST_MUL3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL0]], v[[DST_MUL1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD define amdgpu_kernel void @mul_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %ina, <8 x i16> addrspace(1)* %inb) { entry: @@ -347,8 +347,8 @@ entry: ; NOSDWA-NOT: v_mul_u32_u24_sdwa ; SDWA-DAG: v_mov_b32_e32 v[[M321:[0-9]+]], 0x141 ; SDWA-DAG: v_mov_b32_e32 v[[M123:[0-9]+]], 0x7b -; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v[[M123]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v[[M321]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[M123]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[M321]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD define amdgpu_kernel void @immediate_mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { entry: @@ -367,7 +367,7 @@ entry: ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; NOSDWA-NOT: v_mul_u32_u24_sdwa -; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD define amdgpu_kernel void @mulmul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) { entry: @@ -408,9 +408,9 @@ store_label: ; NOSDWA-NOT: v_and_b32_sdwa ; NOSDWA-NOT: v_or_b32_sdwa -; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; SDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} -; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; SDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} ; SDWA: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD diff --git a/test/CodeGen/AMDGPU/shl.v2i16.ll b/test/CodeGen/AMDGPU/shl.v2i16.ll index 115221c5316d..839854fd575b 100644 --- a/test/CodeGen/AMDGPU/shl.v2i16.ll +++ b/test/CodeGen/AMDGPU/shl.v2i16.ll @@ -10,7 +10,7 @@ ; VI: v_lshlrev_b32_e32 ; VI: v_lshlrev_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; CI: v_lshlrev_b32_e32 ; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} diff --git a/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll b/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll index 114c97b61bd4..a57e7b13453f 100644 --- a/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll +++ b/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll @@ -25,50 +25,50 @@ ; SMEM: s_dcache_wb ; ALL: s_endpgm define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}" () - call void asm sideeffect "", "~{VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}" () - call void asm sideeffect "", "~{VGPR16_VGPR17_VGPR18_VGPR19_VGPR20_VGPR21_VGPR22_VGPR23}" () - call void asm sideeffect "", "~{VGPR24_VGPR25_VGPR26_VGPR27_VGPR28_VGPR29_VGPR30_VGPR31}" () - call void asm sideeffect "", "~{VGPR32_VGPR33_VGPR34_VGPR35_VGPR36_VGPR37_VGPR38_VGPR39}" () - call void asm sideeffect "", "~{VGPR40_VGPR41_VGPR42_VGPR43_VGPR44_VGPR45_VGPR46_VGPR47}" () - call void asm sideeffect "", "~{VGPR48_VGPR49_VGPR50_VGPR51_VGPR52_VGPR53_VGPR54_VGPR55}" () - call void asm sideeffect "", "~{VGPR56_VGPR57_VGPR58_VGPR59_VGPR60_VGPR61_VGPR62_VGPR63}" () - call void asm sideeffect "", "~{VGPR64_VGPR65_VGPR66_VGPR67_VGPR68_VGPR69_VGPR70_VGPR71}" () - call void asm sideeffect "", "~{VGPR72_VGPR73_VGPR74_VGPR75_VGPR76_VGPR77_VGPR78_VGPR79}" () - call void asm sideeffect "", "~{VGPR80_VGPR81_VGPR82_VGPR83_VGPR84_VGPR85_VGPR86_VGPR87}" () - call void asm sideeffect "", "~{VGPR88_VGPR89_VGPR90_VGPR91_VGPR92_VGPR93_VGPR94_VGPR95}" () - call void asm sideeffect "", "~{VGPR96_VGPR97_VGPR98_VGPR99_VGPR100_VGPR101_VGPR102_VGPR103}" () - call void asm sideeffect "", "~{VGPR104_VGPR105_VGPR106_VGPR107_VGPR108_VGPR109_VGPR110_VGPR111}" () - call void asm sideeffect "", "~{VGPR112_VGPR113_VGPR114_VGPR115_VGPR116_VGPR117_VGPR118_VGPR119}" () - call void asm sideeffect "", "~{VGPR120_VGPR121_VGPR122_VGPR123_VGPR124_VGPR125_VGPR126_VGPR127}" () - call void asm sideeffect "", "~{VGPR128_VGPR129_VGPR130_VGPR131_VGPR132_VGPR133_VGPR134_VGPR135}" () - call void asm sideeffect "", "~{VGPR136_VGPR137_VGPR138_VGPR139_VGPR140_VGPR141_VGPR142_VGPR143}" () - call void asm sideeffect "", "~{VGPR144_VGPR145_VGPR146_VGPR147_VGPR148_VGPR149_VGPR150_VGPR151}" () - call void asm sideeffect "", "~{VGPR152_VGPR153_VGPR154_VGPR155_VGPR156_VGPR157_VGPR158_VGPR159}" () - call void asm sideeffect "", "~{VGPR160_VGPR161_VGPR162_VGPR163_VGPR164_VGPR165_VGPR166_VGPR167}" () - call void asm sideeffect "", "~{VGPR168_VGPR169_VGPR170_VGPR171_VGPR172_VGPR173_VGPR174_VGPR175}" () - call void asm sideeffect "", "~{VGPR176_VGPR177_VGPR178_VGPR179_VGPR180_VGPR181_VGPR182_VGPR183}" () - call void asm sideeffect "", "~{VGPR184_VGPR185_VGPR186_VGPR187_VGPR188_VGPR189_VGPR190_VGPR191}" () - call void asm sideeffect "", "~{VGPR192_VGPR193_VGPR194_VGPR195_VGPR196_VGPR197_VGPR198_VGPR199}" () - call void asm sideeffect "", "~{VGPR200_VGPR201_VGPR202_VGPR203_VGPR204_VGPR205_VGPR206_VGPR207}" () - call void asm sideeffect "", "~{VGPR208_VGPR209_VGPR210_VGPR211_VGPR212_VGPR213_VGPR214_VGPR215}" () - call void asm sideeffect "", "~{VGPR216_VGPR217_VGPR218_VGPR219_VGPR220_VGPR221_VGPR222_VGPR223}" () - call void asm sideeffect "", "~{VGPR224_VGPR225_VGPR226_VGPR227_VGPR228_VGPR229_VGPR230_VGPR231}" () - call void asm sideeffect "", "~{VGPR232_VGPR233_VGPR234_VGPR235_VGPR236_VGPR237_VGPR238_VGPR239}" () - call void asm sideeffect "", "~{VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247}" () - call void asm sideeffect "", "~{VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{v[0:7]}" () + call void asm sideeffect "", "~{v[8:15]}" () + call void asm sideeffect "", "~{v[16:23]}" () + call void asm sideeffect "", "~{v[24:31]}" () + call void asm sideeffect "", "~{v[32:39]}" () + call void asm sideeffect "", "~{v[40:47]}" () + call void asm sideeffect "", "~{v[48:55]}" () + call void asm sideeffect "", "~{v[56:63]}" () + call void asm sideeffect "", "~{v[64:71]}" () + call void asm sideeffect "", "~{v[72:79]}" () + call void asm sideeffect "", "~{v[80:87]}" () + call void asm sideeffect "", "~{v[88:95]}" () + call void asm sideeffect "", "~{v[96:103]}" () + call void asm sideeffect "", "~{v[104:111]}" () + call void asm sideeffect "", "~{v[112:119]}" () + call void asm sideeffect "", "~{v[120:127]}" () + call void asm sideeffect "", "~{v[128:135]}" () + call void asm sideeffect "", "~{v[136:143]}" () + call void asm sideeffect "", "~{v[144:151]}" () + call void asm sideeffect "", "~{v[152:159]}" () + call void asm sideeffect "", "~{v[160:167]}" () + call void asm sideeffect "", "~{v[168:175]}" () + call void asm sideeffect "", "~{v[176:183]}" () + call void asm sideeffect "", "~{v[184:191]}" () + call void asm sideeffect "", "~{v[192:199]}" () + call void asm sideeffect "", "~{v[200:207]}" () + call void asm sideeffect "", "~{v[208:215]}" () + call void asm sideeffect "", "~{v[216:223]}" () + call void asm sideeffect "", "~{v[224:231]}" () + call void asm sideeffect "", "~{v[232:239]}" () + call void asm sideeffect "", "~{v[240:247]}" () + call void asm sideeffect "", "~{v[248:255]}" () store i32 %in, i32 addrspace(1)* %out ret void diff --git a/test/CodeGen/AMDGPU/skip-if-dead.ll b/test/CodeGen/AMDGPU/skip-if-dead.ll index 3f53572ab440..ea8b87f1dee2 100644 --- a/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -79,7 +79,7 @@ define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 { ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { call void @llvm.AMDGPU.kill(float %x) - %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={VGPR7}"() + %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"() call void @llvm.AMDGPU.kill(float %y) ret void } @@ -128,7 +128,7 @@ bb: v_nop_e64 v_nop_e64 v_nop_e64 - v_nop_e64", "={VGPR7}"() + v_nop_e64", "={v7}"() call void @llvm.AMDGPU.kill(float %var) br label %exit @@ -186,11 +186,11 @@ bb: v_nop_e64 v_nop_e64 v_nop_e64 - v_nop_e64", "={VGPR7}"() - %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={VGPR8}"() + v_nop_e64", "={v7}"() + %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"() call void @llvm.AMDGPU.kill(float %var) store volatile float %live.across, float addrspace(1)* undef - %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={VGPR9}"() + %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"() br label %exit exit: @@ -242,7 +242,7 @@ bb: v_nop_e64 v_nop_e64 v_nop_e64 - v_nop_e64", "={VGPR7}"() + v_nop_e64", "={v7}"() call void @llvm.AMDGPU.kill(float %var) %vgpr = load volatile i32, i32 addrspace(1)* undef %loop.cond = icmp eq i32 %vgpr, 0 diff --git a/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/test/CodeGen/AMDGPU/sminmax.v2i16.ll index 5d71ad2c8ba3..a9aac2d8abb7 100644 --- a/test/CodeGen/AMDGPU/sminmax.v2i16.ll +++ b/test/CodeGen/AMDGPU/sminmax.v2i16.ll @@ -10,11 +10,11 @@ ; VI: v_sub_i32_e32 ; VI-DAG: v_sub_i32_e32 -; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI: v_max_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_max_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; VI: v_add_i32_e32 ; VI: v_add_i32_e32 -; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; CI: v_sub_i32_e32 ; CI-DAG: v_sub_i32_e32 @@ -47,7 +47,7 @@ define amdgpu_kernel void @s_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> % ; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_add_u16_e32 v{{[0-9]+}}, 2, v{{[0-9]+}} -; VI: v_add_u16_sdwa v{{[0-9]+}}, [[TWO]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[TWO]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NOT: v_and_b32 ; VI: v_or_b32_e32 define amdgpu_kernel void @v_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %src) #0 { diff --git a/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/test/CodeGen/AMDGPU/spill-scavenge-offset.ll index c05021a91ff0..a23461a0a514 100644 --- a/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ b/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -20,13 +20,13 @@ entry: %a = load <1280 x i32>, <1280 x i32> addrspace(1)* %aptr ; mark most VGPR registers as used to increase register pressure - call void asm sideeffect "", "~{VGPR4},~{VGPR8},~{VGPR12},~{VGPR16},~{VGPR20},~{VGPR24},~{VGPR28},~{VGPR32}" () - call void asm sideeffect "", "~{VGPR36},~{VGPR40},~{VGPR44},~{VGPR48},~{VGPR52},~{VGPR56},~{VGPR60},~{VGPR64}" () - call void asm sideeffect "", "~{VGPR68},~{VGPR72},~{VGPR76},~{VGPR80},~{VGPR84},~{VGPR88},~{VGPR92},~{VGPR96}" () - call void asm sideeffect "", "~{VGPR100},~{VGPR104},~{VGPR108},~{VGPR112},~{VGPR116},~{VGPR120},~{VGPR124},~{VGPR128}" () - call void asm sideeffect "", "~{VGPR132},~{VGPR136},~{VGPR140},~{VGPR144},~{VGPR148},~{VGPR152},~{VGPR156},~{VGPR160}" () - call void asm sideeffect "", "~{VGPR164},~{VGPR168},~{VGPR172},~{VGPR176},~{VGPR180},~{VGPR184},~{VGPR188},~{VGPR192}" () - call void asm sideeffect "", "~{VGPR196},~{VGPR200},~{VGPR204},~{VGPR208},~{VGPR212},~{VGPR216},~{VGPR220},~{VGPR224}" () + call void asm sideeffect "", "~{v4},~{v8},~{v12},~{v16},~{v20},~{v24},~{v28},~{v32}" () + call void asm sideeffect "", "~{v36},~{v40},~{v44},~{v48},~{v52},~{v56},~{v60},~{v64}" () + call void asm sideeffect "", "~{v68},~{v72},~{v76},~{v80},~{v84},~{v88},~{v92},~{v96}" () + call void asm sideeffect "", "~{v100},~{v104},~{v108},~{v112},~{v116},~{v120},~{v124},~{v128}" () + call void asm sideeffect "", "~{v132},~{v136},~{v140},~{v144},~{v148},~{v152},~{v156},~{v160}" () + call void asm sideeffect "", "~{v164},~{v168},~{v172},~{v176},~{v180},~{v184},~{v188},~{v192}" () + call void asm sideeffect "", "~{v196},~{v200},~{v204},~{v208},~{v212},~{v216},~{v220},~{v224}" () %outptr = getelementptr <1280 x i32>, <1280 x i32> addrspace(1)* %out, i32 %tid store <1280 x i32> %a, <1280 x i32> addrspace(1)* %outptr diff --git a/test/CodeGen/AMDGPU/sub.v2i16.ll b/test/CodeGen/AMDGPU/sub.v2i16.ll index 6aeff3fc3b6c..ee923e2b8b61 100644 --- a/test/CodeGen/AMDGPU/sub.v2i16.ll +++ b/test/CodeGen/AMDGPU/sub.v2i16.ll @@ -5,7 +5,7 @@ ; GCN-LABEL: {{^}}v_test_sub_v2i16: ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; VI: v_subrev_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_sub_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_subrev_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @v_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -62,7 +62,7 @@ define amdgpu_kernel void @s_test_sub_v2i16_kernarg(<2 x i16> addrspace(1)* %out ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]] ; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xfffffe38 -; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xffffff85, v{{[0-9]+}} define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -80,7 +80,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %ou ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]] ; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3df -; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x34d, v{{[0-9]+}} define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -98,7 +98,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(<2 x i16> addrspace(1)* ; VI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; VI: flat_load_ushort [[LOAD0:v[0-9]+]] ; VI: flat_load_ushort [[LOAD1:v[0-9]+]] -; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[ONE]], [[LOAD0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[LOAD0]], [[ONE]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 1, [[LOAD1]] ; VI: v_or_b32_e32 define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { @@ -137,7 +137,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_lo_zero_hi(<2 x i16> addrspac ; VI-NOT: v_subrev_i16 ; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xffffc080 -; VI: v_add_u16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NOT: v_subrev_i16 ; VI: v_or_b32_e32 define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { @@ -252,7 +252,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i32(<2 x i32> addrspace(1) ; GFX9: v_pk_sub_i16 ; GFX9: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} -; VI: v_subrev_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI: v_sub_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI: v_subrev_u16_e32 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16 diff --git a/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll b/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll index 3e80fcf85b52..1e08f51dabde 100644 --- a/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll +++ b/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll @@ -73,14 +73,14 @@ bb11: ; preds = %bb9 ; CHECK: buffer_store_dwordx4 v{{\[}}[[OUTPUT_LO]]:[[OUTPUT_HI]]{{\]}} define amdgpu_kernel void @partially_undef_copy() #0 { - %tmp0 = call i32 asm sideeffect "v_mov_b32_e32 v5, 5", "={VGPR5}"() - %tmp1 = call i32 asm sideeffect "v_mov_b32_e32 v6, 6", "={VGPR6}"() + %tmp0 = call i32 asm sideeffect "v_mov_b32_e32 v5, 5", "={v5}"() + %tmp1 = call i32 asm sideeffect "v_mov_b32_e32 v6, 6", "={v6}"() %partially.undef.0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0 %partially.undef.1 = insertelement <4 x i32> %partially.undef.0, i32 %tmp1, i32 0 store volatile <4 x i32> %partially.undef.1, <4 x i32> addrspace(1)* undef, align 16 - tail call void asm sideeffect "v_nop", "v={VGPR5_VGPR6_VGPR7_VGPR8}"(<4 x i32> %partially.undef.0) + tail call void asm sideeffect "v_nop", "v={v[5:8]}"(<4 x i32> %partially.undef.0) ret void } diff --git a/test/CodeGen/AMDGPU/v_mac_f16.ll b/test/CodeGen/AMDGPU/v_mac_f16.ll index 3da1a0324042..ce4a69db3506 100644 --- a/test/CodeGen/AMDGPU/v_mac_f16.ll +++ b/test/CodeGen/AMDGPU/v_mac_f16.ll @@ -304,14 +304,14 @@ entry: ; GCN: {{buffer|flat}}_load_dword v[[C_V2_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] -; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] -; SI-DAG: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] -; SI-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]] -; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]] -; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] +; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] +; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] +; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] +; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] +; SI: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]] ; SI: v_cvt_f32_f16_e32 v[[C_F32_1:[0-9]+]], v[[C_F16_1]] +; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]] ; SI-DAG: v_mac_f32_e32 v[[C_F32_0]], v[[B_F32_0]], v[[A_F32_0]] ; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_LO:[0-9]+]], v[[C_F32_0]] ; SI-DAG: v_mac_f32_e32 v[[C_F32_1]], v[[B_F32_1]], v[[A_F32_1]] @@ -320,12 +320,12 @@ entry: ; VI-NOT: and ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] -; VI-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; VI-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[C_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; VI-DAG: v_mac_f16_e32 v[[A_V2_F16]], v[[C_V2_F16]], v[[B_V2_F16]] -; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[A_F16_1]] +; VI-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]] +; VI-DAG: v_mac_f16_sdwa v[[C_F16_1]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_mac_f16_e32 v[[C_V2_F16]], v[[B_V2_F16]], v[[A_V2_F16]] +; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[C_F16_1]] ; VI-NOT: and -; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[A_V2_F16]] +; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[C_V2_F16]] ; GCN: {{buffer|flat}}_store_dword v[[R_V2_F16]] ; GCN: s_endpgm @@ -336,7 +336,9 @@ define amdgpu_kernel void @mac_v2f16( <2 x half> addrspace(1)* %c) #0 { entry: %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + call void @llvm.amdgcn.s.barrier() #2 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b + call void @llvm.amdgcn.s.barrier() #2 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c %t.val = fmul <2 x half> %a.val, %b.val @@ -485,7 +487,7 @@ entry: ; VI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; VI-DAG: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}} ; VI-DAG: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]] ; GCN: s_endpgm @@ -517,7 +519,7 @@ entry: ; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}} ; VI: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}} ; GCN: s_endpgm @@ -670,5 +672,8 @@ entry: ret void } +declare void @llvm.amdgcn.s.barrier() #2 + attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" } attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" } +attributes #2 = { nounwind convergent } |