diff options
Diffstat (limited to 'test')
35 files changed, 2546 insertions, 145 deletions
diff --git a/test/CodeGen/AMDGPU/add.ll b/test/CodeGen/AMDGPU/add.ll index 6a108db879cc7..bce924ec4a081 100644 --- a/test/CodeGen/AMDGPU/add.ll +++ b/test/CodeGen/AMDGPU/add.ll @@ -1,11 +1,8 @@ ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}s_add_i32: -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - ; GCN: s_add_i32 s[[REG:[0-9]+]], {{s[0-9]+, s[0-9]+}} ; GCN: v_mov_b32_e32 v[[V_REG:[0-9]+]], s[[REG]] ; GCN: buffer_store_dword v[[V_REG]], @@ -19,9 +16,6 @@ define amdgpu_kernel void @s_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* % } ; FUNC-LABEL: {{^}}s_add_v2i32: -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - ; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} ; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { @@ -34,11 +28,6 @@ define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> a } ; FUNC-LABEL: {{^}}s_add_v4i32: -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - ; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} ; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} ; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} @@ -53,15 +42,6 @@ define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> a } ; FUNC-LABEL: {{^}}s_add_v8i32: -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT - ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 @@ -78,23 +58,6 @@ entry: } ; FUNC-LABEL: {{^}}s_add_v16i32: -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT - ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 @@ -124,7 +87,7 @@ entry: ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, [[A]], [[B]] ; GFX9: v_add_u32_e32 v{{[0-9]+}}, [[A]], [[B]] define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 %a = load volatile i32, i32 addrspace(1)* %gep @@ -139,7 +102,7 @@ define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* % ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 0x7b, [[A]] ; GFX9: v_add_u32_e32 v{{[0-9]+}}, 0x7b, [[A]] define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 %a = load volatile i32, i32 addrspace(1)* %gep @@ -151,13 +114,6 @@ define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1 ; FUNC-LABEL: {{^}}add64: ; GCN: s_add_u32 ; GCN: s_addc_u32 - -; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] -; EG-DAG: ADD_INT {{[* ]*}} -; EG-DAG: ADDC_UINT -; EG-DAG: ADD_INT -; EG-DAG: ADD_INT {{[* ]*}} -; EG-NOT: SUB define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { entry: %add = add i64 %a, %b @@ -172,13 +128,6 @@ entry: ; FUNC-LABEL: {{^}}add64_sgpr_vgpr: ; GCN-NOT: v_addc_u32_e32 s - -; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] -; EG-DAG: ADD_INT {{[* ]*}} -; EG-DAG: ADDC_UINT -; EG-DAG: ADD_INT -; EG-DAG: ADD_INT {{[* ]*}} -; EG-NOT: SUB define amdgpu_kernel void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) { entry: %0 = load i64, i64 addrspace(1)* %in @@ -191,13 +140,6 @@ entry: ; FUNC-LABEL: {{^}}add64_in_branch: ; GCN: s_add_u32 ; GCN: s_addc_u32 - -; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] -; EG-DAG: ADD_INT {{[* ]*}} -; EG-DAG: ADDC_UINT -; EG-DAG: ADD_INT -; EG-DAG: ADD_INT {{[* ]*}} -; EG-NOT: SUB define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) { entry: %0 = icmp eq i64 %a, 0 @@ -217,7 +159,26 @@ endif: ret void } -declare i32 @llvm.r600.read.tidig.x() #1 +; Make sure the VOP3 form of add is initially selected. Otherwise pair +; of opies from/to VCC would be necessary + +; GCN-LABEL: {{^}}add_select_vop3: +; SI: v_add_i32_e64 v0, s[0:1], s0, v0 +; VI: v_add_u32_e64 v0, s[0:1], s0, v0 +; GFX9: v_add_u32_e32 v0, s0, v0 + +; GCN: ; def vcc +; GCN: ds_write_b32 +; GCN: ; use vcc +define amdgpu_ps void @add_select_vop3(i32 inreg %s, i32 %v) { + %vcc = call i64 asm sideeffect "; def vcc", "={vcc}"() + %sub = add i32 %v, %s + store i32 %sub, i32 addrspace(3)* undef + call void asm sideeffect "; use vcc", "{vcc}"(i64 %vcc) + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone speculatable } diff --git a/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll index 5997e27fd815e..e2c7f1c47cf9f 100644 --- a/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll +++ b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll @@ -7,6 +7,8 @@ declare void @llvm.amdgcn.s.barrier() #1 ; Function Attrs: nounwind ; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop: +; SI: s_movk_i32 [[K_0X88:s[0-9]+]], 0x +; SI: s_movk_i32 [[K_0X100:s[0-9]+]], 0x100 ; CHECK: BB0_1: ; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]], ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] @@ -14,9 +16,9 @@ declare void @llvm.amdgcn.s.barrier() #1 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR8]] ; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], vcc, 0x80, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]] -; SI-DAG: v_add_i32_e32 [[VADDR0x88:v[0-9]+]], vcc, 0x88, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x88:v[0-9]+]], vcc, [[K_0X88]], [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x88]] -; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, 0x100, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, [[K_0X100]], [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]] ; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:2 diff --git a/test/CodeGen/AMDGPU/fence-barrier.ll b/test/CodeGen/AMDGPU/fence-barrier.ll index 8f5a06d01fa22..7de4f1796b08a 100644 --- a/test/CodeGen/AMDGPU/fence-barrier.ll +++ b/test/CodeGen/AMDGPU/fence-barrier.ll @@ -54,7 +54,8 @@ define amdgpu_kernel void @test_local(i32 addrspace(1)*) { } ; GCN-LABEL: {{^}}test_global -; GCN: v_add_u32_e32 v{{[0-9]+}}, vcc, 0x888, v{{[0-9]+}} +; GCN: s_movk_i32 [[K:s[0-9]+]], 0x888 +; GCN: v_add_u32_e32 v{{[0-9]+}}, vcc, [[K]], v{{[0-9]+}} ; GCN: flat_store_dword ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN-NEXT: s_barrier diff --git a/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir b/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir new file mode 100644 index 0000000000000..ab544665efb41 --- /dev/null +++ b/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir @@ -0,0 +1,230 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s + +--- + +# First operand is FI is in a VGPR, other operand is a VGPR +name: shrink_vgpr_fi_vgpr_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: shrink_vgpr_fi_vgpr_v_add_i32_e64_no_carry_out_use + ; GCN: liveins: $vgpr0 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = COPY $vgpr0 + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is a VGPR, other operand FI is in a VGPR +name: shrink_vgpr_vgpr_fi_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: shrink_vgpr_vgpr_fi_v_add_i32_e64_no_carry_out_use + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is FI is in an SGPR, other operand is a VGPR +name: shrink_vgpr_fi_sgpr_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: shrink_vgpr_fi_sgpr_v_add_i32_e64_no_carry_out_use + ; GCN: liveins: $sgpr0 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]] + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:sreg_32_xm0 = COPY $sgpr0 + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is an SGPR, other operand FI is in a VGPR +name: shrink_sgpr_vgpr_fi_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: shrink_sgpr_vgpr_fi_v_add_i32_e64_no_carry_out_use + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[COPY]], implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]] + %0:sreg_32_xm0 = COPY $sgpr0 + %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is FI is in an SGPR, other operand is a VGPR +name: shrink_sgpr_fi_vgpr_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: shrink_sgpr_fi_vgpr_v_add_i32_e64_no_carry_out_use + ; GCN: liveins: $vgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:sreg_32_xm0 = S_MOV_B32 %stack.0 + %1:vgpr_32 = COPY $vgpr0 + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is a VGPR, other operand FI is in an SGPR +name: shrink_vgpr_sgpr_fi_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16} +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: shrink_vgpr_sgpr_fi_v_add_i32_e64_no_carry_out_use + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = COPY $vgpr0 + %1:sreg_32_xm0 = S_MOV_B32 %stack.0 + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is FI is in a VGPR, other operand is an inline imm in a VGPR +name: shrink_vgpr_imm_fi_vgpr_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + + ; GCN-LABEL: name: shrink_vgpr_imm_fi_vgpr_v_add_i32_e64_no_carry_out_use + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 16, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is an inline imm in a VGPR, other operand FI is in a VGPR +name: shrink_vgpr_imm_vgpr_fi_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + + ; GCN-LABEL: name: shrink_vgpr_imm_vgpr_fi_v_add_i32_e64_no_carry_out_use + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 16, [[V_MOV_B32_e32_]], implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]] + %0:vgpr_32 = V_MOV_B32_e32 16, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is FI is in a VGPR, other operand is an literal constant in a VGPR +name: shrink_vgpr_k_fi_vgpr_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + + ; GCN-LABEL: name: shrink_vgpr_k_fi_vgpr_v_add_i32_e64_no_carry_out_use + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 1234, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is a literal constant in a VGPR, other operand FI is in a VGPR +name: shrink_vgpr_k_vgpr_fi_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + + ; GCN-LABEL: name: shrink_vgpr_k_vgpr_fi_v_add_i32_e64_no_carry_out_use + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 %stack.0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... diff --git a/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir b/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir index 847c2b720cd4e..15c453f36f631 100644 --- a/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir +++ b/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir @@ -250,8 +250,8 @@ body: | ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_sub_i32_e64_no_carry_out_use ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]] + ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec @@ -269,8 +269,8 @@ body: | ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_sub_i32_e64_no_carry_out_use ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 - ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]] + ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32_xm0 = S_MOV_B32 12345 %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec @@ -288,8 +288,8 @@ body: | ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_subrev_i32_e64_no_carry_out_use ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]] + ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec @@ -307,8 +307,8 @@ body: | ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_subrev_i32_e64_no_carry_out_use ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 - ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]] + ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32_xm0 = S_MOV_B32 12345 %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec @@ -590,3 +590,59 @@ body: | S_ENDPGM implicit %2 ... + +--- +name: shrink_add_kill_flags_src0 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: shrink_add_kill_flags_src0 + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 killed [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec + %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 killed %1, %0, implicit $exec + S_ENDPGM implicit %2 +... + +--- +name: shrink_add_kill_flags_src1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: shrink_add_kill_flags_src1 + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], killed [[COPY]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec + %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %1, killed %0, implicit $exec + S_ENDPGM implicit %2 +... + +--- +name: shrink_addc_kill_flags_src2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vcc + ; GCN-LABEL: name: shrink_addc_kill_flags_src2 + ; GCN: liveins: $vgpr0, $vcc + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $vcc + ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[V_MOV_B32_e32_]], [[COPY]], [[COPY1]], implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADDC_U32_e64_]] + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec + %2:sreg_64_xexec = COPY $vcc + %3:vgpr_32, %4:sreg_64_xexec = V_ADDC_U32_e64 %1, %0, %2, implicit $exec + S_ENDPGM implicit %3 +... diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll index 18abf607aea5a..77d518f503ab7 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll @@ -15,7 +15,8 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) } ; VI-LABEL: {{^}}dpp_test1: -; VI: v_add_u32_e32 [[REG:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}} +; VI-OPT: v_add_u32_e32 [[REG:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}} +; VI-NOOPT: v_add_u32_e64 [[REG:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI-NOOPT: v_mov_b32_e32 v{{[0-9]+}}, 0 ; VI-NEXT: s_nop 0 ; VI-NEXT: s_nop 0 diff --git a/test/CodeGen/AMDGPU/r600.add.ll b/test/CodeGen/AMDGPU/r600.add.ll new file mode 100644 index 0000000000000..73eea3ef21774 --- /dev/null +++ b/test/CodeGen/AMDGPU/r600.add.ll @@ -0,0 +1,167 @@ +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}s_add_i32: +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +define amdgpu_kernel void @s_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %a = load i32, i32 addrspace(1)* %in + %b = load i32, i32 addrspace(1)* %b_ptr + %result = add i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_add_v2i32: +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 + %a = load <2 x i32>, <2 x i32> addrspace(1)* %in + %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr + %result = add <2 x i32> %a, %b + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_add_v4i32: +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32>, <4 x i32> addrspace(1)* %in + %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr + %result = add <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_add_v8i32: +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +define amdgpu_kernel void @s_add_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) { +entry: + %0 = add <8 x i32> %a, %b + store <8 x i32> %0, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_add_v16i32: +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +define amdgpu_kernel void @s_add_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) { +entry: + %0 = add <16 x i32> %a, %b + store <16 x i32> %0, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_add_i32: +define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 + %a = load volatile i32, i32 addrspace(1)* %gep + %b = load volatile i32, i32 addrspace(1)* %b_ptr + %result = add i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_add_imm_i32: +define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 + %a = load volatile i32, i32 addrspace(1)* %gep + %result = add i32 %a, 123 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}add64: +; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] +; EG-DAG: ADD_INT {{[* ]*}} +; EG-DAG: ADDC_UINT +; EG-DAG: ADD_INT +; EG-DAG: ADD_INT {{[* ]*}} +; EG-NOT: SUB +define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { +entry: + %add = add i64 %a, %b + store i64 %add, i64 addrspace(1)* %out + ret void +} + +; The v_addc_u32 and v_add_i32 instruction can't read SGPRs, because they +; use VCC. The test is designed so that %a will be stored in an SGPR and +; %0 will be stored in a VGPR, so the comiler will be forced to copy %a +; to a VGPR before doing the add. + +; FUNC-LABEL: {{^}}add64_sgpr_vgpr: +; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] +; EG-DAG: ADD_INT {{[* ]*}} +; EG-DAG: ADDC_UINT +; EG-DAG: ADD_INT +; EG-DAG: ADD_INT {{[* ]*}} +; EG-NOT: SUB +define amdgpu_kernel void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) { +entry: + %0 = load i64, i64 addrspace(1)* %in + %1 = add i64 %a, %0 + store i64 %1, i64 addrspace(1)* %out + ret void +} + +; Test i64 add inside a branch. +; FUNC-LABEL: {{^}}add64_in_branch: +; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] +; EG-DAG: ADD_INT {{[* ]*}} +; EG-DAG: ADDC_UINT +; EG-DAG: ADD_INT +; EG-DAG: ADD_INT {{[* ]*}} +; EG-NOT: SUB +define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) { +entry: + %0 = icmp eq i64 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i64, i64 addrspace(1)* %in + br label %endif + +else: + %2 = add i64 %a, %b + br label %endif + +endif: + %3 = phi i64 [%1, %if], [%2, %else] + store i64 %3, i64 addrspace(1)* %out + ret void +} + +declare i32 @llvm.r600.read.tidig.x() #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone speculatable } diff --git a/test/CodeGen/AMDGPU/r600.sub.ll b/test/CodeGen/AMDGPU/r600.sub.ll new file mode 100644 index 0000000000000..2ded4f64328d4 --- /dev/null +++ b/test/CodeGen/AMDGPU/r600.sub.ll @@ -0,0 +1,152 @@ +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s + +declare i32 @llvm.r600.read.tidig.x() readnone + +; FUNC-LABEL: {{^}}s_sub_i32: +define amdgpu_kernel void @s_sub_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %result = sub i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_sub_imm_i32: +define amdgpu_kernel void @s_sub_imm_i32(i32 addrspace(1)* %out, i32 %a) { + %result = sub i32 1234, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_sub_i32: +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %a = load i32, i32 addrspace(1)* %in + %b = load i32, i32 addrspace(1)* %b_ptr + %result = sub i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_sub_imm_i32: +; EG: SUB_INT +define amdgpu_kernel void @test_sub_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %a = load i32, i32 addrspace(1)* %in + %result = sub i32 123, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_sub_v2i32: +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 + %a = load <2 x i32>, <2 x i32> addrspace(1) * %in + %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr + %result = sub <2 x i32> %a, %b + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_sub_v4i32: +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +define amdgpu_kernel void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32>, <4 x i32> addrspace(1) * %in + %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr + %result = sub <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_sub_i16: +define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { + %tid = call i32 @llvm.r600.read.tidig.x() + %gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i32 1 + %a = load volatile i16, i16 addrspace(1)* %gep + %b = load volatile i16, i16 addrspace(1)* %b_ptr + %result = sub i16 %a, %b + store i16 %result, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_sub_v2i16: +define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { + %tid = call i32 @llvm.r600.read.tidig.x() + %gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1 + %a = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr + %result = sub <2 x i16> %a, %b + store <2 x i16> %result, <2 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_sub_v4i16: +define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { + %tid = call i32 @llvm.r600.read.tidig.x() + %gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1 + %a = load <4 x i16>, <4 x i16> addrspace(1) * %gep + %b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr + %result = sub <4 x i16> %a, %b + store <4 x i16> %result, <4 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_sub_i64: +; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY +; EG-DAG: SUB_INT {{[* ]*}} +; EG-DAG: SUBB_UINT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT {{[* ]*}} +define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind { + %result = sub i64 %a, %b + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}v_sub_i64: +; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY +; EG-DAG: SUB_INT {{[* ]*}} +; EG-DAG: SUBB_UINT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT {{[* ]*}} +define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() readnone + %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid + %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid + %a = load i64, i64 addrspace(1)* %a_ptr + %b = load i64, i64 addrspace(1)* %b_ptr + %result = sub i64 %a, %b + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}v_test_sub_v2i64: +define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) { + %tid = call i32 @llvm.r600.read.tidig.x() readnone + %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid + %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid + %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr + %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr + %result = sub <2 x i64> %a, %b + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_test_sub_v4i64: +define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) { + %tid = call i32 @llvm.r600.read.tidig.x() readnone + %a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid + %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid + %a = load <4 x i64>, <4 x i64> addrspace(1)* %a_ptr + %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr + %result = sub <4 x i64> %a, %b + store <4 x i64> %result, <4 x i64> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/AMDGPU/salu-to-valu.ll b/test/CodeGen/AMDGPU/salu-to-valu.ll index 9b46962108c28..cf42ee9b39ba0 100644 --- a/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -458,7 +458,7 @@ bb7: ; preds = %bb3 } ; GCN-LABEL: {{^}}phi_visit_order: -; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 1, v{{[0-9]+}} +; GCN: v_add_i32_e64 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 1, v{{[0-9]+}} define amdgpu_kernel void @phi_visit_order() { bb: br label %bb1 diff --git a/test/CodeGen/AMDGPU/sub.ll b/test/CodeGen/AMDGPU/sub.ll index 4bd346dc586d1..485b374454de6 100644 --- a/test/CodeGen/AMDGPU/sub.ll +++ b/test/CodeGen/AMDGPU/sub.ll @@ -1,11 +1,10 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s -declare i32 @llvm.r600.read.tidig.x() readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable -; FUNC-LABEL: {{^}}s_sub_i32: +; GCN-LABEL: {{^}}s_sub_i32: ; GCN: s_load_dwordx2 ; GCN: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}} ; GCN: s_sub_i32 s{{[0-9]+}}, s[[A]], s[[B]] @@ -15,7 +14,7 @@ define amdgpu_kernel void @s_sub_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { ret void } -; FUNC-LABEL: {{^}}s_sub_imm_i32: +; GCN-LABEL: {{^}}s_sub_imm_i32: ; GCN: s_load_dword [[A:s[0-9]+]] ; GCN: s_sub_i32 s{{[0-9]+}}, 0x4d2, [[A]] define amdgpu_kernel void @s_sub_imm_i32(i32 addrspace(1)* %out, i32 %a) { @@ -24,9 +23,7 @@ define amdgpu_kernel void @s_sub_imm_i32(i32 addrspace(1)* %out, i32 %a) { ret void } -; FUNC-LABEL: {{^}}test_sub_i32: -; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - +; GCN-LABEL: {{^}}test_sub_i32: ; SI: v_subrev_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} ; GFX9: v_sub_u32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { @@ -38,9 +35,7 @@ define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1) ret void } -; FUNC-LABEL: {{^}}test_sub_imm_i32: -; EG: SUB_INT - +; GCN-LABEL: {{^}}test_sub_imm_i32: ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, 0x7b, v{{[0-9]+}} ; GFX9: v_sub_u32_e32 v{{[0-9]+}}, 0x7b, v{{[0-9]+}} define amdgpu_kernel void @test_sub_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { @@ -50,10 +45,7 @@ define amdgpu_kernel void @test_sub_imm_i32(i32 addrspace(1)* %out, i32 addrspac ret void } -; FUNC-LABEL: {{^}}test_sub_v2i32: -; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - +; GCN-LABEL: {{^}}test_sub_v2i32: ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} @@ -68,12 +60,7 @@ define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32 ret void } -; FUNC-LABEL: {{^}}test_sub_v4i32: -; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - +; GCN-LABEL: {{^}}test_sub_v4i32: ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} @@ -92,11 +79,11 @@ define amdgpu_kernel void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32 ret void } -; FUNC-LABEL: {{^}}test_sub_i16: +; GCN-LABEL: {{^}}test_sub_i16: ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, ; GFX89: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i32 1 %a = load volatile i16, i16 addrspace(1)* %gep @@ -106,13 +93,13 @@ define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1) ret void } -; FUNC-LABEL: {{^}}test_sub_v2i16: +; GCN-LABEL: {{^}}test_sub_v2i16: ; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ; VI: v_sub_u16_sdwa v{{[0-9]+, v[0-9]+, v[0-9]+}} ; GFX9: v_pk_sub_i16 define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1 %a = load <2 x i16>, <2 x i16> addrspace(1)* %gep @@ -122,7 +109,7 @@ define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16 ret void } -; FUNC-LABEL: {{^}}test_sub_v4i16: +; GCN-LABEL: {{^}}test_sub_v4i16: ; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ; VI: v_sub_u16_sdwa v{{[0-9]+, v[0-9]+, v[0-9]+}} ; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} @@ -131,7 +118,7 @@ define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16 ; GFX9: v_pk_sub_i16 ; GFX9: v_pk_sub_i16 define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1 %a = load <4 x i16>, <4 x i16> addrspace(1) * %gep @@ -141,22 +128,16 @@ define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16 ret void } -; FUNC-LABEL: {{^}}s_sub_i64: +; GCN-LABEL: {{^}}s_sub_i64: ; GCN: s_sub_u32 ; GCN: s_subb_u32 - -; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY -; EG-DAG: SUB_INT {{[* ]*}} -; EG-DAG: SUBB_UINT -; EG-DAG: SUB_INT -; EG-DAG: SUB_INT {{[* ]*}} define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind { %result = sub i64 %a, %b store i64 %result, i64 addrspace(1)* %out, align 8 ret void } -; FUNC-LABEL: {{^}}v_sub_i64: +; GCN-LABEL: {{^}}v_sub_i64: ; SI: v_sub_i32_e32 ; SI: v_subb_u32_e32 @@ -165,14 +146,8 @@ define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 ; GFX9: v_sub_co_u32_e32 ; GFX9: v_subb_co_u32_e32 - -; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY -; EG-DAG: SUB_INT {{[* ]*}} -; EG-DAG: SUBB_UINT -; EG-DAG: SUB_INT -; EG-DAG: SUB_INT {{[* ]*}} define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid %a = load i64, i64 addrspace(1)* %a_ptr @@ -182,7 +157,7 @@ define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspa ret void } -; FUNC-LABEL: {{^}}v_test_sub_v2i64: +; GCN-LABEL: {{^}}v_test_sub_v2i64: ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, ; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc, ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, @@ -198,7 +173,7 @@ define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspa ; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, ; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc, define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) { - %tid = call i32 @llvm.r600.read.tidig.x() readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr @@ -208,7 +183,7 @@ define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i ret void } -; FUNC-LABEL: {{^}}v_test_sub_v4i64: +; GCN-LABEL: {{^}}v_test_sub_v4i64: ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, ; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc, ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, @@ -236,7 +211,7 @@ define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i ; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, ; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc, define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) { - %tid = call i32 @llvm.r600.read.tidig.x() readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone %a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid %a = load <4 x i64>, <4 x i64> addrspace(1)* %a_ptr @@ -245,3 +220,22 @@ define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i store <4 x i64> %result, <4 x i64> addrspace(1)* %out ret void } + +; Make sure the VOP3 form of sub is initially selected. Otherwise pair +; of opies from/to VCC would be necessary + +; GCN-LABEL: {{^}}sub_select_vop3: +; SI: v_subrev_i32_e64 v0, s[0:1], s0, v0 +; VI: v_subrev_u32_e64 v0, s[0:1], s0, v0 +; GFX9: v_subrev_u32_e32 v0, s0, v0 + +; GCN: ; def vcc +; GCN: ds_write_b32 +; GCN: ; use vcc +define amdgpu_ps void @sub_select_vop3(i32 inreg %s, i32 %v) { + %vcc = call i64 asm sideeffect "; def vcc", "={vcc}"() + %sub = sub i32 %v, %s + store i32 %sub, i32 addrspace(3)* undef + call void asm sideeffect "; use vcc", "{vcc}"(i64 %vcc) + ret void +} diff --git a/test/CodeGen/ARM/tail-call-scheduling.ll b/test/CodeGen/ARM/tail-call-scheduling.ll new file mode 100644 index 0000000000000..591da10256ba4 --- /dev/null +++ b/test/CodeGen/ARM/tail-call-scheduling.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s | FileCheck %s +target triple = "armv6kz-unknown-unknown-gnueabihf" + +; Make sure this doesn't crash, and we actually emit a tail call. +; Unfortunately, this test is sort of fragile... the original issue only +; shows up if scheduling happens in a very specific order. But including +; it anyway just to demonstrate the issue. +; CHECK: pop {r4, lr} + +@e = external local_unnamed_addr constant [0 x i32 (i32, i32)*], align 4 + +; Function Attrs: nounwind sspstrong +define i32 @AVI_ChunkRead_p_chk(i32 %g) nounwind sspstrong "target-cpu"="arm1176jzf-s" { +entry: + %b = alloca i8, align 1 + %tobool = icmp eq i32 %g, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %add = add nsw i32 %g, 1 + %arrayidx = getelementptr inbounds [0 x i32 (i32, i32)*], [0 x i32 (i32, i32)*]* @e, i32 0, i32 %add + %0 = load i32 (i32, i32)*, i32 (i32, i32)** %arrayidx, align 4 + %call = tail call i32 %0(i32 0, i32 0) #3 + br label %return + +if.end: ; preds = %entry + call void @c(i8* nonnull %b) + br label %return + +return: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ %call, %if.then ], [ 0, %if.end ] + ret i32 %retval.0 +} + +declare void @c(i8*) diff --git a/test/CodeGen/AVR/mul.ll b/test/CodeGen/AVR/hardware-mul.ll index 2f169347c46e0..650697857b76b 100644 --- a/test/CodeGen/AVR/mul.ll +++ b/test/CodeGen/AVR/hardware-mul.ll @@ -1,5 +1,7 @@ ; RUN: llc -mattr=mul,movw < %s -march=avr | FileCheck %s +; Tests lowering of multiplication to hardware instructions. + define i8 @mult8(i8 %a, i8 %b) { ; CHECK-LABEL: mult8: ; CHECK: muls r22, r24 diff --git a/test/CodeGen/AVR/smul-with-overflow.ll b/test/CodeGen/AVR/smul-with-overflow.ll index 745e93005cc2f..9eb2c7411dee8 100644 --- a/test/CodeGen/AVR/smul-with-overflow.ll +++ b/test/CodeGen/AVR/smul-with-overflow.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=avr | FileCheck %s +; RUN: llc -mattr=avr6 < %s -march=avr | FileCheck %s define i1 @signed_multiplication_did_overflow(i8, i8) unnamed_addr { ; CHECK-LABEL: signed_multiplication_did_overflow: diff --git a/test/CodeGen/AVR/software-mul.ll b/test/CodeGen/AVR/software-mul.ll new file mode 100644 index 0000000000000..9a4d28127eb87 --- /dev/null +++ b/test/CodeGen/AVR/software-mul.ll @@ -0,0 +1,28 @@ +; RUN: llc -mattr=avr6,-mul < %s -march=avr | FileCheck %s +; RUN: llc -mcpu=attiny85 < %s -march=avr | FileCheck %s +; RUN: llc -mcpu=ata5272 < %s -march=avr | FileCheck %s +; RUN: llc -mcpu=attiny861a < %s -march=avr | FileCheck %s +; RUN: llc -mcpu=at90usb82 < %s -march=avr | FileCheck %s + +; Tests lowering of multiplication to compiler support routines. + +; CHECK-LABEL: mul8: +define i8 @mul8(i8 %a, i8 %b) { +; CHECK: mov r25, r24 +; CHECK: mov r24, r22 +; CHECK: mov r22, r25 +; CHECK: call __mulqi3 + %mul = mul i8 %b, %a + ret i8 %mul +} + +; CHECK-LABEL: mul16: +define i16 @mul16(i16 %a, i16 %b) { +; CHECK: movw r18, r24 +; CHECK: movw r24, r22 +; CHECK: movw r22, r18 +; CHECK: call __mulhi3 + %mul = mul nsw i16 %b, %a + ret i16 %mul +} + diff --git a/test/CodeGen/AVR/umul-with-overflow.ll b/test/CodeGen/AVR/umul-with-overflow.ll index aa8b10a313d38..c6457552dea88 100644 --- a/test/CodeGen/AVR/umul-with-overflow.ll +++ b/test/CodeGen/AVR/umul-with-overflow.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=avr | FileCheck %s +; RUN: llc -mattr=avr6 < %s -march=avr | FileCheck %s define i1 @unsigned_multiplication_did_overflow(i8, i8) unnamed_addr { ; CHECK-LABEL: unsigned_multiplication_did_overflow: diff --git a/test/CodeGen/Mips/Fast-ISel/icmpbr1.ll b/test/CodeGen/Mips/Fast-ISel/icmpbr1.ll index ef8e1c2b0140c..e44ab36532c5b 100644 --- a/test/CodeGen/Mips/Fast-ISel/icmpbr1.ll +++ b/test/CodeGen/Mips/Fast-ISel/icmpbr1.ll @@ -17,7 +17,8 @@ bb0: bb1: ; CHECK: # %bb.1: # %bb1 ; CHECK-NEXT: lw $[[REG2:[0-9]+]], [[SPILL]]($sp) # 4-byte Folded Reload -; CHECK-NEXT: bgtz $[[REG2]], $BB0_3 +; CHECK-NEXT: andi $[[REG3:[0-9]+]], $[[REG2]], 1 +; CHECK-NEXT: bgtz $[[REG3]], $BB0_3 br i1 %2, label %bb2, label %bb3 bb2: ; CHECK: $BB0_3: # %bb2 diff --git a/test/CodeGen/Mips/Fast-ISel/pr40325.ll b/test/CodeGen/Mips/Fast-ISel/pr40325.ll new file mode 100644 index 0000000000000..a9ce70fe8afc5 --- /dev/null +++ b/test/CodeGen/Mips/Fast-ISel/pr40325.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=mipsel -relocation-model=pic -O0 -mcpu=mips32 < %s | FileCheck %s + +define void @test(i32 %x, i1* %p) nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: move $1, $4 +; CHECK-NEXT: andi $4, $4, 1 +; CHECK-NEXT: sb $4, 0($5) +; CHECK-NEXT: andi $1, $1, 1 +; CHECK-NEXT: bgtz $1, $BB0_1 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %foo +; CHECK-NEXT: jr $ra +; CHECK-NEXT: nop + %y = and i32 %x, 1 + %c = icmp eq i32 %y, 1 + store i1 %c, i1* %p + br i1 %c, label %foo, label %foo + +foo: + ret void +} diff --git a/test/CodeGen/Mips/abiflags32.ll b/test/CodeGen/Mips/abiflags32.ll index 39e2a90151e3e..65201ec03814d 100644 --- a/test/CodeGen/Mips/abiflags32.ll +++ b/test/CodeGen/Mips/abiflags32.ll @@ -1,6 +1,12 @@ ; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck %s ; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -mattr=fp64 %s -o - | FileCheck -check-prefix=CHECK-64 %s ; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips64 -target-abi n32 %s -o - | FileCheck -check-prefix=CHECK-64n %s +; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 \ +; RUN: -mattr=soft-float %s -o - | FileCheck -check-prefix=SOFT %s +; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32r6 \ +; RUN: -mattr=soft-float %s -o - | FileCheck -check-prefix=SOFT %s +; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips64 \ +; RUN: -mattr=soft-float -target-abi n64 %s -o - | FileCheck -check-prefix=SOFT %s ; CHECK: .nan legacy ; We don't emit '.module fp=32' for compatibility with binutils 2.24 which @@ -15,3 +21,5 @@ ; We don't emit '.module fp=64' for compatibility with binutils 2.24 which ; doesn't accept .module. ; CHECK-64n-NOT: .module fp=64 + +; SOFT: .module softfloat diff --git a/test/CodeGen/Mips/llvm-ir/fptosi.ll b/test/CodeGen/Mips/llvm-ir/fptosi.ll new file mode 100644 index 0000000000000..03a0de7466452 --- /dev/null +++ b/test/CodeGen/Mips/llvm-ir/fptosi.ll @@ -0,0 +1,418 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M32 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r2 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M32 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r2 -mattr=+fp64 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M32R2-FP64 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r2 -mattr=+soft-float -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M32R2-SF +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r3 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M32R3R5 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r5 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M32R3R5 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r6 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M32R6 +; RUN: llc < %s -mtriple=mips64-linux-gnu -mcpu=mips3 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M64 +; RUN: llc < %s -mtriple=mips64-linux-gnu -mcpu=mips64 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M64 +; RUN: llc < %s -mtriple=mips64-linux-gnu -mcpu=mips64r2 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M64 +; RUN: llc < %s -mtriple=mips64-linux-gnu -mcpu=mips64r6 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M64R6 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r2 -mattr=+micromips -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=MMR2-FP32 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r2 -mattr=+micromips,fp64 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=MMR2-FP64 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r2 -mattr=+micromips,soft-float -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=MMR2-SF +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r6 -mattr=+micromips -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=MMR6 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r6 -mattr=+micromips,soft-float -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=MMR6-SF + +; Test that fptosi can be matched for MIPS targets for various FPU +; configurations + +define i32 @test1(float %t) { +; M32-LABEL: test1: +; M32: # %bb.0: # %entry +; M32-NEXT: trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S +; M32-NEXT: # <MCOperand Reg:147> +; M32-NEXT: # <MCOperand Reg:159>> +; M32-NEXT: jr $ra # <MCInst #{{[0-9]+}} JR +; M32-NEXT: # <MCOperand Reg:19>> +; M32-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1 +; M32-NEXT: # <MCOperand Reg:321> +; M32-NEXT: # <MCOperand Reg:147>> +; +; M32R2-FP64-LABEL: test1: +; M32R2-FP64: # %bb.0: # %entry +; M32R2-FP64-NEXT: trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S +; M32R2-FP64-NEXT: # <MCOperand Reg:147> +; M32R2-FP64-NEXT: # <MCOperand Reg:159>> +; M32R2-FP64-NEXT: jr $ra # <MCInst #{{[0-9]+}} JR +; M32R2-FP64-NEXT: # <MCOperand Reg:19>> +; M32R2-FP64-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1 +; M32R2-FP64-NEXT: # <MCOperand Reg:321> +; M32R2-FP64-NEXT: # <MCOperand Reg:147>> +; +; M32R2-SF-LABEL: test1: +; M32R2-SF: # %bb.0: # %entry +; M32R2-SF-NEXT: addiu $sp, $sp, -24 # <MCInst #{{[0-9]+}} ADDiu +; M32R2-SF-NEXT: # <MCOperand Reg:20> +; M32R2-SF-NEXT: # <MCOperand Reg:20> +; M32R2-SF-NEXT: # <MCOperand Imm:-24>> +; M32R2-SF-NEXT: .cfi_def_cfa_offset 24 +; M32R2-SF-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; M32R2-SF-NEXT: # <MCInst #{{[0-9]+}} SW +; M32R2-SF-NEXT: # <MCOperand Reg:19> +; M32R2-SF-NEXT: # <MCOperand Reg:20> +; M32R2-SF-NEXT: # <MCOperand Imm:20>> +; M32R2-SF-NEXT: .cfi_offset 31, -4 +; M32R2-SF-NEXT: jal __fixsfsi # <MCInst #{{[0-9]+}} JAL +; M32R2-SF-NEXT: # <MCOperand Expr:(__fixsfsi)>> +; M32R2-SF-NEXT: nop # <MCInst #{{[0-9]+}} SLL +; M32R2-SF-NEXT: # <MCOperand Reg:21> +; M32R2-SF-NEXT: # <MCOperand Reg:21> +; M32R2-SF-NEXT: # <MCOperand Imm:0>> +; M32R2-SF-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; M32R2-SF-NEXT: # <MCInst #{{[0-9]+}} LW +; M32R2-SF-NEXT: # <MCOperand Reg:19> +; M32R2-SF-NEXT: # <MCOperand Reg:20> +; M32R2-SF-NEXT: # <MCOperand Imm:20>> +; M32R2-SF-NEXT: jr $ra # <MCInst #{{[0-9]+}} JR +; M32R2-SF-NEXT: # <MCOperand Reg:19>> +; M32R2-SF-NEXT: addiu $sp, $sp, 24 # <MCInst #{{[0-9]+}} ADDiu +; M32R2-SF-NEXT: # <MCOperand Reg:20> +; M32R2-SF-NEXT: # <MCOperand Reg:20> +; M32R2-SF-NEXT: # <MCOperand Imm:24>> +; +; M32R3R5-LABEL: test1: +; M32R3R5: # %bb.0: # %entry +; M32R3R5-NEXT: trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S +; M32R3R5-NEXT: # <MCOperand Reg:147> +; M32R3R5-NEXT: # <MCOperand Reg:159>> +; M32R3R5-NEXT: jr $ra # <MCInst #{{[0-9]+}} JR +; M32R3R5-NEXT: # <MCOperand Reg:19>> +; M32R3R5-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1 +; M32R3R5-NEXT: # <MCOperand Reg:321> +; M32R3R5-NEXT: # <MCOperand Reg:147>> +; +; M32R6-LABEL: test1: +; M32R6: # %bb.0: # %entry +; M32R6-NEXT: trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S +; M32R6-NEXT: # <MCOperand Reg:147> +; M32R6-NEXT: # <MCOperand Reg:159>> +; M32R6-NEXT: jr $ra # <MCInst #{{[0-9]+}} JALR +; M32R6-NEXT: # <MCOperand Reg:21> +; M32R6-NEXT: # <MCOperand Reg:19>> +; M32R6-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1 +; M32R6-NEXT: # <MCOperand Reg:321> +; M32R6-NEXT: # <MCOperand Reg:147>> +; +; M64-LABEL: test1: +; M64: # %bb.0: # %entry +; M64-NEXT: trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S +; M64-NEXT: # <MCOperand Reg:147> +; M64-NEXT: # <MCOperand Reg:159>> +; M64-NEXT: jr $ra # <MCInst #{{[0-9]+}} JR +; M64-NEXT: # <MCOperand Reg:301>> +; M64-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1 +; M64-NEXT: # <MCOperand Reg:321> +; M64-NEXT: # <MCOperand Reg:147>> +; +; M64R6-LABEL: test1: +; M64R6: # %bb.0: # %entry +; M64R6-NEXT: trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S +; M64R6-NEXT: # <MCOperand Reg:147> +; M64R6-NEXT: # <MCOperand Reg:159>> +; M64R6-NEXT: jr $ra # <MCInst #{{[0-9]+}} JALR64 +; M64R6-NEXT: # <MCOperand Reg:355> +; M64R6-NEXT: # <MCOperand Reg:301>> +; M64R6-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1 +; M64R6-NEXT: # <MCOperand Reg:321> +; M64R6-NEXT: # <MCOperand Reg:147>> +; +; MMR2-FP32-LABEL: test1: +; MMR2-FP32: # %bb.0: # %entry +; MMR2-FP32-NEXT: trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S_MM +; MMR2-FP32-NEXT: # <MCOperand Reg:147> +; MMR2-FP32-NEXT: # <MCOperand Reg:159>> +; MMR2-FP32-NEXT: jr $ra # <MCInst #{{[0-9]+}} JR_MM +; MMR2-FP32-NEXT: # <MCOperand Reg:19>> +; MMR2-FP32-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1_MM +; MMR2-FP32-NEXT: # <MCOperand Reg:321> +; MMR2-FP32-NEXT: # <MCOperand Reg:147>> +; +; MMR2-FP64-LABEL: test1: +; MMR2-FP64: # %bb.0: # %entry +; MMR2-FP64-NEXT: trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S_MM +; MMR2-FP64-NEXT: # <MCOperand Reg:147> +; MMR2-FP64-NEXT: # <MCOperand Reg:159>> +; MMR2-FP64-NEXT: jr $ra # <MCInst #{{[0-9]+}} JR_MM +; MMR2-FP64-NEXT: # <MCOperand Reg:19>> +; MMR2-FP64-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1_MM +; MMR2-FP64-NEXT: # <MCOperand Reg:321> +; MMR2-FP64-NEXT: # <MCOperand Reg:147>> +; +; MMR2-SF-LABEL: test1: +; MMR2-SF: # %bb.0: # %entry +; MMR2-SF-NEXT: addiusp -24 # <MCInst #{{[0-9]+}} ADDIUSP_MM +; MMR2-SF-NEXT: # <MCOperand Imm:-24>> +; MMR2-SF-NEXT: .cfi_def_cfa_offset 24 +; MMR2-SF-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MMR2-SF-NEXT: # <MCInst #{{[0-9]+}} SWSP_MM +; MMR2-SF-NEXT: # <MCOperand Reg:19> +; MMR2-SF-NEXT: # <MCOperand Reg:20> +; MMR2-SF-NEXT: # <MCOperand Imm:20>> +; MMR2-SF-NEXT: .cfi_offset 31, -4 +; MMR2-SF-NEXT: jal __fixsfsi # <MCInst #{{[0-9]+}} JAL_MM +; MMR2-SF-NEXT: # <MCOperand Expr:(__fixsfsi)>> +; MMR2-SF-NEXT: nop # <MCInst #{{[0-9]+}} SLL +; MMR2-SF-NEXT: # <MCOperand Reg:21> +; MMR2-SF-NEXT: # <MCOperand Reg:21> +; MMR2-SF-NEXT: # <MCOperand Imm:0>> +; MMR2-SF-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MMR2-SF-NEXT: # <MCInst #{{[0-9]+}} LWSP_MM +; MMR2-SF-NEXT: # <MCOperand Reg:19> +; MMR2-SF-NEXT: # <MCOperand Reg:20> +; MMR2-SF-NEXT: # <MCOperand Imm:20>> +; MMR2-SF-NEXT: addiusp 24 # <MCInst #{{[0-9]+}} ADDIUSP_MM +; MMR2-SF-NEXT: # <MCOperand Imm:24>> +; MMR2-SF-NEXT: jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM +; MMR2-SF-NEXT: # <MCOperand Reg:19>> +; +; MMR6-LABEL: test1: +; MMR6: # %bb.0: # %entry +; MMR6-NEXT: trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S_MMR6 +; MMR6-NEXT: # <MCOperand Reg:147> +; MMR6-NEXT: # <MCOperand Reg:159>> +; MMR6-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1_MM +; MMR6-NEXT: # <MCOperand Reg:321> +; MMR6-NEXT: # <MCOperand Reg:147>> +; MMR6-NEXT: jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM +; MMR6-NEXT: # <MCOperand Reg:19>> +; +; MMR6-SF-LABEL: test1: +; MMR6-SF: # %bb.0: # %entry +; MMR6-SF-NEXT: addiu $sp, $sp, -24 # <MCInst #{{[0-9]+}} ADDiu +; MMR6-SF-NEXT: # <MCOperand Reg:20> +; MMR6-SF-NEXT: # <MCOperand Reg:20> +; MMR6-SF-NEXT: # <MCOperand Imm:-24>> +; MMR6-SF-NEXT: .cfi_def_cfa_offset 24 +; MMR6-SF-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MMR6-SF-NEXT: # <MCInst #{{[0-9]+}} SW +; MMR6-SF-NEXT: # <MCOperand Reg:19> +; MMR6-SF-NEXT: # <MCOperand Reg:20> +; MMR6-SF-NEXT: # <MCOperand Imm:20>> +; MMR6-SF-NEXT: .cfi_offset 31, -4 +; MMR6-SF-NEXT: jalr __fixsfsi # <MCInst #{{[0-9]+}} JALRC16_MMR6 +; MMR6-SF-NEXT: # <MCOperand Expr:(__fixsfsi)>> +; MMR6-SF-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MMR6-SF-NEXT: # <MCInst #{{[0-9]+}} LW +; MMR6-SF-NEXT: # <MCOperand Reg:19> +; MMR6-SF-NEXT: # <MCOperand Reg:20> +; MMR6-SF-NEXT: # <MCOperand Imm:20>> +; MMR6-SF-NEXT: addiu $sp, $sp, 24 # <MCInst #{{[0-9]+}} ADDiu +; MMR6-SF-NEXT: # <MCOperand Reg:20> +; MMR6-SF-NEXT: # <MCOperand Reg:20> +; MMR6-SF-NEXT: # <MCOperand Imm:24>> +; MMR6-SF-NEXT: jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM +; MMR6-SF-NEXT: # <MCOperand Reg:19>> +entry: + %conv = fptosi float %t to i32 + ret i32 %conv +} + +define i32 @test2(double %t) { +; M32-LABEL: test2: +; M32: # %bb.0: # %entry +; M32-NEXT: trunc.w.d $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_D32 +; M32-NEXT: # <MCOperand Reg:147> +; M32-NEXT: # <MCOperand Reg:133>> +; M32-NEXT: jr $ra # <MCInst #{{[0-9]+}} JR +; M32-NEXT: # <MCOperand Reg:19>> +; M32-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1 +; M32-NEXT: # <MCOperand Reg:321> +; M32-NEXT: # <MCOperand Reg:147>> +; +; M32R2-FP64-LABEL: test2: +; M32R2-FP64: # %bb.0: # %entry +; M32R2-FP64-NEXT: trunc.w.d $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_D64 +; M32R2-FP64-NEXT: # <MCOperand Reg:147> +; M32R2-FP64-NEXT: # <MCOperand Reg:373>> +; M32R2-FP64-NEXT: jr $ra # <MCInst #{{[0-9]+}} JR +; M32R2-FP64-NEXT: # <MCOperand Reg:19>> +; M32R2-FP64-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1 +; M32R2-FP64-NEXT: # <MCOperand Reg:321> +; M32R2-FP64-NEXT: # <MCOperand Reg:147>> +; +; M32R2-SF-LABEL: test2: +; M32R2-SF: # %bb.0: # %entry +; M32R2-SF-NEXT: addiu $sp, $sp, -24 # <MCInst #{{[0-9]+}} ADDiu +; M32R2-SF-NEXT: # <MCOperand Reg:20> +; M32R2-SF-NEXT: # <MCOperand Reg:20> +; M32R2-SF-NEXT: # <MCOperand Imm:-24>> +; M32R2-SF-NEXT: .cfi_def_cfa_offset 24 +; M32R2-SF-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; M32R2-SF-NEXT: # <MCInst #{{[0-9]+}} SW +; M32R2-SF-NEXT: # <MCOperand Reg:19> +; M32R2-SF-NEXT: # <MCOperand Reg:20> +; M32R2-SF-NEXT: # <MCOperand Imm:20>> +; M32R2-SF-NEXT: .cfi_offset 31, -4 +; M32R2-SF-NEXT: jal __fixdfsi # <MCInst #{{[0-9]+}} JAL +; M32R2-SF-NEXT: # <MCOperand Expr:(__fixdfsi)>> +; M32R2-SF-NEXT: nop # <MCInst #{{[0-9]+}} SLL +; M32R2-SF-NEXT: # <MCOperand Reg:21> +; M32R2-SF-NEXT: # <MCOperand Reg:21> +; M32R2-SF-NEXT: # <MCOperand Imm:0>> +; M32R2-SF-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; M32R2-SF-NEXT: # <MCInst #{{[0-9]+}} LW +; M32R2-SF-NEXT: # <MCOperand Reg:19> +; M32R2-SF-NEXT: # <MCOperand Reg:20> +; M32R2-SF-NEXT: # <MCOperand Imm:20>> +; M32R2-SF-NEXT: jr $ra # <MCInst #{{[0-9]+}} JR +; M32R2-SF-NEXT: # <MCOperand Reg:19>> +; M32R2-SF-NEXT: addiu $sp, $sp, 24 # <MCInst #{{[0-9]+}} ADDiu +; M32R2-SF-NEXT: # <MCOperand Reg:20> +; M32R2-SF-NEXT: # <MCOperand Reg:20> +; M32R2-SF-NEXT: # <MCOperand Imm:24>> +; +; M32R3R5-LABEL: test2: +; M32R3R5: # %bb.0: # %entry +; M32R3R5-NEXT: trunc.w.d $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_D32 +; M32R3R5-NEXT: # <MCOperand Reg:147> +; M32R3R5-NEXT: # <MCOperand Reg:133>> +; M32R3R5-NEXT: jr $ra # <MCInst #{{[0-9]+}} JR +; M32R3R5-NEXT: # <MCOperand Reg:19>> +; M32R3R5-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1 +; M32R3R5-NEXT: # <MCOperand Reg:321> +; M32R3R5-NEXT: # <MCOperand Reg:147>> +; +; M32R6-LABEL: test2: +; M32R6: # %bb.0: # %entry +; M32R6-NEXT: trunc.w.d $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_D64 +; M32R6-NEXT: # <MCOperand Reg:147> +; M32R6-NEXT: # <MCOperand Reg:373>> +; M32R6-NEXT: jr $ra # <MCInst #{{[0-9]+}} JALR +; M32R6-NEXT: # <MCOperand Reg:21> +; M32R6-NEXT: # <MCOperand Reg:19>> +; M32R6-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1 +; M32R6-NEXT: # <MCOperand Reg:321> +; M32R6-NEXT: # <MCOperand Reg:147>> +; +; M64-LABEL: test2: +; M64: # %bb.0: # %entry +; M64-NEXT: trunc.w.d $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_D64 +; M64-NEXT: # <MCOperand Reg:147> +; M64-NEXT: # <MCOperand Reg:373>> +; M64-NEXT: jr $ra # <MCInst #{{[0-9]+}} JR +; M64-NEXT: # <MCOperand Reg:301>> +; M64-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1 +; M64-NEXT: # <MCOperand Reg:321> +; M64-NEXT: # <MCOperand Reg:147>> +; +; M64R6-LABEL: test2: +; M64R6: # %bb.0: # %entry +; M64R6-NEXT: trunc.w.d $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_D64 +; M64R6-NEXT: # <MCOperand Reg:147> +; M64R6-NEXT: # <MCOperand Reg:373>> +; M64R6-NEXT: jr $ra # <MCInst #{{[0-9]+}} JALR64 +; M64R6-NEXT: # <MCOperand Reg:355> +; M64R6-NEXT: # <MCOperand Reg:301>> +; M64R6-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1 +; M64R6-NEXT: # <MCOperand Reg:321> +; M64R6-NEXT: # <MCOperand Reg:147>> +; +; MMR2-FP32-LABEL: test2: +; MMR2-FP32: # %bb.0: # %entry +; MMR2-FP32-NEXT: trunc.w.d $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_MM +; MMR2-FP32-NEXT: # <MCOperand Reg:147> +; MMR2-FP32-NEXT: # <MCOperand Reg:133>> +; MMR2-FP32-NEXT: jr $ra # <MCInst #{{[0-9]+}} JR_MM +; MMR2-FP32-NEXT: # <MCOperand Reg:19>> +; MMR2-FP32-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1_MM +; MMR2-FP32-NEXT: # <MCOperand Reg:321> +; MMR2-FP32-NEXT: # <MCOperand Reg:147>> +; +; MMR2-FP64-LABEL: test2: +; MMR2-FP64: # %bb.0: # %entry +; MMR2-FP64-NEXT: cvt.w.d $f0, $f12 # <MCInst #{{[0-9]+}} CVT_W_D64_MM +; MMR2-FP64-NEXT: # <MCOperand Reg:147> +; MMR2-FP64-NEXT: # <MCOperand Reg:373>> +; MMR2-FP64-NEXT: jr $ra # <MCInst #{{[0-9]+}} JR_MM +; MMR2-FP64-NEXT: # <MCOperand Reg:19>> +; MMR2-FP64-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1_MM +; MMR2-FP64-NEXT: # <MCOperand Reg:321> +; MMR2-FP64-NEXT: # <MCOperand Reg:147>> +; +; MMR2-SF-LABEL: test2: +; MMR2-SF: # %bb.0: # %entry +; MMR2-SF-NEXT: addiusp -24 # <MCInst #{{[0-9]+}} ADDIUSP_MM +; MMR2-SF-NEXT: # <MCOperand Imm:-24>> +; MMR2-SF-NEXT: .cfi_def_cfa_offset 24 +; MMR2-SF-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MMR2-SF-NEXT: # <MCInst #{{[0-9]+}} SWSP_MM +; MMR2-SF-NEXT: # <MCOperand Reg:19> +; MMR2-SF-NEXT: # <MCOperand Reg:20> +; MMR2-SF-NEXT: # <MCOperand Imm:20>> +; MMR2-SF-NEXT: .cfi_offset 31, -4 +; MMR2-SF-NEXT: jal __fixdfsi # <MCInst #{{[0-9]+}} JAL_MM +; MMR2-SF-NEXT: # <MCOperand Expr:(__fixdfsi)>> +; MMR2-SF-NEXT: nop # <MCInst #{{[0-9]+}} SLL +; MMR2-SF-NEXT: # <MCOperand Reg:21> +; MMR2-SF-NEXT: # <MCOperand Reg:21> +; MMR2-SF-NEXT: # <MCOperand Imm:0>> +; MMR2-SF-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MMR2-SF-NEXT: # <MCInst #{{[0-9]+}} LWSP_MM +; MMR2-SF-NEXT: # <MCOperand Reg:19> +; MMR2-SF-NEXT: # <MCOperand Reg:20> +; MMR2-SF-NEXT: # <MCOperand Imm:20>> +; MMR2-SF-NEXT: addiusp 24 # <MCInst #{{[0-9]+}} ADDIUSP_MM +; MMR2-SF-NEXT: # <MCOperand Imm:24>> +; MMR2-SF-NEXT: jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM +; MMR2-SF-NEXT: # <MCOperand Reg:19>> +; +; MMR6-LABEL: test2: +; MMR6: # %bb.0: # %entry +; MMR6-NEXT: trunc.w.d $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_D_MMR6 +; MMR6-NEXT: # <MCOperand Reg:147> +; MMR6-NEXT: # <MCOperand Reg:373>> +; MMR6-NEXT: mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1_MM +; MMR6-NEXT: # <MCOperand Reg:321> +; MMR6-NEXT: # <MCOperand Reg:147>> +; MMR6-NEXT: jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM +; MMR6-NEXT: # <MCOperand Reg:19>> +; +; MMR6-SF-LABEL: test2: +; MMR6-SF: # %bb.0: # %entry +; MMR6-SF-NEXT: addiu $sp, $sp, -24 # <MCInst #{{[0-9]+}} ADDiu +; MMR6-SF-NEXT: # <MCOperand Reg:20> +; MMR6-SF-NEXT: # <MCOperand Reg:20> +; MMR6-SF-NEXT: # <MCOperand Imm:-24>> +; MMR6-SF-NEXT: .cfi_def_cfa_offset 24 +; MMR6-SF-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MMR6-SF-NEXT: # <MCInst #{{[0-9]+}} SW +; MMR6-SF-NEXT: # <MCOperand Reg:19> +; MMR6-SF-NEXT: # <MCOperand Reg:20> +; MMR6-SF-NEXT: # <MCOperand Imm:20>> +; MMR6-SF-NEXT: .cfi_offset 31, -4 +; MMR6-SF-NEXT: jalr __fixdfsi # <MCInst #{{[0-9]+}} JALRC16_MMR6 +; MMR6-SF-NEXT: # <MCOperand Expr:(__fixdfsi)>> +; MMR6-SF-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MMR6-SF-NEXT: # <MCInst #{{[0-9]+}} LW +; MMR6-SF-NEXT: # <MCOperand Reg:19> +; MMR6-SF-NEXT: # <MCOperand Reg:20> +; MMR6-SF-NEXT: # <MCOperand Imm:20>> +; MMR6-SF-NEXT: addiu $sp, $sp, 24 # <MCInst #{{[0-9]+}} ADDiu +; MMR6-SF-NEXT: # <MCOperand Reg:20> +; MMR6-SF-NEXT: # <MCOperand Reg:20> +; MMR6-SF-NEXT: # <MCOperand Imm:24>> +; MMR6-SF-NEXT: jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM +; MMR6-SF-NEXT: # <MCOperand Reg:19>> +entry: + %conv = fptosi double %t to i32 + ret i32 %conv +} diff --git a/test/CodeGen/Mips/micromips-pseudo-mtlohi-expand.ll b/test/CodeGen/Mips/micromips-pseudo-mtlohi-expand.ll new file mode 100644 index 0000000000000..3f86bd24f34ff --- /dev/null +++ b/test/CodeGen/Mips/micromips-pseudo-mtlohi-expand.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r2 -mattr=+micromips -asm-show-inst < %s |\ +; RUN: FileCheck %s -check-prefixes=MMR2 +; RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r2 -mattr=+dsp,+micromips -asm-show-inst < %s |\ +; RUN: FileCheck %s -check-prefixes=MMR2-DSP + +define i64 @test(i32 signext %a, i32 signext %b) { +; MMR2-LABEL: test: +; MMR2: # %bb.0: # %entry +; MMR2-NEXT: li16 $2, 0 # <MCInst #{{[0-9]+}} LI16_MM +; MMR2-NEXT: # <MCOperand Reg:321> +; MMR2-NEXT: # <MCOperand Imm:0>> +; MMR2-NEXT: li16 $3, 1 # <MCInst #{{[0-9]+}} LI16_MM +; MMR2-NEXT: # <MCOperand Reg:322> +; MMR2-NEXT: # <MCOperand Imm:1>> +; MMR2-NEXT: mtlo $3 # <MCInst #{{[0-9]+}} MTLO_MM +; MMR2-NEXT: # <MCOperand Reg:322>> +; MMR2-NEXT: mthi $2 # <MCInst #{{[0-9]+}} MTHI_MM +; MMR2-NEXT: # <MCOperand Reg:321>> +; MMR2-NEXT: madd $4, $5 # <MCInst #{{[0-9]+}} MADD +; MMR2-NEXT: # <MCOperand Reg:22> +; MMR2-NEXT: # <MCOperand Reg:23>> +; MMR2-NEXT: mflo16 $2 # <MCInst #{{[0-9]+}} MFLO16_MM +; MMR2-NEXT: # <MCOperand Reg:321>> +; MMR2-NEXT: mfhi16 $3 # <MCInst #{{[0-9]+}} MFHI16_MM +; MMR2-NEXT: # <MCOperand Reg:322>> +; MMR2-NEXT: jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM +; MMR2-NEXT: # <MCOperand Reg:19>> +; +; MMR2-DSP-LABEL: test: +; MMR2-DSP: # %bb.0: # %entry +; MMR2-DSP-NEXT: li16 $2, 0 # <MCInst #{{[0-9]+}} LI16_MM +; MMR2-DSP-NEXT: # <MCOperand Reg:321> +; MMR2-DSP-NEXT: # <MCOperand Imm:0>> +; MMR2-DSP-NEXT: li16 $3, 1 # <MCInst #{{[0-9]+}} LI16_MM +; MMR2-DSP-NEXT: # <MCOperand Reg:322> +; MMR2-DSP-NEXT: # <MCOperand Imm:1>> +; MMR2-DSP-NEXT: mtlo $3, $ac0 # <MCInst #{{[0-9]+}} MTLO_DSP +; MMR2-DSP-NEXT: # <MCOperand Reg:291> +; MMR2-DSP-NEXT: # <MCOperand Reg:322>> +; MMR2-DSP-NEXT: mthi $2, $ac0 # <MCInst #{{[0-9]+}} MTHI_DSP +; MMR2-DSP-NEXT: # <MCOperand Reg:253> +; MMR2-DSP-NEXT: # <MCOperand Reg:321>> +; MMR2-DSP-NEXT: madd $ac0, $4, $5 # <MCInst #{{[0-9]+}} MADD_DSP +; MMR2-DSP-NEXT: # <MCOperand Reg:26> +; MMR2-DSP-NEXT: # <MCOperand Reg:22> +; MMR2-DSP-NEXT: # <MCOperand Reg:23> +; MMR2-DSP-NEXT: # <MCOperand Reg:26>> +; MMR2-DSP-NEXT: mflo $2, $ac0 # <MCInst #{{[0-9]+}} MFLO_DSP +; MMR2-DSP-NEXT: # <MCOperand Reg:321> +; MMR2-DSP-NEXT: # <MCOperand Reg:26>> +; MMR2-DSP-NEXT: jr $ra # <MCInst #{{[0-9]+}} JR_MM +; MMR2-DSP-NEXT: # <MCOperand Reg:19>> +; MMR2-DSP-NEXT: mfhi $3, $ac0 # <MCInst #{{[0-9]+}} MFHI_DSP +; MMR2-DSP-NEXT: # <MCOperand Reg:322> +; MMR2-DSP-NEXT: # <MCOperand Reg:26>> +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %mul = mul nsw i64 %conv, %conv1 + %add = add nsw i64 %mul, 1 + ret i64 %add +} diff --git a/test/CodeGen/Mips/pseudo-jump-fill.ll b/test/CodeGen/Mips/pseudo-jump-fill.ll new file mode 100644 index 0000000000000..31f077d57a933 --- /dev/null +++ b/test/CodeGen/Mips/pseudo-jump-fill.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=mipsel-linux-gnu -mattr=+micromips -relocation-model=pic < %s | FileCheck %s + +; Test that the delay slot filler correctly handles indirect branches for +; microMIPS in regard to incorrectly using 16bit instructions in delay slots of +; 32bit instructions. + +define i32 @test(i32 signext %x, i32 signext %c) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui $2, %hi(_gp_disp) +; CHECK-NEXT: addiu $2, $2, %lo(_gp_disp) +; CHECK-NEXT: addiur2 $5, $5, -1 +; CHECK-NEXT: sltiu $1, $5, 4 +; CHECK-NEXT: beqz $1, $BB0_3 +; CHECK-NEXT: addu $3, $2, $25 +; CHECK-NEXT: $BB0_1: # %entry +; CHECK-NEXT: li16 $2, 0 +; CHECK-NEXT: sll16 $5, $5, 2 +; CHECK-NEXT: lw $6, %got($JTI0_0)($3) +; CHECK-NEXT: addu16 $5, $5, $6 +; CHECK-NEXT: lw $5, %lo($JTI0_0)($5) +; CHECK-NEXT: addu16 $3, $5, $3 +; CHECK-NEXT: jr $3 +; CHECK-NEXT: nop +; CHECK-NEXT: $BB0_2: # %sw.bb2 +; CHECK-NEXT: addiur2 $2, $4, 1 +; CHECK-NEXT: jrc $ra +; CHECK-NEXT: $BB0_3: +; CHECK-NEXT: move $2, $4 +; CHECK-NEXT: jrc $ra +; CHECK-NEXT: $BB0_4: # %sw.bb3 +; CHECK-NEXT: addius5 $4, 2 +; CHECK-NEXT: move $2, $4 +; CHECK-NEXT: jrc $ra +; CHECK-NEXT: $BB0_5: # %sw.bb5 +; CHECK-NEXT: addius5 $4, 3 +; CHECK-NEXT: move $2, $4 +; CHECK-NEXT: $BB0_6: # %for.cond.cleanup +; CHECK-NEXT: jrc $ra +entry: + switch i32 %c, label %sw.epilog [ + i32 4, label %sw.bb5 + i32 1, label %for.cond.cleanup + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] + +sw.bb2: + %add = add nsw i32 %x, 1 + br label %sw.epilog + +sw.bb3: + %add4 = add nsw i32 %x, 2 + br label %sw.epilog + +sw.bb5: + %add6 = add nsw i32 %x, 3 + br label %sw.epilog + +sw.epilog: + %a.0 = phi i32 [ %add6, %sw.bb5 ], [ %add4, %sw.bb3 ], [ %add, %sw.bb2 ], [ %x, %entry ] + br label %for.cond.cleanup + +for.cond.cleanup: + %a.028 = phi i32 [ %a.0, %sw.epilog ], [ 0, %entry ] + ret i32 %a.028 +} diff --git a/test/CodeGen/PowerPC/ppc32-pic-large.ll b/test/CodeGen/PowerPC/ppc32-pic-large.ll index d6e491ea2734c..272138e5121bf 100644 --- a/test/CodeGen/PowerPC/ppc32-pic-large.ll +++ b/test/CodeGen/PowerPC/ppc32-pic-large.ll @@ -1,5 +1,9 @@ ; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -relocation-model=pic | FileCheck -check-prefix=LARGE-BSS %s ; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -mattr=+secure-plt -relocation-model=pic | FileCheck -check-prefix=LARGE-SECUREPLT %s +; RUN: llc < %s -mtriple=powerpc-unknown-netbsd -mattr=+secure-plt -relocation-model=pic | FileCheck -check-prefix=LARGE-SECUREPLT %s +; RUN: llc < %s -mtriple=powerpc-unknown-netbsd -relocation-model=pic | FileCheck -check-prefix=LARGE-SECUREPLT %s +; RUN: llc < %s -mtriple=powerpc-unknown-openbsd -mattr=+secure-plt -relocation-model=pic | FileCheck -check-prefix=LARGE-SECUREPLT %s +; RUN: llc < %s -mtriple=powerpc-unknown-openbsd -relocation-model=pic | FileCheck -check-prefix=LARGE-SECUREPLT %s @bar = common global i32 0, align 4 declare i32 @call_foo(i32, ...) diff --git a/test/CodeGen/SPARC/fp128.ll b/test/CodeGen/SPARC/fp128.ll index 535f0ef60c404..21a9cdf77e0de 100644 --- a/test/CodeGen/SPARC/fp128.ll +++ b/test/CodeGen/SPARC/fp128.ll @@ -53,6 +53,29 @@ entry: ret void } +; CHECK-LABEL: f128_spill_large: +; CHECK: sethi 4, %g1 +; CHECK: sethi 4, %g1 +; CHECK-NEXT: add %g1, %sp, %g1 +; CHECK-NEXT: std %f{{.+}}, [%g1] +; CHECK: sethi 4, %g1 +; CHECK-NEXT: add %g1, %sp, %g1 +; CHECK-NEXT: std %f{{.+}}, [%g1+8] +; CHECK: sethi 4, %g1 +; CHECK-NEXT: add %g1, %sp, %g1 +; CHECK-NEXT: ldd [%g1], %f{{.+}} +; CHECK: sethi 4, %g1 +; CHECK-NEXT: add %g1, %sp, %g1 +; CHECK-NEXT: ldd [%g1+8], %f{{.+}} + +define void @f128_spill_large(<251 x fp128>* noalias sret %scalar.result, <251 x fp128>* byval %a) { +entry: + %0 = load <251 x fp128>, <251 x fp128>* %a, align 8 + call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() + store <251 x fp128> %0, <251 x fp128>* %scalar.result, align 8 + ret void +} + ; CHECK-LABEL: f128_compare: ; HARD: fcmpq ; HARD-NEXT: nop diff --git a/test/CodeGen/WebAssembly/varargs.ll b/test/CodeGen/WebAssembly/varargs.ll index 1a73716c2a67c..5a8df4cd2fec4 100644 --- a/test/CodeGen/WebAssembly/varargs.ll +++ b/test/CodeGen/WebAssembly/varargs.ll @@ -163,6 +163,32 @@ define void @nonlegal_fixed(fp128 %x, ...) nounwind { ret void } +; Test that an fp128 argument is properly aligned and allocated +; within a vararg buffer. + +; CHECK-LABEL: call_fp128_alignment: +; CHECK: global.get $push7=, __stack_pointer +; CHECK-NEXT: i32.const $push8=, 32 +; CHECK-NEXT: i32.sub $push12=, $pop7, $pop8 +; CHECK-NEXT: local.tee $push11=, $1=, $pop12 +; CHECK-NEXT: global.set __stack_pointer@GLOBAL, $pop11 +; CHECK-NEXT: i32.const $push0=, 24 +; CHECK-NEXT: i32.add $push1=, $1, $pop0 +; CHECK-NEXT: i64.const $push2=, -9223372036854775808 +; CHECK-NEXT: i64.store 0($pop1), $pop2 +; CHECK-NEXT: i32.const $push3=, 16 +; CHECK-NEXT: i32.add $push4=, $1, $pop3 +; CHECK-NEXT: i64.const $push5=, 1 +; CHECK-NEXT: i64.store 0($pop4), $pop5 +; CHECK-NEXT: i32.const $push6=, 7 +; CHECK-NEXT: i32.store 0($1), $pop6 +; CHECK-NEXT: call callee@FUNCTION, $1 +define void @call_fp128_alignment(i8* %p) { +entry: + call void (...) @callee(i8 7, fp128 0xL00000000000000018000000000000000) + ret void +} + declare void @llvm.va_start(i8*) declare void @llvm.va_end(i8*) declare void @llvm.va_copy(i8*, i8*) diff --git a/test/CodeGen/X86/PR40322.ll b/test/CodeGen/X86/PR40322.ll new file mode 100644 index 0000000000000..22bf1822c65af --- /dev/null +++ b/test/CodeGen/X86/PR40322.ll @@ -0,0 +1,164 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-windows-gnu | FileCheck %s --check-prefix=CHECK-MINGW-X86 + +%struct.as = type { i32* } + +@_ZZ2amiE2au = internal unnamed_addr global %struct.as zeroinitializer, align 4 +@_ZGVZ2amiE2au = internal global i64 0, align 8 +@_ZTIi = external constant i8* + +define void @_Z2ami(i32) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-MINGW-X86-LABEL: _Z2ami: +; CHECK-MINGW-X86: # %bb.0: # %entry +; CHECK-MINGW-X86-NEXT: pushl %edi +; CHECK-MINGW-X86-NEXT: .cfi_def_cfa_offset 8 +; CHECK-MINGW-X86-NEXT: pushl %esi +; CHECK-MINGW-X86-NEXT: .cfi_def_cfa_offset 12 +; CHECK-MINGW-X86-NEXT: .cfi_offset %esi, -12 +; CHECK-MINGW-X86-NEXT: .cfi_offset %edi, -8 +; CHECK-MINGW-X86-NEXT: movb __ZGVZ2amiE2au, %al +; CHECK-MINGW-X86-NEXT: testb %al, %al +; CHECK-MINGW-X86-NEXT: jne LBB0_4 +; CHECK-MINGW-X86-NEXT: # %bb.1: # %init.check +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x04 +; CHECK-MINGW-X86-NEXT: pushl $__ZGVZ2amiE2au +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll ___cxa_guard_acquire +; CHECK-MINGW-X86-NEXT: addl $4, %esp +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-MINGW-X86-NEXT: testl %eax, %eax +; CHECK-MINGW-X86-NEXT: je LBB0_4 +; CHECK-MINGW-X86-NEXT: # %bb.2: # %init +; CHECK-MINGW-X86-NEXT: Ltmp0: +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x04 +; CHECK-MINGW-X86-NEXT: pushl $4 +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll __Znwj +; CHECK-MINGW-X86-NEXT: addl $4, %esp +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-MINGW-X86-NEXT: Ltmp1: +; CHECK-MINGW-X86-NEXT: # %bb.3: # %invoke.cont +; CHECK-MINGW-X86-NEXT: movl %eax, __ZZ2amiE2au +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x04 +; CHECK-MINGW-X86-NEXT: pushl $__ZGVZ2amiE2au +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll ___cxa_guard_release +; CHECK-MINGW-X86-NEXT: addl $4, %esp +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-MINGW-X86-NEXT: LBB0_4: # %init.end +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x04 +; CHECK-MINGW-X86-NEXT: pushl $4 +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll __Znwj +; CHECK-MINGW-X86-NEXT: addl $4, %esp +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-MINGW-X86-NEXT: movl %eax, %esi +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x04 +; CHECK-MINGW-X86-NEXT: pushl $4 +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll ___cxa_allocate_exception +; CHECK-MINGW-X86-NEXT: addl $4, %esp +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-MINGW-X86-NEXT: movl $0, (%eax) +; CHECK-MINGW-X86-NEXT: Ltmp3: +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x0c +; CHECK-MINGW-X86-NEXT: movl .refptr.__ZTIi, %ecx +; CHECK-MINGW-X86-NEXT: pushl $0 +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: pushl %ecx +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: pushl %eax +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll ___cxa_throw +; CHECK-MINGW-X86-NEXT: addl $12, %esp +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset -12 +; CHECK-MINGW-X86-NEXT: Ltmp4: +; CHECK-MINGW-X86-NEXT: # %bb.8: # %unreachable +; CHECK-MINGW-X86-NEXT: LBB0_5: # %lpad +; CHECK-MINGW-X86-NEXT: Ltmp2: +; CHECK-MINGW-X86-NEXT: movl %eax, %edi +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x04 +; CHECK-MINGW-X86-NEXT: pushl $__ZGVZ2amiE2au +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll ___cxa_guard_abort +; CHECK-MINGW-X86-NEXT: jmp LBB0_7 +; CHECK-MINGW-X86-NEXT: LBB0_6: # %lpad1 +; CHECK-MINGW-X86-NEXT: .cfi_def_cfa_offset 12 +; CHECK-MINGW-X86-NEXT: Ltmp5: +; CHECK-MINGW-X86-NEXT: movl %eax, %edi +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x04 +; CHECK-MINGW-X86-NEXT: pushl %esi +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll __ZdlPv +; CHECK-MINGW-X86-NEXT: LBB0_7: # %eh.resume +; CHECK-MINGW-X86-NEXT: addl $4, %esp +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x04 +; CHECK-MINGW-X86-NEXT: pushl %edi +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll __Unwind_Resume +; CHECK-MINGW-X86-NEXT: Lfunc_end0: +entry: + %1 = load atomic i8, i8* bitcast (i64* @_ZGVZ2amiE2au to i8*) acquire, align 8 + %guard.uninitialized = icmp eq i8 %1, 0 + br i1 %guard.uninitialized, label %init.check, label %init.end + +init.check: ; preds = %entry + %2 = tail call i32 @__cxa_guard_acquire(i64* nonnull @_ZGVZ2amiE2au) + %tobool = icmp eq i32 %2, 0 + br i1 %tobool, label %init.end, label %init + +init: ; preds = %init.check + %call.i3 = invoke i8* @_Znwj(i32 4) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %init + store i8* %call.i3, i8** bitcast (%struct.as* @_ZZ2amiE2au to i8**), align 4 + tail call void @__cxa_guard_release(i64* nonnull @_ZGVZ2amiE2au) + br label %init.end + +init.end: ; preds = %init.check, %invoke.cont, %entry + %call.i = tail call i8* @_Znwj(i32 4) + %exception = tail call i8* @__cxa_allocate_exception(i32 4) + %3 = bitcast i8* %exception to i32* + store i32 0, i32* %3, align 16 + invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) + to label %unreachable unwind label %lpad1 + +lpad: ; preds = %init + %4 = landingpad { i8*, i32 } + cleanup + %5 = extractvalue { i8*, i32 } %4, 0 + %6 = extractvalue { i8*, i32 } %4, 1 + tail call void @__cxa_guard_abort(i64* nonnull @_ZGVZ2amiE2au) #1 + br label %eh.resume + +lpad1: ; preds = %init.end + %7 = landingpad { i8*, i32 } + cleanup + %8 = extractvalue { i8*, i32 } %7, 0 + %9 = extractvalue { i8*, i32 } %7, 1 + tail call void @_ZdlPv(i8* nonnull %call.i) + br label %eh.resume + +eh.resume: ; preds = %lpad1, %lpad + %exn.slot.0 = phi i8* [ %8, %lpad1 ], [ %5, %lpad ] + %ehselector.slot.0 = phi i32 [ %9, %lpad1 ], [ %6, %lpad ] + %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0 + %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1 + resume { i8*, i32 } %lpad.val2 + +unreachable: ; preds = %init.end + unreachable +} + +declare i32 @__cxa_guard_acquire(i64*) +declare i32 @__gxx_personality_v0(...) +declare void @__cxa_guard_abort(i64*) +declare void @__cxa_guard_release(i64*) +declare i8* @__cxa_allocate_exception(i32) +declare void @__cxa_throw(i8*, i8*, i8*) +declare noalias nonnull i8* @_Znwj(i32) +declare i8* @__cxa_begin_catch(i8*) +declare void @__cxa_end_catch() +declare void @_ZdlPv(i8*) diff --git a/test/CodeGen/X86/fast-isel-nontemporal.ll b/test/CodeGen/X86/fast-isel-nontemporal.ll index db1ebfe6060f8..37e380b2b48c9 100644 --- a/test/CodeGen/X86/fast-isel-nontemporal.ll +++ b/test/CodeGen/X86/fast-isel-nontemporal.ll @@ -300,10 +300,20 @@ entry: } define <16 x i8> @test_load_nt16xi8(<16 x i8>* nocapture %ptr) { -; SSE-LABEL: test_load_nt16xi8: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movntdqa (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_load_nt16xi8: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE4A-LABEL: test_load_nt16xi8: +; SSE4A: # %bb.0: # %entry +; SSE4A-NEXT: movdqa (%rdi), %xmm0 +; SSE4A-NEXT: retq +; +; SSE41-LABEL: test_load_nt16xi8: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movntdqa (%rdi), %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_load_nt16xi8: ; AVX: # %bb.0: # %entry @@ -320,10 +330,20 @@ entry: } define <8 x i16> @test_load_nt8xi16(<8 x i16>* nocapture %ptr) { -; SSE-LABEL: test_load_nt8xi16: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movntdqa (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_load_nt8xi16: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE4A-LABEL: test_load_nt8xi16: +; SSE4A: # %bb.0: # %entry +; SSE4A-NEXT: movdqa (%rdi), %xmm0 +; SSE4A-NEXT: retq +; +; SSE41-LABEL: test_load_nt8xi16: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movntdqa (%rdi), %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_load_nt8xi16: ; AVX: # %bb.0: # %entry @@ -340,10 +360,20 @@ entry: } define <4 x i32> @test_load_nt4xi32(<4 x i32>* nocapture %ptr) { -; SSE-LABEL: test_load_nt4xi32: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movntdqa (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_load_nt4xi32: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE4A-LABEL: test_load_nt4xi32: +; SSE4A: # %bb.0: # %entry +; SSE4A-NEXT: movdqa (%rdi), %xmm0 +; SSE4A-NEXT: retq +; +; SSE41-LABEL: test_load_nt4xi32: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movntdqa (%rdi), %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_load_nt4xi32: ; AVX: # %bb.0: # %entry @@ -360,10 +390,20 @@ entry: } define <2 x i64> @test_load_nt2xi64(<2 x i64>* nocapture %ptr) { -; SSE-LABEL: test_load_nt2xi64: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movntdqa (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_load_nt2xi64: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE4A-LABEL: test_load_nt2xi64: +; SSE4A: # %bb.0: # %entry +; SSE4A-NEXT: movdqa (%rdi), %xmm0 +; SSE4A-NEXT: retq +; +; SSE41-LABEL: test_load_nt2xi64: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movntdqa (%rdi), %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_load_nt2xi64: ; AVX: # %bb.0: # %entry diff --git a/test/CodeGen/X86/regalloc-copy-hints.mir b/test/CodeGen/X86/regalloc-copy-hints.mir new file mode 100644 index 0000000000000..6287066e64fe0 --- /dev/null +++ b/test/CodeGen/X86/regalloc-copy-hints.mir @@ -0,0 +1,805 @@ +# RUN: llc -mtriple=i386-unknown-unknown -mcpu=i486 %s -o - -run-pass greedy \ +# RUN: -debug-only=regalloc 2>&1 | FileCheck %s +# REQUIRES: asserts + +--- | + %0 = type { %1 } + %1 = type { %2, %23, %23*, %27*, %28*, %29, %33*, %34, %42, i8, i32, i32, i32 } + %2 = type { %3, %6, %14, %14, i8, i8*, i8*, %16 } + %3 = type { i32 (...)**, %4*, %5* } + %4 = type { i32 (...)**, %3* } + %5 = type { i32 (...)** } + %6 = type { %7 } + %7 = type { %8, i32, %12 } + %8 = type { %9**, %9**, %9**, %10 } + %9 = type { i32, i32, i32, i8* } + %10 = type { %11 } + %11 = type { %9** } + %12 = type { %13 } + %13 = type { i32 } + %14 = type { i32, %15* } + %15 = type { i32, i32, i8* } + %16 = type { %17 } + %17 = type { %18*, %20, %22 } + %18 = type { %19* } + %19 = type <{ %18, %19*, %18*, i8, [3 x i8] }> + %20 = type { %21 } + %21 = type { %18 } + %22 = type { %13 } + %23 = type { %24 } + %24 = type { %18*, %25, %26 } + %25 = type { %21 } + %26 = type { %13 } + %27 = type { i32 (...)** } + %28 = type { i32 (...)** } + %29 = type { %30 } + %30 = type { %18*, %31, %32 } + %31 = type { %21 } + %32 = type { %13 } + %33 = type { i32 (...)** } + %34 = type { %35 } + %35 = type { %36 } + %36 = type { %37, i32, %41 } + %37 = type { %38**, %38**, %38**, %39 } + %38 = type { %42, i32 } + %39 = type { %40 } + %40 = type { %38** } + %41 = type { %13 } + %42 = type { %43 } + %43 = type { %18*, %44, %45 } + %44 = type { %21 } + %45 = type { %13 } + %46 = type { %47, %48 } + %47 = type <{ %18, %19*, %18*, i8 }> + %48 = type { %49 } + %49 = type { i32, %50 } + %50 = type { { i32, i32 }, { i32, i32 }, { i32, i32 }, { i32, i32 }, { i32, i32 }, { i32, i32 } } + + define void @fun(%0* %arg) local_unnamed_addr #0 align 2 personality i32 (...)* @__gxx_personality_v0 { + bb: + %tmp = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 1 + %tmp1 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0 + br i1 undef, label %bb5, label %bb6 + + bb5: ; preds = %bb + unreachable + + bb6: ; preds = %bb + %tmp8 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 8, i32 0, i32 1, i32 0, i32 0 + br i1 undef, label %bb10, label %bb9 + + bb9: ; preds = %bb6 + unreachable + + bb10: ; preds = %bb6 + store %18* %tmp8, %18** undef + br i1 undef, label %bb14, label %bb13 + + bb13: ; preds = %bb10 + unreachable + + bb14: ; preds = %bb10 + br i1 undef, label %bb17, label %bb18 + + bb17: ; preds = %bb14 + unreachable + + bb18: ; preds = %bb14 + br i1 undef, label %bb20, label %bb19 + + bb19: ; preds = %bb18 + unreachable + + bb20: ; preds = %bb18 + br i1 undef, label %bb25, label %bb24 + + bb24: ; preds = %bb20 + unreachable + + bb25: ; preds = %bb20 + br i1 undef, label %bb29, label %bb30 + + bb29: ; preds = %bb25 + unreachable + + bb30: ; preds = %bb25 + br i1 undef, label %bb38, label %bb31 + + bb31: ; preds = %bb30 + %tmp32 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0 + br i1 undef, label %bb34, label %bb35 + + bb34: ; preds = %bb31 + unreachable + + bb35: ; preds = %bb31 + br i1 undef, label %bb40, label %bb36 + + bb36: ; preds = %bb35 + unreachable + + bb38: ; preds = %bb30 + %tmp391 = bitcast %18* %tmp1 to %19** + br label %bb40 + + bb40: ; preds = %bb35, %bb38 + %tmp41 = phi %18* [ %tmp1, %bb38 ], [ null, %bb35 ] + %tmp42 = phi %19** [ %tmp391, %bb38 ], [ %tmp32, %bb35 ] + br i1 undef, label %bb43, label %bb48 + + bb43: ; preds = %bb40 + %tmp44 = tail call i8* @_Znwj() + store %18* %tmp41, %18** undef + %tmp46 = bitcast %19** %tmp42 to i8** + store i8* %tmp44, i8** %tmp46 + %0 = bitcast i8* %tmp44 to %46* + tail call void @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_() + br label %bb48 + + bb48: ; preds = %bb43, %bb40 + %tmp49 = phi %46* [ %0, %bb43 ], [ undef, %bb40 ] + %tmp50 = getelementptr inbounds %46, %46* %tmp49, i32 0, i32 1, i32 0, i32 1, i32 4, i32 0 + store i32 ptrtoint (i1 (%0*)* @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private15_preEnd__authorEv to i32), i32* %tmp50 + br i1 undef, label %bb52, label %bb53 + + bb52: ; preds = %bb48 + unreachable + + bb53: ; preds = %bb48 + br i1 undef, label %bb55, label %bb54 + + bb54: ; preds = %bb53 + unreachable + + bb55: ; preds = %bb53 + br i1 undef, label %bb59, label %bb58 + + bb58: ; preds = %bb55 + unreachable + + bb59: ; preds = %bb55 + br i1 undef, label %bb62, label %bb61 + + bb61: ; preds = %bb59 + unreachable + + bb62: ; preds = %bb59 + br i1 undef, label %bb64, label %bb65 + + bb64: ; preds = %bb62 + unreachable + + bb65: ; preds = %bb62 + %tmp66 = icmp eq %46* null, null + br i1 %tmp66, label %bb72, label %bb67 + + bb67: ; preds = %bb65 + %tmp68 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0 + br i1 undef, label %bb70, label %bb74 + + bb70: ; preds = %bb67 + unreachable + + bb72: ; preds = %bb65 + %tmp732 = bitcast %18* %tmp1 to %19** + br label %bb74 + + bb74: ; preds = %bb67, %bb72 + %tmp75 = phi %18* [ %tmp1, %bb72 ], [ null, %bb67 ] + %tmp76 = phi %19** [ %tmp732, %bb72 ], [ %tmp68, %bb67 ] + %tmp77 = tail call i8* @_Znwj() + store %18* %tmp75, %18** undef + %tmp79 = bitcast %19** %tmp76 to i8** + store i8* %tmp77, i8** %tmp79 + %1 = bitcast i8* %tmp77 to %46* + tail call void @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_() + %tmp81 = getelementptr inbounds %46, %46* %1, i32 0, i32 1, i32 0, i32 1, i32 2, i32 0 + store i32 ptrtoint (i1 (%0*)* @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private14_end__commentsEv to i32), i32* %tmp81 + store %18* %tmp8, %18** undef + %2 = bitcast %0* %arg to i8* + %sunkaddr = getelementptr i8, i8* %2, i32 140 + %3 = bitcast i8* %sunkaddr to %18** + %tmp85 = load %18*, %18** %3 + %tmp864 = bitcast %18* %tmp85 to %19** + %tmp87 = load %19*, %19** %tmp864 + %tmp88 = icmp eq %19* %tmp87, null + br i1 %tmp88, label %bb90, label %bb89 + + bb89: ; preds = %bb74 + unreachable + + bb90: ; preds = %bb74 + br i1 undef, label %bb94, label %bb92 + + bb92: ; preds = %bb90 + br i1 undef, label %bb96, label %bb97 + + bb94: ; preds = %bb90 + unreachable + + bb96: ; preds = %bb92 + unreachable + + bb97: ; preds = %bb92 + br i1 undef, label %bb101, label %bb102 + + bb101: ; preds = %bb97 + unreachable + + bb102: ; preds = %bb97 + br i1 undef, label %bb104, label %bb103 + + bb103: ; preds = %bb102 + unreachable + + bb104: ; preds = %bb102 + br i1 undef, label %bb109, label %bb108 + + bb108: ; preds = %bb104 + unreachable + + bb109: ; preds = %bb104 + br i1 undef, label %bb111, label %bb112 + + bb111: ; preds = %bb109 + unreachable + + bb112: ; preds = %bb109 + br i1 undef, label %bb118, label %bb117 + + bb117: ; preds = %bb112 + unreachable + + bb118: ; preds = %bb112 + br i1 undef, label %bb120, label %bb121 + + bb120: ; preds = %bb118 + unreachable + + bb121: ; preds = %bb118 + br i1 undef, label %bb124, label %bb125 + + bb124: ; preds = %bb121 + unreachable + + bb125: ; preds = %bb121 + %4 = bitcast %18* %tmp1 to %46** + %tmp126 = load %46*, %46** %4 + %tmp127 = icmp eq %46* %tmp126, null + br i1 %tmp127, label %bb135, label %bb128 + + bb128: ; preds = %bb125 + br label %bb129 + + bb129: ; preds = %bb131, %bb128 + %tmp130 = icmp ugt i32 undef, 95406324 + br i1 %tmp130, label %bb131, label %bb133 + + bb131: ; preds = %bb129 + br label %bb129 + + bb133: ; preds = %bb129 + unreachable + + bb135: ; preds = %bb125 + br i1 undef, label %bb137, label %bb138 + + bb137: ; preds = %bb135 + unreachable + + bb138: ; preds = %bb135 + unreachable + } + + declare zeroext i1 @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private15_preEnd__authorEv(%0*) #0 + + declare zeroext i1 @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private14_end__commentsEv(%0*) #0 align 2 + + declare i32 @__gxx_personality_v0(...) #0 + + declare noalias nonnull i8* @_Znwj() local_unnamed_addr #0 + + declare void @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_() local_unnamed_addr #0 + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #1 + + attributes #0 = { "target-cpu"="i486" } + attributes #1 = { nounwind } + +... +--- +# A physreg should always only be hinted once per getRegAllocationHints() query. +# CHECK: hints: $ebx $edi +# CHECK-NOT: hints: $ebx $edi $ebx $edi +name: fun +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gr32 } + - { id: 1, class: gr32 } + - { id: 2, class: gr32 } + - { id: 3, class: gr32 } + - { id: 4, class: gr32 } + - { id: 5, class: gr32 } + - { id: 6, class: gr32 } + - { id: 7, class: gr32 } + - { id: 8, class: gr32 } + - { id: 9, class: gr32 } + - { id: 10, class: gr32 } + - { id: 11, class: gr32 } + - { id: 12, class: gr32 } + - { id: 13, class: gr32_abcd } + - { id: 14, class: gr8 } + - { id: 15, class: gr32_abcd } + - { id: 16, class: gr8 } + - { id: 17, class: gr32 } + - { id: 18, class: gr32_abcd } + - { id: 19, class: gr8 } + - { id: 20, class: gr32_abcd } + - { id: 21, class: gr8 } + - { id: 22, class: gr32_abcd } + - { id: 23, class: gr8 } + - { id: 24, class: gr32_abcd } + - { id: 25, class: gr8 } + - { id: 26, class: gr32_abcd } + - { id: 27, class: gr8 } + - { id: 28, class: gr32_abcd } + - { id: 29, class: gr8 } + - { id: 30, class: gr32_abcd } + - { id: 31, class: gr8 } + - { id: 32, class: gr32_abcd } + - { id: 33, class: gr8 } + - { id: 34, class: gr32 } + - { id: 35, class: gr32_abcd } + - { id: 36, class: gr8 } + - { id: 37, class: gr32 } + - { id: 38, class: gr32 } + - { id: 39, class: gr32_abcd } + - { id: 40, class: gr8 } + - { id: 41, class: gr32_abcd } + - { id: 42, class: gr8 } + - { id: 43, class: gr32_abcd } + - { id: 44, class: gr8 } + - { id: 45, class: gr32_abcd } + - { id: 46, class: gr8 } + - { id: 47, class: gr32_abcd } + - { id: 48, class: gr8 } + - { id: 49, class: gr8 } + - { id: 50, class: gr32_abcd } + - { id: 51, class: gr8 } + - { id: 52, class: gr32 } + - { id: 53, class: gr32 } + - { id: 54, class: gr32 } + - { id: 55, class: gr32 } + - { id: 56, class: gr32_abcd } + - { id: 57, class: gr8 } + - { id: 58, class: gr32_abcd } + - { id: 59, class: gr8 } + - { id: 60, class: gr32_abcd } + - { id: 61, class: gr8 } + - { id: 62, class: gr32_abcd } + - { id: 63, class: gr8 } + - { id: 64, class: gr32_abcd } + - { id: 65, class: gr8 } + - { id: 66, class: gr32_abcd } + - { id: 67, class: gr8 } + - { id: 68, class: gr32_abcd } + - { id: 69, class: gr8 } + - { id: 70, class: gr32_abcd } + - { id: 71, class: gr8 } + - { id: 72, class: gr32_abcd } + - { id: 73, class: gr8 } + - { id: 74, class: gr32 } + - { id: 75, class: gr32 } + - { id: 76, class: gr32_abcd } + - { id: 77, class: gr8 } + - { id: 78, class: gr32_abcd } + - { id: 79, class: gr32 } + - { id: 80, class: gr32 } + - { id: 81, class: gr32_abcd } + - { id: 82, class: gr32 } +frameInfo: + maxAlignment: 4 + hasCalls: true +fixedStack: + - { id: 0, size: 4, alignment: 4, stack-id: 0, isImmutable: true } +body: | + bb.0.bb: + successors: %bb.1(0x00000001), %bb.2(0x7fffffff) + + %13:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %13.sub_8bit, %13.sub_8bit, implicit-def $eflags + JNE_1 %bb.2, implicit killed $eflags + JMP_1 %bb.1 + + bb.1.bb5: + successors: + + + bb.2.bb6: + successors: %bb.4(0x7fffffff), %bb.3(0x00000001) + + %15:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %15.sub_8bit, %15.sub_8bit, implicit-def $eflags + JNE_1 %bb.4, implicit killed $eflags + JMP_1 %bb.3 + + bb.3.bb9: + successors: + + + bb.4.bb10: + successors: %bb.6(0x7fffffff), %bb.5(0x00000001) + + %12:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) + %1:gr32 = LEA32r %12, 1, $noreg, 144, $noreg + MOV32mr undef %17:gr32, 1, $noreg, 0, $noreg, %1 :: (store 4 into `%18** undef`) + %18:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %18.sub_8bit, %18.sub_8bit, implicit-def $eflags + JNE_1 %bb.6, implicit killed $eflags + JMP_1 %bb.5 + + bb.5.bb13: + successors: + + + bb.6.bb14: + successors: %bb.7(0x00000001), %bb.8(0x7fffffff) + + %20:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %20.sub_8bit, %20.sub_8bit, implicit-def $eflags + JNE_1 %bb.8, implicit killed $eflags + JMP_1 %bb.7 + + bb.7.bb17: + successors: + + + bb.8.bb18: + successors: %bb.10(0x7fffffff), %bb.9(0x00000001) + + %22:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %22.sub_8bit, %22.sub_8bit, implicit-def $eflags + JNE_1 %bb.10, implicit killed $eflags + JMP_1 %bb.9 + + bb.9.bb19: + successors: + + + bb.10.bb20: + successors: %bb.12(0x7fffffff), %bb.11(0x00000001) + + %24:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %24.sub_8bit, %24.sub_8bit, implicit-def $eflags + JNE_1 %bb.12, implicit killed $eflags + JMP_1 %bb.11 + + bb.11.bb24: + successors: + + + bb.12.bb25: + successors: %bb.13(0x00000001), %bb.14(0x7fffffff) + + %26:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %26.sub_8bit, %26.sub_8bit, implicit-def $eflags + JNE_1 %bb.14, implicit killed $eflags + JMP_1 %bb.13 + + bb.13.bb29: + successors: + + + bb.14.bb30: + %0:gr32 = LEA32r %12, 1, $noreg, 80, $noreg + %28:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %28.sub_8bit, %28.sub_8bit, implicit-def $eflags + JNE_1 %bb.20, implicit killed $eflags + JMP_1 %bb.15 + + bb.15.bb31: + successors: %bb.16(0x00000001), %bb.17(0x7fffffff) + + %78:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %78.sub_8bit, %78.sub_8bit, implicit-def $eflags + JNE_1 %bb.17, implicit killed $eflags + JMP_1 %bb.16 + + bb.16.bb34: + successors: + + + bb.17.bb35: + successors: %bb.18(0x7fffffff), %bb.19(0x00000001) + + TEST8rr %78.sub_8bit, %78.sub_8bit, implicit-def $eflags + JE_1 %bb.19, implicit killed $eflags + + bb.18: + %79:gr32 = LEA32r %12, 1, $noreg, 80, $noreg + JMP_1 %bb.21 + + bb.19.bb36: + successors: + + + bb.20.bb38: + %78:gr32_abcd = COPY %0 + %79:gr32 = COPY %0 + + bb.21.bb40: + successors: %bb.22, %bb.23 + + %35:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %35.sub_8bit, %35.sub_8bit, implicit-def $eflags + %80:gr32 = IMPLICIT_DEF + JNE_1 %bb.23, implicit killed $eflags + JMP_1 %bb.22 + + bb.22.bb43: + ADJCALLSTACKDOWN32 0, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + CALLpcrel32 @_Znwj, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp, implicit-def $eax + ADJCALLSTACKUP32 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + %80:gr32 = COPY killed $eax + MOV32mr undef %38:gr32, 1, $noreg, 0, $noreg, %78 :: (store 4 into `%18** undef`) + MOV32mr %79, 1, $noreg, 0, $noreg, %80 :: (store 4 into %ir.tmp46) + ADJCALLSTACKDOWN32 0, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + CALLpcrel32 @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp + ADJCALLSTACKUP32 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + + bb.23.bb48: + successors: %bb.24(0x00000001), %bb.25(0x7fffffff) + + MOV32mi %80, 1, $noreg, 52, $noreg, @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private15_preEnd__authorEv :: (store 4 into %ir.tmp50) + %39:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %39.sub_8bit, %39.sub_8bit, implicit-def $eflags + JNE_1 %bb.25, implicit killed $eflags + JMP_1 %bb.24 + + bb.24.bb52: + successors: + + + bb.25.bb53: + successors: %bb.27(0x7fffffff), %bb.26(0x00000001) + + %41:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %41.sub_8bit, %41.sub_8bit, implicit-def $eflags + JNE_1 %bb.27, implicit killed $eflags + JMP_1 %bb.26 + + bb.26.bb54: + successors: + + + bb.27.bb55: + successors: %bb.29(0x7fffffff), %bb.28(0x00000001) + + %43:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %43.sub_8bit, %43.sub_8bit, implicit-def $eflags + JNE_1 %bb.29, implicit killed $eflags + JMP_1 %bb.28 + + bb.28.bb58: + successors: + + + bb.29.bb59: + successors: %bb.31(0x7fffffff), %bb.30(0x00000001) + + %45:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %45.sub_8bit, %45.sub_8bit, implicit-def $eflags + JNE_1 %bb.31, implicit killed $eflags + JMP_1 %bb.30 + + bb.30.bb61: + successors: + + + bb.31.bb62: + successors: %bb.32(0x00000001), %bb.33(0x7fffffff) + + %47:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %47.sub_8bit, %47.sub_8bit, implicit-def $eflags + JNE_1 %bb.33, implicit killed $eflags + JMP_1 %bb.32 + + bb.32.bb64: + successors: + + + bb.33.bb65: + successors: %bb.37(0x30000000), %bb.34(0x50000000) + + %49:gr8 = MOV8ri 1 + TEST8rr %49, %49, implicit-def $eflags + JNE_1 %bb.37, implicit killed $eflags + JMP_1 %bb.34 + + bb.34.bb67: + successors: %bb.36(0x00000001), %bb.35(0x7fffffff) + + %81:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %81.sub_8bit, %81.sub_8bit, implicit-def $eflags + JE_1 %bb.36, implicit killed $eflags + + bb.35: + %82:gr32 = LEA32r %12, 1, $noreg, 80, $noreg + JMP_1 %bb.38 + + bb.36.bb70: + successors: + + + bb.37.bb72: + %81:gr32_abcd = COPY %0 + %82:gr32 = COPY %0 + + bb.38.bb74: + successors: %bb.40(0x7fffffff), %bb.39(0x00000001) + + ADJCALLSTACKDOWN32 0, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + CALLpcrel32 @_Znwj, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp, implicit-def $eax + ADJCALLSTACKUP32 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + %52:gr32 = COPY killed $eax + MOV32mr undef %53:gr32, 1, $noreg, 0, $noreg, %81 :: (store 4 into `%18** undef`) + MOV32mr %82, 1, $noreg, 0, $noreg, %52 :: (store 4 into %ir.tmp79) + ADJCALLSTACKDOWN32 0, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + CALLpcrel32 @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp + ADJCALLSTACKUP32 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + MOV32mi %52, 1, $noreg, 36, $noreg, @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private14_end__commentsEv :: (store 4 into %ir.tmp81) + MOV32mr undef %54:gr32, 1, $noreg, 0, $noreg, %1 :: (store 4 into `%18** undef`) + %55:gr32 = MOV32rm %12, 1, $noreg, 140, $noreg :: (load 4 from %ir.3) + CMP32mi8 %55, 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load 4 from %ir.tmp864) + JE_1 %bb.40, implicit killed $eflags + JMP_1 %bb.39 + + bb.39.bb89: + successors: + + + bb.40.bb90: + successors: %bb.42(0x00000001), %bb.41(0x7fffffff) + + %56:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %56.sub_8bit, %56.sub_8bit, implicit-def $eflags + JNE_1 %bb.42, implicit killed $eflags + JMP_1 %bb.41 + + bb.41.bb92: + successors: %bb.43(0x00000001), %bb.44(0x7fffffff) + + %58:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %58.sub_8bit, %58.sub_8bit, implicit-def $eflags + JNE_1 %bb.43, implicit killed $eflags + JMP_1 %bb.44 + + bb.42.bb94: + successors: + + + bb.43.bb96: + successors: + + + bb.44.bb97: + successors: %bb.45(0x00000001), %bb.46(0x7fffffff) + + %60:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %60.sub_8bit, %60.sub_8bit, implicit-def $eflags + JNE_1 %bb.46, implicit killed $eflags + JMP_1 %bb.45 + + bb.45.bb101: + successors: + + + bb.46.bb102: + successors: %bb.48(0x7fffffff), %bb.47(0x00000001) + + %62:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %62.sub_8bit, %62.sub_8bit, implicit-def $eflags + JNE_1 %bb.48, implicit killed $eflags + JMP_1 %bb.47 + + bb.47.bb103: + successors: + + + bb.48.bb104: + successors: %bb.50(0x7fffffff), %bb.49(0x00000001) + + %64:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %64.sub_8bit, %64.sub_8bit, implicit-def $eflags + JNE_1 %bb.50, implicit killed $eflags + JMP_1 %bb.49 + + bb.49.bb108: + successors: + + + bb.50.bb109: + successors: %bb.51(0x00000001), %bb.52(0x7fffffff) + + %66:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %66.sub_8bit, %66.sub_8bit, implicit-def $eflags + JNE_1 %bb.52, implicit killed $eflags + JMP_1 %bb.51 + + bb.51.bb111: + successors: + + + bb.52.bb112: + successors: %bb.54(0x7fffffff), %bb.53(0x00000001) + + %68:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %68.sub_8bit, %68.sub_8bit, implicit-def $eflags + JNE_1 %bb.54, implicit killed $eflags + JMP_1 %bb.53 + + bb.53.bb117: + successors: + + + bb.54.bb118: + successors: %bb.55(0x00000001), %bb.56(0x7fffffff) + + %70:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %70.sub_8bit, %70.sub_8bit, implicit-def $eflags + JNE_1 %bb.56, implicit killed $eflags + JMP_1 %bb.55 + + bb.55.bb120: + successors: + + + bb.56.bb121: + successors: %bb.57(0x00000001), %bb.58(0x7fffffff) + + %72:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %72.sub_8bit, %72.sub_8bit, implicit-def $eflags + JNE_1 %bb.58, implicit killed $eflags + JMP_1 %bb.57 + + bb.57.bb124: + successors: + + + bb.58.bb125: + successors: %bb.62(0x00000001), %bb.59(0x7fffffff) + + CMP32mi8 %0, 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load 4 from %ir.4) + JE_1 %bb.62, implicit killed $eflags + JMP_1 %bb.59 + + bb.59.bb128: + + bb.60.bb129: + successors: %bb.60(0x7fffffff), %bb.61(0x00000001) + + CMP32ri undef %75:gr32, 95406325, implicit-def $eflags + JB_1 %bb.61, implicit killed $eflags + JMP_1 %bb.60 + + bb.61.bb133: + successors: + + + bb.62.bb135: + successors: %bb.63, %bb.64 + + %76:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %76.sub_8bit, %76.sub_8bit, implicit-def $eflags + JNE_1 %bb.64, implicit killed $eflags + JMP_1 %bb.63 + + bb.63.bb137: + successors: + + + bb.64.bb138: + +... diff --git a/test/MC/PowerPC/ppc64-localentry-symbols.s b/test/MC/PowerPC/ppc64-localentry-symbols.s new file mode 100644 index 0000000000000..f1d5c5d0ab1ac --- /dev/null +++ b/test/MC/PowerPC/ppc64-localentry-symbols.s @@ -0,0 +1,34 @@ +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-freebsd13.0 %s -o %t +# RUN: llvm-objdump -t %t | FileCheck %s + +# CHECK: 0000000000000000 gw F .text 00000000 0x60 __impl_foo +# CHECK: 0000000000000000 g F .text 00000000 0x60 foo +# CHECK: 0000000000000000 gw F .text 00000000 0x60 foo@FBSD_1.1 +# CHECK: 0000000000000008 g F .text 00000000 0x60 func +# CHECK: 0000000000000008 gw F .text 00000000 0x60 weak_func + +.text +.abiversion 2 + +.globl foo +.type foo,@function +foo: + nop + nop + .localentry foo, 8 + +.symver __impl_foo, foo@FBSD_1.1 +.weak __impl_foo +.set __impl_foo, foo + +.globl func +# Mimick FreeBSD weak function/reference +.weak weak_func +.equ weak_func, func + +.p2align 2 +.type func,@function +func: + nop + nop + .localentry func, 8 diff --git a/test/MC/WebAssembly/null-output.s b/test/MC/WebAssembly/null-output.s new file mode 100644 index 0000000000000..a25d095e0cbef --- /dev/null +++ b/test/MC/WebAssembly/null-output.s @@ -0,0 +1,10 @@ +# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -o /dev/null < %s + + .text + .section .text.main,"",@ + .type main,@function +main: + .functype main (i32, i32) -> (i32) + end_function +.Lfunc_end0: + .size main, .Lfunc_end0-main diff --git a/test/tools/llvm-dlltool/coff-weak-exports.def b/test/tools/llvm-dlltool/coff-weak-exports.def index dbc59be8ae189..60f835233a555 100644 --- a/test/tools/llvm-dlltool/coff-weak-exports.def +++ b/test/tools/llvm-dlltool/coff-weak-exports.def @@ -1,5 +1,6 @@ ; RUN: llvm-dlltool -m i386:x86-64 --input-def %s --output-lib %t.a ; RUN: llvm-nm %t.a | FileCheck %s +; RUN: llvm-readobj %t.a | FileCheck -check-prefix=ARCH %s LIBRARY test.dll EXPORTS @@ -26,3 +27,5 @@ ImpLibName3 = kernel32.Sleep ; CHECK-NEXT: W __imp_ImpLibName2 ; CHECK: T ImpLibName3 ; CHECK-NEXT: T __imp_ImpLibName3 + +; ARCH-NOT: unknown arch diff --git a/test/tools/llvm-objdump/AMDGPU/source-lines.ll b/test/tools/llvm-objdump/AMDGPU/source-lines.ll index 748f04754e4b3..4a4203d2a52e0 100644 --- a/test/tools/llvm-objdump/AMDGPU/source-lines.ll +++ b/test/tools/llvm-objdump/AMDGPU/source-lines.ll @@ -12,7 +12,7 @@ ; LINE: v_mov_b32_e32 v{{[0-9]+}}, 0x888 ; LINE: ; {{.*}}source-lines.cl:3 ; LINE: ; {{.*}}source-lines.cl:4 -; LINE: v_add_u32_e32 +; LINE: v_add_u32_e64 ; LINE: ; {{.*}}source-lines.cl:5 ; LINE: flat_store_dword ; Epilogue. @@ -28,7 +28,7 @@ ; SOURCE: v_mov_b32_e32 v{{[0-9]+}}, 0x888 ; SOURCE: ; int var1 = 0x888; ; SOURCE: ; int var2 = var0 + var1; -; SOURCE: v_add_u32_e32 +; SOURCE: v_add_u32_e64 ; SOURCE: ; *Out = var2; ; SOURCE: flat_store_dword ; Epilogue. diff --git a/test/tools/llvm-objdump/PowerPC/branch-offset.s b/test/tools/llvm-objdump/PowerPC/branch-offset.s new file mode 100644 index 0000000000000..b0b3f05f9cdb5 --- /dev/null +++ b/test/tools/llvm-objdump/PowerPC/branch-offset.s @@ -0,0 +1,43 @@ +# RUN: llvm-mc -triple=powerpc64le-unknown-linux -filetype=obj %s -o %t.o +# RUN: llvm-objdump -d %t.o | FileCheck %s + +# RUN: llvm-mc -triple=powerpc64-unknown-linux -filetype=obj %s -o %t.o +# RUN: llvm-objdump -d %t.o | FileCheck %s + +# RUN: llvm-mc -triple=powerpc-unknown-linux -filetype=obj %s -o %t.o +# RUN: llvm-objdump -d %t.o | FileCheck %s + +# CHECK: 0000000000000000 callee_back: +# CHECK: 18: {{.*}} bl .-24 +# CHECK: 20: {{.*}} bl .+16 +# CHECK: 0000000000000030 callee_forward: + + .text + .global caller + .type caller,@function + .type callee_forward,@function + .type callee_back,@function + + .p2align 4 +callee_back: + li 3, 55 + blr + + .p2align 4 +caller: +.Lgep: + addis 2, 12, .TOC.-.Lgep@ha + addi 2, 2, .TOC.-.Lgep@l +.Llep: + .localentry caller, .Llep-.Lgep + bl callee_back + mr 31, 3 + bl callee_forward + add 3, 3, 31 + blr + + .p2align 4 +callee_forward: + li 3, 66 + blr + diff --git a/test/tools/llvm-objdump/PowerPC/lit.local.cfg b/test/tools/llvm-objdump/PowerPC/lit.local.cfg new file mode 100644 index 0000000000000..b77510721e100 --- /dev/null +++ b/test/tools/llvm-objdump/PowerPC/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'PowerPC' in config.root.targets: + config.unsupported = True diff --git a/test/tools/llvm-objdump/eh_frame-coff.test b/test/tools/llvm-objdump/eh_frame-coff.test index 74eceeec4f31a..cb481939dcc83 100644 --- a/test/tools/llvm-objdump/eh_frame-coff.test +++ b/test/tools/llvm-objdump/eh_frame-coff.test @@ -8,7 +8,7 @@ # CHECK: Code alignment factor: 1 # CHECK: Data alignment factor: -4 # CHECK: Return address column: 8 -# CHECK: Personality Address: 004025d7 +# CHECK: Personality Address: 00000000004025d7 # CHECK: Augmentation data: 00 D7 25 40 00 00 00 # CHECK: DW_CFA_def_cfa: reg4 +4 @@ -17,7 +17,7 @@ # CHECK: DW_CFA_nop: # CHECK: 00000020 0000001c 00000024 FDE cie=00000024 pc=00401410...00401488 -# CHECK: LSDA Address: 00406000 +# CHECK: LSDA Address: 0000000000406000 # CHECK: DW_CFA_advance_loc: 1 # CHECK: DW_CFA_def_cfa_offset: +8 # CHECK: DW_CFA_offset: reg5 -8 diff --git a/test/tools/llvm-objdump/elf-symbol-visibility.test b/test/tools/llvm-objdump/elf-symbol-visibility.test new file mode 100644 index 0000000000000..da7f6d285166a --- /dev/null +++ b/test/tools/llvm-objdump/elf-symbol-visibility.test @@ -0,0 +1,37 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-objdump --syms %t | FileCheck %s + +# CHECK: SYMBOL TABLE: +# CHECK-NEXT: .text 00000000 default +# CHECK-NEXT: .text 00000000 .internal internal +# CHECK-NEXT: .text 00000000 .hidden hidden +# CHECK-NEXT: .text 00000000 .protected protected +# CHECK-NEXT: .text 00000000 0x20 mips_pic + +!ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_MIPS + Flags: [ EF_MIPS_ABI_O32, EF_MIPS_ARCH_32 ] +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] +Symbols: + Local: + - Name: default + Section: .text + - Name: internal + Visibility: STV_INTERNAL + Section: .text + - Name: hidden + Visibility: STV_HIDDEN + Section: .text + - Name: protected + Visibility: STV_PROTECTED + Section: .text + - Name: mips_pic + Other: [ STO_MIPS_PIC ] + Section: .text |