diff options
Diffstat (limited to 'test/CodeGen/R600')
78 files changed, 1143 insertions, 262 deletions
diff --git a/test/CodeGen/R600/README b/test/CodeGen/R600/README new file mode 100644 index 0000000000000..96998bba28f24 --- /dev/null +++ b/test/CodeGen/R600/README @@ -0,0 +1,21 @@ ++==============================================================================+ +| How to organize the lit tests | ++==============================================================================+ + +- If you write a test for matching a single DAG opcode or intrinsic, it should + go in a file called {opcode_name,intrinsic_name}.ll (e.g. fadd.ll) + +- If you write a test that matches several DAG opcodes and checks for a single + ISA instruction, then that test should go in a file called {ISA_name}.ll (e.g. + bfi_int.ll + +- For all other tests, use your best judgement for organizing tests and naming + the files. + ++==============================================================================+ +| Naming conventions | ++==============================================================================+ + +- Use dash '-' and not underscore '_' to separate words in file names, unless + the file is named after a DAG opcode or ISA instruction that has an + underscore '_' in its name. diff --git a/test/CodeGen/R600/add.v4i32.ll b/test/CodeGen/R600/add.ll index ac4a87417bde9..185998b260959 100644 --- a/test/CodeGen/R600/add.v4i32.ll +++ b/test/CodeGen/R600/add.ll @@ -1,9 +1,9 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 diff --git a/test/CodeGen/R600/alu-split.ll b/test/CodeGen/R600/alu-split.ll index afefcd9f78b08..48496f6febf6f 100644 --- a/test/CodeGen/R600/alu-split.ll +++ b/test/CodeGen/R600/alu-split.ll @@ -4,6 +4,7 @@ ;CHECK: ALU ;CHECK: ALU ;CHECK-NOT: ALU +;CHECK: CF_END define void @main() #0 { main_body: diff --git a/test/CodeGen/R600/and.v4i32.ll b/test/CodeGen/R600/and.ll index 662085e2d673c..166af2d8d128a 100644 --- a/test/CodeGen/R600/and.v4i32.ll +++ b/test/CodeGen/R600/and.ll @@ -1,9 +1,9 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 diff --git a/test/CodeGen/R600/bfe_uint.ll b/test/CodeGen/R600/bfe_uint.ll new file mode 100644 index 0000000000000..92570c3152998 --- /dev/null +++ b/test/CodeGen/R600/bfe_uint.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @bfe_def +; CHECK: BFE_UINT +define void @bfe_def(i32 addrspace(1)* %out, i32 %x) { +entry: + %0 = lshr i32 %x, 5 + %1 = and i32 %0, 15 ; 0xf + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; This program could be implemented using a BFE_UINT instruction, however +; since the lshr constant + number of bits in the mask is >= 32, it can also be +; implmented with a LSHR instruction, which is better, because LSHR has less +; operands and requires less constants. + +; CHECK: @bfe_shift +; CHECK-NOT: BFE_UINT +define void @bfe_shift(i32 addrspace(1)* %out, i32 %x) { +entry: + %0 = lshr i32 %x, 16 + %1 = and i32 %0, 65535 ; 0xffff + store i32 %1, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll new file mode 100644 index 0000000000000..4244dcf3c77bd --- /dev/null +++ b/test/CodeGen/R600/bfi_int.ll @@ -0,0 +1,52 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK %s + +; BFI_INT Definition pattern from ISA docs +; (y & x) | (z & ~x) +; +; R600-CHECK: @bfi_def +; R600-CHECK: BFI_INT +; SI-CHECK: @bfi_def +; SI-CHECK: V_BFI_B32 +define void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { +entry: + %0 = xor i32 %x, -1 + %1 = and i32 %z, %0 + %2 = and i32 %y, %x + %3 = or i32 %1, %2 + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; SHA-256 Ch function +; z ^ (x & (y ^ z)) +; R600-CHECK: @bfi_sha256_ch +; R600-CHECK: BFI_INT +; SI-CHECK: @bfi_sha256_ch +; SI-CHECK: V_BFI_B32 +define void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { +entry: + %0 = xor i32 %y, %z + %1 = and i32 %x, %0 + %2 = xor i32 %z, %1 + store i32 %2, i32 addrspace(1)* %out + ret void +} + +; SHA-256 Ma function +; ((x & z) | (y & (x | z))) +; R600-CHECK: @bfi_sha256_ma +; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV.x}}, {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; SI-CHECK: V_XOR_B32_e32 [[DST:VGPR[0-9]+]], {{VGPR[0-9]+, VGPR[0-9]+}} +; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{VGPR[0-9]+, VGPR[0-9]+}} + +define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { +entry: + %0 = and i32 %x, %z + %1 = or i32 %x, %z + %2 = and i32 %y, %1 + %3 = or i32 %0, %2 + store i32 %3, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/call_fs.ll b/test/CodeGen/R600/call_fs.ll new file mode 100644 index 0000000000000..e152bf6d559d6 --- /dev/null +++ b/test/CodeGen/R600/call_fs.ll @@ -0,0 +1,15 @@ + +; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600-CHECK %s + +; EG-CHECK: @call_fs +; EG-CHECK: CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x84] +; R600-CHECK: @call_fs +; R600-CHECK:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89] + + +define void @call_fs() #0 { + ret void +} + +attributes #0 = { "ShaderType"="1" } ; Vertex Shader diff --git a/test/CodeGen/R600/cf_end.ll b/test/CodeGen/R600/cf_end.ll new file mode 100644 index 0000000000000..138004df6df95 --- /dev/null +++ b/test/CodeGen/R600/cf_end.ll @@ -0,0 +1,9 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood --show-mc-encoding | FileCheck --check-prefix=EG-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=caicos --show-mc-encoding | FileCheck --check-prefix=EG-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=cayman --show-mc-encoding | FileCheck --check-prefix=CM-CHECK %s + +; EG-CHECK: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x80] +; CM-CHECK: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x88] +define void @eop() { + ret void +} diff --git a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll index fd958b3659613..6607c1218b56d 100644 --- a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll +++ b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll @@ -8,7 +8,7 @@ ; CHECK: @sint -; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) { entry: @@ -22,7 +22,7 @@ entry: } ;CHECK: @uint -;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) { entry: diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll index 09baee7a1dcd3..012c17b8fe4b5 100644 --- a/test/CodeGen/R600/disconnected-predset-break-bug.ll +++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll @@ -6,7 +6,7 @@ ; CHECK: @loop_ge ; CHECK: LOOP_START_DX10 -; CHECK: PRED_SET +; CHECK: ALU_PUSH_BEFORE ; CHECK-NEXT: JUMP ; CHECK-NEXT: LOOP_BREAK define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind { diff --git a/test/CodeGen/R600/elf.ll b/test/CodeGen/R600/elf.ll new file mode 100644 index 0000000000000..f460f13d53e0e --- /dev/null +++ b/test/CodeGen/R600/elf.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=r600 -mcpu=SI -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=SI -o - | FileCheck --check-prefix=CONFIG-CHECK %s + +; ELF-CHECK: Format: ELF32 +; ELF-CHECK: Name: .AMDGPU.config +; ELF-CHECK: Type: SHT_PROGBITS + +; CONFIG-CHECK: .section .AMDGPU.config +; CONFIG-CHECK-NEXT: .long 45096 +; CONFIG-CHECK-NEXT: .long 0 +define void @test(i32 %p) #0 { + %i = add i32 %p, 2 + %r = bitcast i32 %i to float + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r) + ret void +} + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="0" } ; Pixel Shader diff --git a/test/CodeGen/R600/elf.r600.ll b/test/CodeGen/R600/elf.r600.ll new file mode 100644 index 0000000000000..0590efb0915f7 --- /dev/null +++ b/test/CodeGen/R600/elf.r600.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=redwood -o - | FileCheck --check-prefix=CONFIG-CHECK %s + +; ELF-CHECK: Format: ELF32 +; ELF-CHECK: Name: .AMDGPU.config + +; CONFIG-CHECK: .section .AMDGPU.config +; CONFIG-CHECK-NEXT: .long 166100 +; CONFIG-CHECK-NEXT: .long 258 +; CONFIG-CHECK-NEXT: .long 165900 +; CONFIG-CHECK-NEXT: .long 0 +define void @test(float addrspace(1)* %out, i32 %p) { + %i = add i32 %p, 2 + %r = bitcast i32 %i to float + store float %r, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll index 0407533eaa5f4..85f2882289fa5 100644 --- a/test/CodeGen/R600/fabs.ll +++ b/test/CodeGen/R600/fabs.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: MOV T{{[0-9]+\.[XYZW], \|T[0-9]+\.[XYZW]\|}} +;CHECK: MOV * T{{[0-9]+\.[XYZW], \|T[0-9]+\.[XYZW]\|}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll index d7d1b6572c41d..9a672329e75ce 100644 --- a/test/CodeGen/R600/fadd.ll +++ b/test/CodeGen/R600/fadd.ll @@ -1,8 +1,9 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: @fadd_f32 +; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { +define void @fadd_f32() { %r0 = call float @llvm.R600.load.input(i32 0) %r1 = call float @llvm.R600.load.input(i32 1) %r2 = fadd float %r0, %r1 @@ -14,3 +15,17 @@ declare float @llvm.R600.load.input(i32) readnone declare void @llvm.AMDGPU.store.output(float, i32) +; CHECK: @fadd_v4f32 +; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float> addrspace(1) * %in + %b = load <4 x float> addrspace(1) * %b_ptr + %result = fadd <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fadd.v4f32.ll b/test/CodeGen/R600/fadd.v4f32.ll deleted file mode 100644 index 85dbfd52cbb39..0000000000000 --- a/test/CodeGen/R600/fadd.v4f32.ll +++ /dev/null @@ -1,15 +0,0 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s - -;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - -define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { - %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 - %a = load <4 x float> addrspace(1) * %in - %b = load <4 x float> addrspace(1) * %b_ptr - %result = fadd <4 x float> %a, %b - store <4 x float> %result, <4 x float> addrspace(1)* %out - ret void -} diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll index a94cfb5cf2fe2..7373a214790ee 100644 --- a/test/CodeGen/R600/fcmp-cnd.ll +++ b/test/CodeGen/R600/fcmp-cnd.ll @@ -2,7 +2,7 @@ ;Not checking arguments 2 and 3 to CNDE, because they may change between ;registers and literal.x depending on what the optimizer does. -;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: CNDE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { entry: diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll index 37f621d23958b..dc3a779dd6093 100644 --- a/test/CodeGen/R600/fcmp.ll +++ b/test/CodeGen/R600/fcmp.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ; CHECK: @fcmp_sext -; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) { entry: @@ -19,7 +19,8 @@ entry: ; SET* + FP_TO_SINT ; CHECK: @fcmp_br -; CHECK: SET{{[N]*}}E_DX10 T{{[0-9]+\.[XYZW], [a-zA-Z0-9, .]+}}(5.0 +; CHECK: SET{{[N]*}}E_DX10 * T{{[0-9]+\.[XYZW],}} +; CHECK-NEXT {{[0-9]+(5.0}} define void @fcmp_br(i32 addrspace(1)* %out, float %in) { entry: diff --git a/test/CodeGen/R600/fdiv.v4f32.ll b/test/CodeGen/R600/fdiv.ll index 79e677f541f53..2e68e36be4d8f 100644 --- a/test/CodeGen/R600/fdiv.v4f32.ll +++ b/test/CodeGen/R600/fdiv.ll @@ -1,13 +1,13 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll index 845330f28419b..877d69a65b434 100644 --- a/test/CodeGen/R600/floor.ll +++ b/test/CodeGen/R600/floor.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: FLOOR T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll index a3d4d0ff0db71..62001edc3aa55 100644 --- a/test/CodeGen/R600/fmad.ll +++ b/test/CodeGen/R600/fmad.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: MULADD_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MULADD_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll index 3708f0b9eed21..8b704e56484b1 100644 --- a/test/CodeGen/R600/fmax.ll +++ b/test/CodeGen/R600/fmax.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: MAX T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MAX * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/fmin.ll b/test/CodeGen/R600/fmin.ll index 19d59ab3061ef..5e34b7c8902eb 100644 --- a/test/CodeGen/R600/fmin.ll +++ b/test/CodeGen/R600/fmin.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: MIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll index eb1d523c0bb44..c29294632dc08 100644 --- a/test/CodeGen/R600/fmul.ll +++ b/test/CodeGen/R600/fmul.ll @@ -1,8 +1,9 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: @fmul_f32 +; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { +define void @fmul_f32() { %r0 = call float @llvm.R600.load.input(i32 0) %r1 = call float @llvm.R600.load.input(i32 1) %r2 = fmul float %r0, %r1 @@ -14,3 +15,17 @@ declare float @llvm.R600.load.input(i32) readnone declare void @llvm.AMDGPU.store.output(float, i32) +; CHECK: @fmul_v4f32 +; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float> addrspace(1) * %in + %b = load <4 x float> addrspace(1) * %b_ptr + %result = fmul <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fmul.v4f32.ll b/test/CodeGen/R600/fmul.v4f32.ll index 6d44a0c5c7822..74a58f74026ab 100644 --- a/test/CodeGen/R600/fmul.v4f32.ll +++ b/test/CodeGen/R600/fmul.v4f32.ll @@ -1,9 +1,9 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll new file mode 100644 index 0000000000000..f5716e1d47e61 --- /dev/null +++ b/test/CodeGen/R600/fp_to_sint.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @fp_to_sint_v4i32 +; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %value = load <4 x float> addrspace(1) * %in + %result = fptosi <4 x float> %value to <4 x i32> + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll new file mode 100644 index 0000000000000..1c3c0c62cf50f --- /dev/null +++ b/test/CodeGen/R600/fp_to_uint.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @fp_to_uint_v4i32 +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %value = load <4 x float> addrspace(1) * %in + %result = fptoui <4 x float> %value to <4 x i32> + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll index 591aa52676a41..f784cde46cd24 100644 --- a/test/CodeGen/R600/fsub.ll +++ b/test/CodeGen/R600/fsub.ll @@ -1,8 +1,9 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} +; CHECK: @fsub_f32 +; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} -define void @test() { +define void @fsub_f32() { %r0 = call float @llvm.R600.load.input(i32 0) %r1 = call float @llvm.R600.load.input(i32 1) %r2 = fsub float %r0, %r1 @@ -14,3 +15,17 @@ declare float @llvm.R600.load.input(i32) readnone declare void @llvm.AMDGPU.store.output(float, i32) +; CHECK: @fsub_v4f32 +; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float> addrspace(1) * %in + %b = load <4 x float> addrspace(1) * %b_ptr + %result = fsub <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fsub.v4f32.ll b/test/CodeGen/R600/fsub.v4f32.ll deleted file mode 100644 index 612a57e4b6099..0000000000000 --- a/test/CodeGen/R600/fsub.v4f32.ll +++ /dev/null @@ -1,15 +0,0 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s - -;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - -define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { - %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 - %a = load <4 x float> addrspace(1) * %in - %b = load <4 x float> addrspace(1) * %b_ptr - %result = fsub <4 x float> %a, %b - store <4 x float> %result, <4 x float> addrspace(1)* %out - ret void -} diff --git a/test/CodeGen/R600/i8_to_double_to_float.ll b/test/CodeGen/R600/i8-to-double-to-float.ll index 39f33227fa4b2..604746627666e 100644 --- a/test/CodeGen/R600/i8_to_double_to_float.ll +++ b/test/CodeGen/R600/i8-to-double-to-float.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) { %1 = load i8 addrspace(1)* %in diff --git a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll index 71705a64f50e5..e3005fe82da15 100644 --- a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll +++ b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll @@ -3,7 +3,7 @@ ;Test that a select with reversed True/False values is correctly lowered ;to a SETNE_INT. There should only be one SETNE_INT instruction. -;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: SETNE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;CHECK-NOT: SETNE_INT define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/R600/imm.ll new file mode 100644 index 0000000000000..979efb00e7bd3 --- /dev/null +++ b/test/CodeGen/R600/imm.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s + +; Use a 64-bit value with lo bits that can be represented as an inline constant +; CHECK: @i64_imm_inline_lo +; CHECK: S_MOV_B32 [[LO:SGPR[0-9]+]], 5 +; CHECK: V_MOV_B32_e32 [[LO_VGPR:VGPR[0-9]+]], [[LO]] +; CHECK: BUFFER_STORE_DWORDX2 [[LO_VGPR]]_ +define void @i64_imm_inline_lo(i64 addrspace(1) *%out) { +entry: + store i64 1311768464867721221, i64 addrspace(1) *%out ; 0x1234567800000005 + ret void +} + +; Use a 64-bit value with hi bits that can be represented as an inline constant +; CHECK: @i64_imm_inline_hi +; CHECK: S_MOV_B32 [[HI:SGPR[0-9]+]], 5 +; CHECK: V_MOV_B32_e32 [[HI_VGPR:VGPR[0-9]+]], [[HI]] +; CHECK: BUFFER_STORE_DWORDX2 {{VGPR[0-9]+}}_[[HI_VGPR]] +define void @i64_imm_inline_hi(i64 addrspace(1) *%out) { +entry: + store i64 21780256376, i64 addrspace(1) *%out ; 0x0000000512345678 + ret void +} diff --git a/test/CodeGen/R600/jump_address.ll b/test/CodeGen/R600/jump-address.ll index cd35bffb1304e..ae9c8bba4fd67 100644 --- a/test/CodeGen/R600/jump_address.ll +++ b/test/CodeGen/R600/jump-address.ll @@ -1,6 +1,8 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -; CHECK: JUMP @4 +; CHECK: JUMP @3 +; CHECK: EXPORT +; CHECK-NOT: EXPORT define void @main() #0 { main_body: diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll index e69f64e0e142a..21e5d4c4de9a6 100644 --- a/test/CodeGen/R600/literals.ll +++ b/test/CodeGen/R600/literals.ll @@ -7,7 +7,8 @@ ; ADD_INT literal.x REG, 5 ; CHECK: @i32_literal -; CHECK: ADD_INT {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} 5 +; CHECK: ADD_INT * {{[A-Z0-9,. ]*}}literal.x +; CHECK-NEXT: 5 define void @i32_literal(i32 addrspace(1)* %out, i32 %in) { entry: %0 = add i32 5, %in @@ -22,7 +23,8 @@ entry: ; ADD literal.x REG, 5.0 ; CHECK: @float_literal -; CHECK: ADD {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} {{[0-9]+}}(5.0 +; CHECK: ADD * {{[A-Z0-9,. ]*}}literal.x +; CHECK-NEXT: 1084227584(5.0 define void @float_literal(float addrspace(1)* %out, float %in) { entry: %0 = fadd float 5.0, %in @@ -30,3 +32,168 @@ entry: ret void } +; CHECK: @main +; CHECK: -2147483648 +; CHECK-NEXT-NOT: -2147483648 + +define void @main() #0 { +main_body: + %0 = call float @llvm.R600.load.input(i32 4) + %1 = call float @llvm.R600.load.input(i32 5) + %2 = call float @llvm.R600.load.input(i32 6) + %3 = call float @llvm.R600.load.input(i32 7) + %4 = call float @llvm.R600.load.input(i32 8) + %5 = call float @llvm.R600.load.input(i32 9) + %6 = call float @llvm.R600.load.input(i32 10) + %7 = call float @llvm.R600.load.input(i32 11) + %8 = call float @llvm.R600.load.input(i32 12) + %9 = call float @llvm.R600.load.input(i32 13) + %10 = call float @llvm.R600.load.input(i32 14) + %11 = call float @llvm.R600.load.input(i32 15) + %12 = load <4 x float> addrspace(8)* null + %13 = extractelement <4 x float> %12, i32 0 + %14 = fsub float -0.000000e+00, %13 + %15 = fadd float %0, %14 + %16 = load <4 x float> addrspace(8)* null + %17 = extractelement <4 x float> %16, i32 1 + %18 = fsub float -0.000000e+00, %17 + %19 = fadd float %1, %18 + %20 = load <4 x float> addrspace(8)* null + %21 = extractelement <4 x float> %20, i32 2 + %22 = fsub float -0.000000e+00, %21 + %23 = fadd float %2, %22 + %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %25 = extractelement <4 x float> %24, i32 0 + %26 = fmul float %25, %0 + %27 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %28 = extractelement <4 x float> %27, i32 1 + %29 = fmul float %28, %0 + %30 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %31 = extractelement <4 x float> %30, i32 2 + %32 = fmul float %31, %0 + %33 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %34 = extractelement <4 x float> %33, i32 3 + %35 = fmul float %34, %0 + %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %37 = extractelement <4 x float> %36, i32 0 + %38 = fmul float %37, %1 + %39 = fadd float %38, %26 + %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %41 = extractelement <4 x float> %40, i32 1 + %42 = fmul float %41, %1 + %43 = fadd float %42, %29 + %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %45 = extractelement <4 x float> %44, i32 2 + %46 = fmul float %45, %1 + %47 = fadd float %46, %32 + %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %49 = extractelement <4 x float> %48, i32 3 + %50 = fmul float %49, %1 + %51 = fadd float %50, %35 + %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %53 = extractelement <4 x float> %52, i32 0 + %54 = fmul float %53, %2 + %55 = fadd float %54, %39 + %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %57 = extractelement <4 x float> %56, i32 1 + %58 = fmul float %57, %2 + %59 = fadd float %58, %43 + %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %61 = extractelement <4 x float> %60, i32 2 + %62 = fmul float %61, %2 + %63 = fadd float %62, %47 + %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %65 = extractelement <4 x float> %64, i32 3 + %66 = fmul float %65, %2 + %67 = fadd float %66, %51 + %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %69 = extractelement <4 x float> %68, i32 0 + %70 = fmul float %69, %3 + %71 = fadd float %70, %55 + %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %73 = extractelement <4 x float> %72, i32 1 + %74 = fmul float %73, %3 + %75 = fadd float %74, %59 + %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %77 = extractelement <4 x float> %76, i32 2 + %78 = fmul float %77, %3 + %79 = fadd float %78, %63 + %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %81 = extractelement <4 x float> %80, i32 3 + %82 = fmul float %81, %3 + %83 = fadd float %82, %67 + %84 = insertelement <4 x float> undef, float %15, i32 0 + %85 = insertelement <4 x float> %84, float %19, i32 1 + %86 = insertelement <4 x float> %85, float %23, i32 2 + %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 3 + %88 = insertelement <4 x float> undef, float %15, i32 0 + %89 = insertelement <4 x float> %88, float %19, i32 1 + %90 = insertelement <4 x float> %89, float %23, i32 2 + %91 = insertelement <4 x float> %90, float 0.000000e+00, i32 3 + %92 = call float @llvm.AMDGPU.dp4(<4 x float> %87, <4 x float> %91) + %93 = call float @fabs(float %92) + %94 = call float @llvm.AMDGPU.rsq(float %93) + %95 = fmul float %15, %94 + %96 = fmul float %19, %94 + %97 = fmul float %23, %94 + %98 = insertelement <4 x float> undef, float %4, i32 0 + %99 = insertelement <4 x float> %98, float %5, i32 1 + %100 = insertelement <4 x float> %99, float %6, i32 2 + %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 3 + %102 = insertelement <4 x float> undef, float %4, i32 0 + %103 = insertelement <4 x float> %102, float %5, i32 1 + %104 = insertelement <4 x float> %103, float %6, i32 2 + %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 3 + %106 = call float @llvm.AMDGPU.dp4(<4 x float> %101, <4 x float> %105) + %107 = call float @fabs(float %106) + %108 = call float @llvm.AMDGPU.rsq(float %107) + %109 = fmul float %4, %108 + %110 = fmul float %5, %108 + %111 = fmul float %6, %108 + %112 = insertelement <4 x float> undef, float %95, i32 0 + %113 = insertelement <4 x float> %112, float %96, i32 1 + %114 = insertelement <4 x float> %113, float %97, i32 2 + %115 = insertelement <4 x float> %114, float 0.000000e+00, i32 3 + %116 = insertelement <4 x float> undef, float %109, i32 0 + %117 = insertelement <4 x float> %116, float %110, i32 1 + %118 = insertelement <4 x float> %117, float %111, i32 2 + %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 3 + %120 = call float @llvm.AMDGPU.dp4(<4 x float> %115, <4 x float> %119) + %121 = fsub float -0.000000e+00, %120 + %122 = fcmp uge float 0.000000e+00, %121 + %123 = select i1 %122, float 0.000000e+00, float %121 + %124 = insertelement <4 x float> undef, float %8, i32 0 + %125 = insertelement <4 x float> %124, float %9, i32 1 + %126 = insertelement <4 x float> %125, float 5.000000e-01, i32 2 + %127 = insertelement <4 x float> %126, float 1.000000e+00, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %127, i32 60, i32 1) + %128 = insertelement <4 x float> undef, float %71, i32 0 + %129 = insertelement <4 x float> %128, float %75, i32 1 + %130 = insertelement <4 x float> %129, float %79, i32 2 + %131 = insertelement <4 x float> %130, float %83, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %131, i32 0, i32 2) + %132 = insertelement <4 x float> undef, float %123, i32 0 + %133 = insertelement <4 x float> %132, float %96, i32 1 + %134 = insertelement <4 x float> %133, float %97, i32 2 + %135 = insertelement <4 x float> %134, float 0.000000e+00, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %135, i32 1, i32 2) + ret void +} + +; Function Attrs: readnone +declare float @llvm.R600.load.input(i32) #1 + +; Function Attrs: readnone +declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 + +; Function Attrs: readonly +declare float @fabs(float) #2 + +; Function Attrs: readnone +declare float @llvm.AMDGPU.rsq(float) #1 + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="1" } +attributes #1 = { readnone } +attributes #2 = { readonly } diff --git a/test/CodeGen/R600/llvm.AMDGPU.mul.ll b/test/CodeGen/R600/llvm.AMDGPU.mul.ll index 693eb27457c2c..cc0732b3fffd3 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.mul.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.mul.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll index fac957f7eeec2..ff22a69196774 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: TRUNC T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: TRUNC * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll index bf0cdaa2fa3a1..e45722c3fa679 100644 --- a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll +++ b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s ;CHECK: S_MOV_B32 ;CHECK-NEXT: V_INTERP_MOV_F32 diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll index c724395b98c22..de06354a56467 100644 --- a/test/CodeGen/R600/llvm.SI.sample.ll +++ b/test/CodeGen/R600/llvm.SI.sample.ll @@ -1,21 +1,21 @@ -;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s -;CHECK: IMAGE_SAMPLE -;CHECK: IMAGE_SAMPLE -;CHECK: IMAGE_SAMPLE -;CHECK: IMAGE_SAMPLE -;CHECK: IMAGE_SAMPLE -;CHECK: IMAGE_SAMPLE_C -;CHECK: IMAGE_SAMPLE_C -;CHECK: IMAGE_SAMPLE_C -;CHECK: IMAGE_SAMPLE -;CHECK: IMAGE_SAMPLE -;CHECK: IMAGE_SAMPLE_C -;CHECK: IMAGE_SAMPLE_C -;CHECK: IMAGE_SAMPLE_C -;CHECK: IMAGE_SAMPLE -;CHECK: IMAGE_SAMPLE -;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 3 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 2 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 1 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 4 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8 +;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 5 +;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 9 +;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 6 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 10 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 12 +;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7 +;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11 +;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0 @@ -34,54 +34,88 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1 %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2 %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3 - %res1 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v1, + %res1 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v1, <8 x i32> undef, <4 x i32> undef, i32 1) - %res2 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v2, + %res2 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v2, <8 x i32> undef, <4 x i32> undef, i32 2) - %res3 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v3, + %res3 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v3, <8 x i32> undef, <4 x i32> undef, i32 3) - %res4 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v4, + %res4 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v4, <8 x i32> undef, <4 x i32> undef, i32 4) - %res5 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v5, + %res5 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v5, <8 x i32> undef, <4 x i32> undef, i32 5) - %res6 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v6, + %res6 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v6, <8 x i32> undef, <4 x i32> undef, i32 6) - %res7 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v7, + %res7 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v7, <8 x i32> undef, <4 x i32> undef, i32 7) - %res8 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v8, + %res8 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v8, <8 x i32> undef, <4 x i32> undef, i32 8) - %res9 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v9, + %res9 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v9, <8 x i32> undef, <4 x i32> undef, i32 9) - %res10 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v10, + %res10 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v10, <8 x i32> undef, <4 x i32> undef, i32 10) - %res11 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v11, + %res11 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v11, <8 x i32> undef, <4 x i32> undef, i32 11) - %res12 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v12, + %res12 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v12, <8 x i32> undef, <4 x i32> undef, i32 12) - %res13 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v13, + %res13 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v13, <8 x i32> undef, <4 x i32> undef, i32 13) - %res14 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v14, + %res14 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v14, <8 x i32> undef, <4 x i32> undef, i32 14) - %res15 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v15, + %res15 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v15, <8 x i32> undef, <4 x i32> undef, i32 15) - %res16 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v16, + %res16 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v16, <8 x i32> undef, <4 x i32> undef, i32 16) %e1 = extractelement <4 x float> %res1, i32 0 - %e2 = extractelement <4 x float> %res2, i32 0 - %e3 = extractelement <4 x float> %res3, i32 0 - %e4 = extractelement <4 x float> %res4, i32 0 - %e5 = extractelement <4 x float> %res5, i32 0 - %e6 = extractelement <4 x float> %res6, i32 0 - %e7 = extractelement <4 x float> %res7, i32 0 - %e8 = extractelement <4 x float> %res8, i32 0 - %e9 = extractelement <4 x float> %res9, i32 0 - %e10 = extractelement <4 x float> %res10, i32 0 - %e11 = extractelement <4 x float> %res11, i32 0 - %e12 = extractelement <4 x float> %res12, i32 0 - %e13 = extractelement <4 x float> %res13, i32 0 - %e14 = extractelement <4 x float> %res14, i32 0 - %e15 = extractelement <4 x float> %res15, i32 0 - %e16 = extractelement <4 x float> %res16, i32 0 + %e2 = extractelement <4 x float> %res2, i32 1 + %e3 = extractelement <4 x float> %res3, i32 2 + %e4 = extractelement <4 x float> %res4, i32 3 + %t0 = extractelement <4 x float> %res5, i32 0 + %t1 = extractelement <4 x float> %res5, i32 1 + %e5 = fadd float %t0, %t1 + %t2 = extractelement <4 x float> %res6, i32 0 + %t3 = extractelement <4 x float> %res6, i32 2 + %e6 = fadd float %t2, %t3 + %t4 = extractelement <4 x float> %res7, i32 0 + %t5 = extractelement <4 x float> %res7, i32 3 + %e7 = fadd float %t4, %t5 + %t6 = extractelement <4 x float> %res8, i32 1 + %t7 = extractelement <4 x float> %res8, i32 2 + %e8 = fadd float %t6, %t7 + %t8 = extractelement <4 x float> %res9, i32 1 + %t9 = extractelement <4 x float> %res9, i32 3 + %e9 = fadd float %t8, %t9 + %t10 = extractelement <4 x float> %res10, i32 2 + %t11 = extractelement <4 x float> %res10, i32 3 + %e10 = fadd float %t10, %t11 + %t12 = extractelement <4 x float> %res11, i32 0 + %t13 = extractelement <4 x float> %res11, i32 1 + %t14 = extractelement <4 x float> %res11, i32 2 + %t15 = fadd float %t12, %t13 + %e11 = fadd float %t14, %t15 + %t16 = extractelement <4 x float> %res12, i32 0 + %t17 = extractelement <4 x float> %res12, i32 1 + %t18 = extractelement <4 x float> %res12, i32 3 + %t19 = fadd float %t16, %t17 + %e12 = fadd float %t18, %t19 + %t20 = extractelement <4 x float> %res13, i32 0 + %t21 = extractelement <4 x float> %res13, i32 2 + %t22 = extractelement <4 x float> %res13, i32 3 + %t23 = fadd float %t20, %t21 + %e13 = fadd float %t22, %t23 + %t24 = extractelement <4 x float> %res14, i32 1 + %t25 = extractelement <4 x float> %res14, i32 2 + %t26 = extractelement <4 x float> %res14, i32 3 + %t27 = fadd float %t24, %t25 + %e14 = fadd float %t26, %t27 + %t28 = extractelement <4 x float> %res15, i32 0 + %t29 = extractelement <4 x float> %res15, i32 1 + %t30 = extractelement <4 x float> %res15, i32 2 + %t31 = extractelement <4 x float> %res15, i32 3 + %t32 = fadd float %t28, %t29 + %t33 = fadd float %t30, %t31 + %e15 = fadd float %t32, %t33 + %e16 = extractelement <4 x float> %res16, i32 3 %s1 = fadd float %e1, %e2 %s2 = fadd float %s1, %e3 %s3 = fadd float %s2, %e4 @@ -101,6 +135,6 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { ret void } -declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone +declare <4 x float> @llvm.SI.sample.(<4 x i32>, <8 x i32>, <4 x i32>, i32) readnone declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll index dc120bfb00c29..9b28167070426 100644 --- a/test/CodeGen/R600/llvm.cos.ll +++ b/test/CodeGen/R600/llvm.cos.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: COS T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: COS * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll index b4ce9f429f168..91b7742829069 100644 --- a/test/CodeGen/R600/llvm.pow.ll +++ b/test/CodeGen/R600/llvm.pow.ll @@ -1,8 +1,8 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: LOG_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK-NEXT: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK-NEXT: EXP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK-NEXT: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll index 5cd6998c93705..803dc2d6debc0 100644 --- a/test/CodeGen/R600/llvm.sin.ll +++ b/test/CodeGen/R600/llvm.sin.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: SIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: SIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/load.constant_addrspace.f32.ll b/test/CodeGen/R600/load.constant_addrspace.f32.ll deleted file mode 100644 index 93627283bb94c..0000000000000 --- a/test/CodeGen/R600/load.constant_addrspace.f32.ll +++ /dev/null @@ -1,9 +0,0 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s - -;CHECK: VTX_READ_32 T{{[0-9]+\.X, T[0-9]+\.X}} - -define void @test(float addrspace(1)* %out, float addrspace(2)* %in) { - %1 = load float addrspace(2)* %in - store float %1, float addrspace(1)* %out - ret void -} diff --git a/test/CodeGen/R600/load.i8.ll b/test/CodeGen/R600/load.i8.ll deleted file mode 100644 index b070dcd520490..0000000000000 --- a/test/CodeGen/R600/load.i8.ll +++ /dev/null @@ -1,10 +0,0 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s - -;CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} - -define void @test(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { - %1 = load i8 addrspace(1)* %in - %2 = zext i8 %1 to i32 - store i32 %2, i32 addrspace(1)* %out - ret void -} diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll new file mode 100644 index 0000000000000..b03245ae87b3c --- /dev/null +++ b/test/CodeGen/R600/load.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; Load an i8 value from the global address space. +; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { + %1 = load i8 addrspace(1)* %in + %2 = zext i8 %1 to i32 + store i32 %2, i32 addrspace(1)* %out + ret void +} + +; Load a f32 value from the constant address space. +; CHECK: VTX_READ_32 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) { + %1 = load float addrspace(2)* %in + store float %1, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/loop-address.ll b/test/CodeGen/R600/loop-address.ll new file mode 100644 index 0000000000000..8a5458b898095 --- /dev/null +++ b/test/CodeGen/R600/loop-address.ll @@ -0,0 +1,41 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: TEX +;CHECK: ALU_PUSH +;CHECK: JUMP @4 +;CHECK: ELSE @16 +;CHECK: TEX +;CHECK: LOOP_START_DX10 @15 +;CHECK: LOOP_BREAK @14 +;CHECK: POP @16 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64" +target triple = "r600--" + +define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) #0 { +entry: + %cmp5 = icmp sgt i32 %iterations, 0 + br i1 %cmp5, label %for.body, label %for.end + +for.body: ; preds = %for.body, %entry + %i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ] + %ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %i.07 = add nsw i32 %i.07.in, -1 + %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %ai.06 + store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4 + %add = add nsw i32 %ai.06, 1 + %exitcond = icmp eq i32 %add, %iterations + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +attributes #0 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } + +!opencl.kernels = !{!0, !1, !2, !3} + +!0 = metadata !{void (i32 addrspace(1)*, i32)* @loop_ge} +!1 = metadata !{null} +!2 = metadata !{null} +!3 = metadata !{null} diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll index 423adb9da9008..fb698da62719e 100644 --- a/test/CodeGen/R600/lshl.ll +++ b/test/CodeGen/R600/lshl.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s ;CHECK: V_LSHLREV_B32_e32 VGPR0, 1, VGPR0 diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll index 551eac1d76bf9..e0ed3ac078660 100644 --- a/test/CodeGen/R600/lshr.ll +++ b/test/CodeGen/R600/lshr.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s ;CHECK: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0 diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll new file mode 100644 index 0000000000000..7278e9039840c --- /dev/null +++ b/test/CodeGen/R600/mul.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; mul24 and mad24 are affected +;CHECK: MULLO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MULLO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MULLO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MULLO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = mul <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll index 28744e00c3cf3..bc17a597873ee 100644 --- a/test/CodeGen/R600/mulhu.ll +++ b/test/CodeGen/R600/mulhu.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s ;CHECK: V_MOV_B32_e32 VGPR1, -1431655765 ;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0 diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll new file mode 100644 index 0000000000000..b0dbb021e8226 --- /dev/null +++ b/test/CodeGen/R600/or.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @or_v4i32 +; CHECK: OR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: OR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: OR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: OR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) { + %result = or <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll index eb8b052b6f724..0d3eeef263072 100644 --- a/test/CodeGen/R600/predicates.ll +++ b/test/CodeGen/R600/predicates.ll @@ -4,8 +4,8 @@ ; when it is legal to do so. ; CHECK: @simple_if -; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, -; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel +; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred, +; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel define void @simple_if(i32 addrspace(1)* %out, i32 %in) { entry: %0 = icmp sgt i32 %in, 0 @@ -22,9 +22,9 @@ ENDIF: } ; CHECK: @simple_if_else -; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, -; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel -; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel +; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred, +; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel +; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel define void @simple_if_else(i32 addrspace(1)* %out, i32 %in) { entry: %0 = icmp sgt i32 %in, 0 @@ -46,11 +46,11 @@ ENDIF: ; CHECK: @nested_if ; CHECK: ALU_PUSH_BEFORE -; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec ; CHECK: JUMP -; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, -; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel ; CHECK: POP +; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Exec +; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred, +; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel define void @nested_if(i32 addrspace(1)* %out, i32 %in) { entry: %0 = icmp sgt i32 %in, 0 @@ -73,12 +73,12 @@ ENDIF: ; CHECK: @nested_if_else ; CHECK: ALU_PUSH_BEFORE -; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec ; CHECK: JUMP -; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, -; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel -; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel ; CHECK: POP +; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Exec +; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred, +; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel +; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) { entry: %0 = icmp sgt i32 %in, 0 diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll new file mode 100644 index 0000000000000..37c3d9d7d6d11 --- /dev/null +++ b/test/CodeGen/R600/pv.ll @@ -0,0 +1,244 @@ +; RUN: llc < %s -march=r600 | FileCheck %s + +;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED) +;CHECK-NEXT: CNDGE T{{[0-9].[XYZW]}}, PV.x + +define void @main() #0 { +main_body: + %0 = call float @llvm.R600.load.input(i32 4) + %1 = call float @llvm.R600.load.input(i32 5) + %2 = call float @llvm.R600.load.input(i32 6) + %3 = call float @llvm.R600.load.input(i32 7) + %4 = call float @llvm.R600.load.input(i32 8) + %5 = call float @llvm.R600.load.input(i32 9) + %6 = call float @llvm.R600.load.input(i32 10) + %7 = call float @llvm.R600.load.input(i32 11) + %8 = call float @llvm.R600.load.input(i32 12) + %9 = call float @llvm.R600.load.input(i32 13) + %10 = call float @llvm.R600.load.input(i32 14) + %11 = call float @llvm.R600.load.input(i32 15) + %12 = call float @llvm.R600.load.input(i32 16) + %13 = call float @llvm.R600.load.input(i32 17) + %14 = call float @llvm.R600.load.input(i32 18) + %15 = call float @llvm.R600.load.input(i32 19) + %16 = call float @llvm.R600.load.input(i32 20) + %17 = call float @llvm.R600.load.input(i32 21) + %18 = call float @llvm.R600.load.input(i32 22) + %19 = call float @llvm.R600.load.input(i32 23) + %20 = call float @llvm.R600.load.input(i32 24) + %21 = call float @llvm.R600.load.input(i32 25) + %22 = call float @llvm.R600.load.input(i32 26) + %23 = call float @llvm.R600.load.input(i32 27) + %24 = call float @llvm.R600.load.input(i32 28) + %25 = call float @llvm.R600.load.input(i32 29) + %26 = call float @llvm.R600.load.input(i32 30) + %27 = call float @llvm.R600.load.input(i32 31) + %28 = load <4 x float> addrspace(8)* null + %29 = extractelement <4 x float> %28, i32 0 + %30 = fmul float %0, %29 + %31 = load <4 x float> addrspace(8)* null + %32 = extractelement <4 x float> %31, i32 1 + %33 = fmul float %0, %32 + %34 = load <4 x float> addrspace(8)* null + %35 = extractelement <4 x float> %34, i32 2 + %36 = fmul float %0, %35 + %37 = load <4 x float> addrspace(8)* null + %38 = extractelement <4 x float> %37, i32 3 + %39 = fmul float %0, %38 + %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %41 = extractelement <4 x float> %40, i32 0 + %42 = fmul float %1, %41 + %43 = fadd float %42, %30 + %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %45 = extractelement <4 x float> %44, i32 1 + %46 = fmul float %1, %45 + %47 = fadd float %46, %33 + %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %49 = extractelement <4 x float> %48, i32 2 + %50 = fmul float %1, %49 + %51 = fadd float %50, %36 + %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %53 = extractelement <4 x float> %52, i32 3 + %54 = fmul float %1, %53 + %55 = fadd float %54, %39 + %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %57 = extractelement <4 x float> %56, i32 0 + %58 = fmul float %2, %57 + %59 = fadd float %58, %43 + %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %61 = extractelement <4 x float> %60, i32 1 + %62 = fmul float %2, %61 + %63 = fadd float %62, %47 + %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %65 = extractelement <4 x float> %64, i32 2 + %66 = fmul float %2, %65 + %67 = fadd float %66, %51 + %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %69 = extractelement <4 x float> %68, i32 3 + %70 = fmul float %2, %69 + %71 = fadd float %70, %55 + %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %73 = extractelement <4 x float> %72, i32 0 + %74 = fmul float %3, %73 + %75 = fadd float %74, %59 + %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %77 = extractelement <4 x float> %76, i32 1 + %78 = fmul float %3, %77 + %79 = fadd float %78, %63 + %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %81 = extractelement <4 x float> %80, i32 2 + %82 = fmul float %3, %81 + %83 = fadd float %82, %67 + %84 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %85 = extractelement <4 x float> %84, i32 3 + %86 = fmul float %3, %85 + %87 = fadd float %86, %71 + %88 = insertelement <4 x float> undef, float %4, i32 0 + %89 = insertelement <4 x float> %88, float %5, i32 1 + %90 = insertelement <4 x float> %89, float %6, i32 2 + %91 = insertelement <4 x float> %90, float 0.000000e+00, i32 3 + %92 = insertelement <4 x float> undef, float %4, i32 0 + %93 = insertelement <4 x float> %92, float %5, i32 1 + %94 = insertelement <4 x float> %93, float %6, i32 2 + %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3 + %96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95) + %97 = call float @fabs(float %96) + %98 = call float @llvm.AMDGPU.rsq(float %97) + %99 = fmul float %4, %98 + %100 = fmul float %5, %98 + %101 = fmul float %6, %98 + %102 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %103 = extractelement <4 x float> %102, i32 0 + %104 = fmul float %103, %8 + %105 = fadd float %104, %20 + %106 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %107 = extractelement <4 x float> %106, i32 1 + %108 = fmul float %107, %9 + %109 = fadd float %108, %21 + %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %111 = extractelement <4 x float> %110, i32 2 + %112 = fmul float %111, %10 + %113 = fadd float %112, %22 + %114 = call float @llvm.AMDIL.clamp.(float %105, float 0.000000e+00, float 1.000000e+00) + %115 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00) + %116 = call float @llvm.AMDIL.clamp.(float %113, float 0.000000e+00, float 1.000000e+00) + %117 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00) + %118 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) + %119 = extractelement <4 x float> %118, i32 0 + %120 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) + %121 = extractelement <4 x float> %120, i32 1 + %122 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) + %123 = extractelement <4 x float> %122, i32 2 + %124 = insertelement <4 x float> undef, float %99, i32 0 + %125 = insertelement <4 x float> %124, float %100, i32 1 + %126 = insertelement <4 x float> %125, float %101, i32 2 + %127 = insertelement <4 x float> %126, float 0.000000e+00, i32 3 + %128 = insertelement <4 x float> undef, float %119, i32 0 + %129 = insertelement <4 x float> %128, float %121, i32 1 + %130 = insertelement <4 x float> %129, float %123, i32 2 + %131 = insertelement <4 x float> %130, float 0.000000e+00, i32 3 + %132 = call float @llvm.AMDGPU.dp4(<4 x float> %127, <4 x float> %131) + %133 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) + %134 = extractelement <4 x float> %133, i32 0 + %135 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) + %136 = extractelement <4 x float> %135, i32 1 + %137 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) + %138 = extractelement <4 x float> %137, i32 2 + %139 = insertelement <4 x float> undef, float %99, i32 0 + %140 = insertelement <4 x float> %139, float %100, i32 1 + %141 = insertelement <4 x float> %140, float %101, i32 2 + %142 = insertelement <4 x float> %141, float 0.000000e+00, i32 3 + %143 = insertelement <4 x float> undef, float %134, i32 0 + %144 = insertelement <4 x float> %143, float %136, i32 1 + %145 = insertelement <4 x float> %144, float %138, i32 2 + %146 = insertelement <4 x float> %145, float 0.000000e+00, i32 3 + %147 = call float @llvm.AMDGPU.dp4(<4 x float> %142, <4 x float> %146) + %148 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) + %149 = extractelement <4 x float> %148, i32 0 + %150 = fmul float %149, %8 + %151 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) + %152 = extractelement <4 x float> %151, i32 1 + %153 = fmul float %152, %9 + %154 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) + %155 = extractelement <4 x float> %154, i32 2 + %156 = fmul float %155, %10 + %157 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) + %158 = extractelement <4 x float> %157, i32 0 + %159 = fmul float %158, %12 + %160 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) + %161 = extractelement <4 x float> %160, i32 1 + %162 = fmul float %161, %13 + %163 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) + %164 = extractelement <4 x float> %163, i32 2 + %165 = fmul float %164, %14 + %166 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) + %167 = extractelement <4 x float> %166, i32 0 + %168 = fmul float %167, %16 + %169 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) + %170 = extractelement <4 x float> %169, i32 1 + %171 = fmul float %170, %17 + %172 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) + %173 = extractelement <4 x float> %172, i32 2 + %174 = fmul float %173, %18 + %175 = fcmp uge float %132, 0.000000e+00 + %176 = select i1 %175, float %132, float 0.000000e+00 + %177 = fcmp uge float %147, 0.000000e+00 + %178 = select i1 %177, float %147, float 0.000000e+00 + %179 = call float @llvm.pow.f32(float %178, float %24) + %180 = fcmp ult float %132, 0.000000e+00 + %181 = select i1 %180, float 0.000000e+00, float %179 + %182 = fadd float %150, %105 + %183 = fadd float %153, %109 + %184 = fadd float %156, %113 + %185 = fmul float %176, %159 + %186 = fadd float %185, %182 + %187 = fmul float %176, %162 + %188 = fadd float %187, %183 + %189 = fmul float %176, %165 + %190 = fadd float %189, %184 + %191 = fmul float %181, %168 + %192 = fadd float %191, %186 + %193 = fmul float %181, %171 + %194 = fadd float %193, %188 + %195 = fmul float %181, %174 + %196 = fadd float %195, %190 + %197 = call float @llvm.AMDIL.clamp.(float %192, float 0.000000e+00, float 1.000000e+00) + %198 = call float @llvm.AMDIL.clamp.(float %194, float 0.000000e+00, float 1.000000e+00) + %199 = call float @llvm.AMDIL.clamp.(float %196, float 0.000000e+00, float 1.000000e+00) + %200 = insertelement <4 x float> undef, float %75, i32 0 + %201 = insertelement <4 x float> %200, float %79, i32 1 + %202 = insertelement <4 x float> %201, float %83, i32 2 + %203 = insertelement <4 x float> %202, float %87, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %203, i32 60, i32 1) + %204 = insertelement <4 x float> undef, float %197, i32 0 + %205 = insertelement <4 x float> %204, float %198, i32 1 + %206 = insertelement <4 x float> %205, float %199, i32 2 + %207 = insertelement <4 x float> %206, float %117, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %207, i32 0, i32 2) + ret void +} + +; Function Attrs: readnone +declare float @llvm.R600.load.input(i32) #1 + +; Function Attrs: readnone +declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 + +; Function Attrs: readonly +declare float @fabs(float) #2 + +; Function Attrs: readnone +declare float @llvm.AMDGPU.rsq(float) #1 + +; Function Attrs: readnone +declare float @llvm.AMDIL.clamp.(float, float, float) #1 + +; Function Attrs: nounwind readonly +declare float @llvm.pow.f32(float, float) #3 + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="1" } +attributes #1 = { readnone } +attributes #2 = { readonly } +attributes #3 = { nounwind readonly } diff --git a/test/CodeGen/R600/r600-encoding.ll b/test/CodeGen/R600/r600-encoding.ll new file mode 100644 index 0000000000000..c8040a1b4cd52 --- /dev/null +++ b/test/CodeGen/R600/r600-encoding.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s +; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rs880 | FileCheck --check-prefix=R600-CHECK %s + +; The earliest R600 GPUs have a slightly different encoding than the rest of +; the VLIW4/5 GPUs. + +; EG-CHECK: @test +; EG-CHECK: MUL_IEEE {{[ *TXYZW.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x01,0x[0-9a-f]+,0x[0-9a-f]+}}] + +; R600-CHECK: @test +; R600-CHECK: MUL_IEEE {{[ *TXYZW.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}] + +define void @test() { +entry: + %0 = call float @llvm.R600.load.input(i32 0) + %1 = call float @llvm.R600.load.input(i32 1) + %2 = fmul float %0, %1 + call void @llvm.AMDGPU.store.output(float %2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) diff --git a/test/CodeGen/R600/reciprocal.ll b/test/CodeGen/R600/reciprocal.ll index 6838c1ae36628..27839296703f4 100644 --- a/test/CodeGen/R600/reciprocal.ll +++ b/test/CodeGen/R600/reciprocal.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll index 3556facfbab3c..3dd10c8a61c1c 100644 --- a/test/CodeGen/R600/sdiv.ll +++ b/test/CodeGen/R600/sdiv.ll @@ -9,7 +9,7 @@ ; This was fixed by adding an additional pattern in R600Instructions.td to ; match this pattern with a CNDGE_INT. -; CHECK: RETURN +; CHECK: CF_END define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1 diff --git a/test/CodeGen/R600/selectcc_cnde.ll b/test/CodeGen/R600/selectcc-cnd.ll index f0a0f512ba158..d7287b4878960 100644 --- a/test/CodeGen/R600/selectcc_cnde.ll +++ b/test/CodeGen/R600/selectcc-cnd.ll @@ -1,7 +1,8 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;CHECK-NOT: SETE -;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1.0, literal.x, [-0-9]+\(2.0}} +;CHECK: CNDE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x, +;CHECK-NEXT: {{[-0-9]+\(2.0}} define void @test(float addrspace(1)* %out, float addrspace(1)* %in) { %1 = load float addrspace(1)* %in %2 = fcmp oeq float %1, 0.0 diff --git a/test/CodeGen/R600/selectcc_cnde_int.ll b/test/CodeGen/R600/selectcc-cnde-int.ll index b38078e26db61..768dc7dbf418d 100644 --- a/test/CodeGen/R600/selectcc_cnde_int.ll +++ b/test/CodeGen/R600/selectcc-cnde-int.ll @@ -1,7 +1,8 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;CHECK-NOT: SETE_INT -;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1, literal.x, 2}} +;CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x, +;CHECK-NEXT: 2 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %1 = load i32 addrspace(1)* %in %2 = icmp eq i32 %1, 0 diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll b/test/CodeGen/R600/selectcc-icmp-select-float.ll index 359ca1e6f8ce7..6743800490b3f 100644 --- a/test/CodeGen/R600/selectcc-icmp-select-float.ll +++ b/test/CodeGen/R600/selectcc-icmp-select-float.ll @@ -2,7 +2,8 @@ ; Note additional optimizations may cause this SGT to be replaced with a ; CND* instruction. -; CHECK: SETGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}} +; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, literal.x, +; CHECK-NEXT: -1 ; Test a selectcc with i32 LHS/RHS and float True/False define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) { diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll index 54febcf0e68e5..eb6e9d2f2ba57 100644 --- a/test/CodeGen/R600/set-dx10.ll +++ b/test/CodeGen/R600/set-dx10.ll @@ -5,7 +5,8 @@ ; SET*DX10 instructions. ; CHECK: @fcmp_une_select_fptosi -; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, +; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: %0 = fcmp une float %in, 5.0 @@ -17,7 +18,8 @@ entry: } ; CHECK: @fcmp_une_select_i32 -; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, +; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) { entry: %0 = fcmp une float %in, 5.0 @@ -27,7 +29,8 @@ entry: } ; CHECK: @fcmp_ueq_select_fptosi -; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, +; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: %0 = fcmp ueq float %in, 5.0 @@ -39,7 +42,8 @@ entry: } ; CHECK: @fcmp_ueq_select_i32 -; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, +; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) { entry: %0 = fcmp ueq float %in, 5.0 @@ -49,7 +53,8 @@ entry: } ; CHECK: @fcmp_ugt_select_fptosi -; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, +; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: %0 = fcmp ugt float %in, 5.0 @@ -61,7 +66,8 @@ entry: } ; CHECK: @fcmp_ugt_select_i32 -; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, +; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) { entry: %0 = fcmp ugt float %in, 5.0 @@ -71,7 +77,8 @@ entry: } ; CHECK: @fcmp_uge_select_fptosi -; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, +; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: %0 = fcmp uge float %in, 5.0 @@ -83,7 +90,8 @@ entry: } ; CHECK: @fcmp_uge_select_i32 -; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, +; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) { entry: %0 = fcmp uge float %in, 5.0 @@ -93,7 +101,8 @@ entry: } ; CHECK: @fcmp_ule_select_fptosi -; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, +; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: %0 = fcmp ule float %in, 5.0 @@ -105,7 +114,8 @@ entry: } ; CHECK: @fcmp_ule_select_i32 -; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, +; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) { entry: %0 = fcmp ule float %in, 5.0 @@ -115,7 +125,8 @@ entry: } ; CHECK: @fcmp_ult_select_fptosi -; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, +; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: %0 = fcmp ult float %in, 5.0 @@ -127,7 +138,8 @@ entry: } ; CHECK: @fcmp_ult_select_i32 -; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, +; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) { entry: %0 = fcmp ult float %in, 5.0 diff --git a/test/CodeGen/R600/setcc.v4i32.ll b/test/CodeGen/R600/setcc.ll index 0752f2e63dbf2..0752f2e63dbf2 100644 --- a/test/CodeGen/R600/setcc.v4i32.ll +++ b/test/CodeGen/R600/setcc.ll diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll index 5ab4b87d570cf..4622203ffdbc6 100644 --- a/test/CodeGen/R600/seto.ll +++ b/test/CodeGen/R600/seto.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s ;CHECK: V_CMP_O_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0 diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll index 320835576d418..0bf5801b1c33d 100644 --- a/test/CodeGen/R600/setuo.ll +++ b/test/CodeGen/R600/setuo.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s ;CHECK: V_CMP_U_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0 diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll new file mode 100644 index 0000000000000..43cc1e26fc016 --- /dev/null +++ b/test/CodeGen/R600/shl.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @shl_v4i32 +; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) { + %result = shl <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/sint_to_fp.ll b/test/CodeGen/R600/sint_to_fp.ll new file mode 100644 index 0000000000000..91a8eb7f57b42 --- /dev/null +++ b/test/CodeGen/R600/sint_to_fp.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @sint_to_fp_v4i32 +; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %value = load <4 x i32> addrspace(1) * %in + %result = sitofp <4 x i32> %value to <4 x float> + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll new file mode 100644 index 0000000000000..972542d346f49 --- /dev/null +++ b/test/CodeGen/R600/sra.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @ashr_v4i32 +; CHECK: ASHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: ASHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: ASHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: ASHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) { + %result = ashr <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/srl.ll b/test/CodeGen/R600/srl.ll new file mode 100644 index 0000000000000..5f63600b75f02 --- /dev/null +++ b/test/CodeGen/R600/srl.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @lshr_v4i32 +; CHECK: LSHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: LSHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: LSHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: LSHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) { + %result = lshr <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll new file mode 100644 index 0000000000000..4d673f3ea326a --- /dev/null +++ b/test/CodeGen/R600/store.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s + +; floating-point store +; EG-CHECK: @store_f32 +; EG-CHECK: RAT_WRITE_CACHELESS_32_eg T{{[0-9]+\.X, T[0-9]+\.X}}, 1 +; SI-CHECK: @store_f32 +; SI-CHECK: BUFFER_STORE_DWORD + +define void @store_f32(float addrspace(1)* %out, float %in) { + store float %in, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/store.r600.ll b/test/CodeGen/R600/store.r600.ll new file mode 100644 index 0000000000000..5ffb7f1809f8d --- /dev/null +++ b/test/CodeGen/R600/store.r600.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s + +; XXX: Merge this test into store.ll once it is supported on SI + +; v4i32 store +; EG-CHECK: @store_v4i32 +; EG-CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 + +define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %1 = load <4 x i32> addrspace(1) * %in + store <4 x i32> %1, <4 x i32> addrspace(1)* %out + ret void +} + +; v4f32 store +; EG-CHECK: @store_v4f32 +; EG-CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 +define void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %1 = load <4 x float> addrspace(1) * %in + store <4 x float> %1, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/store.v4f32.ll b/test/CodeGen/R600/store.v4f32.ll deleted file mode 100644 index 8b0d24445971c..0000000000000 --- a/test/CodeGen/R600/store.v4f32.ll +++ /dev/null @@ -1,9 +0,0 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s - -;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 - -define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { - %1 = load <4 x float> addrspace(1) * %in - store <4 x float> %1, <4 x float> addrspace(1)* %out - ret void -} diff --git a/test/CodeGen/R600/store.v4i32.ll b/test/CodeGen/R600/store.v4i32.ll deleted file mode 100644 index a659815ddeba8..0000000000000 --- a/test/CodeGen/R600/store.v4i32.ll +++ /dev/null @@ -1,9 +0,0 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s - -;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 - -define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { - %1 = load <4 x i32> addrspace(1) * %in - store <4 x i32> %1, <4 x i32> addrspace(1)* %out - ret void -} diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll new file mode 100644 index 0000000000000..12bfba39753eb --- /dev/null +++ b/test/CodeGen/R600/sub.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: SUB_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = sub <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/udiv.v4i32.ll b/test/CodeGen/R600/udiv.ll index 47657a6be75e7..b81e3667ce897 100644 --- a/test/CodeGen/R600/udiv.v4i32.ll +++ b/test/CodeGen/R600/udiv.ll @@ -3,7 +3,7 @@ ;The code generated by udiv is long and complex and may frequently change. ;The goal of this test is to make sure the ISel doesn't fail when it gets ;a v4i32 udiv -;CHECK: RETURN +;CHECK: CF_END define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 diff --git a/test/CodeGen/R600/uint_to_fp.ll b/test/CodeGen/R600/uint_to_fp.ll new file mode 100644 index 0000000000000..9054fc4c2cc87 --- /dev/null +++ b/test/CodeGen/R600/uint_to_fp.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @uint_to_fp_v4i32 +; CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %value = load <4 x i32> addrspace(1) * %in + %result = uitofp <4 x i32> %value to <4 x float> + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll index b48c59151831a..b311f4cfa7f9b 100644 --- a/test/CodeGen/R600/unsupported-cc.ll +++ b/test/CodeGen/R600/unsupported-cc.ll @@ -3,7 +3,8 @@ ; These tests are for condition codes that are not supported by the hardware ; CHECK: @slt -; CHECK: SETGT_INT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 5(7.006492e-45) +; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, +; CHECK-NEXT: 5(7.006492e-45) define void @slt(i32 addrspace(1)* %out, i32 %in) { entry: %0 = icmp slt i32 %in, 5 @@ -13,7 +14,8 @@ entry: } ; CHECK: @ult_i32 -; CHECK: SETGT_UINT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 5(7.006492e-45) +; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, +; CHECK-NEXT: 5(7.006492e-45) define void @ult_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = icmp ult i32 %in, 5 @@ -23,7 +25,8 @@ entry: } ; CHECK: @ult_float -; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, +; CHECK-NEXT: 1084227584(5.000000e+00) define void @ult_float(float addrspace(1)* %out, float %in) { entry: %0 = fcmp ult float %in, 5.0 @@ -33,7 +36,8 @@ entry: } ; CHECK: @olt -; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, +;CHECK-NEXT: 1084227584(5.000000e+00) define void @olt(float addrspace(1)* %out, float %in) { entry: %0 = fcmp olt float %in, 5.0 @@ -43,7 +47,8 @@ entry: } ; CHECK: @sle -; CHECK: SETGT_INT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 6(8.407791e-45) +; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, +; CHECK-NEXT: 6(8.407791e-45) define void @sle(i32 addrspace(1)* %out, i32 %in) { entry: %0 = icmp sle i32 %in, 5 @@ -53,7 +58,8 @@ entry: } ; CHECK: @ule_i32 -; CHECK: SETGT_UINT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 6(8.407791e-45) +; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, +; CHECK-NEXT: 6(8.407791e-45) define void @ule_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = icmp ule i32 %in, 5 @@ -63,7 +69,8 @@ entry: } ; CHECK: @ule_float -; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, +; CHECK-NEXT: 1084227584(5.000000e+00) define void @ule_float(float addrspace(1)* %out, float %in) { entry: %0 = fcmp ule float %in, 5.0 @@ -73,7 +80,8 @@ entry: } ; CHECK: @ole -; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, +; CHECK-NEXT:1084227584(5.000000e+00) define void @ole(float addrspace(1)* %out, float %in) { entry: %0 = fcmp ole float %in, 5.0 diff --git a/test/CodeGen/R600/urecip.ll b/test/CodeGen/R600/urecip.ll new file mode 100644 index 0000000000000..dad02dd76f0a5 --- /dev/null +++ b/test/CodeGen/R600/urecip.ll @@ -0,0 +1,12 @@ +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s + +;CHECK: V_RCP_IFLAG_F32_e32 + +define void @test(i32 %p, i32 %q) { + %i = udiv i32 %p, %q + %r = bitcast i32 %i to float + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r) + ret void +} + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/R600/urem.v4i32.ll b/test/CodeGen/R600/urem.ll index 2e7388caa6ced..a2cc0bd2e84e6 100644 --- a/test/CodeGen/R600/urem.v4i32.ll +++ b/test/CodeGen/R600/urem.ll @@ -3,7 +3,7 @@ ;The code generated by urem is long and complex and may frequently change. ;The goal of this test is to make sure the ISel doesn't fail when it gets ;a v4i32 urem -;CHECK: RETURN +;CHECK: CF_END define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 diff --git a/test/CodeGen/R600/vec4-expand.ll b/test/CodeGen/R600/vec4-expand.ll deleted file mode 100644 index 8f62bc692908f..0000000000000 --- a/test/CodeGen/R600/vec4-expand.ll +++ /dev/null @@ -1,53 +0,0 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s - -; CHECK: @fp_to_sint -; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - -define void @fp_to_sint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { - %value = load <4 x float> addrspace(1) * %in - %result = fptosi <4 x float> %value to <4 x i32> - store <4 x i32> %result, <4 x i32> addrspace(1)* %out - ret void -} - -; CHECK: @fp_to_uint -; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - -define void @fp_to_uint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { - %value = load <4 x float> addrspace(1) * %in - %result = fptoui <4 x float> %value to <4 x i32> - store <4 x i32> %result, <4 x i32> addrspace(1)* %out - ret void -} - -; CHECK: @sint_to_fp -; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - -define void @sint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { - %value = load <4 x i32> addrspace(1) * %in - %result = sitofp <4 x i32> %value to <4 x float> - store <4 x float> %result, <4 x float> addrspace(1)* %out - ret void -} - -; CHECK: @uint_to_fp -; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - -define void @uint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { - %value = load <4 x i32> addrspace(1) * %in - %result = uitofp <4 x i32> %value to <4 x float> - store <4 x float> %result, <4 x float> addrspace(1)* %out - ret void -} diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll new file mode 100644 index 0000000000000..6e459df847e73 --- /dev/null +++ b/test/CodeGen/R600/vselect.ll @@ -0,0 +1,17 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @test_select_v4i32 +; CHECK: CNDE_INT T{{[0-9]+\.[XYZW], PV\.[xyzw], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], PV\.[xyzw], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) { +entry: + %0 = load <4 x i32> addrspace(1)* %in0 + %1 = load <4 x i32> addrspace(1)* %in1 + %cmp = icmp ne <4 x i32> %0, %1 + %result = select <4 x i1> %cmp, <4 x i32> %0, <4 x i32> %1 + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll new file mode 100644 index 0000000000000..cf612e0a1fbe4 --- /dev/null +++ b/test/CodeGen/R600/xor.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @xor_v4i32 +; CHECK: XOR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: XOR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: XOR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: XOR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) { + %result = xor <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} |