diff options
Diffstat (limited to 'test/CodeGen')
-rw-r--r-- | test/CodeGen/AArch64/arm64-platform-reg.ll | 15 | ||||
-rw-r--r-- | test/CodeGen/AArch64/ghc-cc.ll | 89 | ||||
-rw-r--r-- | test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll | 55 | ||||
-rw-r--r-- | test/CodeGen/Mips/fcmp.ll | 90 | ||||
-rw-r--r-- | test/CodeGen/R600/basic-loop.ll | 1 | ||||
-rw-r--r-- | test/CodeGen/R600/ctpop.ll | 66 | ||||
-rw-r--r-- | test/CodeGen/R600/ctpop64.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/R600/ds_read2st64.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/R600/fp_to_sint.ll | 15 | ||||
-rw-r--r-- | test/CodeGen/R600/hsa.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/misaligned-load.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/R600/scratch-buffer.ll | 86 | ||||
-rw-r--r-- | test/CodeGen/R600/si-triv-disjoint-mem-access.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/SPARC/inlineasm.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/SPARC/mult-alt-generic-sparc.ll | 2 |
16 files changed, 401 insertions, 51 deletions
diff --git a/test/CodeGen/AArch64/arm64-platform-reg.ll b/test/CodeGen/AArch64/arm64-platform-reg.ll index 651c793f73a4..b0d3ee0ff8a3 100644 --- a/test/CodeGen/AArch64/arm64-platform-reg.ll +++ b/test/CodeGen/AArch64/arm64-platform-reg.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-DARWIN +; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18 +; RUN: llc -mtriple=arm64-freebsd-gnu -aarch64-reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18 ; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s ; x18 is reserved as a platform register on Darwin but not on other @@ -16,11 +17,11 @@ define void @keep_live() { ; CHECK: ldr x18 ; CHECK: str x18 -; CHECK-DARWIN-NOT: ldr fp -; CHECK-DARWIN-NOT: ldr x18 -; CHECK-DARWIN: Spill -; CHECK-DARWIN-NOT: ldr fp -; CHECK-DARWIN-NOT: ldr x18 -; CHECK-DARWIN: ret +; CHECK-RESERVE-X18-NOT: ldr fp +; CHECK-RESERVE-X18-NOT: ldr x18 +; CHECK-RESERVE-X18: Spill +; CHECK-RESERVE-X18-NOT: ldr fp +; CHECK-RESERVE-X18-NOT: ldr x18 +; CHECK-RESERVE-X18: ret ret void } diff --git a/test/CodeGen/AArch64/ghc-cc.ll b/test/CodeGen/AArch64/ghc-cc.ll new file mode 100644 index 000000000000..505bd5fca66d --- /dev/null +++ b/test/CodeGen/AArch64/ghc-cc.ll @@ -0,0 +1,89 @@ +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; Check the GHC call convention works (aarch64) + +@base = external global i64 ; assigned to register: r19 +@sp = external global i64 ; assigned to register: r20 +@hp = external global i64 ; assigned to register: r21 +@r1 = external global i64 ; assigned to register: r22 +@r2 = external global i64 ; assigned to register: r23 +@r3 = external global i64 ; assigned to register: r24 +@r4 = external global i64 ; assigned to register: r25 +@r5 = external global i64 ; assigned to register: r26 +@r6 = external global i64 ; assigned to register: r27 +@splim = external global i64 ; assigned to register: r28 + +@f1 = external global float ; assigned to register: s8 +@f2 = external global float ; assigned to register: s9 +@f3 = external global float ; assigned to register: s10 +@f4 = external global float ; assigned to register: s11 + +@d1 = external global double ; assigned to register: d12 +@d2 = external global double ; assigned to register: d13 +@d3 = external global double ; assigned to register: d14 +@d4 = external global double ; assigned to register: d15 + +define ghccc i64 @addtwo(i64 %x, i64 %y) nounwind { +entry: + ; CHECK-LABEL: addtwo + ; CHECK: add x0, x19, x20 + ; CHECK-NEXT: ret + %0 = add i64 %x, %y + ret i64 %0 +} + +define void @zap(i64 %a, i64 %b) nounwind { +entry: + ; CHECK-LABEL: zap + ; CHECK-NOT: mov {{x[0-9]+}}, sp + ; CHECK: bl addtwo + ; CHECK-NEXT: bl foo + %0 = call ghccc i64 @addtwo(i64 %a, i64 %b) + call void @foo() nounwind + ret void +} + +define ghccc void @foo_i64 () nounwind { +entry: + ; CHECK-LABEL: foo_i64 + ; CHECK: adrp {{x[0-9]+}}, base + ; CHECK-NEXT: ldr x19, [{{x[0-9]+}}, :lo12:base] + ; CHECK-NEXT: bl bar_i64 + ; CHECK-NEXT: ret + + %0 = load i64* @base + tail call ghccc void @bar_i64( i64 %0 ) nounwind + ret void +} + +define ghccc void @foo_float () nounwind { +entry: + ; CHECK-LABEL: foo_float + ; CHECK: adrp {{x[0-9]+}}, f1 + ; CHECK-NEXT: ldr s8, [{{x[0-9]+}}, :lo12:f1] + ; CHECK-NEXT: bl bar_float + ; CHECK-NEXT: ret + + %0 = load float* @f1 + tail call ghccc void @bar_float( float %0 ) nounwind + ret void +} + +define ghccc void @foo_double () nounwind { +entry: + ; CHECK-LABEL: foo_double + ; CHECK: adrp {{x[0-9]+}}, d1 + ; CHECK-NEXT: ldr d12, [{{x[0-9]+}}, :lo12:d1] + ; CHECK-NEXT: bl bar_double + ; CHECK-NEXT: ret + + %0 = load double* @d1 + tail call ghccc void @bar_double( double %0 ) nounwind + ret void +} + +declare ghccc void @foo () + +declare ghccc void @bar_i64 (i64) +declare ghccc void @bar_float (float) +declare ghccc void @bar_double (double) diff --git a/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll b/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll new file mode 100644 index 000000000000..f3cc3d82121f --- /dev/null +++ b/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll @@ -0,0 +1,55 @@ +; RUN: llc -mtriple=thumbv4t-none--eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V4T +; RUN: llc -mtriple=thumbv6m-none--eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V6M + +; CHECK-LABEL: foo +define i32 @foo(i32 %z, ...) #0 { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + %d = alloca i32, align 4 + %e = alloca i32, align 4 + %f = alloca i32, align 4 + %g = alloca i32, align 4 + %h = alloca i32, align 4 + + store i32 1, i32* %a, align 4 + store i32 2, i32* %b, align 4 + store i32 3, i32* %c, align 4 + store i32 4, i32* %d, align 4 + store i32 5, i32* %e, align 4 + store i32 6, i32* %f, align 4 + store i32 7, i32* %g, align 4 + store i32 8, i32* %h, align 4 + + %0 = load i32* %a, align 4 + %1 = load i32* %b, align 4 + %2 = load i32* %c, align 4 + %3 = load i32* %d, align 4 + %4 = load i32* %e, align 4 + %5 = load i32* %f, align 4 + %6 = load i32* %g, align 4 + %7 = load i32* %h, align 4 + + %add = add nsw i32 %0, %1 + %add4 = add nsw i32 %add, %2 + %add5 = add nsw i32 %add4, %3 + %add6 = add nsw i32 %add5, %4 + %add7 = add nsw i32 %add6, %5 + %add8 = add nsw i32 %add7, %6 + %add9 = add nsw i32 %add8, %7 + + %addz = add nsw i32 %add9, %z + call void @llvm.va_start(i8* null) + ret i32 %addz + +; CHECK: sub sp, #40 +; CHECK-NEXT: add [[BASE:r[0-9]]], sp, #8 + +; CHECK-V4T: movs [[NEWBASE:r[0-9]]], [[BASE]] +; CHECK-V6M: mov [[NEWBASE:r[0-9]]], [[BASE]] +; CHECK-NEXT: adds [[NEWBASE]], #8 +; CHECK-NEXT: ldm [[NEWBASE]], +} + +declare void @llvm.va_start(i8*) nounwind diff --git a/test/CodeGen/Mips/fcmp.ll b/test/CodeGen/Mips/fcmp.ll index b7759831c5a2..8e83b0064ed9 100644 --- a/test/CodeGen/Mips/fcmp.ll +++ b/test/CodeGen/Mips/fcmp.ll @@ -781,3 +781,93 @@ define i32 @true_f64(double %a, double %b) nounwind { %2 = zext i1 %1 to i32 ret i32 %2 } + +; The optimizers sometimes produce setlt instead of setolt/setult. +define float @bug1_f32(float %angle, float %at) #0 { +entry: +; ALL-LABEL: bug1_f32: + +; 32-C-DAG: add.s $[[T0:f[0-9]+]], $f14, $f12 +; 32-C-DAG: lwc1 $[[T1:f[0-9]+]], %lo($CPI32_0)( +; 32-C-DAG: c.ole.s $[[T0]], $[[T1]] +; 32-C-DAG: bc1t + +; 32-CMP-DAG: add.s $[[T0:f[0-9]+]], $f14, $f12 +; 32-CMP-DAG: lwc1 $[[T1:f[0-9]+]], %lo($CPI32_0)( +; 32-CMP-DAG: cmp.le.s $[[T2:f[0-9]+]], $[[T0]], $[[T1]] +; 32-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] +; FIXME: This instruction is redundant. +; 32-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1 +; 32-CMP-DAG: bnez $[[T4]], + +; 64-C-DAG: add.s $[[T0:f[0-9]+]], $f13, $f12 +; 64-C-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst($CPI32_0)( +; 64-C-DAG: c.ole.s $[[T0]], $[[T1]] +; 64-C-DAG: bc1t + +; 64-CMP-DAG: add.s $[[T0:f[0-9]+]], $f13, $f12 +; 64-CMP-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst($CPI32_0)( +; 64-CMP-DAG: cmp.le.s $[[T2:f[0-9]+]], $[[T0]], $[[T1]] +; 64-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] +; FIXME: This instruction is redundant. +; 64-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1 +; 64-CMP-DAG: bnez $[[T4]], + + %add = fadd fast float %at, %angle + %cmp = fcmp ogt float %add, 1.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %sub = fadd fast float %add, -1.000000e+00 + br label %if.end + +if.end: + %theta.0 = phi float [ %sub, %if.then ], [ %add, %entry ] + ret float %theta.0 +} + +; The optimizers sometimes produce setlt instead of setolt/setult. +define double @bug1_f64(double %angle, double %at) #0 { +entry: +; ALL-LABEL: bug1_f64: + +; 32-C-DAG: add.d $[[T0:f[0-9]+]], $f14, $f12 +; 32-C-DAG: ldc1 $[[T1:f[0-9]+]], %lo($CPI33_0)( +; 32-C-DAG: c.ole.d $[[T0]], $[[T1]] +; 32-C-DAG: bc1t + +; 32-CMP-DAG: add.d $[[T0:f[0-9]+]], $f14, $f12 +; 32-CMP-DAG: ldc1 $[[T1:f[0-9]+]], %lo($CPI33_0)( +; 32-CMP-DAG: cmp.le.d $[[T2:f[0-9]+]], $[[T0]], $[[T1]] +; 32-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] +; FIXME: This instruction is redundant. +; 32-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1 +; 32-CMP-DAG: bnez $[[T4]], + +; 64-C-DAG: add.d $[[T0:f[0-9]+]], $f13, $f12 +; 64-C-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst($CPI33_0)( +; 64-C-DAG: c.ole.d $[[T0]], $[[T1]] +; 64-C-DAG: bc1t + +; 64-CMP-DAG: add.d $[[T0:f[0-9]+]], $f13, $f12 +; 64-CMP-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst($CPI33_0)( +; 64-CMP-DAG: cmp.le.d $[[T2:f[0-9]+]], $[[T0]], $[[T1]] +; 64-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] +; FIXME: This instruction is redundant. +; 64-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1 +; 64-CMP-DAG: bnez $[[T4]], + + %add = fadd fast double %at, %angle + %cmp = fcmp ogt double %add, 1.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %sub = fadd fast double %add, -1.000000e+00 + br label %if.end + +if.end: + %theta.0 = phi double [ %sub, %if.then ], [ %add, %entry ] + ret double %theta.0 +} + +attributes #0 = { nounwind readnone "no-nans-fp-math"="true" } diff --git a/test/CodeGen/R600/basic-loop.ll b/test/CodeGen/R600/basic-loop.ll index 72737ae273e6..9d0509b38d8a 100644 --- a/test/CodeGen/R600/basic-loop.ll +++ b/test/CodeGen/R600/basic-loop.ll @@ -1,4 +1,3 @@ -; XFAIL: * ; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s ; CHECK-LABEL: {{^}}test_loop: diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll index a47bc876cb96..c64f443ad697 100644 --- a/test/CodeGen/R600/ctpop.ll +++ b/test/CodeGen/R600/ctpop.ll @@ -24,8 +24,7 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { ; XXX - Why 0 in register? ; FUNC-LABEL: {{^}}v_ctpop_i32: ; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0 -; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]] +; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0 ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm @@ -40,8 +39,7 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali ; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32: ; SI: buffer_load_dword [[VAL0:v[0-9]+]], ; SI: buffer_load_dword [[VAL1:v[0-9]+]], -; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0 -; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]] +; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm @@ -73,8 +71,8 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace( } ; FUNC-LABEL: {{^}}v_ctpop_v2i32: -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 ; SI: s_endpgm ; EG: BCNT_INT @@ -87,10 +85,10 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v4i32: -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 ; SI: s_endpgm ; EG: BCNT_INT @@ -105,14 +103,14 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v8i32: -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 ; SI: s_endpgm ; EG: BCNT_INT @@ -131,22 +129,22 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v16i32: -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 ; SI: s_endpgm ; EG: BCNT_INT diff --git a/test/CodeGen/R600/ctpop64.ll b/test/CodeGen/R600/ctpop64.ll index 8dfe571d3477..9758ac96ea9b 100644 --- a/test/CodeGen/R600/ctpop64.ll +++ b/test/CodeGen/R600/ctpop64.ll @@ -21,8 +21,7 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind { ; FUNC-LABEL: {{^}}v_ctpop_i64: ; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, -; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0 -; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]] +; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/R600/ds_read2st64.ll index 24834af20404..efd875e93176 100644 --- a/test/CodeGen/R600/ds_read2st64.ll +++ b/test/CodeGen/R600/ds_read2st64.ll @@ -65,8 +65,8 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add ; SI-LABEL: @simple_read2st64_f32_over_max_offset ; SI-NOT: ds_read2st64_b32 -; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 ; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]] ; SI: s_endpgm define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { @@ -197,8 +197,8 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a ; SI-LABEL: @simple_read2st64_f64_over_max_offset ; SI-NOT: ds_read2st64_b64 -; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]] ; SI: s_endpgm define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll index 35cfb03d39b4..d76e8a341c6f 100644 --- a/test/CodeGen/R600/fp_to_sint.ll +++ b/test/CodeGen/R600/fp_to_sint.ll @@ -1,16 +1,27 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC +declare float @llvm.fabs.f32(float) #0 + ; FUNC-LABEL: {{^}}fp_to_sint_i32: ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; SI: v_cvt_i32_f32_e32 ; SI: s_endpgm -define void @fp_to_sint_i32 (i32 addrspace(1)* %out, float %in) { +define void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) { %conv = fptosi float %in to i32 store i32 %conv, i32 addrspace(1)* %out ret void } +; FUNC-LABEL: {{^}}fp_to_sint_i32_fabs: +; SI: v_cvt_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}} +define void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) { + %in.fabs = call float @llvm.fabs.f32(float %in) #0 + %conv = fptosi float %in.fabs to i32 + store i32 %conv, i32 addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}fp_to_sint_v2i32: ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} @@ -214,3 +225,5 @@ define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { store <4 x i64> %conv, <4 x i64> addrspace(1)* %out ret void } + +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/R600/hsa.ll b/test/CodeGen/R600/hsa.ll index 2e79866362ac..5ce3beaa16c0 100644 --- a/test/CodeGen/R600/hsa.ll +++ b/test/CodeGen/R600/hsa.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -mtriple=r600--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s ; HSA: {{^}}simple: +; HSA: .section .hsa.version +; HSA-NEXT: .ascii "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0" ; Make sure we are setting the ATC bit: ; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000 ; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0 diff --git a/test/CodeGen/R600/misaligned-load.ll b/test/CodeGen/R600/misaligned-load.ll new file mode 100644 index 000000000000..6290ca09d502 --- /dev/null +++ b/test/CodeGen/R600/misaligned-load.ll @@ -0,0 +1,18 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; SI: @byte_aligned_load64 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: s_endpgm +define void @byte_aligned_load64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) { +entry: + %0 = load i64 addrspace(3)* %in, align 1 + store i64 %0, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/scratch-buffer.ll b/test/CodeGen/R600/scratch-buffer.ll new file mode 100644 index 000000000000..740328a495da --- /dev/null +++ b/test/CodeGen/R600/scratch-buffer.ll @@ -0,0 +1,86 @@ +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s + +; When a frame index offset is more than 12-bits, make sure we don't store +; it in mubuf's offset field. + +; Also, make sure we use the same register for storing the scratch buffer addresss +; for both stores. This register is allocated by the register scavenger, so we +; should be able to reuse the same regiser for each scratch buffer access. + +; CHECK-LABEL: {{^}}legal_offset_fi: +; CHECK: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}} +; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen +; CHECK: v_mov_b32_e32 [[OFFSET]], 0x8000 +; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} + +define void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) { +entry: + %scratch0 = alloca [8192 x i32] + %scratch1 = alloca [8192 x i32] + + %scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 0 + store i32 1, i32* %scratchptr0 + + %scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 0 + store i32 2, i32* %scratchptr1 + + %cmp = icmp eq i32 %cond, 0 + br i1 %cmp, label %if, label %else + +if: + %if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset + %if_value = load i32* %if_ptr + br label %done + +else: + %else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset + %else_value = load i32* %else_ptr + br label %done + +done: + %value = phi i32 [%if_value, %if], [%else_value, %else] + store i32 %value, i32 addrspace(1)* %out + ret void + + ret void + +} + +; CHECK-LABEL: {{^}}legal_offset_fi_offset +; CHECK: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen +; CHECK: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000 +; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} + +define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) { +entry: + %scratch0 = alloca [8192 x i32] + %scratch1 = alloca [8192 x i32] + + %offset0 = load i32 addrspace(1)* %offsets + %scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %offset0 + store i32 %offset0, i32* %scratchptr0 + + %offsetptr1 = getelementptr i32 addrspace(1)* %offsets, i32 1 + %offset1 = load i32 addrspace(1)* %offsetptr1 + %scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %offset1 + store i32 %offset1, i32* %scratchptr1 + + %cmp = icmp eq i32 %cond, 0 + br i1 %cmp, label %if, label %else + +if: + %if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset + %if_value = load i32* %if_ptr + br label %done + +else: + %else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset + %else_value = load i32* %else_ptr + br label %done + +done: + %value = phi i32 [%if_value, %if], [%else_value, %else] + store i32 %value, i32 addrspace(1)* %out + ret void +} + diff --git a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll index b2f4a9ff05e1..f6dcb388248a 100644 --- a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll +++ b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll @@ -51,8 +51,8 @@ define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspac ; FUNC-LABEL: @no_reorder_barrier_local_load_global_store_local_load ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4 -; CI: buffer_store_dword ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8 +; CI: buffer_store_dword define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4 diff --git a/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll b/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll index 373a1967307a..e8315f17ebb6 100644 --- a/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll +++ b/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=sparc -no-integrated-as +; RUN: llc < %s -march=sparc ; PR 1557 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128" diff --git a/test/CodeGen/SPARC/inlineasm.ll b/test/CodeGen/SPARC/inlineasm.ll index 526cde8de8b4..2650533b7fec 100644 --- a/test/CodeGen/SPARC/inlineasm.ll +++ b/test/CodeGen/SPARC/inlineasm.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=sparc -no-integrated-as <%s | FileCheck %s +; RUN: llc -march=sparc <%s | FileCheck %s ; CHECK-LABEL: test_constraint_r ; CHECK: add %o1, %o0, %o0 diff --git a/test/CodeGen/SPARC/mult-alt-generic-sparc.ll b/test/CodeGen/SPARC/mult-alt-generic-sparc.ll index 6a67616d53be..6013b17d9372 100644 --- a/test/CodeGen/SPARC/mult-alt-generic-sparc.ll +++ b/test/CodeGen/SPARC/mult-alt-generic-sparc.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=sparc -no-integrated-as +; RUN: llc < %s -march=sparc ; ModuleID = 'mult-alt-generic.c' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32" target triple = "sparc" |