summaryrefslogtreecommitdiff
path: root/test/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen')
-rw-r--r--test/CodeGen/AArch64/arm64-platform-reg.ll15
-rw-r--r--test/CodeGen/AArch64/ghc-cc.ll89
-rw-r--r--test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll55
-rw-r--r--test/CodeGen/Mips/fcmp.ll90
-rw-r--r--test/CodeGen/R600/basic-loop.ll1
-rw-r--r--test/CodeGen/R600/ctpop.ll66
-rw-r--r--test/CodeGen/R600/ctpop64.ll3
-rw-r--r--test/CodeGen/R600/ds_read2st64.ll4
-rw-r--r--test/CodeGen/R600/fp_to_sint.ll15
-rw-r--r--test/CodeGen/R600/hsa.ll2
-rw-r--r--test/CodeGen/R600/misaligned-load.ll18
-rw-r--r--test/CodeGen/R600/scratch-buffer.ll86
-rw-r--r--test/CodeGen/R600/si-triv-disjoint-mem-access.ll2
-rw-r--r--test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll2
-rw-r--r--test/CodeGen/SPARC/inlineasm.ll2
-rw-r--r--test/CodeGen/SPARC/mult-alt-generic-sparc.ll2
16 files changed, 401 insertions, 51 deletions
diff --git a/test/CodeGen/AArch64/arm64-platform-reg.ll b/test/CodeGen/AArch64/arm64-platform-reg.ll
index 651c793f73a4..b0d3ee0ff8a3 100644
--- a/test/CodeGen/AArch64/arm64-platform-reg.ll
+++ b/test/CodeGen/AArch64/arm64-platform-reg.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-DARWIN
+; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18
+; RUN: llc -mtriple=arm64-freebsd-gnu -aarch64-reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18
; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
; x18 is reserved as a platform register on Darwin but not on other
@@ -16,11 +17,11 @@ define void @keep_live() {
; CHECK: ldr x18
; CHECK: str x18
-; CHECK-DARWIN-NOT: ldr fp
-; CHECK-DARWIN-NOT: ldr x18
-; CHECK-DARWIN: Spill
-; CHECK-DARWIN-NOT: ldr fp
-; CHECK-DARWIN-NOT: ldr x18
-; CHECK-DARWIN: ret
+; CHECK-RESERVE-X18-NOT: ldr fp
+; CHECK-RESERVE-X18-NOT: ldr x18
+; CHECK-RESERVE-X18: Spill
+; CHECK-RESERVE-X18-NOT: ldr fp
+; CHECK-RESERVE-X18-NOT: ldr x18
+; CHECK-RESERVE-X18: ret
ret void
}
diff --git a/test/CodeGen/AArch64/ghc-cc.ll b/test/CodeGen/AArch64/ghc-cc.ll
new file mode 100644
index 000000000000..505bd5fca66d
--- /dev/null
+++ b/test/CodeGen/AArch64/ghc-cc.ll
@@ -0,0 +1,89 @@
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+; Check the GHC call convention works (aarch64)
+
+@base = external global i64 ; assigned to register: r19
+@sp = external global i64 ; assigned to register: r20
+@hp = external global i64 ; assigned to register: r21
+@r1 = external global i64 ; assigned to register: r22
+@r2 = external global i64 ; assigned to register: r23
+@r3 = external global i64 ; assigned to register: r24
+@r4 = external global i64 ; assigned to register: r25
+@r5 = external global i64 ; assigned to register: r26
+@r6 = external global i64 ; assigned to register: r27
+@splim = external global i64 ; assigned to register: r28
+
+@f1 = external global float ; assigned to register: s8
+@f2 = external global float ; assigned to register: s9
+@f3 = external global float ; assigned to register: s10
+@f4 = external global float ; assigned to register: s11
+
+@d1 = external global double ; assigned to register: d12
+@d2 = external global double ; assigned to register: d13
+@d3 = external global double ; assigned to register: d14
+@d4 = external global double ; assigned to register: d15
+
+define ghccc i64 @addtwo(i64 %x, i64 %y) nounwind {
+entry:
+ ; CHECK-LABEL: addtwo
+ ; CHECK: add x0, x19, x20
+ ; CHECK-NEXT: ret
+ %0 = add i64 %x, %y
+ ret i64 %0
+}
+
+define void @zap(i64 %a, i64 %b) nounwind {
+entry:
+ ; CHECK-LABEL: zap
+ ; CHECK-NOT: mov {{x[0-9]+}}, sp
+ ; CHECK: bl addtwo
+ ; CHECK-NEXT: bl foo
+ %0 = call ghccc i64 @addtwo(i64 %a, i64 %b)
+ call void @foo() nounwind
+ ret void
+}
+
+define ghccc void @foo_i64 () nounwind {
+entry:
+ ; CHECK-LABEL: foo_i64
+ ; CHECK: adrp {{x[0-9]+}}, base
+ ; CHECK-NEXT: ldr x19, [{{x[0-9]+}}, :lo12:base]
+ ; CHECK-NEXT: bl bar_i64
+ ; CHECK-NEXT: ret
+
+ %0 = load i64* @base
+ tail call ghccc void @bar_i64( i64 %0 ) nounwind
+ ret void
+}
+
+define ghccc void @foo_float () nounwind {
+entry:
+ ; CHECK-LABEL: foo_float
+ ; CHECK: adrp {{x[0-9]+}}, f1
+ ; CHECK-NEXT: ldr s8, [{{x[0-9]+}}, :lo12:f1]
+ ; CHECK-NEXT: bl bar_float
+ ; CHECK-NEXT: ret
+
+ %0 = load float* @f1
+ tail call ghccc void @bar_float( float %0 ) nounwind
+ ret void
+}
+
+define ghccc void @foo_double () nounwind {
+entry:
+ ; CHECK-LABEL: foo_double
+ ; CHECK: adrp {{x[0-9]+}}, d1
+ ; CHECK-NEXT: ldr d12, [{{x[0-9]+}}, :lo12:d1]
+ ; CHECK-NEXT: bl bar_double
+ ; CHECK-NEXT: ret
+
+ %0 = load double* @d1
+ tail call ghccc void @bar_double( double %0 ) nounwind
+ ret void
+}
+
+declare ghccc void @foo ()
+
+declare ghccc void @bar_i64 (i64)
+declare ghccc void @bar_float (float)
+declare ghccc void @bar_double (double)
diff --git a/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll b/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll
new file mode 100644
index 000000000000..f3cc3d82121f
--- /dev/null
+++ b/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mtriple=thumbv4t-none--eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V4T
+; RUN: llc -mtriple=thumbv6m-none--eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V6M
+
+; CHECK-LABEL: foo
+define i32 @foo(i32 %z, ...) #0 {
+entry:
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ %d = alloca i32, align 4
+ %e = alloca i32, align 4
+ %f = alloca i32, align 4
+ %g = alloca i32, align 4
+ %h = alloca i32, align 4
+
+ store i32 1, i32* %a, align 4
+ store i32 2, i32* %b, align 4
+ store i32 3, i32* %c, align 4
+ store i32 4, i32* %d, align 4
+ store i32 5, i32* %e, align 4
+ store i32 6, i32* %f, align 4
+ store i32 7, i32* %g, align 4
+ store i32 8, i32* %h, align 4
+
+ %0 = load i32* %a, align 4
+ %1 = load i32* %b, align 4
+ %2 = load i32* %c, align 4
+ %3 = load i32* %d, align 4
+ %4 = load i32* %e, align 4
+ %5 = load i32* %f, align 4
+ %6 = load i32* %g, align 4
+ %7 = load i32* %h, align 4
+
+ %add = add nsw i32 %0, %1
+ %add4 = add nsw i32 %add, %2
+ %add5 = add nsw i32 %add4, %3
+ %add6 = add nsw i32 %add5, %4
+ %add7 = add nsw i32 %add6, %5
+ %add8 = add nsw i32 %add7, %6
+ %add9 = add nsw i32 %add8, %7
+
+ %addz = add nsw i32 %add9, %z
+ call void @llvm.va_start(i8* null)
+ ret i32 %addz
+
+; CHECK: sub sp, #40
+; CHECK-NEXT: add [[BASE:r[0-9]]], sp, #8
+
+; CHECK-V4T: movs [[NEWBASE:r[0-9]]], [[BASE]]
+; CHECK-V6M: mov [[NEWBASE:r[0-9]]], [[BASE]]
+; CHECK-NEXT: adds [[NEWBASE]], #8
+; CHECK-NEXT: ldm [[NEWBASE]],
+}
+
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/Mips/fcmp.ll b/test/CodeGen/Mips/fcmp.ll
index b7759831c5a2..8e83b0064ed9 100644
--- a/test/CodeGen/Mips/fcmp.ll
+++ b/test/CodeGen/Mips/fcmp.ll
@@ -781,3 +781,93 @@ define i32 @true_f64(double %a, double %b) nounwind {
%2 = zext i1 %1 to i32
ret i32 %2
}
+
+; The optimizers sometimes produce setlt instead of setolt/setult.
+define float @bug1_f32(float %angle, float %at) #0 {
+entry:
+; ALL-LABEL: bug1_f32:
+
+; 32-C-DAG: add.s $[[T0:f[0-9]+]], $f14, $f12
+; 32-C-DAG: lwc1 $[[T1:f[0-9]+]], %lo($CPI32_0)(
+; 32-C-DAG: c.ole.s $[[T0]], $[[T1]]
+; 32-C-DAG: bc1t
+
+; 32-CMP-DAG: add.s $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG: lwc1 $[[T1:f[0-9]+]], %lo($CPI32_0)(
+; 32-CMP-DAG: cmp.le.s $[[T2:f[0-9]+]], $[[T0]], $[[T1]]
+; 32-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]]
+; FIXME: This instruction is redundant.
+; 32-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1
+; 32-CMP-DAG: bnez $[[T4]],
+
+; 64-C-DAG: add.s $[[T0:f[0-9]+]], $f13, $f12
+; 64-C-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst($CPI32_0)(
+; 64-C-DAG: c.ole.s $[[T0]], $[[T1]]
+; 64-C-DAG: bc1t
+
+; 64-CMP-DAG: add.s $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst($CPI32_0)(
+; 64-CMP-DAG: cmp.le.s $[[T2:f[0-9]+]], $[[T0]], $[[T1]]
+; 64-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]]
+; FIXME: This instruction is redundant.
+; 64-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1
+; 64-CMP-DAG: bnez $[[T4]],
+
+ %add = fadd fast float %at, %angle
+ %cmp = fcmp ogt float %add, 1.000000e+00
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ %sub = fadd fast float %add, -1.000000e+00
+ br label %if.end
+
+if.end:
+ %theta.0 = phi float [ %sub, %if.then ], [ %add, %entry ]
+ ret float %theta.0
+}
+
+; The optimizers sometimes produce setlt instead of setolt/setult.
+define double @bug1_f64(double %angle, double %at) #0 {
+entry:
+; ALL-LABEL: bug1_f64:
+
+; 32-C-DAG: add.d $[[T0:f[0-9]+]], $f14, $f12
+; 32-C-DAG: ldc1 $[[T1:f[0-9]+]], %lo($CPI33_0)(
+; 32-C-DAG: c.ole.d $[[T0]], $[[T1]]
+; 32-C-DAG: bc1t
+
+; 32-CMP-DAG: add.d $[[T0:f[0-9]+]], $f14, $f12
+; 32-CMP-DAG: ldc1 $[[T1:f[0-9]+]], %lo($CPI33_0)(
+; 32-CMP-DAG: cmp.le.d $[[T2:f[0-9]+]], $[[T0]], $[[T1]]
+; 32-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]]
+; FIXME: This instruction is redundant.
+; 32-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1
+; 32-CMP-DAG: bnez $[[T4]],
+
+; 64-C-DAG: add.d $[[T0:f[0-9]+]], $f13, $f12
+; 64-C-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst($CPI33_0)(
+; 64-C-DAG: c.ole.d $[[T0]], $[[T1]]
+; 64-C-DAG: bc1t
+
+; 64-CMP-DAG: add.d $[[T0:f[0-9]+]], $f13, $f12
+; 64-CMP-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst($CPI33_0)(
+; 64-CMP-DAG: cmp.le.d $[[T2:f[0-9]+]], $[[T0]], $[[T1]]
+; 64-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]]
+; FIXME: This instruction is redundant.
+; 64-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1
+; 64-CMP-DAG: bnez $[[T4]],
+
+ %add = fadd fast double %at, %angle
+ %cmp = fcmp ogt double %add, 1.000000e+00
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ %sub = fadd fast double %add, -1.000000e+00
+ br label %if.end
+
+if.end:
+ %theta.0 = phi double [ %sub, %if.then ], [ %add, %entry ]
+ ret double %theta.0
+}
+
+attributes #0 = { nounwind readnone "no-nans-fp-math"="true" }
diff --git a/test/CodeGen/R600/basic-loop.ll b/test/CodeGen/R600/basic-loop.ll
index 72737ae273e6..9d0509b38d8a 100644
--- a/test/CodeGen/R600/basic-loop.ll
+++ b/test/CodeGen/R600/basic-loop.ll
@@ -1,4 +1,3 @@
-; XFAIL: *
; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s
; CHECK-LABEL: {{^}}test_loop:
diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll
index a47bc876cb96..c64f443ad697 100644
--- a/test/CodeGen/R600/ctpop.ll
+++ b/test/CodeGen/R600/ctpop.ll
@@ -24,8 +24,7 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
; XXX - Why 0 in register?
; FUNC-LABEL: {{^}}v_ctpop_i32:
; SI: buffer_load_dword [[VAL:v[0-9]+]],
-; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
-; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
+; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
@@ -40,8 +39,7 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
; SI: buffer_load_dword [[VAL0:v[0-9]+]],
; SI: buffer_load_dword [[VAL1:v[0-9]+]],
-; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
-; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
+; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
@@ -73,8 +71,8 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(
}
; FUNC-LABEL: {{^}}v_ctpop_v2i32:
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
; SI: s_endpgm
; EG: BCNT_INT
@@ -87,10 +85,10 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs
}
; FUNC-LABEL: {{^}}v_ctpop_v4i32:
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
; SI: s_endpgm
; EG: BCNT_INT
@@ -105,14 +103,14 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs
}
; FUNC-LABEL: {{^}}v_ctpop_v8i32:
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
; SI: s_endpgm
; EG: BCNT_INT
@@ -131,22 +129,22 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs
}
; FUNC-LABEL: {{^}}v_ctpop_v16i32:
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
-; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
+; SI: v_bcnt_u32_b32_e64
; SI: s_endpgm
; EG: BCNT_INT
diff --git a/test/CodeGen/R600/ctpop64.ll b/test/CodeGen/R600/ctpop64.ll
index 8dfe571d3477..9758ac96ea9b 100644
--- a/test/CodeGen/R600/ctpop64.ll
+++ b/test/CodeGen/R600/ctpop64.ll
@@ -21,8 +21,7 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
; FUNC-LABEL: {{^}}v_ctpop_i64:
; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
-; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
-; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]]
+; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/R600/ds_read2st64.ll
index 24834af20404..efd875e93176 100644
--- a/test/CodeGen/R600/ds_read2st64.ll
+++ b/test/CodeGen/R600/ds_read2st64.ll
@@ -65,8 +65,8 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add
; SI-LABEL: @simple_read2st64_f32_over_max_offset
; SI-NOT: ds_read2st64_b32
-; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
; SI: s_endpgm
define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
@@ -197,8 +197,8 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a
; SI-LABEL: @simple_read2st64_f64_over_max_offset
; SI-NOT: ds_read2st64_b64
-; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
; SI: s_endpgm
define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll
index 35cfb03d39b4..d76e8a341c6f 100644
--- a/test/CodeGen/R600/fp_to_sint.ll
+++ b/test/CodeGen/R600/fp_to_sint.ll
@@ -1,16 +1,27 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC
+declare float @llvm.fabs.f32(float) #0
+
; FUNC-LABEL: {{^}}fp_to_sint_i32:
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; SI: v_cvt_i32_f32_e32
; SI: s_endpgm
-define void @fp_to_sint_i32 (i32 addrspace(1)* %out, float %in) {
+define void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) {
%conv = fptosi float %in to i32
store i32 %conv, i32 addrspace(1)* %out
ret void
}
+; FUNC-LABEL: {{^}}fp_to_sint_i32_fabs:
+; SI: v_cvt_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}}
+define void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) {
+ %in.fabs = call float @llvm.fabs.f32(float %in) #0
+ %conv = fptosi float %in.fabs to i32
+ store i32 %conv, i32 addrspace(1)* %out
+ ret void
+}
+
; FUNC-LABEL: {{^}}fp_to_sint_v2i32:
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
@@ -214,3 +225,5 @@ define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
ret void
}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/R600/hsa.ll b/test/CodeGen/R600/hsa.ll
index 2e79866362ac..5ce3beaa16c0 100644
--- a/test/CodeGen/R600/hsa.ll
+++ b/test/CodeGen/R600/hsa.ll
@@ -1,6 +1,8 @@
; RUN: llc < %s -mtriple=r600--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
; HSA: {{^}}simple:
+; HSA: .section .hsa.version
+; HSA-NEXT: .ascii "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0"
; Make sure we are setting the ATC bit:
; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000
; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
diff --git a/test/CodeGen/R600/misaligned-load.ll b/test/CodeGen/R600/misaligned-load.ll
new file mode 100644
index 000000000000..6290ca09d502
--- /dev/null
+++ b/test/CodeGen/R600/misaligned-load.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SI: @byte_aligned_load64
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: s_endpgm
+define void @byte_aligned_load64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) {
+entry:
+ %0 = load i64 addrspace(3)* %in, align 1
+ store i64 %0, i64 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/scratch-buffer.ll b/test/CodeGen/R600/scratch-buffer.ll
new file mode 100644
index 000000000000..740328a495da
--- /dev/null
+++ b/test/CodeGen/R600/scratch-buffer.ll
@@ -0,0 +1,86 @@
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s
+
+; When a frame index offset is more than 12-bits, make sure we don't store
+; it in mubuf's offset field.
+
+; Also, make sure we use the same register for storing the scratch buffer addresss
+; for both stores. This register is allocated by the register scavenger, so we
+; should be able to reuse the same regiser for each scratch buffer access.
+
+; CHECK-LABEL: {{^}}legal_offset_fi:
+; CHECK: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}}
+; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
+; CHECK: v_mov_b32_e32 [[OFFSET]], 0x8000
+; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
+
+define void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) {
+entry:
+ %scratch0 = alloca [8192 x i32]
+ %scratch1 = alloca [8192 x i32]
+
+ %scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 0
+ store i32 1, i32* %scratchptr0
+
+ %scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 0
+ store i32 2, i32* %scratchptr1
+
+ %cmp = icmp eq i32 %cond, 0
+ br i1 %cmp, label %if, label %else
+
+if:
+ %if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset
+ %if_value = load i32* %if_ptr
+ br label %done
+
+else:
+ %else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset
+ %else_value = load i32* %else_ptr
+ br label %done
+
+done:
+ %value = phi i32 [%if_value, %if], [%else_value, %else]
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+
+ ret void
+
+}
+
+; CHECK-LABEL: {{^}}legal_offset_fi_offset
+; CHECK: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
+; CHECK: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000
+; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
+
+define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
+entry:
+ %scratch0 = alloca [8192 x i32]
+ %scratch1 = alloca [8192 x i32]
+
+ %offset0 = load i32 addrspace(1)* %offsets
+ %scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %offset0
+ store i32 %offset0, i32* %scratchptr0
+
+ %offsetptr1 = getelementptr i32 addrspace(1)* %offsets, i32 1
+ %offset1 = load i32 addrspace(1)* %offsetptr1
+ %scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %offset1
+ store i32 %offset1, i32* %scratchptr1
+
+ %cmp = icmp eq i32 %cond, 0
+ br i1 %cmp, label %if, label %else
+
+if:
+ %if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset
+ %if_value = load i32* %if_ptr
+ br label %done
+
+else:
+ %else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset
+ %else_value = load i32* %else_ptr
+ br label %done
+
+done:
+ %value = phi i32 [%if_value, %if], [%else_value, %else]
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
diff --git a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
index b2f4a9ff05e1..f6dcb388248a 100644
--- a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
+++ b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
@@ -51,8 +51,8 @@ define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspac
; FUNC-LABEL: @no_reorder_barrier_local_load_global_store_local_load
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
-; CI: buffer_store_dword
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
+; CI: buffer_store_dword
define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
%ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
diff --git a/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll b/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
index 373a1967307a..e8315f17ebb6 100644
--- a/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
+++ b/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=sparc -no-integrated-as
+; RUN: llc < %s -march=sparc
; PR 1557
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
diff --git a/test/CodeGen/SPARC/inlineasm.ll b/test/CodeGen/SPARC/inlineasm.ll
index 526cde8de8b4..2650533b7fec 100644
--- a/test/CodeGen/SPARC/inlineasm.ll
+++ b/test/CodeGen/SPARC/inlineasm.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=sparc -no-integrated-as <%s | FileCheck %s
+; RUN: llc -march=sparc <%s | FileCheck %s
; CHECK-LABEL: test_constraint_r
; CHECK: add %o1, %o0, %o0
diff --git a/test/CodeGen/SPARC/mult-alt-generic-sparc.ll b/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
index 6a67616d53be..6013b17d9372 100644
--- a/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
+++ b/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=sparc -no-integrated-as
+; RUN: llc < %s -march=sparc
; ModuleID = 'mult-alt-generic.c'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32"
target triple = "sparc"