summaryrefslogtreecommitdiff
path: root/test/CodeGen/Thumb2
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2013-04-08 18:41:23 +0000
committerDimitry Andric <dim@FreeBSD.org>2013-04-08 18:41:23 +0000
commit4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 (patch)
tree06099edc18d30894081a822b756f117cbe0b8207 /test/CodeGen/Thumb2
parent482e7bddf617ae804dc47133cb07eb4aa81e45de (diff)
Diffstat (limited to 'test/CodeGen/Thumb2')
-rw-r--r--test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll53
-rw-r--r--test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll14
-rw-r--r--test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll19
-rw-r--r--test/CodeGen/Thumb2/aligned-spill.ll14
-rw-r--r--test/CodeGen/Thumb2/cortex-fp.ll2
-rw-r--r--test/CodeGen/Thumb2/crash.ll9
-rw-r--r--test/CodeGen/Thumb2/thumb2-ldr_post.ll4
-rw-r--r--test/CodeGen/Thumb2/thumb2-mul.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-shifter.ll82
-rw-r--r--test/CodeGen/Thumb2/thumb2-spill-q.ll4
10 files changed, 177 insertions, 26 deletions
diff --git a/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll b/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
new file mode 100644
index 000000000000..502b138f65c8
--- /dev/null
+++ b/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=thumbv7s-apple-ios6.0.0 -verify-machineinstrs
+
+; Check to make sure the tail-call return at the end doesn't use a
+; callee-saved register. Register hinting from t2LDRDri was getting this
+; wrong. The intervening call will force allocation to try a high register
+; first, so the hint will attempt to fire, but must be rejected due to
+; not being in the allocation order for the tcGPR register class.
+; The machine instruction verifier will make sure that all actually worked
+; out the way it's supposed to.
+
+%"myclass" = type { %struct.foo }
+%struct.foo = type { i32, [40 x i8] }
+
+define hidden void @func(i8* %Data) nounwind ssp {
+ %1 = getelementptr inbounds i8* %Data, i32 12
+ %2 = bitcast i8* %1 to %"myclass"*
+ tail call void @abc(%"myclass"* %2) nounwind
+ tail call void @def(%"myclass"* %2) nounwind
+ %3 = getelementptr inbounds i8* %Data, i32 8
+ %4 = bitcast i8* %3 to i8**
+ %5 = load i8** %4, align 4, !tbaa !0
+ tail call void @ghi(i8* %5) nounwind
+ %6 = bitcast i8* %Data to void (i8*)**
+ %7 = load void (i8*)** %6, align 4, !tbaa !0
+ %8 = getelementptr inbounds i8* %Data, i32 4
+ %9 = bitcast i8* %8 to i8**
+ %10 = load i8** %9, align 4, !tbaa !0
+ %11 = icmp eq i8* %Data, null
+ br i1 %11, label %14, label %12
+
+; <label>:12 ; preds = %0
+ %13 = tail call %"myclass"* @jkl(%"myclass"* %2) nounwind
+ tail call void @mno(i8* %Data) nounwind
+ br label %14
+
+; <label>:14 ; preds = %12, %0
+ tail call void %7(i8* %10) nounwind
+ ret void
+}
+
+declare void @mno(i8*)
+
+declare void @def(%"myclass"*)
+
+declare void @abc(%"myclass"*)
+
+declare void @ghi(i8*)
+
+declare %"myclass"* @jkl(%"myclass"*) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll
new file mode 100644
index 000000000000..937ecc0d6679
--- /dev/null
+++ b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+
+define void @bar(<4 x i32>* %p, i32 %lane, <4 x i32> %phitmp) nounwind {
+; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[SOURCE:[0-9]+]]:128]
+; CHECK: add.w r[[ADDR:[0-9]+]], r[[SOURCE]], {{r[0-9]+}}, lsl #2
+; CHECK: vld1.32 {[[DREG:d[0-9]+]][], [[DREG2:d[0-9]+]][]}, [r[[ADDR]]:32]
+; CHECK: vst1.32 {[[DREG]], [[DREG2]]}, [r0]
+ %val = extractelement <4 x i32> %phitmp, i32 %lane
+ %r1 = insertelement <4 x i32> undef, i32 %val, i32 1
+ %r2 = insertelement <4 x i32> %r1, i32 %val, i32 2
+ %r3 = insertelement <4 x i32> %r2, i32 %val, i32 3
+ store <4 x i32> %r3, <4 x i32>* %p, align 4
+ ret void
+}
diff --git a/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll b/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll
new file mode 100644
index 000000000000..203815fadc9c
--- /dev/null
+++ b/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+; Testing that these don't crash/assert. The loop vectorizer can end up
+; with odd constructs like this. The code actually generated is incidental.
+define <1 x i64> @test_zext(i32 %a) nounwind {
+; CHECK: test_zext:
+ %Cmp = icmp uge i32 %a, 42
+ %vec = insertelement <1 x i1> zeroinitializer, i1 %Cmp, i32 0
+ %Se = zext <1 x i1> %vec to <1 x i64>
+ ret <1 x i64> %Se
+}
+
+define <1 x i64> @test_sext(i32 %a) nounwind {
+; CHECK: test_sext:
+ %Cmp = icmp uge i32 %a, 42
+ %vec = insertelement <1 x i1> zeroinitializer, i1 %Cmp, i32 0
+ %Se = sext <1 x i1> %vec to <1 x i64>
+ ret <1 x i64> %Se
+}
diff --git a/test/CodeGen/Thumb2/aligned-spill.ll b/test/CodeGen/Thumb2/aligned-spill.ll
index c98ca8098583..3a2803f91f16 100644
--- a/test/CodeGen/Thumb2/aligned-spill.ll
+++ b/test/CodeGen/Thumb2/aligned-spill.ll
@@ -26,8 +26,8 @@ entry:
; NEON: bic r4, r4, #15
; Stack pointer must be updated before the spills.
; NEON: mov sp, r4
-; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
-; NEON: vst1.64 {d12, d13, d14, d15}, [r4, :128]
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]!
+; NEON: vst1.64 {d12, d13, d14, d15}, [r4:128]
; Stack pointer adjustment for the stack frame contents.
; This could legally happen before the spills.
; Since the spill slot is only 8 bytes, technically it would be fine to only
@@ -36,8 +36,8 @@ entry:
; NEON: sub sp, #16
; The epilog is free to use another scratch register than r4.
; NEON: add r[[R4:[0-9]+]], sp, #16
-; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]], :128]!
-; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]], :128]
+; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]]:128]!
+; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]]:128]
; The stack pointer restore must happen after the reloads.
; NEON: mov sp,
; NEON: pop
@@ -57,8 +57,8 @@ entry:
; NEON: bic r4, r4, #15
; Stack pointer must be updated before the spills.
; NEON: mov sp, r4
-; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
-; NEON: vst1.64 {d12, d13}, [r4, :128]
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]!
+; NEON: vst1.64 {d12, d13}, [r4:128]
; NEON: vstr d14, [r4, #16]
; Epilog
; NEON: vld1.64 {d8, d9, d10, d11},
@@ -84,7 +84,7 @@ entry:
; NEON: bic r4, r4, #15
; Stack pointer must be updated before the spills.
; NEON: mov sp, r4
-; NEON: vst1.64 {d8, d9}, [r4, :128]
+; NEON: vst1.64 {d8, d9}, [r4:128]
; NEON: vstr d10, [r4, #16]
; Epilog
; NEON: vld1.64 {d8, d9},
diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll
index b7df2fbf546c..f6cea72caecd 100644
--- a/test/CodeGen/Thumb2/cortex-fp.ll
+++ b/test/CodeGen/Thumb2/cortex-fp.ll
@@ -7,7 +7,7 @@ define float @foo(float %a, float %b) {
entry:
; CHECK: foo
; CORTEXM3: blx ___mulsf3
-; CORTEXM4: vmul.f32 s0, s2, s0
+; CORTEXM4: vmul.f32 s
; CORTEXA8: vmul.f32 d
%0 = fmul float %a, %b
ret float %0
diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll
index cb4d08058f41..6ce0b82b94d7 100644
--- a/test/CodeGen/Thumb2/crash.ll
+++ b/test/CodeGen/Thumb2/crash.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -verify-machineinstrs
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -verify-machineinstrs -O0
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
target triple = "thumbv7-apple-darwin10"
@@ -76,3 +77,11 @@ entry:
store i32 %num, i32* %p2, align 4
ret void
}
+
+; Check RAFast handling of inline assembly with many dense clobbers.
+; The large tuple aliases of the vector registers can cause problems.
+define void @rdar13249625(double* nocapture %p) nounwind {
+ %1 = tail call double asm sideeffect "@ $0", "=w,~{d0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15}"() nounwind
+ store double %1, double* %p, align 4
+ ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
index 2178eecb43e4..bce847471beb 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN: grep "ldr.*\[.*\]," | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
define i32 @test(i32 %a, i32 %b, i32 %c) {
%tmp1 = mul i32 %a, %b ; <i32> [#uses=2]
@@ -9,4 +8,5 @@ define i32 @test(i32 %a, i32 %b, i32 %c) {
%tmp5 = mul i32 %tmp4, %tmp3 ; <i32> [#uses=1]
ret i32 %tmp5
}
+; CHECK: ldr r{{.*}}, [{{.*}}],
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index ac059bdaf05d..a8134e630821 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -15,7 +15,7 @@ entry:
; CHECK: t1:
; CHECK: mla r0, r2, r0, r1
; CHECK: add.w r0, r0, r0, lsl #3
-; CHECL: add.w r0, r3, r0, lsl #2
+; CHECK: add.w r0, r3, r0, lsl #2
%mul = mul i32 %n, %i
%add = add i32 %mul, %j
%0 = ptrtoint %struct.CMPoint* %thePoints to i32
diff --git a/test/CodeGen/Thumb2/thumb2-shifter.ll b/test/CodeGen/Thumb2/thumb2-shifter.ll
index 98854a1205f8..05dd90cfbfed 100644
--- a/test/CodeGen/Thumb2/thumb2-shifter.ll
+++ b/test/CodeGen/Thumb2/thumb2-shifter.ll
@@ -1,24 +1,27 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s --check-prefix=A8
+; RUN: llc < %s -march=thumb -mcpu=swift | FileCheck %s --check-prefix=SWIFT
+
+; rdar://12892707
define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_lsl
-; CHECK: add.w r0, r0, r1, lsl #16
+; A8: t2ADDrs_lsl
+; A8: add.w r0, r0, r1, lsl #16
%A = shl i32 %Y, 16
%B = add i32 %X, %A
ret i32 %B
}
define i32 @t2ADDrs_lsr(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_lsr
-; CHECK: add.w r0, r0, r1, lsr #16
+; A8: t2ADDrs_lsr
+; A8: add.w r0, r0, r1, lsr #16
%A = lshr i32 %Y, 16
%B = add i32 %X, %A
ret i32 %B
}
define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_asr
-; CHECK: add.w r0, r0, r1, asr #16
+; A8: t2ADDrs_asr
+; A8: add.w r0, r0, r1, asr #16
%A = ashr i32 %Y, 16
%B = add i32 %X, %A
ret i32 %B
@@ -26,8 +29,8 @@ define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
; i32 ror(n) = (x >> n) | (x << (32 - n))
define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_ror
-; CHECK: add.w r0, r0, r1, ror #16
+; A8: t2ADDrs_ror
+; A8: add.w r0, r0, r1, ror #16
%A = lshr i32 %Y, 16
%B = shl i32 %Y, 16
%C = or i32 %B, %A
@@ -36,13 +39,66 @@ define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
}
define i32 @t2ADDrs_noRegShift(i32 %X, i32 %Y, i8 %sh) {
-; CHECK: t2ADDrs_noRegShift
-; CHECK: uxtb r2, r2
-; CHECK: lsls r1, r2
-; CHECK: add r0, r1
+; A8: t2ADDrs_noRegShift
+; A8: uxtb r2, r2
+; A8: lsls r1, r2
+; A8: add r0, r1
+
+; SWIFT: t2ADDrs_noRegShift
+; SWIFT-NOT: lsls
+; SWIFT: lsl.w
+ %shift.upgrd.1 = zext i8 %sh to i32
+ %A = shl i32 %Y, %shift.upgrd.1
+ %B = add i32 %X, %A
+ ret i32 %B
+}
+
+define i32 @t2ADDrs_noRegShift2(i32 %X, i32 %Y, i8 %sh) {
+; A8: t2ADDrs_noRegShift2
+; A8: uxtb r2, r2
+; A8: lsrs r1, r2
+; A8: add r0, r1
+
+; SWIFT: t2ADDrs_noRegShift2
+; SWIFT-NOT: lsrs
+; SWIFT: lsr.w
+ %shift.upgrd.1 = zext i8 %sh to i32
+ %A = lshr i32 %Y, %shift.upgrd.1
+ %B = add i32 %X, %A
+ ret i32 %B
+}
+
+define i32 @t2ADDrs_noRegShift3(i32 %X, i32 %Y, i8 %sh) {
+; A8: t2ADDrs_noRegShift3
+; A8: uxtb r2, r2
+; A8: asrs r1, r2
+; A8: add r0, r1
+
+; SWIFT: t2ADDrs_noRegShift3
+; SWIFT-NOT: asrs
+; SWIFT: asr.w
+ %shift.upgrd.1 = zext i8 %sh to i32
+ %A = ashr i32 %Y, %shift.upgrd.1
+ %B = add i32 %X, %A
+ ret i32 %B
+}
+
+define i32 @t2ADDrs_optsize(i32 %X, i32 %Y, i8 %sh) optsize {
+; SWIFT: t2ADDrs_optsize
+; SWIFT-NOT: lsl.w
+; SWIFT: lsls
%shift.upgrd.1 = zext i8 %sh to i32
%A = shl i32 %Y, %shift.upgrd.1
%B = add i32 %X, %A
ret i32 %B
}
+define i32 @t2ADDrs_minsize(i32 %X, i32 %Y, i8 %sh) minsize {
+; SWIFT: t2ADDrs_minsize
+; SWIFT-NOT: lsr.w
+; SWIFT: lsrs
+ %shift.upgrd.1 = zext i8 %sh to i32
+ %A = lshr i32 %Y, %shift.upgrd.1
+ %B = add i32 %X, %A
+ ret i32 %B
+}
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index d9a0617f5a46..5bff268e2c3e 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -12,8 +12,8 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
define void @aaa(%quuz* %this, i8* %block) {
; CHECK: aaa:
; CHECK: bic r4, r4, #15
-; CHECK: vst1.64 {{.*}}[{{.*}}, :128]
-; CHECK: vld1.64 {{.*}}[{{.*}}, :128]
+; CHECK: vst1.64 {{.*}}[{{.*}}:128]
+; CHECK: vld1.64 {{.*}}[{{.*}}:128]
entry:
%aligned_vec = alloca <4 x float>, align 16
%"alloca point" = bitcast i32 0 to i32