vendor/llvm/llvm-trunk-r178860

author: Dimitry Andric <dim@FreeBSD.org> 2013-04-08 18:41:23 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2013-04-08 18:41:23 +0000
commit: 4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 (patch)
tree: 06099edc18d30894081a822b756f117cbe0b8207 /test/CodeGen/Thumb2
parent: 482e7bddf617ae804dc47133cb07eb4aa81e45de (diff)
10 files changed, 177 insertions, 26 deletions
diff --git a/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll b/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
new file mode 100644
index 000000000000..502b138f65c8
--- /dev/null
+++ b/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=thumbv7s-apple-ios6.0.0 -verify-machineinstrs
+
+; Check to make sure the tail-call return at the end doesn't use a
+; callee-saved register. Register hinting from t2LDRDri was getting this
+; wrong. The intervening call will force allocation to try a high register
+; first, so the hint will attempt to fire, but must be rejected due to
+; not being in the allocation order for the tcGPR register class.
+; The machine instruction verifier will make sure that all actually worked
+; out the way it's supposed to.
+
+%"myclass" = type { %struct.foo }
+%struct.foo = type { i32, [40 x i8] }
+
+define hidden void @func(i8* %Data) nounwind ssp {
+  %1 = getelementptr inbounds i8* %Data, i32 12
+  %2 = bitcast i8* %1 to %"myclass"*
+  tail call void @abc(%"myclass"* %2) nounwind
+  tail call void @def(%"myclass"* %2) nounwind
+  %3 = getelementptr inbounds i8* %Data, i32 8
+  %4 = bitcast i8* %3 to i8**
+  %5 = load i8** %4, align 4, !tbaa !0
+  tail call void @ghi(i8* %5) nounwind
+  %6 = bitcast i8* %Data to void (i8*)**
+  %7 = load void (i8*)** %6, align 4, !tbaa !0
+  %8 = getelementptr inbounds i8* %Data, i32 4
+  %9 = bitcast i8* %8 to i8**
+  %10 = load i8** %9, align 4, !tbaa !0
+  %11 = icmp eq i8* %Data, null
+  br i1 %11, label %14, label %12
+
+; <label>:12                                      ; preds = %0
+  %13 = tail call %"myclass"* @jkl(%"myclass"* %2) nounwind
+  tail call void @mno(i8* %Data) nounwind
+  br label %14
+
+; <label>:14                                      ; preds = %12, %0
+  tail call void %7(i8* %10) nounwind
+  ret void
+}
+
+declare void @mno(i8*)
+
+declare void @def(%"myclass"*)
+
+declare void @abc(%"myclass"*)
+
+declare void @ghi(i8*)
+
+declare %"myclass"* @jkl(%"myclass"*) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll
new file mode 100644
index 000000000000..937ecc0d6679
--- /dev/null
+++ b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+
+define void @bar(<4 x i32>* %p, i32 %lane, <4 x i32> %phitmp) nounwind {
+; CHECK:  vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[SOURCE:[0-9]+]]:128]
+; CHECK:  add.w r[[ADDR:[0-9]+]], r[[SOURCE]], {{r[0-9]+}}, lsl #2
+; CHECK:  vld1.32 {[[DREG:d[0-9]+]][], [[DREG2:d[0-9]+]][]}, [r[[ADDR]]:32]
+; CHECK:  vst1.32 {[[DREG]], [[DREG2]]}, [r0]
+  %val = extractelement <4 x i32> %phitmp, i32 %lane
+  %r1 = insertelement <4 x i32> undef, i32 %val, i32 1
+  %r2 = insertelement <4 x i32> %r1, i32 %val, i32 2
+  %r3 = insertelement <4 x i32> %r2, i32 %val, i32 3
+  store <4 x i32> %r3, <4 x i32>* %p, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll b/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll
new file mode 100644
index 000000000000..203815fadc9c
--- /dev/null
+++ b/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+; Testing that these don't crash/assert. The loop vectorizer can end up
+; with odd constructs like this. The code actually generated is incidental.
+define <1 x i64> @test_zext(i32 %a) nounwind {
+; CHECK: test_zext:
+  %Cmp = icmp uge i32 %a, 42
+  %vec = insertelement <1 x i1> zeroinitializer, i1 %Cmp, i32 0
+  %Se = zext <1 x i1> %vec to <1 x i64>
+  ret <1 x i64> %Se
+}
+
+define <1 x i64> @test_sext(i32 %a) nounwind {
+; CHECK: test_sext:
+  %Cmp = icmp uge i32 %a, 42
+  %vec = insertelement <1 x i1> zeroinitializer, i1 %Cmp, i32 0
+  %Se = sext <1 x i1> %vec to <1 x i64>
+  ret <1 x i64> %Se
+}
diff --git a/test/CodeGen/Thumb2/aligned-spill.ll b/test/CodeGen/Thumb2/aligned-spill.ll
index c98ca8098583..3a2803f91f16 100644
--- a/test/CodeGen/Thumb2/aligned-spill.ll
+++ b/test/CodeGen/Thumb2/aligned-spill.ll
@@ -26,8 +26,8 @@ entry:
 ; NEON: bic r4, r4, #15
 ; Stack pointer must be updated before the spills.
 ; NEON: mov sp, r4
-; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
-; NEON: vst1.64 {d12, d13, d14, d15}, [r4, :128]
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]!
+; NEON: vst1.64 {d12, d13, d14, d15}, [r4:128]
 ; Stack pointer adjustment for the stack frame contents.
 ; This could legally happen before the spills.
 ; Since the spill slot is only 8 bytes, technically it would be fine to only
@@ -36,8 +36,8 @@ entry:
 ; NEON: sub sp, #16
 ; The epilog is free to use another scratch register than r4.
 ; NEON: add r[[R4:[0-9]+]], sp, #16
-; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]], :128]!
-; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]], :128]
+; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]]:128]!
+; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]]:128]
 ; The stack pointer restore must happen after the reloads.
 ; NEON: mov sp,
 ; NEON: pop
@@ -57,8 +57,8 @@ entry:
 ; NEON: bic r4, r4, #15
 ; Stack pointer must be updated before the spills.
 ; NEON: mov sp, r4
-; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
-; NEON: vst1.64 {d12, d13}, [r4, :128]
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]!
+; NEON: vst1.64 {d12, d13}, [r4:128]
 ; NEON: vstr d14, [r4, #16]
 ; Epilog
 ; NEON: vld1.64 {d8, d9, d10, d11},
@@ -84,7 +84,7 @@ entry:
 ; NEON: bic r4, r4, #15
 ; Stack pointer must be updated before the spills.
 ; NEON: mov sp, r4
-; NEON: vst1.64 {d8, d9}, [r4, :128]
+; NEON: vst1.64 {d8, d9}, [r4:128]
 ; NEON: vstr d10, [r4, #16]
 ; Epilog
 ; NEON: vld1.64 {d8, d9},
diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll
index b7df2fbf546c..f6cea72caecd 100644
--- a/test/CodeGen/Thumb2/cortex-fp.ll
+++ b/test/CodeGen/Thumb2/cortex-fp.ll
@@ -7,7 +7,7 @@ define float @foo(float %a, float %b) {
 entry:
 ; CHECK: foo
 ; CORTEXM3: blx ___mulsf3
-; CORTEXM4: vmul.f32  s0, s2, s0
+; CORTEXM4: vmul.f32  s
 ; CORTEXA8: vmul.f32  d
   %0 = fmul float %a, %b
   ret float %0
diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll
index cb4d08058f41..6ce0b82b94d7 100644
--- a/test/CodeGen/Thumb2/crash.ll
+++ b/test/CodeGen/Thumb2/crash.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -verify-machineinstrs
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -verify-machineinstrs -O0
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
 
@@ -76,3 +77,11 @@ entry:
   store i32 %num, i32* %p2, align 4
   ret void
 }
+
+; Check RAFast handling of inline assembly with many dense clobbers.
+; The large tuple aliases of the vector registers can cause problems.
+define void @rdar13249625(double* nocapture %p) nounwind {
+  %1 = tail call double asm sideeffect "@ $0", "=w,~{d0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15}"() nounwind
+  store double %1, double* %p, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
index 2178eecb43e4..bce847471beb 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep "ldr.*\[.*\]," | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @test(i32 %a, i32 %b, i32 %c) {
         %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]
@@ -9,4 +8,5 @@ define i32 @test(i32 %a, i32 %b, i32 %c) {
         %tmp5 = mul i32 %tmp4, %tmp3            ; <i32> [#uses=1]
         ret i32 %tmp5
 }
+; CHECK: 	ldr	r{{.*}},	[{{.*}}],
 
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index ac059bdaf05d..a8134e630821 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -15,7 +15,7 @@ entry:
 ; CHECK: t1:
 ; CHECK: mla     r0, r2, r0, r1
 ; CHECK: add.w   r0, r0, r0, lsl #3
-; CHECL: add.w   r0, r3, r0, lsl #2
+; CHECK: add.w   r0, r3, r0, lsl #2
   %mul = mul i32 %n, %i
   %add = add i32 %mul, %j
   %0 = ptrtoint %struct.CMPoint* %thePoints to i32
diff --git a/test/CodeGen/Thumb2/thumb2-shifter.ll b/test/CodeGen/Thumb2/thumb2-shifter.ll
index 98854a1205f8..05dd90cfbfed 100644
--- a/test/CodeGen/Thumb2/thumb2-shifter.ll
+++ b/test/CodeGen/Thumb2/thumb2-shifter.ll
@@ -1,24 +1,27 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s --check-prefix=A8
+; RUN: llc < %s -march=thumb -mcpu=swift | FileCheck %s --check-prefix=SWIFT
+
+; rdar://12892707
 
 define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_lsl
-; CHECK: add.w  r0, r0, r1, lsl #16
+; A8: t2ADDrs_lsl
+; A8: add.w  r0, r0, r1, lsl #16
         %A = shl i32 %Y, 16
         %B = add i32 %X, %A
         ret i32 %B
 }
 
 define i32 @t2ADDrs_lsr(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_lsr
-; CHECK: add.w  r0, r0, r1, lsr #16
+; A8: t2ADDrs_lsr
+; A8: add.w  r0, r0, r1, lsr #16
         %A = lshr i32 %Y, 16
         %B = add i32 %X, %A
         ret i32 %B
 }
 
 define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_asr
-; CHECK: add.w  r0, r0, r1, asr #16
+; A8: t2ADDrs_asr
+; A8: add.w  r0, r0, r1, asr #16
         %A = ashr i32 %Y, 16
         %B = add i32 %X, %A
         ret i32 %B
@@ -26,8 +29,8 @@ define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
 
 ; i32 ror(n) = (x >> n) | (x << (32 - n))
 define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_ror
-; CHECK: add.w  r0, r0, r1, ror #16
+; A8: t2ADDrs_ror
+; A8: add.w  r0, r0, r1, ror #16
         %A = lshr i32 %Y, 16
         %B = shl  i32 %Y, 16
         %C = or   i32 %B, %A
@@ -36,13 +39,66 @@ define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
 }
 
 define i32 @t2ADDrs_noRegShift(i32 %X, i32 %Y, i8 %sh) {
-; CHECK: t2ADDrs_noRegShift
-; CHECK: uxtb r2, r2
-; CHECK: lsls r1, r2
-; CHECK: add  r0, r1
+; A8: t2ADDrs_noRegShift
+; A8: uxtb r2, r2
+; A8: lsls r1, r2
+; A8: add  r0, r1
+
+; SWIFT: t2ADDrs_noRegShift
+; SWIFT-NOT: lsls
+; SWIFT: lsl.w
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = shl i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_noRegShift2(i32 %X, i32 %Y, i8 %sh) {
+; A8: t2ADDrs_noRegShift2
+; A8: uxtb r2, r2
+; A8: lsrs r1, r2
+; A8: add  r0, r1
+
+; SWIFT: t2ADDrs_noRegShift2
+; SWIFT-NOT: lsrs
+; SWIFT: lsr.w
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = lshr i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_noRegShift3(i32 %X, i32 %Y, i8 %sh) {
+; A8: t2ADDrs_noRegShift3
+; A8: uxtb r2, r2
+; A8: asrs r1, r2
+; A8: add  r0, r1
+
+; SWIFT: t2ADDrs_noRegShift3
+; SWIFT-NOT: asrs
+; SWIFT: asr.w
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = ashr i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_optsize(i32 %X, i32 %Y, i8 %sh) optsize {
+; SWIFT: t2ADDrs_optsize
+; SWIFT-NOT: lsl.w
+; SWIFT: lsls
         %shift.upgrd.1 = zext i8 %sh to i32
         %A = shl i32 %Y, %shift.upgrd.1
         %B = add i32 %X, %A
         ret i32 %B
 }
 
+define i32 @t2ADDrs_minsize(i32 %X, i32 %Y, i8 %sh) minsize {
+; SWIFT: t2ADDrs_minsize
+; SWIFT-NOT: lsr.w
+; SWIFT: lsrs
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = lshr i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index d9a0617f5a46..5bff268e2c3e 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -12,8 +12,8 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
 ; CHECK: bic r4, r4, #15
-; CHECK: vst1.64 {{.*}}[{{.*}}, :128]
-; CHECK: vld1.64 {{.*}}[{{.*}}, :128]
+; CHECK: vst1.64 {{.*}}[{{.*}}:128]
+; CHECK: vld1.64 {{.*}}[{{.*}}:128]
 entry:
   %aligned_vec = alloca <4 x float>, align 16
   %"alloca point" = bitcast i32 0 to i32
author	Dimitry Andric <dim@FreeBSD.org>	2013-04-08 18:41:23 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2013-04-08 18:41:23 +0000
commit	4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 (patch)
tree	06099edc18d30894081a822b756f117cbe0b8207 /test/CodeGen/Thumb2
parent	482e7bddf617ae804dc47133cb07eb4aa81e45de (diff)