diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2013-04-08 18:41:23 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2013-04-08 18:41:23 +0000 |
commit | 4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 (patch) | |
tree | 06099edc18d30894081a822b756f117cbe0b8207 /test/CodeGen/Thumb2 | |
parent | 482e7bddf617ae804dc47133cb07eb4aa81e45de (diff) |
Diffstat (limited to 'test/CodeGen/Thumb2')
-rw-r--r-- | test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll | 53 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll | 14 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll | 19 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/aligned-spill.ll | 14 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/cortex-fp.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/crash.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/thumb2-ldr_post.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/thumb2-mul.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/thumb2-shifter.ll | 82 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/thumb2-spill-q.ll | 4 |
10 files changed, 177 insertions, 26 deletions
diff --git a/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll b/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll new file mode 100644 index 000000000000..502b138f65c8 --- /dev/null +++ b/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -mtriple=thumbv7s-apple-ios6.0.0 -verify-machineinstrs + +; Check to make sure the tail-call return at the end doesn't use a +; callee-saved register. Register hinting from t2LDRDri was getting this +; wrong. The intervening call will force allocation to try a high register +; first, so the hint will attempt to fire, but must be rejected due to +; not being in the allocation order for the tcGPR register class. +; The machine instruction verifier will make sure that all actually worked +; out the way it's supposed to. + +%"myclass" = type { %struct.foo } +%struct.foo = type { i32, [40 x i8] } + +define hidden void @func(i8* %Data) nounwind ssp { + %1 = getelementptr inbounds i8* %Data, i32 12 + %2 = bitcast i8* %1 to %"myclass"* + tail call void @abc(%"myclass"* %2) nounwind + tail call void @def(%"myclass"* %2) nounwind + %3 = getelementptr inbounds i8* %Data, i32 8 + %4 = bitcast i8* %3 to i8** + %5 = load i8** %4, align 4, !tbaa !0 + tail call void @ghi(i8* %5) nounwind + %6 = bitcast i8* %Data to void (i8*)** + %7 = load void (i8*)** %6, align 4, !tbaa !0 + %8 = getelementptr inbounds i8* %Data, i32 4 + %9 = bitcast i8* %8 to i8** + %10 = load i8** %9, align 4, !tbaa !0 + %11 = icmp eq i8* %Data, null + br i1 %11, label %14, label %12 + +; <label>:12 ; preds = %0 + %13 = tail call %"myclass"* @jkl(%"myclass"* %2) nounwind + tail call void @mno(i8* %Data) nounwind + br label %14 + +; <label>:14 ; preds = %12, %0 + tail call void %7(i8* %10) nounwind + ret void +} + +declare void @mno(i8*) + +declare void @def(%"myclass"*) + +declare void @abc(%"myclass"*) + +declare void @ghi(i8*) + +declare %"myclass"* @jkl(%"myclass"*) nounwind + +!0 = metadata !{metadata !"any pointer", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll new file mode 100644 index 000000000000..937ecc0d6679 --- /dev/null +++ b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s + +define void @bar(<4 x i32>* %p, i32 %lane, <4 x i32> %phitmp) nounwind { +; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[SOURCE:[0-9]+]]:128] +; CHECK: add.w r[[ADDR:[0-9]+]], r[[SOURCE]], {{r[0-9]+}}, lsl #2 +; CHECK: vld1.32 {[[DREG:d[0-9]+]][], [[DREG2:d[0-9]+]][]}, [r[[ADDR]]:32] +; CHECK: vst1.32 {[[DREG]], [[DREG2]]}, [r0] + %val = extractelement <4 x i32> %phitmp, i32 %lane + %r1 = insertelement <4 x i32> undef, i32 %val, i32 1 + %r2 = insertelement <4 x i32> %r1, i32 %val, i32 2 + %r3 = insertelement <4 x i32> %r2, i32 %val, i32 3 + store <4 x i32> %r3, <4 x i32>* %p, align 4 + ret void +} diff --git a/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll b/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll new file mode 100644 index 000000000000..203815fadc9c --- /dev/null +++ b/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s + +; Testing that these don't crash/assert. The loop vectorizer can end up +; with odd constructs like this. The code actually generated is incidental. +define <1 x i64> @test_zext(i32 %a) nounwind { +; CHECK: test_zext: + %Cmp = icmp uge i32 %a, 42 + %vec = insertelement <1 x i1> zeroinitializer, i1 %Cmp, i32 0 + %Se = zext <1 x i1> %vec to <1 x i64> + ret <1 x i64> %Se +} + +define <1 x i64> @test_sext(i32 %a) nounwind { +; CHECK: test_sext: + %Cmp = icmp uge i32 %a, 42 + %vec = insertelement <1 x i1> zeroinitializer, i1 %Cmp, i32 0 + %Se = sext <1 x i1> %vec to <1 x i64> + ret <1 x i64> %Se +} diff --git a/test/CodeGen/Thumb2/aligned-spill.ll b/test/CodeGen/Thumb2/aligned-spill.ll index c98ca8098583..3a2803f91f16 100644 --- a/test/CodeGen/Thumb2/aligned-spill.ll +++ b/test/CodeGen/Thumb2/aligned-spill.ll @@ -26,8 +26,8 @@ entry: ; NEON: bic r4, r4, #15 ; Stack pointer must be updated before the spills. ; NEON: mov sp, r4 -; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]! -; NEON: vst1.64 {d12, d13, d14, d15}, [r4, :128] +; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]! +; NEON: vst1.64 {d12, d13, d14, d15}, [r4:128] ; Stack pointer adjustment for the stack frame contents. ; This could legally happen before the spills. ; Since the spill slot is only 8 bytes, technically it would be fine to only @@ -36,8 +36,8 @@ entry: ; NEON: sub sp, #16 ; The epilog is free to use another scratch register than r4. ; NEON: add r[[R4:[0-9]+]], sp, #16 -; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]], :128]! -; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]], :128] +; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]]:128]! +; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]]:128] ; The stack pointer restore must happen after the reloads. ; NEON: mov sp, ; NEON: pop @@ -57,8 +57,8 @@ entry: ; NEON: bic r4, r4, #15 ; Stack pointer must be updated before the spills. ; NEON: mov sp, r4 -; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]! -; NEON: vst1.64 {d12, d13}, [r4, :128] +; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]! +; NEON: vst1.64 {d12, d13}, [r4:128] ; NEON: vstr d14, [r4, #16] ; Epilog ; NEON: vld1.64 {d8, d9, d10, d11}, @@ -84,7 +84,7 @@ entry: ; NEON: bic r4, r4, #15 ; Stack pointer must be updated before the spills. ; NEON: mov sp, r4 -; NEON: vst1.64 {d8, d9}, [r4, :128] +; NEON: vst1.64 {d8, d9}, [r4:128] ; NEON: vstr d10, [r4, #16] ; Epilog ; NEON: vld1.64 {d8, d9}, diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll index b7df2fbf546c..f6cea72caecd 100644 --- a/test/CodeGen/Thumb2/cortex-fp.ll +++ b/test/CodeGen/Thumb2/cortex-fp.ll @@ -7,7 +7,7 @@ define float @foo(float %a, float %b) { entry: ; CHECK: foo ; CORTEXM3: blx ___mulsf3 -; CORTEXM4: vmul.f32 s0, s2, s0 +; CORTEXM4: vmul.f32 s ; CORTEXA8: vmul.f32 d %0 = fmul float %a, %b ret float %0 diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll index cb4d08058f41..6ce0b82b94d7 100644 --- a/test/CodeGen/Thumb2/crash.ll +++ b/test/CodeGen/Thumb2/crash.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -verify-machineinstrs +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -verify-machineinstrs -O0 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" target triple = "thumbv7-apple-darwin10" @@ -76,3 +77,11 @@ entry: store i32 %num, i32* %p2, align 4 ret void } + +; Check RAFast handling of inline assembly with many dense clobbers. +; The large tuple aliases of the vector registers can cause problems. +define void @rdar13249625(double* nocapture %p) nounwind { + %1 = tail call double asm sideeffect "@ $0", "=w,~{d0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15}"() nounwind + store double %1, double* %p, align 4 + ret void +} diff --git a/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/test/CodeGen/Thumb2/thumb2-ldr_post.ll index 2178eecb43e4..bce847471beb 100644 --- a/test/CodeGen/Thumb2/thumb2-ldr_post.ll +++ b/test/CodeGen/Thumb2/thumb2-ldr_post.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | \ -; RUN: grep "ldr.*\[.*\]," | count 1 +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s define i32 @test(i32 %a, i32 %b, i32 %c) { %tmp1 = mul i32 %a, %b ; <i32> [#uses=2] @@ -9,4 +8,5 @@ define i32 @test(i32 %a, i32 %b, i32 %c) { %tmp5 = mul i32 %tmp4, %tmp3 ; <i32> [#uses=1] ret i32 %tmp5 } +; CHECK: ldr r{{.*}}, [{{.*}}], diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll index ac059bdaf05d..a8134e630821 100644 --- a/test/CodeGen/Thumb2/thumb2-mul.ll +++ b/test/CodeGen/Thumb2/thumb2-mul.ll @@ -15,7 +15,7 @@ entry: ; CHECK: t1: ; CHECK: mla r0, r2, r0, r1 ; CHECK: add.w r0, r0, r0, lsl #3 -; CHECL: add.w r0, r3, r0, lsl #2 +; CHECK: add.w r0, r3, r0, lsl #2 %mul = mul i32 %n, %i %add = add i32 %mul, %j %0 = ptrtoint %struct.CMPoint* %thePoints to i32 diff --git a/test/CodeGen/Thumb2/thumb2-shifter.ll b/test/CodeGen/Thumb2/thumb2-shifter.ll index 98854a1205f8..05dd90cfbfed 100644 --- a/test/CodeGen/Thumb2/thumb2-shifter.ll +++ b/test/CodeGen/Thumb2/thumb2-shifter.ll @@ -1,24 +1,27 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s --check-prefix=A8 +; RUN: llc < %s -march=thumb -mcpu=swift | FileCheck %s --check-prefix=SWIFT + +; rdar://12892707 define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) { -; CHECK: t2ADDrs_lsl -; CHECK: add.w r0, r0, r1, lsl #16 +; A8: t2ADDrs_lsl +; A8: add.w r0, r0, r1, lsl #16 %A = shl i32 %Y, 16 %B = add i32 %X, %A ret i32 %B } define i32 @t2ADDrs_lsr(i32 %X, i32 %Y) { -; CHECK: t2ADDrs_lsr -; CHECK: add.w r0, r0, r1, lsr #16 +; A8: t2ADDrs_lsr +; A8: add.w r0, r0, r1, lsr #16 %A = lshr i32 %Y, 16 %B = add i32 %X, %A ret i32 %B } define i32 @t2ADDrs_asr(i32 %X, i32 %Y) { -; CHECK: t2ADDrs_asr -; CHECK: add.w r0, r0, r1, asr #16 +; A8: t2ADDrs_asr +; A8: add.w r0, r0, r1, asr #16 %A = ashr i32 %Y, 16 %B = add i32 %X, %A ret i32 %B @@ -26,8 +29,8 @@ define i32 @t2ADDrs_asr(i32 %X, i32 %Y) { ; i32 ror(n) = (x >> n) | (x << (32 - n)) define i32 @t2ADDrs_ror(i32 %X, i32 %Y) { -; CHECK: t2ADDrs_ror -; CHECK: add.w r0, r0, r1, ror #16 +; A8: t2ADDrs_ror +; A8: add.w r0, r0, r1, ror #16 %A = lshr i32 %Y, 16 %B = shl i32 %Y, 16 %C = or i32 %B, %A @@ -36,13 +39,66 @@ define i32 @t2ADDrs_ror(i32 %X, i32 %Y) { } define i32 @t2ADDrs_noRegShift(i32 %X, i32 %Y, i8 %sh) { -; CHECK: t2ADDrs_noRegShift -; CHECK: uxtb r2, r2 -; CHECK: lsls r1, r2 -; CHECK: add r0, r1 +; A8: t2ADDrs_noRegShift +; A8: uxtb r2, r2 +; A8: lsls r1, r2 +; A8: add r0, r1 + +; SWIFT: t2ADDrs_noRegShift +; SWIFT-NOT: lsls +; SWIFT: lsl.w + %shift.upgrd.1 = zext i8 %sh to i32 + %A = shl i32 %Y, %shift.upgrd.1 + %B = add i32 %X, %A + ret i32 %B +} + +define i32 @t2ADDrs_noRegShift2(i32 %X, i32 %Y, i8 %sh) { +; A8: t2ADDrs_noRegShift2 +; A8: uxtb r2, r2 +; A8: lsrs r1, r2 +; A8: add r0, r1 + +; SWIFT: t2ADDrs_noRegShift2 +; SWIFT-NOT: lsrs +; SWIFT: lsr.w + %shift.upgrd.1 = zext i8 %sh to i32 + %A = lshr i32 %Y, %shift.upgrd.1 + %B = add i32 %X, %A + ret i32 %B +} + +define i32 @t2ADDrs_noRegShift3(i32 %X, i32 %Y, i8 %sh) { +; A8: t2ADDrs_noRegShift3 +; A8: uxtb r2, r2 +; A8: asrs r1, r2 +; A8: add r0, r1 + +; SWIFT: t2ADDrs_noRegShift3 +; SWIFT-NOT: asrs +; SWIFT: asr.w + %shift.upgrd.1 = zext i8 %sh to i32 + %A = ashr i32 %Y, %shift.upgrd.1 + %B = add i32 %X, %A + ret i32 %B +} + +define i32 @t2ADDrs_optsize(i32 %X, i32 %Y, i8 %sh) optsize { +; SWIFT: t2ADDrs_optsize +; SWIFT-NOT: lsl.w +; SWIFT: lsls %shift.upgrd.1 = zext i8 %sh to i32 %A = shl i32 %Y, %shift.upgrd.1 %B = add i32 %X, %A ret i32 %B } +define i32 @t2ADDrs_minsize(i32 %X, i32 %Y, i8 %sh) minsize { +; SWIFT: t2ADDrs_minsize +; SWIFT-NOT: lsr.w +; SWIFT: lsrs + %shift.upgrd.1 = zext i8 %sh to i32 + %A = lshr i32 %Y, %shift.upgrd.1 + %B = add i32 %X, %A + ret i32 %B +} diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll index d9a0617f5a46..5bff268e2c3e 100644 --- a/test/CodeGen/Thumb2/thumb2-spill-q.ll +++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll @@ -12,8 +12,8 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly define void @aaa(%quuz* %this, i8* %block) { ; CHECK: aaa: ; CHECK: bic r4, r4, #15 -; CHECK: vst1.64 {{.*}}[{{.*}}, :128] -; CHECK: vld1.64 {{.*}}[{{.*}}, :128] +; CHECK: vst1.64 {{.*}}[{{.*}}:128] +; CHECK: vld1.64 {{.*}}[{{.*}}:128] entry: %aligned_vec = alloca <4 x float>, align 16 %"alloca point" = bitcast i32 0 to i32 |