20 files changed, 257 insertions, 35 deletions
diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
index 98a5263c2f99b..45d356c3dc676 100644
--- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
+++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
@@ -11,8 +11,8 @@
 define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) {
 ; CHECK: _ZNKSs7compareERKSs:
 ; CHECK:      it  eq
-; CHECK-NEXT: subeq.w r0, r6, r8
-; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r8, r9, pc}
+; CHECK-NEXT: subeq r0, r6, r7
+; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r7, r8, pc}
 entry:
   %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
   %1 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
diff --git a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
index 3f1b9eb8d9d09..2246de35e03cd 100644
--- a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
+++ b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
@@ -7,17 +7,12 @@
 define void @t() nounwind ssp {
 entry:
 ; CHECK: t:
-; CHECK: mov r0, sp
-; CHECK: bfc r0, #0, #3
-; CHECK: subs r0, #16
-; CHECK: mov sp, r0
-; Yes, this is stupid codegen, but it's correct.
-; CHECK: mov r0, sp
-; CHECK: bfc r0, #0, #3
-; CHECK: subs r0, #16
-; CHECK: mov sp, r0
   %size = mul i32 8, 2
+; CHECK:  subs  r0, #16
+; CHECK:  mov sp, r0
   %vla_a = alloca i8, i32 %size, align 8
+; CHECK:  subs  r0, #16
+; CHECK:  mov sp, r0
   %vla_b = alloca i8, i32 %size, align 8
   unreachable
 }
diff --git a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
new file mode 100644
index 0000000000000..abcf13a3e38f9
--- /dev/null
+++ b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -O3 | FileCheck %s
+
+@.str = private constant [4 x i8] c"%d\0A\00", align 4 ; <[4 x i8]*> [#uses=1]
+
+define internal fastcc i32 @Callee(i32 %i) nounwind {
+entry:
+; CHECK: Callee:
+  %0 = icmp eq i32 %i, 0                          ; <i1> [#uses=1]
+  br i1 %0, label %bb2, label %bb
+
+bb:                                               ; preds = %entry
+  %1 = alloca [1000 x i8], align 4                ; <[1000 x i8]*> [#uses=1]
+  %.sub = getelementptr inbounds [1000 x i8]* %1, i32 0, i32 0 ; <i8*> [#uses=2]
+  %2 = call i32 (i8*, i32, i32, i8*, ...)* @__sprintf_chk(i8* %.sub, i32 0, i32 1000, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %i) nounwind ; <i32> [#uses=0]
+  %3 = load i8* %.sub, align 4                    ; <i8> [#uses=1]
+  %4 = sext i8 %3 to i32                          ; <i32> [#uses=1]
+  ret i32 %4
+
+bb2:                                              ; preds = %entry
+; Must restore sp from fp here
+; CHECK: mov sp, r7
+; CHECK: sub sp, #8
+; CHECK: pop
+  ret i32 0
+}
+
+declare i32 @__sprintf_chk(i8*, i32, i32, i8*, ...) nounwind
+
+define i32 @main() nounwind {
+; CHECK: main:
+bb.nph:
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %0 = phi i32 [ 0, %bb.nph ], [ %3, %bb ]        ; <i32> [#uses=2]
+  %j.01 = phi i32 [ 0, %bb.nph ], [ %2, %bb ]     ; <i32> [#uses=1]
+  %1 = tail call fastcc i32 @Callee(i32 %0) nounwind ; <i32> [#uses=1]
+  %2 = add nsw i32 %1, %j.01                      ; <i32> [#uses=2]
+  %3 = add nsw i32 %0, 1                          ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %3, 10000               ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb2, label %bb
+
+bb2:                                              ; preds = %bb
+; No need to restore sp from fp here.
+; CHECK: printf
+; CHECK-NOT: mov sp, r7
+; CHECK-NOT: sub sp, #12
+; CHECK: pop
+  %4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/Thumb2/bfi.ll b/test/CodeGen/Thumb2/bfi.ll
new file mode 100644
index 0000000000000..22473bb35a0ae
--- /dev/null
+++ b/test/CodeGen/Thumb2/bfi.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=thumb -mattr=+v6t2 < %s | FileCheck %s
+
+%struct.F = type { [3 x i8], i8 }
+
+@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1]
+
+define void @f1([1 x i32] %f.coerce0) nounwind {
+entry:
+; CHECK: f1
+; CHECK: movs r2, #10
+; CHECK: bfi r1, r2, #22, #4
+  %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
+  %1 = and i32 %0, -62914561                      ; <i32> [#uses=1]
+  %2 = or i32 %1, 41943040                        ; <i32> [#uses=1]
+  store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4
+  ret void
+}
+
+define i32 @f2(i32 %A, i32 %B) nounwind readnone optsize {
+entry:
+; CHECK: f2
+; CHECK: lsrs  r1, r1, #7
+; CHECK: bfi r0, r1, #7, #16
+  %and = and i32 %A, -8388481                     ; <i32> [#uses=1]
+  %and2 = and i32 %B, 8388480                     ; <i32> [#uses=1]
+  %or = or i32 %and2, %and                        ; <i32> [#uses=1]
+  ret i32 %or
+}
+
+define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize {
+entry:
+; CHECK: f3
+; CHECK: lsrs  r2, r0, #7
+; CHECK: mov r0, r1
+; CHECK: bfi r0, r2, #7, #16
+  %and = and i32 %A, 8388480                      ; <i32> [#uses=1]
+  %and2 = and i32 %B, -8388481                    ; <i32> [#uses=1]
+  %or = or i32 %and2, %and                        ; <i32> [#uses=1]
+  ret i32 %or
+}
diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll
new file mode 100644
index 0000000000000..f7ec5a3b577c3
--- /dev/null
+++ b/test/CodeGen/Thumb2/cortex-fp.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=CORTEXM3
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m4 | FileCheck %s -check-prefix=CORTEXM4
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+
+
+define float @foo(float %a, float %b) {
+entry:
+; CHECK: foo
+; CORTEXM3: blx ___mulsf3
+; CORTEXM4: vmul.f32  s0, s1, s0
+; CORTEXA8: vmul.f32  d0, d1, d0
+  %0 = fmul float %a, %b
+  ret float %0
+}
+
+define double @bar(double %a, double %b) {
+entry:
+; CHECK: bar
+  %0 = fmul double %a, %b
+; CORTEXM3: blx ___muldf3
+; CORTEXM4: blx ___muldf3
+; CORTEXA8: vmul.f64  d0, d1, d0
+  ret double %0
+}
diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll
index 87af9d10572b6..d8b51ec82dedd 100644
--- a/test/CodeGen/Thumb2/crash.ll
+++ b/test/CodeGen/Thumb2/crash.ll
@@ -14,11 +14,11 @@ entry:
   %6 = bitcast i32* %sp3 to <4 x i32>*            ; <<4 x i32>*> [#uses=1]
   %7 = load <4 x i32>* %6, align 16               ; <<4 x i32>> [#uses=1]
   %8 = bitcast i32* %dp to i8*                    ; <i8*> [#uses=1]
-  tail call void @llvm.arm.neon.vst4.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7)
+  tail call void @llvm.arm.neon.vst4.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7, i32 1)
   ret void
 }
 
-declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
 
 @sbuf = common global [16 x i32] zeroinitializer, align 16 ; <[16 x i32]*> [#uses=5]
 @dbuf = common global [16 x i32] zeroinitializer  ; <[16 x i32]*> [#uses=2]
@@ -44,6 +44,6 @@ bb2:                                              ; preds = %bb
   %3 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 4) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
   %4 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 8) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
   %5 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 12) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
-  tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5) nounwind
+  tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind
   ret i32 0
 }
diff --git a/test/CodeGen/Thumb2/div.ll b/test/CodeGen/Thumb2/div.ll
index 0cddd489fb469..e63a115273ffe 100644
--- a/test/CodeGen/Thumb2/div.ll
+++ b/test/CodeGen/Thumb2/div.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 \
 ; RUN:    | FileCheck %s -check-prefix=CHECK-THUMB
-; RUN: llc < %s -march=arm -mcpu=cortex-m3 -mattr=+thumb2 \
+; RUN: llc < %s -march=thumb -mcpu=cortex-m3 -mattr=+thumb2 \
 ; RUN:    | FileCheck %s -check-prefix=CHECK-THUMBV7M
 
 define i32 @f1(i32 %a, i32 %b) {
diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll
index 29b8e75cb8b3b..650d788cb4d20 100644
--- a/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -22,7 +22,7 @@
 
 define %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind {
 entry:
-; CHECK:       ldr.w	r9, [r7, #28]
+; CHECK:       ldr.w	{{(r[0-9])|(lr)}}, [r7, #28]
   %xgaps.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
   %ycomp.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
   br label %bb20
@@ -46,9 +46,9 @@ bb119:                                            ; preds = %bb20, %bb20
 
 bb420:                                            ; preds = %bb20, %bb20
 ; CHECK: bb420
-; CHECK: str r{{[0-7]}}, [sp]
-; CHECK: str r{{[0-7]}}, [sp, #4]
-; CHECK: str r{{[0-7]}}, [sp, #8]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #4]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #8]
 ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #24]
   store %union.rec* null, %union.rec** @zz_hold, align 4
   store %union.rec* null, %union.rec** @zz_res, align 4
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
index 7fa782f91de9a..ad957a1fcb45f 100644
--- a/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -21,8 +21,8 @@ entry:
 bb:                                               ; preds = %bb, %entry
 ; CHECK: LBB0_1:
 ; CHECK: cmp r2, #0
-; CHECK: sub.w r9, r2, #1
-; CHECK: mov r2, r9
+; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], r2, #1
+; CHECK: mov r2, [[REGISTER]]
 
   %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ]     ; <i32> [#uses=1]
   %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
diff --git a/test/CodeGen/Thumb2/machine-licm-vdup.ll b/test/CodeGen/Thumb2/machine-licm-vdup.ll
new file mode 100644
index 0000000000000..fde2ee0ab0c9a
--- /dev/null
+++ b/test/CodeGen/Thumb2/machine-licm-vdup.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-fp-elim                -arm-vdup-splat | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-vdup-splat | FileCheck %s 
+; Modified version of machine-licm.ll with -arm-vdup-splat turned on, 8003375.
+; Eventually this should become the default and be moved into machine-licm.ll.
+; FIXME: the vdup should be hoisted out of the loop, 8248029.
+
+define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: mov.w r3, #1065353216
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+; CHECK-NEXT: %bb1
+; CHECK: vdup.32 q1, r3
+  %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
+  %tmp1 = shl i32 %indvar, 2
+  %gep1 = getelementptr i8* %ptr1, i32 %tmp1
+  %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1)
+  %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2)
+  %gep2 = getelementptr i8* %ptr2, i32 %tmp1
+  call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1)
+  %indvar.next = add i32 %indvar, 1
+  %cond = icmp eq i32 %indvar.next, 10
+  br i1 %cond, label %bb2, label %bb1
+
+bb2:
+  ret void
+}
+
+; CHECK-NOT: LCPI1_0:
+; CHECK: .subsections_via_symbols
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+
+declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index cdb3041b3beab..b949b2f30506d 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -64,10 +64,10 @@ bb1:
   %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
   %tmp1 = shl i32 %indvar, 2
   %gep1 = getelementptr i8* %ptr1, i32 %tmp1
-  %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1)
+  %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1)
   %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2)
   %gep2 = getelementptr i8* %ptr2, i32 %tmp1
-  call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3)
+  call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1)
   %indvar.next = add i32 %indvar, 1
   %cond = icmp eq i32 %indvar.next, 10
   br i1 %cond, label %bb2, label %bb1
@@ -79,8 +79,8 @@ bb2:
 ; CHECK: LCPI1_0:
 ; CHECK: .section
 
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
 
 declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/Thumb2/thumb2-and2.ll b/test/CodeGen/Thumb2/thumb2-and2.ll
index 76c56d00473d3..7b0432de9bb5e 100644
--- a/test/CodeGen/Thumb2/thumb2-and2.ll
+++ b/test/CodeGen/Thumb2/thumb2-and2.ll
@@ -30,7 +30,7 @@ define i32 @f4(i32 %a) {
     ret i32 %tmp
 }
 ; CHECK: f4:
-; CHECK: 	and	r0, r0, #1448498774
+; CHECK: bic r0, r0, #-1448498775
 
 ; 66846720 = 0x03fc0000
 define i32 @f5(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-badreg-operands.ll b/test/CodeGen/Thumb2/thumb2-badreg-operands.ll
new file mode 100644
index 0000000000000..4df06b836fc57
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-badreg-operands.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
+
+define void @b(i32 %x) nounwind optsize {
+entry:
+; CHECK: b
+; CHECK: mov r2, sp
+; CHECK: mls r0, r0, r1, r2
+; CHECK: mov sp, r0
+  %0 = mul i32 %x, 24                             ; <i32> [#uses=1]
+  %vla = alloca i8, i32 %0, align 1               ; <i8*> [#uses=1]
+  call arm_aapcscc  void @a(i8* %vla) nounwind optsize
+  ret void
+}
+
+declare void @a(i8*) optsize
diff --git a/test/CodeGen/Thumb2/thumb2-barrier.ll b/test/CodeGen/Thumb2/thumb2-barrier.ll
new file mode 100644
index 0000000000000..a54d09e629199
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-barrier.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 )
+
+define void @t1() {
+; CHECK: t1:
+; CHECK: dsb
+  call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true )
+  ret void
+}
+
+define void @t2() {
+; CHECK: t2:
+; CHECK: dmb
+  call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false )
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-call-tc.ll b/test/CodeGen/Thumb2/thumb2-call-tc.ll
index 24502b0338c25..2e4da1b289b55 100644
--- a/test/CodeGen/Thumb2/thumb2-call-tc.ll
+++ b/test/CodeGen/Thumb2/thumb2-call-tc.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN
 ; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX
+; XFAIL: *
 
 @t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
 
diff --git a/test/CodeGen/Thumb2/thumb2-cmp.ll b/test/CodeGen/Thumb2/thumb2-cmp.ll
index d4773bb5809b0..63249f4cf1452 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp.ll
@@ -39,3 +39,17 @@ define i1 @f5(i32 %a) {
     %tmp = icmp eq i32 %a, 1114112
     ret i1 %tmp
 }
+
+; Check that we don't do an invalid (a > b) --> !(a < b + 1) transform.
+;
+; CHECK: f6:
+; CHECK-NOT: cmp.w r0, #-2147483648
+; CHECK: bx lr
+define i32 @f6(i32 %a) {
+    %tmp = icmp sgt i32 %a, 2147483647
+    br i1 %tmp, label %true, label %false
+true:
+    ret i32 2
+false:
+    ret i32 0
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
index c024415477189..5315535db0456 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; XFAIL: *
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 ; CHECK: t1:
diff --git a/test/CodeGen/Thumb2/thumb2-pack.ll b/test/CodeGen/Thumb2/thumb2-pack.ll
index c8302df78f680..2e8bb1d609342 100644
--- a/test/CodeGen/Thumb2/thumb2-pack.ll
+++ b/test/CodeGen/Thumb2/thumb2-pack.ll
@@ -1,8 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | \
-; RUN:   grep pkhbt | count 5
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | \
-; RUN:   grep pkhtb | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
 
+; CHECK: test1
+; CHECK: pkhbt   r0, r0, r1, lsl #16
 define i32 @test1(i32 %X, i32 %Y) {
 	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
 	%tmp4 = shl i32 %Y, 16		; <i32> [#uses=1]
@@ -10,6 +9,8 @@ define i32 @test1(i32 %X, i32 %Y) {
 	ret i32 %tmp5
 }
 
+; CHECK: test1a
+; CHECK: pkhbt   r0, r0, r1, lsl #16
 define i32 @test1a(i32 %X, i32 %Y) {
 	%tmp19 = and i32 %X, 65535		; <i32> [#uses=1]
 	%tmp37 = shl i32 %Y, 16		; <i32> [#uses=1]
@@ -17,6 +18,8 @@ define i32 @test1a(i32 %X, i32 %Y) {
 	ret i32 %tmp5
 }
 
+; CHECK: test2
+; CHECK: pkhbt   r0, r0, r1, lsl #12
 define i32 @test2(i32 %X, i32 %Y) {
 	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
 	%tmp3 = shl i32 %Y, 12		; <i32> [#uses=1]
@@ -25,6 +28,8 @@ define i32 @test2(i32 %X, i32 %Y) {
 	ret i32 %tmp57
 }
 
+; CHECK: test3
+; CHECK: pkhbt   r0, r0, r1, lsl #18
 define i32 @test3(i32 %X, i32 %Y) {
 	%tmp19 = and i32 %X, 65535		; <i32> [#uses=1]
 	%tmp37 = shl i32 %Y, 18		; <i32> [#uses=1]
@@ -32,6 +37,8 @@ define i32 @test3(i32 %X, i32 %Y) {
 	ret i32 %tmp5
 }
 
+; CHECK: test4
+; CHECK: pkhbt   r0, r0, r1
 define i32 @test4(i32 %X, i32 %Y) {
 	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
 	%tmp3 = and i32 %Y, -65536		; <i32> [#uses=1]
@@ -39,6 +46,8 @@ define i32 @test4(i32 %X, i32 %Y) {
 	ret i32 %tmp46
 }
 
+; CHECK: test5
+; CHECK: pkhtb   r0, r0, r1, asr #16
 define i32 @test5(i32 %X, i32 %Y) {
 	%tmp17 = and i32 %X, -65536		; <i32> [#uses=1]
 	%tmp2 = bitcast i32 %Y to i32		; <i32> [#uses=1]
@@ -47,6 +56,8 @@ define i32 @test5(i32 %X, i32 %Y) {
 	ret i32 %tmp5
 }
 
+; CHECK: test5a
+; CHECK: pkhtb   r0, r0, r1, asr #16
 define i32 @test5a(i32 %X, i32 %Y) {
 	%tmp110 = and i32 %X, -65536		; <i32> [#uses=1]
 	%tmp37 = lshr i32 %Y, 16		; <i32> [#uses=1]
@@ -55,6 +66,8 @@ define i32 @test5a(i32 %X, i32 %Y) {
 	ret i32 %tmp5
 }
 
+; CHECK: test6
+; CHECK: pkhtb   r0, r0, r1, asr #12
 define i32 @test6(i32 %X, i32 %Y) {
 	%tmp1 = and i32 %X, -65536		; <i32> [#uses=1]
 	%tmp37 = lshr i32 %Y, 12		; <i32> [#uses=1]
@@ -64,6 +77,8 @@ define i32 @test6(i32 %X, i32 %Y) {
 	ret i32 %tmp59
 }
 
+; CHECK: test7
+; CHECK: pkhtb   r0, r0, r1, asr #18
 define i32 @test7(i32 %X, i32 %Y) {
 	%tmp1 = and i32 %X, -65536		; <i32> [#uses=1]
 	%tmp3 = ashr i32 %Y, 18		; <i32> [#uses=1]
@@ -71,3 +86,12 @@ define i32 @test7(i32 %X, i32 %Y) {
 	%tmp57 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
 	ret i32 %tmp57
 }
+
+; CHECK: test8
+; CHECK: pkhtb   r0, r0, r1, asr #22
+define i32 @test8(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536
+	%tmp3 = lshr i32 %Y, 22
+	%tmp57 = or i32 %tmp3, %tmp1
+	ret i32 %tmp57
+}
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index 3946371709d58..4f92c93338066 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -7,7 +7,7 @@
 %quux = type { i32 (...)**, %baz*, i32 }
 %quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
 
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
@@ -15,11 +15,11 @@ define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: vst1.64 {{.*}}[{{.*}}, :128]
 ; CHECK: vld1.64 {{.*}}[{{.*}}, :128]
 entry:
-  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
   store float 6.300000e+01, float* undef, align 4
-  %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
   store float 0.000000e+00, float* undef, align 4
-  %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
   %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
   br label %bb4
 
diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll
index 1fa4e5c21dabf..2074f98cb608c 100644
--- a/test/CodeGen/Thumb2/thumb2-uxtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll
@@ -6,7 +6,7 @@ define i32 @test1(i32 %x) {
 ; ARMv7A: uxtb16 r0, r0
 
 ; ARMv7M: test1
-; ARMv7M: and r0, r0, #16711935
+; ARMv7M: bic r0, r0, #-16711936
 	%tmp1 = and i32 %x, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp1
 }