2221 files changed, 37626 insertions, 3663 deletions
diff --git a/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll b/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
index caa9a981fc6a2..a0235f787061d 100644
--- a/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
+++ b/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6
+; RUN: llc < %s -march=arm -mattr=+v6
 
 %struct.layer_data = type { i32, [2048 x i8], i8*, [16 x i8], i32, i8*, i32, i32, [64 x i32], [64 x i32], [64 x i32], [64 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [12 x [64 x i16]] }
 @ld = external global %struct.layer_data*               ; <%struct.layer_data**> [#uses=1]
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index 6e11b16910184..81483cb4e7c55 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
 
 @quant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
 @dequant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll b/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
index 7317e62e31824..83b26d340062f 100644
--- a/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
+++ b/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
 
 define fastcc i8* @read_sleb128(i8* %p, i32* %val) {
 	br label %bb
diff --git a/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
index 07390add55381..33f935e960b1a 100644
--- a/test/CodeGen/ARM/2007-03-13-InstrSched.ll
+++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
 ; RUN:   -mattr=+v6 | grep r9
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
 ; RUN:   -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats |& grep asm-printer
 ; | grep 35
 
diff --git a/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll b/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
index 32daf839f0fc9..b0953dc8b61f8 100644
--- a/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
+++ b/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
 ; PR1257
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
index 6d3f6404af840..d741112e2886e 100644
--- a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 ; PR1266
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
index f927ef43ca194..e4635f50279d2 100644
--- a/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi
 ; PR1279
 
 	%struct.rtx_def = type { i16, i8, i8, %struct.u }
diff --git a/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll
index 55d29933a55c9..ea27676a9f0f5 100644
--- a/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi
 ; PR1279
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
index ef5a1ae404598..f24def31f97aa 100644
--- a/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-apple-darwin
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin
 
 	%struct.H_TBL = type { [17 x i8], [256 x i8], i32 }
 	%struct.Q_TBL = type { [64 x i16], i32 }
diff --git a/test/CodeGen/ARM/2007-04-03-PEIBug.ll b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
index e412127eae7b5..b543c57e1a85e 100644
--- a/test/CodeGen/ARM/2007-04-03-PEIBug.ll
+++ b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | not grep {add.*#0}
+; RUN: llc < %s -march=arm | not grep {add.*#0}
 
 define i32 @foo() {
 entry:
diff --git a/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll b/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
index 42f5034c70a76..e001cde8351bf 100644
--- a/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
+++ b/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic | \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | \
 ; RUN:   not grep LPC9
 
 	%struct.B = type { i32 }
diff --git a/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll b/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
index ec70a596bc3ac..a89e937d3e106 100644
--- a/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
+++ b/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
 target triple = "arm-apple-darwin8"
diff --git a/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll b/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
index f3f82bc4846f0..c73b6793da0ff 100644
--- a/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
+++ b/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 	%struct.Connection = type { i32, [10 x i8], i32 }
 	%struct.IntChunk = type { %struct.cppobjtype, i32, i32*, i32 }
diff --git a/test/CodeGen/ARM/2007-05-07-jumptoentry.ll b/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
index 11431be9c28c8..26864f18a69c0 100644
--- a/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
+++ b/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep 1_0
+; RUN: llc < %s | not grep 1_0
 ; This used to create an extra branch to 'entry', LBB1_0.
 
 ; ModuleID = 'bug.bc'
diff --git a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
index c3596e7c7b4f3..f2a8ee1a14240 100644
--- a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
+++ b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*quux | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1
 ; Check that calls to baz and quux are tail-merged.
 ; PR1628
 
diff --git a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
index 41ab1e52f674b..2758505811549 100644
--- a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
+++ b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*quux | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1
 ; Check that calls to baz and quux are tail-merged.
 ; PR1628
 
diff --git a/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll b/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
index 58c5f89c619d5..b3b0769347f16 100644
--- a/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
+++ b/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6
+; RUN: llc < %s -march=arm -mattr=+v6
 
 define i32 @test3() {
 	tail call void asm sideeffect "/* number: ${0:c} */", "i"( i32 1 )
diff --git a/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll
index 430b3689c0b41..7b15ded44799c 100644
--- a/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi
 ; PR1406
 
 	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
diff --git a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
index 4c4a9336fd91b..061bf5e851b02 100644
--- a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -march=arm | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep bl.*quux | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge=0 | grep bl.*baz | count 2
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge=0 | grep bl.*quux | count 2
-; RUN: llvm-as < %s | llc -march=arm -enable-eh | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-eh | grep bl.*quux | count 1
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*baz | count 2
-; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*quux | count 2
+; RUN: llc < %s -march=arm | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*baz | count 2
+; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*quux | count 2
+; RUN: llc < %s -march=arm -enable-eh | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-eh | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*baz | count 2
+; RUN: llc < %s -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*quux | count 2
 ; Check that tail merging is the default on ARM, and that -enable-tail-merge=0 works.
 ; PR1628
 
diff --git a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
index de32a26ae9cfb..d2eb85d356c53 100644
--- a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
+++ b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | not grep {str.*\\!}
+; RUN: llc < %s -march=arm | not grep {str.*\\!}
 
 	%struct.shape_edge_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32 }
 	%struct.shape_path_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
index d21a8f209e960..030486a7c9832 100644
--- a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; PR1424
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/2007-08-15-ReuseBug.ll b/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
index 3cfcdef48f4b4..30b72e09a1145 100644
--- a/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
+++ b/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6
 ; PR1609
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
index ec170f8eac5be..ff015065ef01b 100644
--- a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
+++ b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -regalloc=local
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=local
 ; PR1925
 
 	%struct.encode_aux_nearestmatch = type { i32*, i32*, i32*, i32*, i32, i32 }
diff --git a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
index b81d5759b6cf6..06bc98746076f 100644
--- a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
+++ b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -regalloc=local
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=local
 ; PR1925
 
 	%"struct.kc::impl_Ccode_option" = type { %"struct.kc::impl_abstract_phylum" }
diff --git a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
index ca34275f79f49..a604c5cd574e2 100644
--- a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
+++ b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | not grep 255
+; RUN: llc < %s -march=arm -mattr=+v6 | not grep 255
 
 define i32 @main(i32 %argc, i8** %argv) {
 entry:
diff --git a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
index 70f1774b4c521..78c6222375632 100644
--- a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
 
 @accum = external global { double, double }		; <{ double, double }*> [#uses=1]
 @.str = external constant [4 x i8]		; <[4 x i8]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll b/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
index 610f5ea7cd05d..234c7b69e3e7b 100644
--- a/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 @numBinsY = external global i32		; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
index 80ccddfcd735d..77418be380843 100644
--- a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 	%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll b/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll
index 3cd757fa62ad2..33bd4def5b495 100644
--- a/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll
+++ b/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 declare void @foo(i8*, i8*, i32, i32, i32, i32, i32, i32, i32)
 
diff --git a/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll b/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
index 035af08cd40ac..71aa6037a1374 100644
--- a/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
+++ b/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 	%struct.BiContextType = type { i16, i8, i32 }
 	%struct.Bitstream = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 }
diff --git a/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll b/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
index e98126bf87aa9..aa61d86e1389e 100644
--- a/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 	%struct.Decoders = type { i32**, i16***, i16****, i16***, i16**, i8**, i8** }
 @decoders = external global %struct.Decoders		; <%struct.Decoders*> [#uses=1]
diff --git a/test/CodeGen/ARM/2008-07-17-Fdiv.ll b/test/CodeGen/ARM/2008-07-17-Fdiv.ll
index aa75970418a60..4cb768ef5b6d3 100644
--- a/test/CodeGen/ARM/2008-07-17-Fdiv.ll
+++ b/test/CodeGen/ARM/2008-07-17-Fdiv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define float @f(float %a, float %b) nounwind  {
 	%tmp = fdiv float %a, %b
diff --git a/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll b/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
index 6ea75eb5c79c5..83fde07779bc0 100644
--- a/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
+++ b/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 ; PR2589
 
 define void @main({ i32 }*) {
diff --git a/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll b/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
index 0a79e8665a75e..adb011277604d 100644
--- a/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
+++ b/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6 -relocation-model=pic | grep comm
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 -relocation-model=pic | grep comm
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.__gcov_var = type { %struct.FILE*, i32, i32, i32, i32, i32, i32, [1025 x i32] }
diff --git a/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll b/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll
index c601b90e0710c..5f9d9aea58dd1 100644
--- a/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll
+++ b/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 @"\01LC1" = external constant [288 x i8]		; <[288 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll b/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll
index b3ea6fc5945fc..d3bc3e1663bcf 100644
--- a/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll
+++ b/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define void @gcov_exit() nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll b/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
index 164e9643f170a..601a516eb09a9 100644
--- a/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
 
 define hidden i64 @__muldi3(i64 %u, i64 %v) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll b/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
index 3f17a5150fbee..35ca7b4c9af26 100644
--- a/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 164
+; RUN: llc < %s -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 154
 
 	%"struct.Adv5::Ekin<3>" = type <{ i8 }>
 	%"struct.Adv5::X::Energyflux<3>" = type { double }
diff --git a/test/CodeGen/ARM/2009-02-16-SpillerBug.ll b/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
index 48e663dd80675..4c0c59ccfbc6c 100644
--- a/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
 
 target triple = "arm-apple-darwin9"
 	%struct.FILE_POS = type { i8, i8, i16, i32 }
diff --git a/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll b/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
index d7befa0987488..a48f0033acc8c 100644
--- a/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
+++ b/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3610
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-s0:0:64-f80:32:32"
 target triple = "arm-elf"
diff --git a/test/CodeGen/ARM/2009-02-27-SpillerBug.ll b/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
index bd5b719594428..bc5e6023409f3 100644
--- a/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
 
 target triple = "arm-apple-darwin9"
 @a = external global double		; <double*> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
index 399ed3081f20c..0ec17ae23d694 100644
--- a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin9 -mattr=+vfp2
+; RUN: llc < %s -mtriple=armv6-apple-darwin9 -mattr=+vfp2
 ; rdar://6653182
 
 	%struct.ggBRDF = type { i32 (...)** }
diff --git a/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll b/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
index 0ec6d7d4ff735..a1ce384b53452 100644
--- a/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
+++ b/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 	%struct.hit_t = type { %struct.v_t, double }
 	%struct.node_t = type { %struct.hit_t, %struct.hit_t, i32 }
diff --git a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
index 11c05c6ea7b3d..352672274d20a 100644
--- a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
+++ b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {swi 107}
+; RUN: llc < %s -march=arm | grep {swi 107}
 
 define i32 @_swilseek(i32) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll b/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
index c00b1fb986069..f6b3d2c0147b6 100644
--- a/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
+++ b/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 ; PR3795
 
 define fastcc void @_D3foo3fooFAriZv({ i32, { double, double }* } %d_arg, i32 %x_arg) {
diff --git a/test/CodeGen/ARM/2009-04-08-FREM.ll b/test/CodeGen/ARM/2009-04-08-FREM.ll
index c7e343c89203c..99907fc697bd0 100644
--- a/test/CodeGen/ARM/2009-04-08-FREM.ll
+++ b/test/CodeGen/ARM/2009-04-08-FREM.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 declare i32 @printf(i8*, ...)
 
diff --git a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
index f394847362f9d..05d2f26be0b7a 100644
--- a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
+++ b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>* %CONST) {
 entry:
diff --git a/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll b/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
index 223fa0f435c9e..deb092bbf86e2 100644
--- a/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
+++ b/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 ; PR3954
 
 define void @foo(...) nounwind {
diff --git a/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll b/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
index 2bca6e62fc301..670d2045f8ecf 100644
--- a/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
+++ b/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linuxeabi-unknown-gnu -mattr=+v6
+; RUN: llc < %s -mtriple=arm-linuxeabi-unknown-gnu -mattr=+v6
 ; PR4166
 
 	%"byte[]" = type { i32, i8* }
diff --git a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
index d03b7ce87539c..75610ffecec27 100644
--- a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
+++ b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=local
+; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=local
 ; PR4100
 @.str = external constant [30 x i8]		; <[30 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
index 35d4306e9d14e..7046fccb5ee9b 100644
--- a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
+++ b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 	%struct.List = type { %struct.List*, i32 }
 @Node5 = external constant %struct.List		; <%struct.List*> [#uses=1]
 @"\01LC" = external constant [7 x i8]		; <[7 x i8]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
index f942c9fc22168..1e2707f7b5bba 100644
--- a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
+++ b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
@@ -1,7 +1,9 @@
-; RUN: llvm-as < %s | llc -march=arm | grep swp
+; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=thumb | FileCheck %s
 ; PR4091
 
 define void @foo(i32 %i, i32* %p) nounwind {
+;CHECK: swp r2, r0, [r1]
 	%asmtmp = call i32 asm sideeffect "swp $0, $2, $3", "=&r,=*m,r,*m,~{memory}"(i32* %p, i32 %i, i32* %p) nounwind
 	ret void
 }
diff --git a/test/CodeGen/ARM/2009-06-02-ISelCrash.ll b/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
index 7cd35b9557d01..403e3f6509f3c 100644
--- a/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
+++ b/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6,+vfp2
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6,+vfp2
 
 @"\01LC" = external constant [15 x i8]		; <[15 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll b/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
index 5eaae7aa9b46d..98e002302558b 100644
--- a/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
+++ b/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6
 
 	%struct.anon = type { i16, i16 }
 	%struct.cab_archive = type { i32, i16, i16, i16, i16, i8, %struct.cab_folder*, %struct.cab_file* }
diff --git a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
index 45b4bd48f5161..27888d75f67a5 100644
--- a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin
+; RUN: llc < %s -mtriple=armv6-apple-darwin
 
 	type { i32, i32, %struct.D_Sym**, [3 x %struct.D_Sym*] }		; type %0
 	type { i32, %struct.D_Reduction** }		; type %1
diff --git a/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
index c715a189287a3..a0f903b0bdf56 100644
--- a/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin
+; RUN: llc < %s -mtriple=armv6-apple-darwin
 
   %struct.term = type { i32, i32, i32 }
 
diff --git a/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
index cbe2385ab27af..b56b684473609 100644
--- a/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-eabi -mattr=+vfp2 -float-abi=hard
+; RUN: llc < %s -mtriple=armv6-eabi -mattr=+vfp2 -float-abi=hard
 ; PR4419
 
 define float @__ieee754_acosf(float %x) nounwind {
diff --git a/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll b/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
index 5c8d7b0f62208..e068be74bae46 100644
--- a/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
+++ b/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin
+; RUN: llc < %s -mtriple=armv6-apple-darwin
 
 	%struct.rtunion = type { i64 }
 	%struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] }
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
index 27cad7ccf6b7c..17efe0035419a 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @nn = external global i32		; <i32*> [#uses=1]
 @al_len = external global i32		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
index 3a14d67247b9d..f520be3946ae7 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @no_mat = external global i32		; <i32*> [#uses=1]
 @no_mis = external global i32		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
index f94b59dc91bdd..eee6ff98c6109 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @JJ = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
index bca7f793eef4a..93c92b1c93f41 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @r = external global i32		; <i32*> [#uses=1]
 @qr = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
index 0c90592f1d211..277283dc08890 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @XX = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2009-07-01-CommuteBug.ll b/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
index dfccefcac7a0d..5c0e5fa57b9fd 100644
--- a/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
+++ b/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
 
 @qr = external global i32		; <i32*> [#uses=1]
 @II = external global i32*		; <i32**> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll b/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll
new file mode 100644
index 0000000000000..e1e94b6412149
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+
+define void @test(i8* %x) nounwind {
+entry:
+	call void asm sideeffect "pld\09${0:a}", "r,~{cc}"(i8* %x) nounwind
+	ret void
+}
diff --git a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
new file mode 100644
index 0000000000000..ee93fde998c10
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
@@ -0,0 +1,1323 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin10 -mattr=+vfp2 | grep fcmpezd | count 13
+
+	%struct.EDGE_PAIR = type { %struct.edge_rec*, %struct.edge_rec* }
+	%struct.VEC2 = type { double, double, double }
+	%struct.VERTEX = type { %struct.VEC2, %struct.VERTEX*, %struct.VERTEX* }
+	%struct.edge_rec = type { %struct.VERTEX*, %struct.edge_rec*, i32, i8* }
+@avail_edge = internal global %struct.edge_rec* null		; <%struct.edge_rec**> [#uses=6]
+@_2E_str7 = internal constant [21 x i8] c"ERROR: Only 1 point!\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[21 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.EDGE_PAIR*, %struct.VERTEX*, %struct.VERTEX*)* @build_delaunay to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define arm_apcscc void @build_delaunay(%struct.EDGE_PAIR* noalias nocapture sret %agg.result, %struct.VERTEX* %tree, %struct.VERTEX* %extra) nounwind {
+entry:
+	%delright = alloca %struct.EDGE_PAIR, align 8		; <%struct.EDGE_PAIR*> [#uses=3]
+	%delleft = alloca %struct.EDGE_PAIR, align 8		; <%struct.EDGE_PAIR*> [#uses=3]
+	%0 = icmp eq %struct.VERTEX* %tree, null		; <i1> [#uses=1]
+	br i1 %0, label %bb8, label %bb
+
+bb:		; preds = %entry
+	%1 = getelementptr %struct.VERTEX* %tree, i32 0, i32 2		; <%struct.VERTEX**> [#uses=1]
+	%2 = load %struct.VERTEX** %1, align 4		; <%struct.VERTEX*> [#uses=2]
+	%3 = icmp eq %struct.VERTEX* %2, null		; <i1> [#uses=1]
+	br i1 %3, label %bb7, label %bb1.i
+
+bb1.i:		; preds = %bb1.i, %bb
+	%tree_addr.0.i = phi %struct.VERTEX* [ %5, %bb1.i ], [ %tree, %bb ]		; <%struct.VERTEX*> [#uses=3]
+	%4 = getelementptr %struct.VERTEX* %tree_addr.0.i, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
+	%5 = load %struct.VERTEX** %4, align 4		; <%struct.VERTEX*> [#uses=2]
+	%6 = icmp eq %struct.VERTEX* %5, null		; <i1> [#uses=1]
+	br i1 %6, label %get_low.exit, label %bb1.i
+
+get_low.exit:		; preds = %bb1.i
+	call arm_apcscc  void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delright, %struct.VERTEX* %2, %struct.VERTEX* %extra) nounwind
+	%7 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
+	%8 = load %struct.VERTEX** %7, align 4		; <%struct.VERTEX*> [#uses=1]
+	call arm_apcscc  void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delleft, %struct.VERTEX* %8, %struct.VERTEX* %tree) nounwind
+	%9 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 0		; <%struct.edge_rec**> [#uses=1]
+	%10 = load %struct.edge_rec** %9, align 8		; <%struct.edge_rec*> [#uses=2]
+	%11 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%12 = load %struct.edge_rec** %11, align 4		; <%struct.edge_rec*> [#uses=1]
+	%13 = getelementptr %struct.EDGE_PAIR* %delright, i32 0, i32 0		; <%struct.edge_rec**> [#uses=1]
+	%14 = load %struct.edge_rec** %13, align 8		; <%struct.edge_rec*> [#uses=1]
+	%15 = getelementptr %struct.EDGE_PAIR* %delright, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%16 = load %struct.edge_rec** %15, align 4		; <%struct.edge_rec*> [#uses=2]
+	br label %bb.i
+
+bb.i:		; preds = %bb4.i, %get_low.exit
+	%rdi_addr.0.i = phi %struct.edge_rec* [ %14, %get_low.exit ], [ %72, %bb4.i ]		; <%struct.edge_rec*> [#uses=2]
+	%ldi_addr.1.i = phi %struct.edge_rec* [ %12, %get_low.exit ], [ %ldi_addr.0.i, %bb4.i ]		; <%struct.edge_rec*> [#uses=3]
+	%17 = getelementptr %struct.edge_rec* %rdi_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%18 = load %struct.VERTEX** %17, align 4		; <%struct.VERTEX*> [#uses=3]
+	%19 = ptrtoint %struct.edge_rec* %ldi_addr.1.i to i32		; <i32> [#uses=1]
+	%20 = getelementptr %struct.VERTEX* %18, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%21 = load double* %20, align 4		; <double> [#uses=3]
+	%22 = getelementptr %struct.VERTEX* %18, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%23 = load double* %22, align 4		; <double> [#uses=3]
+	br label %bb2.i
+
+bb1.i1:		; preds = %bb2.i
+	%24 = ptrtoint %struct.edge_rec* %ldi_addr.0.i to i32		; <i32> [#uses=2]
+	%25 = add i32 %24, 48		; <i32> [#uses=1]
+	%26 = and i32 %25, 63		; <i32> [#uses=1]
+	%27 = and i32 %24, -64		; <i32> [#uses=1]
+	%28 = or i32 %26, %27		; <i32> [#uses=1]
+	%29 = inttoptr i32 %28 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%30 = getelementptr %struct.edge_rec* %29, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%31 = load %struct.edge_rec** %30, align 4		; <%struct.edge_rec*> [#uses=1]
+	%32 = ptrtoint %struct.edge_rec* %31 to i32		; <i32> [#uses=2]
+	%33 = add i32 %32, 16		; <i32> [#uses=1]
+	%34 = and i32 %33, 63		; <i32> [#uses=1]
+	%35 = and i32 %32, -64		; <i32> [#uses=1]
+	%36 = or i32 %34, %35		; <i32> [#uses=2]
+	%37 = inttoptr i32 %36 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	br label %bb2.i
+
+bb2.i:		; preds = %bb1.i1, %bb.i
+	%ldi_addr.1.pn.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ]		; <%struct.edge_rec*> [#uses=1]
+	%.pn6.in.in.i = phi i32 [ %19, %bb.i ], [ %36, %bb1.i1 ]		; <i32> [#uses=1]
+	%ldi_addr.0.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ]		; <%struct.edge_rec*> [#uses=4]
+	%.pn6.in.i = xor i32 %.pn6.in.in.i, 32		; <i32> [#uses=1]
+	%.pn6.i = inttoptr i32 %.pn6.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%t1.0.in.i = getelementptr %struct.edge_rec* %ldi_addr.1.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%t2.0.in.i = getelementptr %struct.edge_rec* %.pn6.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%t1.0.i = load %struct.VERTEX** %t1.0.in.i		; <%struct.VERTEX*> [#uses=2]
+	%t2.0.i = load %struct.VERTEX** %t2.0.in.i		; <%struct.VERTEX*> [#uses=2]
+	%38 = getelementptr %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%39 = load double* %38, align 4		; <double> [#uses=3]
+	%40 = getelementptr %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%41 = load double* %40, align 4		; <double> [#uses=3]
+	%42 = getelementptr %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%43 = load double* %42, align 4		; <double> [#uses=1]
+	%44 = getelementptr %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%45 = load double* %44, align 4		; <double> [#uses=1]
+	%46 = fsub double %39, %21		; <double> [#uses=1]
+	%47 = fsub double %45, %23		; <double> [#uses=1]
+	%48 = fmul double %46, %47		; <double> [#uses=1]
+	%49 = fsub double %43, %21		; <double> [#uses=1]
+	%50 = fsub double %41, %23		; <double> [#uses=1]
+	%51 = fmul double %49, %50		; <double> [#uses=1]
+	%52 = fsub double %48, %51		; <double> [#uses=1]
+	%53 = fcmp ogt double %52, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %53, label %bb1.i1, label %bb3.i
+
+bb3.i:		; preds = %bb2.i
+	%54 = ptrtoint %struct.edge_rec* %rdi_addr.0.i to i32		; <i32> [#uses=1]
+	%55 = xor i32 %54, 32		; <i32> [#uses=3]
+	%56 = inttoptr i32 %55 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%57 = getelementptr %struct.edge_rec* %56, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%58 = load %struct.VERTEX** %57, align 4		; <%struct.VERTEX*> [#uses=2]
+	%59 = getelementptr %struct.VERTEX* %58, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%60 = load double* %59, align 4		; <double> [#uses=1]
+	%61 = getelementptr %struct.VERTEX* %58, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%62 = load double* %61, align 4		; <double> [#uses=1]
+	%63 = fsub double %60, %39		; <double> [#uses=1]
+	%64 = fsub double %23, %41		; <double> [#uses=1]
+	%65 = fmul double %63, %64		; <double> [#uses=1]
+	%66 = fsub double %21, %39		; <double> [#uses=1]
+	%67 = fsub double %62, %41		; <double> [#uses=1]
+	%68 = fmul double %66, %67		; <double> [#uses=1]
+	%69 = fsub double %65, %68		; <double> [#uses=1]
+	%70 = fcmp ogt double %69, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %70, label %bb4.i, label %bb5.i
+
+bb4.i:		; preds = %bb3.i
+	%71 = getelementptr %struct.edge_rec* %56, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%72 = load %struct.edge_rec** %71, align 4		; <%struct.edge_rec*> [#uses=1]
+	br label %bb.i
+
+bb5.i:		; preds = %bb3.i
+	%73 = add i32 %55, 48		; <i32> [#uses=1]
+	%74 = and i32 %73, 63		; <i32> [#uses=1]
+	%75 = and i32 %55, -64		; <i32> [#uses=1]
+	%76 = or i32 %74, %75		; <i32> [#uses=1]
+	%77 = inttoptr i32 %76 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%78 = getelementptr %struct.edge_rec* %77, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%79 = load %struct.edge_rec** %78, align 4		; <%struct.edge_rec*> [#uses=1]
+	%80 = ptrtoint %struct.edge_rec* %79 to i32		; <i32> [#uses=2]
+	%81 = add i32 %80, 16		; <i32> [#uses=1]
+	%82 = and i32 %81, 63		; <i32> [#uses=1]
+	%83 = and i32 %80, -64		; <i32> [#uses=1]
+	%84 = or i32 %82, %83		; <i32> [#uses=1]
+	%85 = inttoptr i32 %84 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%86 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%87 = load %struct.VERTEX** %86, align 4		; <%struct.VERTEX*> [#uses=1]
+	%88 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=6]
+	%89 = getelementptr %struct.edge_rec* %88, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %88, %struct.edge_rec** %89, align 4
+	%90 = getelementptr %struct.edge_rec* %88, i32 0, i32 0		; <%struct.VERTEX**> [#uses=2]
+	store %struct.VERTEX* %18, %struct.VERTEX** %90, align 4
+	%91 = ptrtoint %struct.edge_rec* %88 to i32		; <i32> [#uses=5]
+	%92 = add i32 %91, 16		; <i32> [#uses=2]
+	%93 = inttoptr i32 %92 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%94 = add i32 %91, 48		; <i32> [#uses=1]
+	%95 = inttoptr i32 %94 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%96 = getelementptr %struct.edge_rec* %93, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %95, %struct.edge_rec** %96, align 4
+	%97 = add i32 %91, 32		; <i32> [#uses=1]
+	%98 = inttoptr i32 %97 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%99 = getelementptr %struct.edge_rec* %98, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %98, %struct.edge_rec** %99, align 4
+	%100 = getelementptr %struct.edge_rec* %98, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %87, %struct.VERTEX** %100, align 4
+	%101 = getelementptr %struct.edge_rec* %95, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %93, %struct.edge_rec** %101, align 4
+	%102 = load %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
+	%103 = ptrtoint %struct.edge_rec* %102 to i32		; <i32> [#uses=2]
+	%104 = add i32 %103, 16		; <i32> [#uses=1]
+	%105 = and i32 %104, 63		; <i32> [#uses=1]
+	%106 = and i32 %103, -64		; <i32> [#uses=1]
+	%107 = or i32 %105, %106		; <i32> [#uses=1]
+	%108 = inttoptr i32 %107 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%109 = getelementptr %struct.edge_rec* %85, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%110 = load %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
+	%111 = ptrtoint %struct.edge_rec* %110 to i32		; <i32> [#uses=2]
+	%112 = add i32 %111, 16		; <i32> [#uses=1]
+	%113 = and i32 %112, 63		; <i32> [#uses=1]
+	%114 = and i32 %111, -64		; <i32> [#uses=1]
+	%115 = or i32 %113, %114		; <i32> [#uses=1]
+	%116 = inttoptr i32 %115 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%117 = getelementptr %struct.edge_rec* %116, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%118 = load %struct.edge_rec** %117, align 4		; <%struct.edge_rec*> [#uses=1]
+	%119 = getelementptr %struct.edge_rec* %108, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%120 = load %struct.edge_rec** %119, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %118, %struct.edge_rec** %119, align 4
+	store %struct.edge_rec* %120, %struct.edge_rec** %117, align 4
+	%121 = load %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
+	%122 = load %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %121, %struct.edge_rec** %109, align 4
+	store %struct.edge_rec* %122, %struct.edge_rec** %89, align 4
+	%123 = xor i32 %91, 32		; <i32> [#uses=1]
+	%124 = inttoptr i32 %123 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%125 = getelementptr %struct.edge_rec* %124, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%126 = load %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
+	%127 = ptrtoint %struct.edge_rec* %126 to i32		; <i32> [#uses=2]
+	%128 = add i32 %127, 16		; <i32> [#uses=1]
+	%129 = and i32 %128, 63		; <i32> [#uses=1]
+	%130 = and i32 %127, -64		; <i32> [#uses=1]
+	%131 = or i32 %129, %130		; <i32> [#uses=1]
+	%132 = inttoptr i32 %131 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%133 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%134 = load %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=1]
+	%135 = ptrtoint %struct.edge_rec* %134 to i32		; <i32> [#uses=2]
+	%136 = add i32 %135, 16		; <i32> [#uses=1]
+	%137 = and i32 %136, 63		; <i32> [#uses=1]
+	%138 = and i32 %135, -64		; <i32> [#uses=1]
+	%139 = or i32 %137, %138		; <i32> [#uses=1]
+	%140 = inttoptr i32 %139 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%141 = getelementptr %struct.edge_rec* %140, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%142 = load %struct.edge_rec** %141, align 4		; <%struct.edge_rec*> [#uses=1]
+	%143 = getelementptr %struct.edge_rec* %132, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%144 = load %struct.edge_rec** %143, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %142, %struct.edge_rec** %143, align 4
+	store %struct.edge_rec* %144, %struct.edge_rec** %141, align 4
+	%145 = load %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
+	%146 = load %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=2]
+	store %struct.edge_rec* %145, %struct.edge_rec** %133, align 4
+	store %struct.edge_rec* %146, %struct.edge_rec** %125, align 4
+	%147 = and i32 %92, 63		; <i32> [#uses=1]
+	%148 = and i32 %91, -64		; <i32> [#uses=1]
+	%149 = or i32 %147, %148		; <i32> [#uses=1]
+	%150 = inttoptr i32 %149 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%151 = getelementptr %struct.edge_rec* %150, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%152 = load %struct.edge_rec** %151, align 4		; <%struct.edge_rec*> [#uses=1]
+	%153 = ptrtoint %struct.edge_rec* %152 to i32		; <i32> [#uses=2]
+	%154 = add i32 %153, 16		; <i32> [#uses=1]
+	%155 = and i32 %154, 63		; <i32> [#uses=1]
+	%156 = and i32 %153, -64		; <i32> [#uses=1]
+	%157 = or i32 %155, %156		; <i32> [#uses=1]
+	%158 = inttoptr i32 %157 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%159 = load %struct.VERTEX** %90, align 4		; <%struct.VERTEX*> [#uses=1]
+	%160 = getelementptr %struct.edge_rec* %124, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%161 = load %struct.VERTEX** %160, align 4		; <%struct.VERTEX*> [#uses=1]
+	%162 = getelementptr %struct.edge_rec* %16, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%163 = load %struct.VERTEX** %162, align 4		; <%struct.VERTEX*> [#uses=1]
+	%164 = icmp eq %struct.VERTEX* %163, %159		; <i1> [#uses=1]
+	%rdo_addr.0.i = select i1 %164, %struct.edge_rec* %88, %struct.edge_rec* %16		; <%struct.edge_rec*> [#uses=3]
+	%165 = getelementptr %struct.edge_rec* %10, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%166 = load %struct.VERTEX** %165, align 4		; <%struct.VERTEX*> [#uses=1]
+	%167 = icmp eq %struct.VERTEX* %166, %161		; <i1> [#uses=1]
+	%ldo_addr.0.ph.i = select i1 %167, %struct.edge_rec* %124, %struct.edge_rec* %10		; <%struct.edge_rec*> [#uses=3]
+	br label %bb9.i
+
+bb9.i:		; preds = %bb25.i, %bb24.i, %bb5.i
+	%lcand.2.i = phi %struct.edge_rec* [ %146, %bb5.i ], [ %lcand.1.i, %bb24.i ], [ %739, %bb25.i ]		; <%struct.edge_rec*> [#uses=5]
+	%rcand.2.i = phi %struct.edge_rec* [ %158, %bb5.i ], [ %666, %bb24.i ], [ %rcand.1.i, %bb25.i ]		; <%struct.edge_rec*> [#uses=5]
+	%basel.0.i = phi %struct.edge_rec* [ %88, %bb5.i ], [ %595, %bb24.i ], [ %716, %bb25.i ]		; <%struct.edge_rec*> [#uses=2]
+	%168 = getelementptr %struct.edge_rec* %lcand.2.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%169 = load %struct.edge_rec** %168, align 4		; <%struct.edge_rec*> [#uses=3]
+	%170 = getelementptr %struct.edge_rec* %basel.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
+	%171 = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=4]
+	%172 = ptrtoint %struct.edge_rec* %basel.0.i to i32		; <i32> [#uses=3]
+	%173 = xor i32 %172, 32		; <i32> [#uses=1]
+	%174 = inttoptr i32 %173 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%175 = getelementptr %struct.edge_rec* %174, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
+	%176 = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=3]
+	%177 = ptrtoint %struct.edge_rec* %169 to i32		; <i32> [#uses=1]
+	%178 = xor i32 %177, 32		; <i32> [#uses=1]
+	%179 = inttoptr i32 %178 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%180 = getelementptr %struct.edge_rec* %179, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%181 = load %struct.VERTEX** %180, align 4		; <%struct.VERTEX*> [#uses=2]
+	%182 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 0		; <double*> [#uses=2]
+	%183 = load double* %182, align 4		; <double> [#uses=2]
+	%184 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 1		; <double*> [#uses=2]
+	%185 = load double* %184, align 4		; <double> [#uses=2]
+	%186 = getelementptr %struct.VERTEX* %181, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%187 = load double* %186, align 4		; <double> [#uses=1]
+	%188 = getelementptr %struct.VERTEX* %181, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%189 = load double* %188, align 4		; <double> [#uses=1]
+	%190 = getelementptr %struct.VERTEX* %176, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%191 = load double* %190, align 4		; <double> [#uses=2]
+	%192 = getelementptr %struct.VERTEX* %176, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%193 = load double* %192, align 4		; <double> [#uses=2]
+	%194 = fsub double %183, %191		; <double> [#uses=1]
+	%195 = fsub double %189, %193		; <double> [#uses=1]
+	%196 = fmul double %194, %195		; <double> [#uses=1]
+	%197 = fsub double %187, %191		; <double> [#uses=1]
+	%198 = fsub double %185, %193		; <double> [#uses=1]
+	%199 = fmul double %197, %198		; <double> [#uses=1]
+	%200 = fsub double %196, %199		; <double> [#uses=1]
+	%201 = fcmp ogt double %200, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %201, label %bb10.i, label %bb13.i
+
+bb10.i:		; preds = %bb9.i
+	%202 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%avail_edge.promoted25 = load %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
+	br label %bb12.i
+
+bb11.i:		; preds = %bb12.i
+	%203 = ptrtoint %struct.edge_rec* %lcand.0.i to i32		; <i32> [#uses=3]
+	%204 = add i32 %203, 16		; <i32> [#uses=1]
+	%205 = and i32 %204, 63		; <i32> [#uses=1]
+	%206 = and i32 %203, -64		; <i32> [#uses=3]
+	%207 = or i32 %205, %206		; <i32> [#uses=1]
+	%208 = inttoptr i32 %207 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%209 = getelementptr %struct.edge_rec* %208, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%210 = load %struct.edge_rec** %209, align 4		; <%struct.edge_rec*> [#uses=1]
+	%211 = ptrtoint %struct.edge_rec* %210 to i32		; <i32> [#uses=2]
+	%212 = add i32 %211, 16		; <i32> [#uses=1]
+	%213 = and i32 %212, 63		; <i32> [#uses=1]
+	%214 = and i32 %211, -64		; <i32> [#uses=1]
+	%215 = or i32 %213, %214		; <i32> [#uses=1]
+	%216 = inttoptr i32 %215 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%217 = getelementptr %struct.edge_rec* %lcand.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%218 = load %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
+	%219 = ptrtoint %struct.edge_rec* %218 to i32		; <i32> [#uses=2]
+	%220 = add i32 %219, 16		; <i32> [#uses=1]
+	%221 = and i32 %220, 63		; <i32> [#uses=1]
+	%222 = and i32 %219, -64		; <i32> [#uses=1]
+	%223 = or i32 %221, %222		; <i32> [#uses=1]
+	%224 = inttoptr i32 %223 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%225 = getelementptr %struct.edge_rec* %216, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%226 = load %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
+	%227 = ptrtoint %struct.edge_rec* %226 to i32		; <i32> [#uses=2]
+	%228 = add i32 %227, 16		; <i32> [#uses=1]
+	%229 = and i32 %228, 63		; <i32> [#uses=1]
+	%230 = and i32 %227, -64		; <i32> [#uses=1]
+	%231 = or i32 %229, %230		; <i32> [#uses=1]
+	%232 = inttoptr i32 %231 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%233 = getelementptr %struct.edge_rec* %232, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%234 = load %struct.edge_rec** %233, align 4		; <%struct.edge_rec*> [#uses=1]
+	%235 = getelementptr %struct.edge_rec* %224, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%236 = load %struct.edge_rec** %235, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %234, %struct.edge_rec** %235, align 4
+	store %struct.edge_rec* %236, %struct.edge_rec** %233, align 4
+	%237 = load %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
+	%238 = load %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %237, %struct.edge_rec** %225, align 4
+	store %struct.edge_rec* %238, %struct.edge_rec** %217, align 4
+	%239 = xor i32 %203, 32		; <i32> [#uses=2]
+	%240 = add i32 %239, 16		; <i32> [#uses=1]
+	%241 = and i32 %240, 63		; <i32> [#uses=1]
+	%242 = or i32 %241, %206		; <i32> [#uses=1]
+	%243 = inttoptr i32 %242 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%244 = getelementptr %struct.edge_rec* %243, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%245 = load %struct.edge_rec** %244, align 4		; <%struct.edge_rec*> [#uses=1]
+	%246 = ptrtoint %struct.edge_rec* %245 to i32		; <i32> [#uses=2]
+	%247 = add i32 %246, 16		; <i32> [#uses=1]
+	%248 = and i32 %247, 63		; <i32> [#uses=1]
+	%249 = and i32 %246, -64		; <i32> [#uses=1]
+	%250 = or i32 %248, %249		; <i32> [#uses=1]
+	%251 = inttoptr i32 %250 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%252 = inttoptr i32 %239 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%253 = getelementptr %struct.edge_rec* %252, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%254 = load %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
+	%255 = ptrtoint %struct.edge_rec* %254 to i32		; <i32> [#uses=2]
+	%256 = add i32 %255, 16		; <i32> [#uses=1]
+	%257 = and i32 %256, 63		; <i32> [#uses=1]
+	%258 = and i32 %255, -64		; <i32> [#uses=1]
+	%259 = or i32 %257, %258		; <i32> [#uses=1]
+	%260 = inttoptr i32 %259 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%261 = getelementptr %struct.edge_rec* %251, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%262 = load %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
+	%263 = ptrtoint %struct.edge_rec* %262 to i32		; <i32> [#uses=2]
+	%264 = add i32 %263, 16		; <i32> [#uses=1]
+	%265 = and i32 %264, 63		; <i32> [#uses=1]
+	%266 = and i32 %263, -64		; <i32> [#uses=1]
+	%267 = or i32 %265, %266		; <i32> [#uses=1]
+	%268 = inttoptr i32 %267 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%269 = getelementptr %struct.edge_rec* %268, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%270 = load %struct.edge_rec** %269, align 4		; <%struct.edge_rec*> [#uses=1]
+	%271 = getelementptr %struct.edge_rec* %260, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%272 = load %struct.edge_rec** %271, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %270, %struct.edge_rec** %271, align 4
+	store %struct.edge_rec* %272, %struct.edge_rec** %269, align 4
+	%273 = load %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
+	%274 = load %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %273, %struct.edge_rec** %261, align 4
+	store %struct.edge_rec* %274, %struct.edge_rec** %253, align 4
+	%275 = inttoptr i32 %206 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%276 = getelementptr %struct.edge_rec* %275, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** %276, align 4
+	%277 = getelementptr %struct.edge_rec* %t.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%278 = load %struct.edge_rec** %277, align 4		; <%struct.edge_rec*> [#uses=2]
+	%.pre.i = load double* %182, align 4		; <double> [#uses=1]
+	%.pre22.i = load double* %184, align 4		; <double> [#uses=1]
+	br label %bb12.i
+
+bb12.i:		; preds = %bb11.i, %bb10.i
+	%avail_edge.tmp.026 = phi %struct.edge_rec* [ %avail_edge.promoted25, %bb10.i ], [ %275, %bb11.i ]		; <%struct.edge_rec*> [#uses=2]
+	%279 = phi double [ %.pre22.i, %bb11.i ], [ %185, %bb10.i ]		; <double> [#uses=3]
+	%280 = phi double [ %.pre.i, %bb11.i ], [ %183, %bb10.i ]		; <double> [#uses=3]
+	%lcand.0.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]		; <%struct.edge_rec*> [#uses=3]
+	%t.0.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ]		; <%struct.edge_rec*> [#uses=4]
+	%.pn5.in.in.in.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]		; <%struct.edge_rec*> [#uses=1]
+	%.pn4.in.in.in.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ]		; <%struct.edge_rec*> [#uses=1]
+	%lcand.2.pn.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]		; <%struct.edge_rec*> [#uses=1]
+	%.pn5.in.in.i = ptrtoint %struct.edge_rec* %.pn5.in.in.in.i to i32		; <i32> [#uses=1]
+	%.pn4.in.in.i = ptrtoint %struct.edge_rec* %.pn4.in.in.in.i to i32		; <i32> [#uses=1]
+	%.pn5.in.i = xor i32 %.pn5.in.in.i, 32		; <i32> [#uses=1]
+	%.pn4.in.i = xor i32 %.pn4.in.in.i, 32		; <i32> [#uses=1]
+	%.pn5.i = inttoptr i32 %.pn5.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%.pn4.i = inttoptr i32 %.pn4.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%v1.0.in.i = getelementptr %struct.edge_rec* %.pn5.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v2.0.in.i = getelementptr %struct.edge_rec* %.pn4.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v3.0.in.i = getelementptr %struct.edge_rec* %lcand.2.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v1.0.i = load %struct.VERTEX** %v1.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v2.0.i = load %struct.VERTEX** %v2.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v3.0.i = load %struct.VERTEX** %v3.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%281 = load double* %202, align 4		; <double> [#uses=3]
+	%282 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%283 = load double* %282, align 4		; <double> [#uses=1]
+	%284 = fsub double %283, %280		; <double> [#uses=2]
+	%285 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%286 = load double* %285, align 4		; <double> [#uses=1]
+	%287 = fsub double %286, %279		; <double> [#uses=2]
+	%288 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%289 = load double* %288, align 4		; <double> [#uses=1]
+	%290 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%291 = load double* %290, align 4		; <double> [#uses=1]
+	%292 = fsub double %291, %280		; <double> [#uses=2]
+	%293 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%294 = load double* %293, align 4		; <double> [#uses=1]
+	%295 = fsub double %294, %279		; <double> [#uses=2]
+	%296 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%297 = load double* %296, align 4		; <double> [#uses=1]
+	%298 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%299 = load double* %298, align 4		; <double> [#uses=1]
+	%300 = fsub double %299, %280		; <double> [#uses=2]
+	%301 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%302 = load double* %301, align 4		; <double> [#uses=1]
+	%303 = fsub double %302, %279		; <double> [#uses=2]
+	%304 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%305 = load double* %304, align 4		; <double> [#uses=1]
+	%306 = fsub double %289, %281		; <double> [#uses=1]
+	%307 = fmul double %292, %303		; <double> [#uses=1]
+	%308 = fmul double %295, %300		; <double> [#uses=1]
+	%309 = fsub double %307, %308		; <double> [#uses=1]
+	%310 = fmul double %306, %309		; <double> [#uses=1]
+	%311 = fsub double %297, %281		; <double> [#uses=1]
+	%312 = fmul double %300, %287		; <double> [#uses=1]
+	%313 = fmul double %303, %284		; <double> [#uses=1]
+	%314 = fsub double %312, %313		; <double> [#uses=1]
+	%315 = fmul double %311, %314		; <double> [#uses=1]
+	%316 = fadd double %315, %310		; <double> [#uses=1]
+	%317 = fsub double %305, %281		; <double> [#uses=1]
+	%318 = fmul double %284, %295		; <double> [#uses=1]
+	%319 = fmul double %287, %292		; <double> [#uses=1]
+	%320 = fsub double %318, %319		; <double> [#uses=1]
+	%321 = fmul double %317, %320		; <double> [#uses=1]
+	%322 = fadd double %321, %316		; <double> [#uses=1]
+	%323 = fcmp ogt double %322, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %323, label %bb11.i, label %bb13.loopexit.i
+
+bb13.loopexit.i:		; preds = %bb12.i
+	store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** @avail_edge
+	%.pre23.i = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre24.i = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
+	br label %bb13.i
+
+bb13.i:		; preds = %bb13.loopexit.i, %bb9.i
+	%324 = phi %struct.VERTEX* [ %.pre24.i, %bb13.loopexit.i ], [ %176, %bb9.i ]		; <%struct.VERTEX*> [#uses=4]
+	%325 = phi %struct.VERTEX* [ %.pre23.i, %bb13.loopexit.i ], [ %171, %bb9.i ]		; <%struct.VERTEX*> [#uses=3]
+	%lcand.1.i = phi %struct.edge_rec* [ %lcand.0.i, %bb13.loopexit.i ], [ %lcand.2.i, %bb9.i ]		; <%struct.edge_rec*> [#uses=3]
+	%326 = ptrtoint %struct.edge_rec* %rcand.2.i to i32		; <i32> [#uses=2]
+	%327 = add i32 %326, 16		; <i32> [#uses=1]
+	%328 = and i32 %327, 63		; <i32> [#uses=1]
+	%329 = and i32 %326, -64		; <i32> [#uses=1]
+	%330 = or i32 %328, %329		; <i32> [#uses=1]
+	%331 = inttoptr i32 %330 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%332 = getelementptr %struct.edge_rec* %331, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%333 = load %struct.edge_rec** %332, align 4		; <%struct.edge_rec*> [#uses=1]
+	%334 = ptrtoint %struct.edge_rec* %333 to i32		; <i32> [#uses=2]
+	%335 = add i32 %334, 16		; <i32> [#uses=1]
+	%336 = and i32 %335, 63		; <i32> [#uses=1]
+	%337 = and i32 %334, -64		; <i32> [#uses=1]
+	%338 = or i32 %336, %337		; <i32> [#uses=3]
+	%339 = xor i32 %338, 32		; <i32> [#uses=1]
+	%340 = inttoptr i32 %339 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%341 = getelementptr %struct.edge_rec* %340, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%342 = load %struct.VERTEX** %341, align 4		; <%struct.VERTEX*> [#uses=2]
+	%343 = getelementptr %struct.VERTEX* %325, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%344 = load double* %343, align 4		; <double> [#uses=1]
+	%345 = getelementptr %struct.VERTEX* %325, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%346 = load double* %345, align 4		; <double> [#uses=1]
+	%347 = getelementptr %struct.VERTEX* %342, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%348 = load double* %347, align 4		; <double> [#uses=1]
+	%349 = getelementptr %struct.VERTEX* %342, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%350 = load double* %349, align 4		; <double> [#uses=1]
+	%351 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 0		; <double*> [#uses=2]
+	%352 = load double* %351, align 4		; <double> [#uses=3]
+	%353 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 1		; <double*> [#uses=2]
+	%354 = load double* %353, align 4		; <double> [#uses=3]
+	%355 = fsub double %344, %352		; <double> [#uses=1]
+	%356 = fsub double %350, %354		; <double> [#uses=1]
+	%357 = fmul double %355, %356		; <double> [#uses=1]
+	%358 = fsub double %348, %352		; <double> [#uses=1]
+	%359 = fsub double %346, %354		; <double> [#uses=1]
+	%360 = fmul double %358, %359		; <double> [#uses=1]
+	%361 = fsub double %357, %360		; <double> [#uses=1]
+	%362 = fcmp ogt double %361, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %362, label %bb14.i, label %bb17.i
+
+bb14.i:		; preds = %bb13.i
+	%363 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%avail_edge.promoted = load %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
+	br label %bb16.i
+
+bb15.i:		; preds = %bb16.i
+	%364 = ptrtoint %struct.edge_rec* %rcand.0.i to i32		; <i32> [#uses=3]
+	%365 = add i32 %364, 16		; <i32> [#uses=1]
+	%366 = and i32 %365, 63		; <i32> [#uses=1]
+	%367 = and i32 %364, -64		; <i32> [#uses=3]
+	%368 = or i32 %366, %367		; <i32> [#uses=1]
+	%369 = inttoptr i32 %368 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%370 = getelementptr %struct.edge_rec* %369, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%371 = load %struct.edge_rec** %370, align 4		; <%struct.edge_rec*> [#uses=1]
+	%372 = ptrtoint %struct.edge_rec* %371 to i32		; <i32> [#uses=2]
+	%373 = add i32 %372, 16		; <i32> [#uses=1]
+	%374 = and i32 %373, 63		; <i32> [#uses=1]
+	%375 = and i32 %372, -64		; <i32> [#uses=1]
+	%376 = or i32 %374, %375		; <i32> [#uses=1]
+	%377 = inttoptr i32 %376 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%378 = getelementptr %struct.edge_rec* %rcand.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%379 = load %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
+	%380 = ptrtoint %struct.edge_rec* %379 to i32		; <i32> [#uses=2]
+	%381 = add i32 %380, 16		; <i32> [#uses=1]
+	%382 = and i32 %381, 63		; <i32> [#uses=1]
+	%383 = and i32 %380, -64		; <i32> [#uses=1]
+	%384 = or i32 %382, %383		; <i32> [#uses=1]
+	%385 = inttoptr i32 %384 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%386 = getelementptr %struct.edge_rec* %377, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%387 = load %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
+	%388 = ptrtoint %struct.edge_rec* %387 to i32		; <i32> [#uses=2]
+	%389 = add i32 %388, 16		; <i32> [#uses=1]
+	%390 = and i32 %389, 63		; <i32> [#uses=1]
+	%391 = and i32 %388, -64		; <i32> [#uses=1]
+	%392 = or i32 %390, %391		; <i32> [#uses=1]
+	%393 = inttoptr i32 %392 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%394 = getelementptr %struct.edge_rec* %393, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%395 = load %struct.edge_rec** %394, align 4		; <%struct.edge_rec*> [#uses=1]
+	%396 = getelementptr %struct.edge_rec* %385, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%397 = load %struct.edge_rec** %396, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %395, %struct.edge_rec** %396, align 4
+	store %struct.edge_rec* %397, %struct.edge_rec** %394, align 4
+	%398 = load %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
+	%399 = load %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %398, %struct.edge_rec** %386, align 4
+	store %struct.edge_rec* %399, %struct.edge_rec** %378, align 4
+	%400 = xor i32 %364, 32		; <i32> [#uses=2]
+	%401 = add i32 %400, 16		; <i32> [#uses=1]
+	%402 = and i32 %401, 63		; <i32> [#uses=1]
+	%403 = or i32 %402, %367		; <i32> [#uses=1]
+	%404 = inttoptr i32 %403 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%405 = getelementptr %struct.edge_rec* %404, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%406 = load %struct.edge_rec** %405, align 4		; <%struct.edge_rec*> [#uses=1]
+	%407 = ptrtoint %struct.edge_rec* %406 to i32		; <i32> [#uses=2]
+	%408 = add i32 %407, 16		; <i32> [#uses=1]
+	%409 = and i32 %408, 63		; <i32> [#uses=1]
+	%410 = and i32 %407, -64		; <i32> [#uses=1]
+	%411 = or i32 %409, %410		; <i32> [#uses=1]
+	%412 = inttoptr i32 %411 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%413 = inttoptr i32 %400 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%414 = getelementptr %struct.edge_rec* %413, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%415 = load %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
+	%416 = ptrtoint %struct.edge_rec* %415 to i32		; <i32> [#uses=2]
+	%417 = add i32 %416, 16		; <i32> [#uses=1]
+	%418 = and i32 %417, 63		; <i32> [#uses=1]
+	%419 = and i32 %416, -64		; <i32> [#uses=1]
+	%420 = or i32 %418, %419		; <i32> [#uses=1]
+	%421 = inttoptr i32 %420 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%422 = getelementptr %struct.edge_rec* %412, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%423 = load %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
+	%424 = ptrtoint %struct.edge_rec* %423 to i32		; <i32> [#uses=2]
+	%425 = add i32 %424, 16		; <i32> [#uses=1]
+	%426 = and i32 %425, 63		; <i32> [#uses=1]
+	%427 = and i32 %424, -64		; <i32> [#uses=1]
+	%428 = or i32 %426, %427		; <i32> [#uses=1]
+	%429 = inttoptr i32 %428 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%430 = getelementptr %struct.edge_rec* %429, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%431 = load %struct.edge_rec** %430, align 4		; <%struct.edge_rec*> [#uses=1]
+	%432 = getelementptr %struct.edge_rec* %421, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%433 = load %struct.edge_rec** %432, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %431, %struct.edge_rec** %432, align 4
+	store %struct.edge_rec* %433, %struct.edge_rec** %430, align 4
+	%434 = load %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
+	%435 = load %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %434, %struct.edge_rec** %422, align 4
+	store %struct.edge_rec* %435, %struct.edge_rec** %414, align 4
+	%436 = inttoptr i32 %367 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%437 = getelementptr %struct.edge_rec* %436, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** %437, align 4
+	%438 = add i32 %t.1.in.i, 16		; <i32> [#uses=1]
+	%439 = and i32 %438, 63		; <i32> [#uses=1]
+	%440 = and i32 %t.1.in.i, -64		; <i32> [#uses=1]
+	%441 = or i32 %439, %440		; <i32> [#uses=1]
+	%442 = inttoptr i32 %441 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%443 = getelementptr %struct.edge_rec* %442, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%444 = load %struct.edge_rec** %443, align 4		; <%struct.edge_rec*> [#uses=1]
+	%445 = ptrtoint %struct.edge_rec* %444 to i32		; <i32> [#uses=2]
+	%446 = add i32 %445, 16		; <i32> [#uses=1]
+	%447 = and i32 %446, 63		; <i32> [#uses=1]
+	%448 = and i32 %445, -64		; <i32> [#uses=1]
+	%449 = or i32 %447, %448		; <i32> [#uses=2]
+	%.pre25.i = load double* %351, align 4		; <double> [#uses=1]
+	%.pre26.i = load double* %353, align 4		; <double> [#uses=1]
+	br label %bb16.i
+
+bb16.i:		; preds = %bb15.i, %bb14.i
+	%avail_edge.tmp.0 = phi %struct.edge_rec* [ %avail_edge.promoted, %bb14.i ], [ %436, %bb15.i ]		; <%struct.edge_rec*> [#uses=2]
+	%450 = phi double [ %.pre26.i, %bb15.i ], [ %354, %bb14.i ]		; <double> [#uses=3]
+	%451 = phi double [ %.pre25.i, %bb15.i ], [ %352, %bb14.i ]		; <double> [#uses=3]
+	%rcand.0.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]		; <%struct.edge_rec*> [#uses=3]
+	%t.1.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ]		; <i32> [#uses=3]
+	%.pn3.in.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ]		; <i32> [#uses=1]
+	%.pn.in.in.in.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]		; <%struct.edge_rec*> [#uses=1]
+	%rcand.2.pn.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]		; <%struct.edge_rec*> [#uses=1]
+	%t.1.i = inttoptr i32 %t.1.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%.pn.in.in.i = ptrtoint %struct.edge_rec* %.pn.in.in.in.i to i32		; <i32> [#uses=1]
+	%.pn3.in.i = xor i32 %.pn3.in.in.i, 32		; <i32> [#uses=1]
+	%.pn.in.i = xor i32 %.pn.in.in.i, 32		; <i32> [#uses=1]
+	%.pn3.i = inttoptr i32 %.pn3.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%.pn.i = inttoptr i32 %.pn.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%v1.1.in.i = getelementptr %struct.edge_rec* %.pn3.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v2.1.in.i = getelementptr %struct.edge_rec* %.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v3.1.in.i = getelementptr %struct.edge_rec* %rcand.2.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v1.1.i = load %struct.VERTEX** %v1.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v2.1.i = load %struct.VERTEX** %v2.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v3.1.i = load %struct.VERTEX** %v3.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%452 = load double* %363, align 4		; <double> [#uses=3]
+	%453 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%454 = load double* %453, align 4		; <double> [#uses=1]
+	%455 = fsub double %454, %451		; <double> [#uses=2]
+	%456 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%457 = load double* %456, align 4		; <double> [#uses=1]
+	%458 = fsub double %457, %450		; <double> [#uses=2]
+	%459 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%460 = load double* %459, align 4		; <double> [#uses=1]
+	%461 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%462 = load double* %461, align 4		; <double> [#uses=1]
+	%463 = fsub double %462, %451		; <double> [#uses=2]
+	%464 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%465 = load double* %464, align 4		; <double> [#uses=1]
+	%466 = fsub double %465, %450		; <double> [#uses=2]
+	%467 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%468 = load double* %467, align 4		; <double> [#uses=1]
+	%469 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%470 = load double* %469, align 4		; <double> [#uses=1]
+	%471 = fsub double %470, %451		; <double> [#uses=2]
+	%472 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%473 = load double* %472, align 4		; <double> [#uses=1]
+	%474 = fsub double %473, %450		; <double> [#uses=2]
+	%475 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%476 = load double* %475, align 4		; <double> [#uses=1]
+	%477 = fsub double %460, %452		; <double> [#uses=1]
+	%478 = fmul double %463, %474		; <double> [#uses=1]
+	%479 = fmul double %466, %471		; <double> [#uses=1]
+	%480 = fsub double %478, %479		; <double> [#uses=1]
+	%481 = fmul double %477, %480		; <double> [#uses=1]
+	%482 = fsub double %468, %452		; <double> [#uses=1]
+	%483 = fmul double %471, %458		; <double> [#uses=1]
+	%484 = fmul double %474, %455		; <double> [#uses=1]
+	%485 = fsub double %483, %484		; <double> [#uses=1]
+	%486 = fmul double %482, %485		; <double> [#uses=1]
+	%487 = fadd double %486, %481		; <double> [#uses=1]
+	%488 = fsub double %476, %452		; <double> [#uses=1]
+	%489 = fmul double %455, %466		; <double> [#uses=1]
+	%490 = fmul double %458, %463		; <double> [#uses=1]
+	%491 = fsub double %489, %490		; <double> [#uses=1]
+	%492 = fmul double %488, %491		; <double> [#uses=1]
+	%493 = fadd double %492, %487		; <double> [#uses=1]
+	%494 = fcmp ogt double %493, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %494, label %bb15.i, label %bb17.loopexit.i
+
+bb17.loopexit.i:		; preds = %bb16.i
+	store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** @avail_edge
+	%.pre27.i = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre28.i = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
+	br label %bb17.i
+
+bb17.i:		; preds = %bb17.loopexit.i, %bb13.i
+	%495 = phi %struct.VERTEX* [ %.pre28.i, %bb17.loopexit.i ], [ %324, %bb13.i ]		; <%struct.VERTEX*> [#uses=3]
+	%496 = phi %struct.VERTEX* [ %.pre27.i, %bb17.loopexit.i ], [ %325, %bb13.i ]		; <%struct.VERTEX*> [#uses=3]
+	%rcand.1.i = phi %struct.edge_rec* [ %rcand.0.i, %bb17.loopexit.i ], [ %rcand.2.i, %bb13.i ]		; <%struct.edge_rec*> [#uses=3]
+	%497 = ptrtoint %struct.edge_rec* %lcand.1.i to i32		; <i32> [#uses=1]
+	%498 = xor i32 %497, 32		; <i32> [#uses=1]
+	%499 = inttoptr i32 %498 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%500 = getelementptr %struct.edge_rec* %499, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%501 = load %struct.VERTEX** %500, align 4		; <%struct.VERTEX*> [#uses=4]
+	%502 = getelementptr %struct.VERTEX* %496, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%503 = load double* %502, align 4		; <double> [#uses=1]
+	%504 = getelementptr %struct.VERTEX* %496, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%505 = load double* %504, align 4		; <double> [#uses=1]
+	%506 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%507 = load double* %506, align 4		; <double> [#uses=2]
+	%508 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%509 = load double* %508, align 4		; <double> [#uses=2]
+	%510 = getelementptr %struct.VERTEX* %495, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%511 = load double* %510, align 4		; <double> [#uses=3]
+	%512 = getelementptr %struct.VERTEX* %495, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%513 = load double* %512, align 4		; <double> [#uses=3]
+	%514 = fsub double %503, %511		; <double> [#uses=2]
+	%515 = fsub double %509, %513		; <double> [#uses=1]
+	%516 = fmul double %514, %515		; <double> [#uses=1]
+	%517 = fsub double %507, %511		; <double> [#uses=1]
+	%518 = fsub double %505, %513		; <double> [#uses=2]
+	%519 = fmul double %517, %518		; <double> [#uses=1]
+	%520 = fsub double %516, %519		; <double> [#uses=1]
+	%521 = fcmp ogt double %520, 0.000000e+00		; <i1> [#uses=2]
+	%522 = ptrtoint %struct.edge_rec* %rcand.1.i to i32		; <i32> [#uses=3]
+	%523 = xor i32 %522, 32		; <i32> [#uses=1]
+	%524 = inttoptr i32 %523 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%525 = getelementptr %struct.edge_rec* %524, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%526 = load %struct.VERTEX** %525, align 4		; <%struct.VERTEX*> [#uses=4]
+	%527 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%528 = load double* %527, align 4		; <double> [#uses=4]
+	%529 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%530 = load double* %529, align 4		; <double> [#uses=4]
+	%531 = fsub double %530, %513		; <double> [#uses=1]
+	%532 = fmul double %514, %531		; <double> [#uses=1]
+	%533 = fsub double %528, %511		; <double> [#uses=1]
+	%534 = fmul double %533, %518		; <double> [#uses=1]
+	%535 = fsub double %532, %534		; <double> [#uses=1]
+	%536 = fcmp ogt double %535, 0.000000e+00		; <i1> [#uses=2]
+	%537 = or i1 %536, %521		; <i1> [#uses=1]
+	br i1 %537, label %bb21.i, label %do_merge.exit
+
+bb21.i:		; preds = %bb17.i
+	%538 = getelementptr %struct.edge_rec* %lcand.1.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%539 = load %struct.VERTEX** %538, align 4		; <%struct.VERTEX*> [#uses=3]
+	%540 = getelementptr %struct.edge_rec* %rcand.1.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%541 = load %struct.VERTEX** %540, align 4		; <%struct.VERTEX*> [#uses=3]
+	br i1 %521, label %bb22.i, label %bb24.i
+
+bb22.i:		; preds = %bb21.i
+	br i1 %536, label %bb23.i, label %bb25.i
+
+bb23.i:		; preds = %bb22.i
+	%542 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%543 = load double* %542, align 4		; <double> [#uses=3]
+	%544 = fsub double %507, %528		; <double> [#uses=2]
+	%545 = fsub double %509, %530		; <double> [#uses=2]
+	%546 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%547 = load double* %546, align 4		; <double> [#uses=1]
+	%548 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%549 = load double* %548, align 4		; <double> [#uses=1]
+	%550 = fsub double %549, %528		; <double> [#uses=2]
+	%551 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%552 = load double* %551, align 4		; <double> [#uses=1]
+	%553 = fsub double %552, %530		; <double> [#uses=2]
+	%554 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%555 = load double* %554, align 4		; <double> [#uses=1]
+	%556 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%557 = load double* %556, align 4		; <double> [#uses=1]
+	%558 = fsub double %557, %528		; <double> [#uses=2]
+	%559 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%560 = load double* %559, align 4		; <double> [#uses=1]
+	%561 = fsub double %560, %530		; <double> [#uses=2]
+	%562 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%563 = load double* %562, align 4		; <double> [#uses=1]
+	%564 = fsub double %547, %543		; <double> [#uses=1]
+	%565 = fmul double %550, %561		; <double> [#uses=1]
+	%566 = fmul double %553, %558		; <double> [#uses=1]
+	%567 = fsub double %565, %566		; <double> [#uses=1]
+	%568 = fmul double %564, %567		; <double> [#uses=1]
+	%569 = fsub double %555, %543		; <double> [#uses=1]
+	%570 = fmul double %558, %545		; <double> [#uses=1]
+	%571 = fmul double %561, %544		; <double> [#uses=1]
+	%572 = fsub double %570, %571		; <double> [#uses=1]
+	%573 = fmul double %569, %572		; <double> [#uses=1]
+	%574 = fadd double %573, %568		; <double> [#uses=1]
+	%575 = fsub double %563, %543		; <double> [#uses=1]
+	%576 = fmul double %544, %553		; <double> [#uses=1]
+	%577 = fmul double %545, %550		; <double> [#uses=1]
+	%578 = fsub double %576, %577		; <double> [#uses=1]
+	%579 = fmul double %575, %578		; <double> [#uses=1]
+	%580 = fadd double %579, %574		; <double> [#uses=1]
+	%581 = fcmp ogt double %580, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %581, label %bb24.i, label %bb25.i
+
+bb24.i:		; preds = %bb23.i, %bb21.i
+	%582 = add i32 %522, 48		; <i32> [#uses=1]
+	%583 = and i32 %582, 63		; <i32> [#uses=1]
+	%584 = and i32 %522, -64		; <i32> [#uses=1]
+	%585 = or i32 %583, %584		; <i32> [#uses=1]
+	%586 = inttoptr i32 %585 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%587 = getelementptr %struct.edge_rec* %586, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%588 = load %struct.edge_rec** %587, align 4		; <%struct.edge_rec*> [#uses=1]
+	%589 = ptrtoint %struct.edge_rec* %588 to i32		; <i32> [#uses=2]
+	%590 = add i32 %589, 16		; <i32> [#uses=1]
+	%591 = and i32 %590, 63		; <i32> [#uses=1]
+	%592 = and i32 %589, -64		; <i32> [#uses=1]
+	%593 = or i32 %591, %592		; <i32> [#uses=1]
+	%594 = inttoptr i32 %593 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%595 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=5]
+	%596 = getelementptr %struct.edge_rec* %595, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %595, %struct.edge_rec** %596, align 4
+	%597 = getelementptr %struct.edge_rec* %595, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %526, %struct.VERTEX** %597, align 4
+	%598 = ptrtoint %struct.edge_rec* %595 to i32		; <i32> [#uses=5]
+	%599 = add i32 %598, 16		; <i32> [#uses=1]
+	%600 = inttoptr i32 %599 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%601 = add i32 %598, 48		; <i32> [#uses=1]
+	%602 = inttoptr i32 %601 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%603 = getelementptr %struct.edge_rec* %600, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %602, %struct.edge_rec** %603, align 4
+	%604 = add i32 %598, 32		; <i32> [#uses=1]
+	%605 = inttoptr i32 %604 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%606 = getelementptr %struct.edge_rec* %605, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %605, %struct.edge_rec** %606, align 4
+	%607 = getelementptr %struct.edge_rec* %605, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %495, %struct.VERTEX** %607, align 4
+	%608 = getelementptr %struct.edge_rec* %602, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %600, %struct.edge_rec** %608, align 4
+	%609 = load %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
+	%610 = ptrtoint %struct.edge_rec* %609 to i32		; <i32> [#uses=2]
+	%611 = add i32 %610, 16		; <i32> [#uses=1]
+	%612 = and i32 %611, 63		; <i32> [#uses=1]
+	%613 = and i32 %610, -64		; <i32> [#uses=1]
+	%614 = or i32 %612, %613		; <i32> [#uses=1]
+	%615 = inttoptr i32 %614 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%616 = getelementptr %struct.edge_rec* %594, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%617 = load %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
+	%618 = ptrtoint %struct.edge_rec* %617 to i32		; <i32> [#uses=2]
+	%619 = add i32 %618, 16		; <i32> [#uses=1]
+	%620 = and i32 %619, 63		; <i32> [#uses=1]
+	%621 = and i32 %618, -64		; <i32> [#uses=1]
+	%622 = or i32 %620, %621		; <i32> [#uses=1]
+	%623 = inttoptr i32 %622 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%624 = getelementptr %struct.edge_rec* %623, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%625 = load %struct.edge_rec** %624, align 4		; <%struct.edge_rec*> [#uses=1]
+	%626 = getelementptr %struct.edge_rec* %615, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%627 = load %struct.edge_rec** %626, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %625, %struct.edge_rec** %626, align 4
+	store %struct.edge_rec* %627, %struct.edge_rec** %624, align 4
+	%628 = load %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
+	%629 = load %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %628, %struct.edge_rec** %616, align 4
+	store %struct.edge_rec* %629, %struct.edge_rec** %596, align 4
+	%630 = xor i32 %598, 32		; <i32> [#uses=2]
+	%631 = inttoptr i32 %630 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%632 = getelementptr %struct.edge_rec* %631, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%633 = load %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
+	%634 = ptrtoint %struct.edge_rec* %633 to i32		; <i32> [#uses=2]
+	%635 = add i32 %634, 16		; <i32> [#uses=1]
+	%636 = and i32 %635, 63		; <i32> [#uses=1]
+	%637 = and i32 %634, -64		; <i32> [#uses=1]
+	%638 = or i32 %636, %637		; <i32> [#uses=1]
+	%639 = inttoptr i32 %638 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%640 = getelementptr %struct.edge_rec* %174, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%641 = load %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
+	%642 = ptrtoint %struct.edge_rec* %641 to i32		; <i32> [#uses=2]
+	%643 = add i32 %642, 16		; <i32> [#uses=1]
+	%644 = and i32 %643, 63		; <i32> [#uses=1]
+	%645 = and i32 %642, -64		; <i32> [#uses=1]
+	%646 = or i32 %644, %645		; <i32> [#uses=1]
+	%647 = inttoptr i32 %646 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%648 = getelementptr %struct.edge_rec* %647, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%649 = load %struct.edge_rec** %648, align 4		; <%struct.edge_rec*> [#uses=1]
+	%650 = getelementptr %struct.edge_rec* %639, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%651 = load %struct.edge_rec** %650, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %649, %struct.edge_rec** %650, align 4
+	store %struct.edge_rec* %651, %struct.edge_rec** %648, align 4
+	%652 = load %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
+	%653 = load %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %652, %struct.edge_rec** %640, align 4
+	store %struct.edge_rec* %653, %struct.edge_rec** %632, align 4
+	%654 = add i32 %630, 48		; <i32> [#uses=1]
+	%655 = and i32 %654, 63		; <i32> [#uses=1]
+	%656 = and i32 %598, -64		; <i32> [#uses=1]
+	%657 = or i32 %655, %656		; <i32> [#uses=1]
+	%658 = inttoptr i32 %657 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%659 = getelementptr %struct.edge_rec* %658, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%660 = load %struct.edge_rec** %659, align 4		; <%struct.edge_rec*> [#uses=1]
+	%661 = ptrtoint %struct.edge_rec* %660 to i32		; <i32> [#uses=2]
+	%662 = add i32 %661, 16		; <i32> [#uses=1]
+	%663 = and i32 %662, 63		; <i32> [#uses=1]
+	%664 = and i32 %661, -64		; <i32> [#uses=1]
+	%665 = or i32 %663, %664		; <i32> [#uses=1]
+	%666 = inttoptr i32 %665 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	br label %bb9.i
+
+bb25.i:		; preds = %bb23.i, %bb22.i
+	%667 = add i32 %172, 16		; <i32> [#uses=1]
+	%668 = and i32 %667, 63		; <i32> [#uses=1]
+	%669 = and i32 %172, -64		; <i32> [#uses=1]
+	%670 = or i32 %668, %669		; <i32> [#uses=1]
+	%671 = inttoptr i32 %670 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%672 = getelementptr %struct.edge_rec* %671, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%673 = load %struct.edge_rec** %672, align 4		; <%struct.edge_rec*> [#uses=1]
+	%674 = ptrtoint %struct.edge_rec* %673 to i32		; <i32> [#uses=2]
+	%675 = add i32 %674, 16		; <i32> [#uses=1]
+	%676 = and i32 %675, 63		; <i32> [#uses=1]
+	%677 = and i32 %674, -64		; <i32> [#uses=1]
+	%678 = or i32 %676, %677		; <i32> [#uses=1]
+	%679 = inttoptr i32 %678 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%680 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
+	%681 = getelementptr %struct.edge_rec* %680, i32 0, i32 1		; <%struct.edge_rec**> [#uses=5]
+	store %struct.edge_rec* %680, %struct.edge_rec** %681, align 4
+	%682 = getelementptr %struct.edge_rec* %680, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %501, %struct.VERTEX** %682, align 4
+	%683 = ptrtoint %struct.edge_rec* %680 to i32		; <i32> [#uses=4]
+	%684 = add i32 %683, 16		; <i32> [#uses=1]
+	%685 = inttoptr i32 %684 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%686 = add i32 %683, 48		; <i32> [#uses=1]
+	%687 = inttoptr i32 %686 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%688 = getelementptr %struct.edge_rec* %685, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %687, %struct.edge_rec** %688, align 4
+	%689 = add i32 %683, 32		; <i32> [#uses=1]
+	%690 = inttoptr i32 %689 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%691 = getelementptr %struct.edge_rec* %690, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %690, %struct.edge_rec** %691, align 4
+	%692 = getelementptr %struct.edge_rec* %690, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %496, %struct.VERTEX** %692, align 4
+	%693 = getelementptr %struct.edge_rec* %687, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %685, %struct.edge_rec** %693, align 4
+	%694 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	%695 = ptrtoint %struct.edge_rec* %694 to i32		; <i32> [#uses=2]
+	%696 = add i32 %695, 16		; <i32> [#uses=1]
+	%697 = and i32 %696, 63		; <i32> [#uses=1]
+	%698 = and i32 %695, -64		; <i32> [#uses=1]
+	%699 = or i32 %697, %698		; <i32> [#uses=1]
+	%700 = inttoptr i32 %699 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%701 = getelementptr %struct.edge_rec* %499, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%702 = load %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
+	%703 = ptrtoint %struct.edge_rec* %702 to i32		; <i32> [#uses=2]
+	%704 = add i32 %703, 16		; <i32> [#uses=1]
+	%705 = and i32 %704, 63		; <i32> [#uses=1]
+	%706 = and i32 %703, -64		; <i32> [#uses=1]
+	%707 = or i32 %705, %706		; <i32> [#uses=1]
+	%708 = inttoptr i32 %707 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%709 = getelementptr %struct.edge_rec* %708, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%710 = load %struct.edge_rec** %709, align 4		; <%struct.edge_rec*> [#uses=1]
+	%711 = getelementptr %struct.edge_rec* %700, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%712 = load %struct.edge_rec** %711, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %710, %struct.edge_rec** %711, align 4
+	store %struct.edge_rec* %712, %struct.edge_rec** %709, align 4
+	%713 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	%714 = load %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %713, %struct.edge_rec** %701, align 4
+	store %struct.edge_rec* %714, %struct.edge_rec** %681, align 4
+	%715 = xor i32 %683, 32		; <i32> [#uses=1]
+	%716 = inttoptr i32 %715 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%717 = getelementptr %struct.edge_rec* %716, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%718 = load %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
+	%719 = ptrtoint %struct.edge_rec* %718 to i32		; <i32> [#uses=2]
+	%720 = add i32 %719, 16		; <i32> [#uses=1]
+	%721 = and i32 %720, 63		; <i32> [#uses=1]
+	%722 = and i32 %719, -64		; <i32> [#uses=1]
+	%723 = or i32 %721, %722		; <i32> [#uses=1]
+	%724 = inttoptr i32 %723 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%725 = getelementptr %struct.edge_rec* %679, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%726 = load %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
+	%727 = ptrtoint %struct.edge_rec* %726 to i32		; <i32> [#uses=2]
+	%728 = add i32 %727, 16		; <i32> [#uses=1]
+	%729 = and i32 %728, 63		; <i32> [#uses=1]
+	%730 = and i32 %727, -64		; <i32> [#uses=1]
+	%731 = or i32 %729, %730		; <i32> [#uses=1]
+	%732 = inttoptr i32 %731 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%733 = getelementptr %struct.edge_rec* %732, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%734 = load %struct.edge_rec** %733, align 4		; <%struct.edge_rec*> [#uses=1]
+	%735 = getelementptr %struct.edge_rec* %724, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%736 = load %struct.edge_rec** %735, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %734, %struct.edge_rec** %735, align 4
+	store %struct.edge_rec* %736, %struct.edge_rec** %733, align 4
+	%737 = load %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
+	%738 = load %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %737, %struct.edge_rec** %725, align 4
+	store %struct.edge_rec* %738, %struct.edge_rec** %717, align 4
+	%739 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	br label %bb9.i
+
+do_merge.exit:		; preds = %bb17.i
+	%740 = getelementptr %struct.edge_rec* %ldo_addr.0.ph.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%741 = load %struct.VERTEX** %740, align 4		; <%struct.VERTEX*> [#uses=1]
+	%742 = icmp eq %struct.VERTEX* %741, %tree_addr.0.i		; <i1> [#uses=1]
+	br i1 %742, label %bb5.loopexit, label %bb2
+
+bb2:		; preds = %bb2, %do_merge.exit
+	%ldo.07 = phi %struct.edge_rec* [ %747, %bb2 ], [ %ldo_addr.0.ph.i, %do_merge.exit ]		; <%struct.edge_rec*> [#uses=1]
+	%743 = ptrtoint %struct.edge_rec* %ldo.07 to i32		; <i32> [#uses=1]
+	%744 = xor i32 %743, 32		; <i32> [#uses=1]
+	%745 = inttoptr i32 %744 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%746 = getelementptr %struct.edge_rec* %745, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%747 = load %struct.edge_rec** %746, align 4		; <%struct.edge_rec*> [#uses=3]
+	%748 = getelementptr %struct.edge_rec* %747, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%749 = load %struct.VERTEX** %748, align 4		; <%struct.VERTEX*> [#uses=1]
+	%750 = icmp eq %struct.VERTEX* %749, %tree_addr.0.i		; <i1> [#uses=1]
+	br i1 %750, label %bb5.loopexit, label %bb2
+
+bb4:		; preds = %bb5.loopexit, %bb4
+	%rdo.05 = phi %struct.edge_rec* [ %755, %bb4 ], [ %rdo_addr.0.i, %bb5.loopexit ]		; <%struct.edge_rec*> [#uses=1]
+	%751 = getelementptr %struct.edge_rec* %rdo.05, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%752 = load %struct.edge_rec** %751, align 4		; <%struct.edge_rec*> [#uses=1]
+	%753 = ptrtoint %struct.edge_rec* %752 to i32		; <i32> [#uses=1]
+	%754 = xor i32 %753, 32		; <i32> [#uses=1]
+	%755 = inttoptr i32 %754 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%756 = getelementptr %struct.edge_rec* %755, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%757 = load %struct.VERTEX** %756, align 4		; <%struct.VERTEX*> [#uses=1]
+	%758 = icmp eq %struct.VERTEX* %757, %extra		; <i1> [#uses=1]
+	br i1 %758, label %bb6, label %bb4
+
+bb5.loopexit:		; preds = %bb2, %do_merge.exit
+	%ldo.0.lcssa = phi %struct.edge_rec* [ %ldo_addr.0.ph.i, %do_merge.exit ], [ %747, %bb2 ]		; <%struct.edge_rec*> [#uses=1]
+	%759 = getelementptr %struct.edge_rec* %rdo_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%760 = load %struct.VERTEX** %759, align 4		; <%struct.VERTEX*> [#uses=1]
+	%761 = icmp eq %struct.VERTEX* %760, %extra		; <i1> [#uses=1]
+	br i1 %761, label %bb6, label %bb4
+
+bb6:		; preds = %bb5.loopexit, %bb4
+	%rdo.0.lcssa = phi %struct.edge_rec* [ %rdo_addr.0.i, %bb5.loopexit ], [ %755, %bb4 ]		; <%struct.edge_rec*> [#uses=1]
+	%tmp16 = ptrtoint %struct.edge_rec* %ldo.0.lcssa to i32		; <i32> [#uses=1]
+	%tmp4 = ptrtoint %struct.edge_rec* %rdo.0.lcssa to i32		; <i32> [#uses=1]
+	br label %bb15
+
+bb7:		; preds = %bb
+	%762 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
+	%763 = load %struct.VERTEX** %762, align 4		; <%struct.VERTEX*> [#uses=4]
+	%764 = icmp eq %struct.VERTEX* %763, null		; <i1> [#uses=1]
+	%765 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=5]
+	%766 = getelementptr %struct.edge_rec* %765, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %765, %struct.edge_rec** %766, align 4
+	%767 = getelementptr %struct.edge_rec* %765, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
+	br i1 %764, label %bb10, label %bb11
+
+bb8:		; preds = %entry
+	%768 = call arm_apcscc  i32 @puts(i8* getelementptr ([21 x i8]* @_2E_str7, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	call arm_apcscc  void @exit(i32 -1) noreturn nounwind
+	unreachable
+
+bb10:		; preds = %bb7
+	store %struct.VERTEX* %tree, %struct.VERTEX** %767, align 4
+	%769 = ptrtoint %struct.edge_rec* %765 to i32		; <i32> [#uses=5]
+	%770 = add i32 %769, 16		; <i32> [#uses=1]
+	%771 = inttoptr i32 %770 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%772 = add i32 %769, 48		; <i32> [#uses=1]
+	%773 = inttoptr i32 %772 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%774 = getelementptr %struct.edge_rec* %771, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %773, %struct.edge_rec** %774, align 4
+	%775 = add i32 %769, 32		; <i32> [#uses=1]
+	%776 = inttoptr i32 %775 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%777 = getelementptr %struct.edge_rec* %776, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %776, %struct.edge_rec** %777, align 4
+	%778 = getelementptr %struct.edge_rec* %776, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %extra, %struct.VERTEX** %778, align 4
+	%779 = getelementptr %struct.edge_rec* %773, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %771, %struct.edge_rec** %779, align 4
+	%780 = xor i32 %769, 32		; <i32> [#uses=1]
+	br label %bb15
+
+bb11:		; preds = %bb7
+	store %struct.VERTEX* %763, %struct.VERTEX** %767, align 4
+	%781 = ptrtoint %struct.edge_rec* %765 to i32		; <i32> [#uses=6]
+	%782 = add i32 %781, 16		; <i32> [#uses=1]
+	%783 = inttoptr i32 %782 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%784 = add i32 %781, 48		; <i32> [#uses=1]
+	%785 = inttoptr i32 %784 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%786 = getelementptr %struct.edge_rec* %783, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %785, %struct.edge_rec** %786, align 4
+	%787 = add i32 %781, 32		; <i32> [#uses=1]
+	%788 = inttoptr i32 %787 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%789 = getelementptr %struct.edge_rec* %788, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %788, %struct.edge_rec** %789, align 4
+	%790 = getelementptr %struct.edge_rec* %788, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %tree, %struct.VERTEX** %790, align 4
+	%791 = getelementptr %struct.edge_rec* %785, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %783, %struct.edge_rec** %791, align 4
+	%792 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
+	%793 = getelementptr %struct.edge_rec* %792, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %792, %struct.edge_rec** %793, align 4
+	%794 = getelementptr %struct.edge_rec* %792, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %tree, %struct.VERTEX** %794, align 4
+	%795 = ptrtoint %struct.edge_rec* %792 to i32		; <i32> [#uses=5]
+	%796 = add i32 %795, 16		; <i32> [#uses=1]
+	%797 = inttoptr i32 %796 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%798 = add i32 %795, 48		; <i32> [#uses=2]
+	%799 = inttoptr i32 %798 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%800 = getelementptr %struct.edge_rec* %797, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %799, %struct.edge_rec** %800, align 4
+	%801 = add i32 %795, 32		; <i32> [#uses=1]
+	%802 = inttoptr i32 %801 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%803 = getelementptr %struct.edge_rec* %802, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %802, %struct.edge_rec** %803, align 4
+	%804 = getelementptr %struct.edge_rec* %802, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %extra, %struct.VERTEX** %804, align 4
+	%805 = getelementptr %struct.edge_rec* %799, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %797, %struct.edge_rec** %805, align 4
+	%806 = xor i32 %781, 32		; <i32> [#uses=1]
+	%807 = inttoptr i32 %806 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%808 = getelementptr %struct.edge_rec* %807, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%809 = load %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
+	%810 = ptrtoint %struct.edge_rec* %809 to i32		; <i32> [#uses=2]
+	%811 = add i32 %810, 16		; <i32> [#uses=1]
+	%812 = and i32 %811, 63		; <i32> [#uses=1]
+	%813 = and i32 %810, -64		; <i32> [#uses=1]
+	%814 = or i32 %812, %813		; <i32> [#uses=1]
+	%815 = inttoptr i32 %814 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%816 = load %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
+	%817 = ptrtoint %struct.edge_rec* %816 to i32		; <i32> [#uses=2]
+	%818 = add i32 %817, 16		; <i32> [#uses=1]
+	%819 = and i32 %818, 63		; <i32> [#uses=1]
+	%820 = and i32 %817, -64		; <i32> [#uses=1]
+	%821 = or i32 %819, %820		; <i32> [#uses=1]
+	%822 = inttoptr i32 %821 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%823 = getelementptr %struct.edge_rec* %822, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%824 = load %struct.edge_rec** %823, align 4		; <%struct.edge_rec*> [#uses=1]
+	%825 = getelementptr %struct.edge_rec* %815, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%826 = load %struct.edge_rec** %825, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %824, %struct.edge_rec** %825, align 4
+	store %struct.edge_rec* %826, %struct.edge_rec** %823, align 4
+	%827 = load %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
+	%828 = load %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %827, %struct.edge_rec** %793, align 4
+	store %struct.edge_rec* %828, %struct.edge_rec** %808, align 4
+	%829 = xor i32 %795, 32		; <i32> [#uses=3]
+	%830 = inttoptr i32 %829 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%831 = getelementptr %struct.edge_rec* %830, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%832 = load %struct.VERTEX** %831, align 4		; <%struct.VERTEX*> [#uses=1]
+	%833 = and i32 %798, 63		; <i32> [#uses=1]
+	%834 = and i32 %795, -64		; <i32> [#uses=1]
+	%835 = or i32 %833, %834		; <i32> [#uses=1]
+	%836 = inttoptr i32 %835 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%837 = getelementptr %struct.edge_rec* %836, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%838 = load %struct.edge_rec** %837, align 4		; <%struct.edge_rec*> [#uses=1]
+	%839 = ptrtoint %struct.edge_rec* %838 to i32		; <i32> [#uses=2]
+	%840 = add i32 %839, 16		; <i32> [#uses=1]
+	%841 = and i32 %840, 63		; <i32> [#uses=1]
+	%842 = and i32 %839, -64		; <i32> [#uses=1]
+	%843 = or i32 %841, %842		; <i32> [#uses=1]
+	%844 = inttoptr i32 %843 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%845 = load %struct.VERTEX** %767, align 4		; <%struct.VERTEX*> [#uses=1]
+	%846 = call arm_apcscc  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
+	%847 = getelementptr %struct.edge_rec* %846, i32 0, i32 1		; <%struct.edge_rec**> [#uses=7]
+	store %struct.edge_rec* %846, %struct.edge_rec** %847, align 4
+	%848 = getelementptr %struct.edge_rec* %846, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %832, %struct.VERTEX** %848, align 4
+	%849 = ptrtoint %struct.edge_rec* %846 to i32		; <i32> [#uses=6]
+	%850 = add i32 %849, 16		; <i32> [#uses=2]
+	%851 = inttoptr i32 %850 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%852 = add i32 %849, 48		; <i32> [#uses=1]
+	%853 = inttoptr i32 %852 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%854 = getelementptr %struct.edge_rec* %851, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %853, %struct.edge_rec** %854, align 4
+	%855 = add i32 %849, 32		; <i32> [#uses=1]
+	%856 = inttoptr i32 %855 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%857 = getelementptr %struct.edge_rec* %856, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %856, %struct.edge_rec** %857, align 4
+	%858 = getelementptr %struct.edge_rec* %856, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %845, %struct.VERTEX** %858, align 4
+	%859 = getelementptr %struct.edge_rec* %853, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %851, %struct.edge_rec** %859, align 4
+	%860 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%861 = ptrtoint %struct.edge_rec* %860 to i32		; <i32> [#uses=2]
+	%862 = add i32 %861, 16		; <i32> [#uses=1]
+	%863 = and i32 %862, 63		; <i32> [#uses=1]
+	%864 = and i32 %861, -64		; <i32> [#uses=1]
+	%865 = or i32 %863, %864		; <i32> [#uses=1]
+	%866 = inttoptr i32 %865 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%867 = getelementptr %struct.edge_rec* %844, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%868 = load %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
+	%869 = ptrtoint %struct.edge_rec* %868 to i32		; <i32> [#uses=2]
+	%870 = add i32 %869, 16		; <i32> [#uses=1]
+	%871 = and i32 %870, 63		; <i32> [#uses=1]
+	%872 = and i32 %869, -64		; <i32> [#uses=1]
+	%873 = or i32 %871, %872		; <i32> [#uses=1]
+	%874 = inttoptr i32 %873 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%875 = getelementptr %struct.edge_rec* %874, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%876 = load %struct.edge_rec** %875, align 4		; <%struct.edge_rec*> [#uses=1]
+	%877 = getelementptr %struct.edge_rec* %866, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%878 = load %struct.edge_rec** %877, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %876, %struct.edge_rec** %877, align 4
+	store %struct.edge_rec* %878, %struct.edge_rec** %875, align 4
+	%879 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%880 = load %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %879, %struct.edge_rec** %867, align 4
+	store %struct.edge_rec* %880, %struct.edge_rec** %847, align 4
+	%881 = xor i32 %849, 32		; <i32> [#uses=3]
+	%882 = inttoptr i32 %881 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%883 = getelementptr %struct.edge_rec* %882, i32 0, i32 1		; <%struct.edge_rec**> [#uses=6]
+	%884 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%885 = ptrtoint %struct.edge_rec* %884 to i32		; <i32> [#uses=2]
+	%886 = add i32 %885, 16		; <i32> [#uses=1]
+	%887 = and i32 %886, 63		; <i32> [#uses=1]
+	%888 = and i32 %885, -64		; <i32> [#uses=1]
+	%889 = or i32 %887, %888		; <i32> [#uses=1]
+	%890 = inttoptr i32 %889 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%891 = load %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
+	%892 = ptrtoint %struct.edge_rec* %891 to i32		; <i32> [#uses=2]
+	%893 = add i32 %892, 16		; <i32> [#uses=1]
+	%894 = and i32 %893, 63		; <i32> [#uses=1]
+	%895 = and i32 %892, -64		; <i32> [#uses=1]
+	%896 = or i32 %894, %895		; <i32> [#uses=1]
+	%897 = inttoptr i32 %896 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%898 = getelementptr %struct.edge_rec* %897, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%899 = load %struct.edge_rec** %898, align 4		; <%struct.edge_rec*> [#uses=1]
+	%900 = getelementptr %struct.edge_rec* %890, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%901 = load %struct.edge_rec** %900, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %899, %struct.edge_rec** %900, align 4
+	store %struct.edge_rec* %901, %struct.edge_rec** %898, align 4
+	%902 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%903 = load %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %902, %struct.edge_rec** %766, align 4
+	store %struct.edge_rec* %903, %struct.edge_rec** %883, align 4
+	%904 = getelementptr %struct.VERTEX* %763, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%905 = load double* %904, align 4		; <double> [#uses=2]
+	%906 = getelementptr %struct.VERTEX* %763, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%907 = load double* %906, align 4		; <double> [#uses=2]
+	%908 = getelementptr %struct.VERTEX* %extra, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%909 = load double* %908, align 4		; <double> [#uses=3]
+	%910 = getelementptr %struct.VERTEX* %extra, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%911 = load double* %910, align 4		; <double> [#uses=3]
+	%912 = getelementptr %struct.VERTEX* %tree, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%913 = load double* %912, align 4		; <double> [#uses=3]
+	%914 = getelementptr %struct.VERTEX* %tree, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%915 = load double* %914, align 4		; <double> [#uses=3]
+	%916 = fsub double %905, %913		; <double> [#uses=1]
+	%917 = fsub double %911, %915		; <double> [#uses=1]
+	%918 = fmul double %916, %917		; <double> [#uses=1]
+	%919 = fsub double %909, %913		; <double> [#uses=1]
+	%920 = fsub double %907, %915		; <double> [#uses=1]
+	%921 = fmul double %919, %920		; <double> [#uses=1]
+	%922 = fsub double %918, %921		; <double> [#uses=1]
+	%923 = fcmp ogt double %922, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %923, label %bb15, label %bb13
+
+bb13:		; preds = %bb11
+	%924 = fsub double %905, %909		; <double> [#uses=1]
+	%925 = fsub double %915, %911		; <double> [#uses=1]
+	%926 = fmul double %924, %925		; <double> [#uses=1]
+	%927 = fsub double %913, %909		; <double> [#uses=1]
+	%928 = fsub double %907, %911		; <double> [#uses=1]
+	%929 = fmul double %927, %928		; <double> [#uses=1]
+	%930 = fsub double %926, %929		; <double> [#uses=1]
+	%931 = fcmp ogt double %930, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %931, label %bb15, label %bb14
+
+bb14:		; preds = %bb13
+	%932 = and i32 %850, 63		; <i32> [#uses=1]
+	%933 = and i32 %849, -64		; <i32> [#uses=3]
+	%934 = or i32 %932, %933		; <i32> [#uses=1]
+	%935 = inttoptr i32 %934 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%936 = getelementptr %struct.edge_rec* %935, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%937 = load %struct.edge_rec** %936, align 4		; <%struct.edge_rec*> [#uses=1]
+	%938 = ptrtoint %struct.edge_rec* %937 to i32		; <i32> [#uses=2]
+	%939 = add i32 %938, 16		; <i32> [#uses=1]
+	%940 = and i32 %939, 63		; <i32> [#uses=1]
+	%941 = and i32 %938, -64		; <i32> [#uses=1]
+	%942 = or i32 %940, %941		; <i32> [#uses=1]
+	%943 = inttoptr i32 %942 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%944 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%945 = ptrtoint %struct.edge_rec* %944 to i32		; <i32> [#uses=2]
+	%946 = add i32 %945, 16		; <i32> [#uses=1]
+	%947 = and i32 %946, 63		; <i32> [#uses=1]
+	%948 = and i32 %945, -64		; <i32> [#uses=1]
+	%949 = or i32 %947, %948		; <i32> [#uses=1]
+	%950 = inttoptr i32 %949 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%951 = getelementptr %struct.edge_rec* %943, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%952 = load %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
+	%953 = ptrtoint %struct.edge_rec* %952 to i32		; <i32> [#uses=2]
+	%954 = add i32 %953, 16		; <i32> [#uses=1]
+	%955 = and i32 %954, 63		; <i32> [#uses=1]
+	%956 = and i32 %953, -64		; <i32> [#uses=1]
+	%957 = or i32 %955, %956		; <i32> [#uses=1]
+	%958 = inttoptr i32 %957 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%959 = getelementptr %struct.edge_rec* %958, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%960 = load %struct.edge_rec** %959, align 4		; <%struct.edge_rec*> [#uses=1]
+	%961 = getelementptr %struct.edge_rec* %950, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%962 = load %struct.edge_rec** %961, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %960, %struct.edge_rec** %961, align 4
+	store %struct.edge_rec* %962, %struct.edge_rec** %959, align 4
+	%963 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%964 = load %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %963, %struct.edge_rec** %951, align 4
+	store %struct.edge_rec* %964, %struct.edge_rec** %847, align 4
+	%965 = add i32 %881, 16		; <i32> [#uses=1]
+	%966 = and i32 %965, 63		; <i32> [#uses=1]
+	%967 = or i32 %966, %933		; <i32> [#uses=1]
+	%968 = inttoptr i32 %967 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%969 = getelementptr %struct.edge_rec* %968, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%970 = load %struct.edge_rec** %969, align 4		; <%struct.edge_rec*> [#uses=1]
+	%971 = ptrtoint %struct.edge_rec* %970 to i32		; <i32> [#uses=2]
+	%972 = add i32 %971, 16		; <i32> [#uses=1]
+	%973 = and i32 %972, 63		; <i32> [#uses=1]
+	%974 = and i32 %971, -64		; <i32> [#uses=1]
+	%975 = or i32 %973, %974		; <i32> [#uses=1]
+	%976 = inttoptr i32 %975 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%977 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%978 = ptrtoint %struct.edge_rec* %977 to i32		; <i32> [#uses=2]
+	%979 = add i32 %978, 16		; <i32> [#uses=1]
+	%980 = and i32 %979, 63		; <i32> [#uses=1]
+	%981 = and i32 %978, -64		; <i32> [#uses=1]
+	%982 = or i32 %980, %981		; <i32> [#uses=1]
+	%983 = inttoptr i32 %982 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%984 = getelementptr %struct.edge_rec* %976, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%985 = load %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
+	%986 = ptrtoint %struct.edge_rec* %985 to i32		; <i32> [#uses=2]
+	%987 = add i32 %986, 16		; <i32> [#uses=1]
+	%988 = and i32 %987, 63		; <i32> [#uses=1]
+	%989 = and i32 %986, -64		; <i32> [#uses=1]
+	%990 = or i32 %988, %989		; <i32> [#uses=1]
+	%991 = inttoptr i32 %990 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%992 = getelementptr %struct.edge_rec* %991, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%993 = load %struct.edge_rec** %992, align 4		; <%struct.edge_rec*> [#uses=1]
+	%994 = getelementptr %struct.edge_rec* %983, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%995 = load %struct.edge_rec** %994, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %993, %struct.edge_rec** %994, align 4
+	store %struct.edge_rec* %995, %struct.edge_rec** %992, align 4
+	%996 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%997 = load %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %996, %struct.edge_rec** %984, align 4
+	store %struct.edge_rec* %997, %struct.edge_rec** %883, align 4
+	%998 = inttoptr i32 %933 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%999 = load %struct.edge_rec** @avail_edge, align 4		; <%struct.edge_rec*> [#uses=1]
+	%1000 = getelementptr %struct.edge_rec* %998, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %999, %struct.edge_rec** %1000, align 4
+	store %struct.edge_rec* %998, %struct.edge_rec** @avail_edge, align 4
+	br label %bb15
+
+bb15:		; preds = %bb14, %bb13, %bb11, %bb10, %bb6
+	%retval.1.0 = phi i32 [ %780, %bb10 ], [ %829, %bb13 ], [ %829, %bb14 ], [ %tmp4, %bb6 ], [ %849, %bb11 ]		; <i32> [#uses=1]
+	%retval.0.0 = phi i32 [ %769, %bb10 ], [ %781, %bb13 ], [ %781, %bb14 ], [ %tmp16, %bb6 ], [ %881, %bb11 ]		; <i32> [#uses=1]
+	%agg.result162 = bitcast %struct.EDGE_PAIR* %agg.result to i64*		; <i64*> [#uses=1]
+	%1001 = zext i32 %retval.0.0 to i64		; <i64> [#uses=1]
+	%1002 = zext i32 %retval.1.0 to i64		; <i64> [#uses=1]
+	%1003 = shl i64 %1002, 32		; <i64> [#uses=1]
+	%1004 = or i64 %1003, %1001		; <i64> [#uses=1]
+	store i64 %1004, i64* %agg.result162, align 4
+	ret void
+}
+
+declare arm_apcscc i32 @puts(i8* nocapture) nounwind
+
+declare arm_apcscc void @exit(i32) noreturn nounwind
+
+declare arm_apcscc %struct.edge_rec* @alloc_edge() nounwind
diff --git a/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll b/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
new file mode 100644
index 0000000000000..b4b989bf38a45
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin10
+
+	%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+	%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+	%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+	%struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 }
+	%struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+declare arm_apcscc i32 @strlen(i8* nocapture) nounwind readonly
+
+define arm_apcscc i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind {
+entry:
+	br i1 undef, label %bb126, label %bb1
+
+bb1:		; preds = %entry
+	br i1 undef, label %cli_calloc.exit.thread, label %cli_calloc.exit
+
+cli_calloc.exit.thread:		; preds = %bb1
+	ret i32 -114
+
+cli_calloc.exit:		; preds = %bb1
+	store i16 %parts, i16* undef, align 4
+	br i1 undef, label %bb52, label %bb4
+
+bb4:		; preds = %cli_calloc.exit
+	br i1 undef, label %bb.i, label %bb1.i3
+
+bb.i:		; preds = %bb4
+	unreachable
+
+bb1.i3:		; preds = %bb4
+	br i1 undef, label %bb2.i4, label %cli_strdup.exit
+
+bb2.i4:		; preds = %bb1.i3
+	ret i32 -114
+
+cli_strdup.exit:		; preds = %bb1.i3
+	br i1 undef, label %cli_calloc.exit54.thread, label %cli_calloc.exit54
+
+cli_calloc.exit54.thread:		; preds = %cli_strdup.exit
+	ret i32 -114
+
+cli_calloc.exit54:		; preds = %cli_strdup.exit
+	br label %bb45
+
+cli_calloc.exit70.thread:		; preds = %bb45
+	unreachable
+
+cli_calloc.exit70:		; preds = %bb45
+	br i1 undef, label %bb.i83, label %bb1.i84
+
+bb.i83:		; preds = %cli_calloc.exit70
+	unreachable
+
+bb1.i84:		; preds = %cli_calloc.exit70
+	br i1 undef, label %bb2.i85, label %bb17
+
+bb2.i85:		; preds = %bb1.i84
+	unreachable
+
+bb17:		; preds = %bb1.i84
+	br i1 undef, label %bb22, label %bb.nph
+
+bb.nph:		; preds = %bb17
+	br label %bb18
+
+bb18:		; preds = %bb18, %bb.nph
+	br i1 undef, label %bb18, label %bb22
+
+bb22:		; preds = %bb18, %bb17
+	br i1 undef, label %bb25, label %bb43.preheader
+
+bb43.preheader:		; preds = %bb22
+	br i1 undef, label %bb28, label %bb45
+
+bb25:		; preds = %bb22
+	unreachable
+
+bb28:		; preds = %bb43.preheader
+	unreachable
+
+bb45:		; preds = %bb43.preheader, %cli_calloc.exit54
+	br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70
+
+bb52:		; preds = %cli_calloc.exit
+	%0 = load i16* undef, align 4		; <i16> [#uses=1]
+	%1 = icmp eq i16 %0, 0		; <i1> [#uses=1]
+	%iftmp.20.0 = select i1 %1, i8* %hexsig, i8* null		; <i8*> [#uses=1]
+	%2 = tail call arm_apcscc  i32 @strlen(i8* %iftmp.20.0) nounwind readonly		; <i32> [#uses=0]
+	unreachable
+
+bb126:		; preds = %entry
+	ret i32 -117
+}
diff --git a/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
new file mode 100644
index 0000000000000..24f499036ce42
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -march=arm
+
+	%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+	%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+	%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+	%struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 }
+	%struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+define arm_apcscc i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind {
+entry:
+	br i1 undef, label %bb126, label %bb1
+
+bb1:		; preds = %entry
+	br i1 undef, label %cli_calloc.exit.thread, label %cli_calloc.exit
+
+cli_calloc.exit.thread:		; preds = %bb1
+	ret i32 -114
+
+cli_calloc.exit:		; preds = %bb1
+	br i1 undef, label %bb52, label %bb4
+
+bb4:		; preds = %cli_calloc.exit
+	br i1 undef, label %bb.i, label %bb1.i3
+
+bb.i:		; preds = %bb4
+	unreachable
+
+bb1.i3:		; preds = %bb4
+	br i1 undef, label %bb2.i4, label %cli_strdup.exit
+
+bb2.i4:		; preds = %bb1.i3
+	ret i32 -114
+
+cli_strdup.exit:		; preds = %bb1.i3
+	br i1 undef, label %cli_calloc.exit54.thread, label %cli_calloc.exit54
+
+cli_calloc.exit54.thread:		; preds = %cli_strdup.exit
+	ret i32 -114
+
+cli_calloc.exit54:		; preds = %cli_strdup.exit
+	br label %bb45
+
+cli_calloc.exit70.thread:		; preds = %bb45
+	unreachable
+
+cli_calloc.exit70:		; preds = %bb45
+	br i1 undef, label %bb.i83, label %bb1.i84
+
+bb.i83:		; preds = %cli_calloc.exit70
+	unreachable
+
+bb1.i84:		; preds = %cli_calloc.exit70
+	br i1 undef, label %bb2.i85, label %bb17
+
+bb2.i85:		; preds = %bb1.i84
+	unreachable
+
+bb17:		; preds = %bb1.i84
+	br i1 undef, label %bb22, label %bb.nph
+
+bb.nph:		; preds = %bb17
+	br label %bb18
+
+bb18:		; preds = %bb18, %bb.nph
+	br i1 undef, label %bb18, label %bb22
+
+bb22:		; preds = %bb18, %bb17
+	%0 = getelementptr i8* null, i32 10		; <i8*> [#uses=1]
+	%1 = bitcast i8* %0 to i16*		; <i16*> [#uses=1]
+	%2 = load i16* %1, align 2		; <i16> [#uses=1]
+	%3 = add i16 %2, 1		; <i16> [#uses=1]
+	%4 = zext i16 %3 to i32		; <i32> [#uses=1]
+	%5 = mul i32 %4, 3		; <i32> [#uses=1]
+	%6 = add i32 %5, -1		; <i32> [#uses=1]
+	%7 = icmp eq i32 %6, undef		; <i1> [#uses=1]
+	br i1 %7, label %bb25, label %bb43.preheader
+
+bb43.preheader:		; preds = %bb22
+	br i1 undef, label %bb28, label %bb45
+
+bb25:		; preds = %bb22
+	unreachable
+
+bb28:		; preds = %bb43.preheader
+	unreachable
+
+bb45:		; preds = %bb43.preheader, %cli_calloc.exit54
+	br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70
+
+bb52:		; preds = %cli_calloc.exit
+	unreachable
+
+bb126:		; preds = %entry
+	ret i32 -117
+}
diff --git a/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll b/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
new file mode 100644
index 0000000000000..e1d19d1ac2ffe
--- /dev/null
+++ b/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin10 -mattr=+vfp3
+
+@a = external global double		; <double*> [#uses=1]
+
+declare double @llvm.exp.f64(double) nounwind readonly
+
+define arm_apcscc void @findratio(double* nocapture %res1, double* nocapture %res2) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	br i1 undef, label %bb28, label %bb
+
+bb28:		; preds = %bb
+	%0 = load double* @a, align 4		; <double> [#uses=2]
+	%1 = fadd double %0, undef		; <double> [#uses=2]
+	br i1 undef, label %bb59, label %bb60
+
+bb59:		; preds = %bb28
+	%2 = fsub double -0.000000e+00, undef		; <double> [#uses=2]
+	br label %bb61
+
+bb60:		; preds = %bb28
+	%3 = tail call double @llvm.exp.f64(double undef) nounwind		; <double> [#uses=1]
+	%4 = fsub double -0.000000e+00, %3		; <double> [#uses=2]
+	%5 = fsub double -0.000000e+00, undef		; <double> [#uses=1]
+	%6 = fsub double -0.000000e+00, undef		; <double> [#uses=1]
+	br label %bb61
+
+bb61:		; preds = %bb60, %bb59
+	%.pn201 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn111 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn452 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn85 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn238 = phi double [ 0.000000e+00, %bb59 ], [ 0.000000e+00, %bb60 ]		; <double> [#uses=1]
+	%.pn39 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn230 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn228 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn224 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn222 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn218 = phi double [ %2, %bb59 ], [ %4, %bb60 ]		; <double> [#uses=1]
+	%.pn214 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn212 = phi double [ %2, %bb59 ], [ %4, %bb60 ]		; <double> [#uses=1]
+	%.pn213 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn210 = phi double [ undef, %bb59 ], [ %5, %bb60 ]		; <double> [#uses=1]
+	%.pn202 = phi double [ undef, %bb59 ], [ %6, %bb60 ]		; <double> [#uses=0]
+	%.pn390 = fdiv double %.pn452, undef		; <double> [#uses=0]
+	%.pn145 = fdiv double %.pn238, %1		; <double> [#uses=0]
+	%.pn138 = fdiv double %.pn230, undef		; <double> [#uses=1]
+	%.pn139 = fdiv double %.pn228, undef		; <double> [#uses=1]
+	%.pn134 = fdiv double %.pn224, %0		; <double> [#uses=1]
+	%.pn135 = fdiv double %.pn222, %1		; <double> [#uses=1]
+	%.pn133 = fdiv double %.pn218, undef		; <double> [#uses=0]
+	%.pn128 = fdiv double %.pn214, undef		; <double> [#uses=1]
+	%.pn129 = fdiv double %.pn212, %.pn213		; <double> [#uses=1]
+	%.pn126 = fdiv double %.pn210, undef		; <double> [#uses=0]
+	%.pn54.in = fmul double undef, %.pn201		; <double> [#uses=1]
+	%.pn42.in = fmul double undef, undef		; <double> [#uses=1]
+	%.pn76 = fsub double %.pn138, %.pn139		; <double> [#uses=1]
+	%.pn74 = fsub double %.pn134, %.pn135		; <double> [#uses=1]
+	%.pn70 = fsub double %.pn128, %.pn129		; <double> [#uses=1]
+	%.pn54 = fdiv double %.pn54.in, 6.000000e+00		; <double> [#uses=1]
+	%.pn64 = fmul double undef, 0x3FE5555555555555		; <double> [#uses=1]
+	%.pn65 = fmul double undef, undef		; <double> [#uses=1]
+	%.pn50 = fmul double undef, %.pn111		; <double> [#uses=0]
+	%.pn42 = fdiv double %.pn42.in, 6.000000e+00		; <double> [#uses=1]
+	%.pn40 = fmul double undef, %.pn85		; <double> [#uses=0]
+	%.pn56 = fadd double %.pn76, undef		; <double> [#uses=1]
+	%.pn57 = fmul double %.pn74, undef		; <double> [#uses=1]
+	%.pn36 = fadd double undef, undef		; <double> [#uses=1]
+	%.pn37 = fmul double %.pn70, undef		; <double> [#uses=1]
+	%.pn33 = fmul double undef, 0x3FC5555555555555		; <double> [#uses=1]
+	%.pn29 = fsub double %.pn64, %.pn65		; <double> [#uses=1]
+	%.pn21 = fadd double undef, undef		; <double> [#uses=1]
+	%.pn27 = fmul double undef, 0x3FC5555555555555		; <double> [#uses=1]
+	%.pn11 = fadd double %.pn56, %.pn57		; <double> [#uses=1]
+	%.pn32 = fmul double %.pn54, undef		; <double> [#uses=1]
+	%.pn26 = fmul double %.pn42, undef		; <double> [#uses=1]
+	%.pn15 = fmul double 0.000000e+00, %.pn39		; <double> [#uses=1]
+	%.pn7 = fadd double %.pn36, %.pn37		; <double> [#uses=1]
+	%.pn30 = fsub double %.pn32, %.pn33		; <double> [#uses=1]
+	%.pn28 = fadd double %.pn30, 0.000000e+00		; <double> [#uses=1]
+	%.pn24 = fsub double %.pn28, %.pn29		; <double> [#uses=1]
+	%.pn22 = fsub double %.pn26, %.pn27		; <double> [#uses=1]
+	%.pn20 = fadd double %.pn24, undef		; <double> [#uses=1]
+	%.pn18 = fadd double %.pn22, 0.000000e+00		; <double> [#uses=1]
+	%.pn16 = fsub double %.pn20, %.pn21		; <double> [#uses=1]
+	%.pn14 = fsub double %.pn18, undef		; <double> [#uses=1]
+	%.pn12 = fadd double %.pn16, undef		; <double> [#uses=1]
+	%.pn10 = fadd double %.pn14, %.pn15		; <double> [#uses=1]
+	%.pn8 = fsub double %.pn12, undef		; <double> [#uses=1]
+	%.pn6 = fsub double %.pn10, %.pn11		; <double> [#uses=1]
+	%.pn4 = fadd double %.pn8, undef		; <double> [#uses=1]
+	%.pn2 = fadd double %.pn6, %.pn7		; <double> [#uses=1]
+	%N1.0 = fsub double %.pn4, undef		; <double> [#uses=1]
+	%D1.0 = fsub double %.pn2, undef		; <double> [#uses=2]
+	br i1 undef, label %bb62, label %bb64
+
+bb62:		; preds = %bb61
+	%7 = fadd double %D1.0, undef		; <double> [#uses=1]
+	br label %bb64
+
+bb64:		; preds = %bb62, %bb61
+	%.pn = phi double [ undef, %bb62 ], [ %N1.0, %bb61 ]		; <double> [#uses=1]
+	%.pn1 = phi double [ %7, %bb62 ], [ %D1.0, %bb61 ]		; <double> [#uses=1]
+	%x.1 = fdiv double %.pn, %.pn1		; <double> [#uses=0]
+	ret void
+}
diff --git a/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll b/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
new file mode 100644
index 0000000000000..2d4e58d63603a
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm -mattr=+neon
+; PR4657
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+define arm_apcscc <4 x i32> @scale(<4 x i32> %v, i32 %f) nounwind {
+entry:
+	%v_addr = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%f_addr = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%0 = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <4 x i32> %v, <4 x i32>* %v_addr
+	store i32 %f, i32* %f_addr
+	%1 = load <4 x i32>* %v_addr, align 16		; <<4 x i32>> [#uses=1]
+	%2 = load i32* %f_addr, align 4		; <i32> [#uses=1]
+	%3 = insertelement <4 x i32> undef, i32 %2, i32 0		; <<4 x i32>> [#uses=1]
+	%4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>> [#uses=1]
+	%5 = mul <4 x i32> %1, %4		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %5, <4 x i32>* %0, align 16
+	%6 = load <4 x i32>* %0, align 16		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %6, <4 x i32>* %retval, align 16
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load <4 x i32>* %retval		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %retval1
+}
diff --git a/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll
new file mode 100644
index 0000000000000..65ffed2b80a0b
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=armv6-elf
+; PR4528
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv6-elf"
+
+define arm_aapcscc i32 @file_read_actor(i32* nocapture %desc, i32* %page, i32 %offset, i32 %size) nounwind optsize {
+entry:
+	br i1 undef, label %fault_in_pages_writeable.exit, label %bb5.i
+
+bb5.i:		; preds = %entry
+	%asmtmp.i = tail call i32 asm sideeffect "1:\09strbt\09$1,[$2]\0A2:\0A\09.section .fixup,\22ax\22\0A\09.align\092\0A3:\09mov\09$0, $3\0A\09b\092b\0A\09.previous\0A\09.section __ex_table,\22a\22\0A\09.align\093\0A\09.long\091b, 3b\0A\09.previous", "=r,r,r,i,0,~{cc}"(i8 0, i32 undef, i32 -14, i32 0) nounwind		; <i32> [#uses=1]
+	%0 = icmp eq i32 %asmtmp.i, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb6.i, label %fault_in_pages_writeable.exit
+
+bb6.i:		; preds = %bb5.i
+	br i1 undef, label %fault_in_pages_writeable.exit, label %bb7.i
+
+bb7.i:		; preds = %bb6.i
+	unreachable
+
+fault_in_pages_writeable.exit:		; preds = %bb6.i, %bb5.i, %entry
+	br i1 undef, label %bb2, label %bb3
+
+bb2:		; preds = %fault_in_pages_writeable.exit
+	unreachable
+
+bb3:		; preds = %fault_in_pages_writeable.exit
+	%1 = tail call arm_aapcscc  i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare arm_aapcscc i32 @__copy_to_user(i8*, i8*, i32)
diff --git a/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll
new file mode 100644
index 0000000000000..9e5372a793522
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=armv6-elf
+; PR4528
+
+define arm_aapcscc i32 @file_read_actor(i32 %desc, i32 %page, i32 %offset, i32 %size) nounwind optsize {
+entry:
+	br i1 undef, label %fault_in_pages_writeable.exit, label %bb5.i
+
+bb5.i:		; preds = %entry
+	%asmtmp.i = tail call i32 asm sideeffect "1:\09strbt\09$1,[$2]\0A2:\0A\09.section .fixup,\22ax\22\0A\09.align\092\0A3:\09mov\09$0, $3\0A\09b\092b\0A\09.previous\0A\09.section __ex_table,\22a\22\0A\09.align\093\0A\09.long\091b, 3b\0A\09.previous", "=r,r,r,i,0,~{cc}"(i8 0, i32 undef, i32 -14, i32 0) nounwind		; <i32> [#uses=1]
+	br label %fault_in_pages_writeable.exit
+
+fault_in_pages_writeable.exit:		; preds = %bb5.i, %entry
+	%0 = phi i32 [ 0, %entry ], [ %asmtmp.i, %bb5.i ]		; <i32> [#uses=1]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb2, label %bb3
+
+bb2:		; preds = %fault_in_pages_writeable.exit
+	unreachable
+
+bb3:		; preds = %fault_in_pages_writeable.exit
+	%2 = tail call arm_aapcscc  i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare arm_aapcscc i32 @__copy_to_user(i8*, i8*, i32)
diff --git a/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll b/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
new file mode 100644
index 0000000000000..18d68f79370ce
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=arm
+; PR4528
+
+; Inline asm is allowed to contain operands "=&r", "0".
+
+%struct.device_dma_parameters = type { i32, i32 }
+%struct.iovec = type { i8*, i32 }
+
+define arm_aapcscc i32 @generic_segment_checks(%struct.iovec* nocapture %iov, i32* nocapture %nr_segs, i32* nocapture %count, i32 %access_flags) nounwind optsize {
+entry:
+  br label %bb8
+
+bb:                                               ; preds = %bb8
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb
+  %asmtmp = tail call %struct.device_dma_parameters asm "adds $1, $2, $3; sbcccs $1, $1, $0; movcc $0, #0", "=&r,=&r,r,Ir,0,~{cc}"(i8* undef, i32 undef, i32 0) nounwind; <%struct.device_dma_parameters> [#uses=1]
+  %asmresult = extractvalue %struct.device_dma_parameters %asmtmp, 0; <i32> [#uses=1]
+  %0 = icmp eq i32 %asmresult, 0                  ; <i1> [#uses=1]
+  br i1 %0, label %bb7, label %bb4
+
+bb4:                                              ; preds = %bb2
+  br i1 undef, label %bb10, label %bb9
+
+bb7:                                              ; preds = %bb2
+  %1 = add i32 %2, 1                              ; <i32> [#uses=1]
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %entry
+  %2 = phi i32 [ 0, %entry ], [ %1, %bb7 ]        ; <i32> [#uses=3]
+  %scevgep22 = getelementptr %struct.iovec* %iov, i32 %2, i32 0; <i8**> [#uses=0]
+  %3 = load i32* %nr_segs, align 4                ; <i32> [#uses=1]
+  %4 = icmp ult i32 %2, %3                        ; <i1> [#uses=1]
+  br i1 %4, label %bb, label %bb9
+
+bb9:                                              ; preds = %bb8, %bb4
+  store i32 undef, i32* %count, align 4
+  ret i32 0
+
+bb10:                                             ; preds = %bb4, %bb
+  ret i32 0
+}
diff --git a/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll
new file mode 100644
index 0000000000000..a46482cc73175
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=arm
+; PR4716
+
+define arm_aapcscc void @_start() nounwind naked {
+entry:
+  tail call arm_aapcscc  void @exit(i32 undef) noreturn nounwind
+  unreachable
+}
+
+declare arm_aapcscc void @exit(i32) noreturn nounwind
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
new file mode 100644
index 0000000000000..84915c48824a4
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 -post-RA-scheduler -mcpu=cortex-a8
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.tree = type { i32, double, double, %struct.tree*, %struct.tree*, %struct.tree*, %struct.tree* }
+@g = common global %struct.tree* null
+
+define arm_apcscc %struct.tree* @tsp(%struct.tree* %t, i32 %nproc) nounwind {
+entry:
+  %t.idx51.val.i = load double* null              ; <double> [#uses=1]
+  br i1 undef, label %bb4.i, label %bb.i
+
+bb.i:                                             ; preds = %entry
+  unreachable
+
+bb4.i:                                            ; preds = %entry
+  %0 = load %struct.tree** @g, align 4         ; <%struct.tree*> [#uses=2]
+  %.idx45.i = getelementptr %struct.tree* %0, i32 0, i32 1 ; <double*> [#uses=1]
+  %.idx45.val.i = load double* %.idx45.i          ; <double> [#uses=1]
+  %.idx46.i = getelementptr %struct.tree* %0, i32 0, i32 2 ; <double*> [#uses=1]
+  %.idx46.val.i = load double* %.idx46.i          ; <double> [#uses=1]
+  %1 = fsub double 0.000000e+00, %.idx45.val.i    ; <double> [#uses=2]
+  %2 = fmul double %1, %1                         ; <double> [#uses=1]
+  %3 = fsub double %t.idx51.val.i, %.idx46.val.i  ; <double> [#uses=2]
+  %4 = fmul double %3, %3                         ; <double> [#uses=1]
+  %5 = fadd double %2, %4                         ; <double> [#uses=1]
+  %6 = tail call double @llvm.sqrt.f64(double %5) nounwind ; <double> [#uses=1]
+  br i1 undef, label %bb7.i4, label %bb6.i
+
+bb6.i:                                            ; preds = %bb4.i
+  br label %bb7.i4
+
+bb7.i4:                                           ; preds = %bb6.i, %bb4.i
+  %tton1.0.i = phi double [ %6, %bb6.i ], [ undef, %bb4.i ] ; <double> [#uses=0]
+  unreachable
+}
+
+declare double @llvm.sqrt.f64(double) nounwind readonly
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
new file mode 100644
index 0000000000000..a21ffc38d09e6
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
+%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
+%struct.icstruct = type { [3 x i32], i16 }
+%struct.node = type { i16, double, [3 x double], i32, i32 }
+
+declare arm_apcscc double @floor(double) nounwind readnone
+
+define void @intcoord(%struct.icstruct* noalias nocapture sret %agg.result, i1 %a, double %b) {
+entry:
+  br i1 %a, label %bb3, label %bb1
+
+bb1:                                              ; preds = %entry
+  unreachable
+
+bb3:                                              ; preds = %entry
+  br i1 %a, label %bb7, label %bb5
+
+bb5:                                              ; preds = %bb3
+  unreachable
+
+bb7:                                              ; preds = %bb3
+  br i1 %a, label %bb11, label %bb9
+
+bb9:                                              ; preds = %bb7
+  %0 = tail call arm_apcscc  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
+  br label %bb11
+
+bb11:                                             ; preds = %bb9, %bb7
+  %1 = getelementptr %struct.icstruct* %agg.result, i32 0, i32 0, i32 0 ; <i32*> [#uses=1]
+  store i32 0, i32* %1
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
new file mode 100644
index 0000000000000..e3d8ea60f9927
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+%struct.Patient = type { i32, i32, i32, %struct.Village* }
+%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define arm_apcscc %struct.Village* @alloc_tree(i32 %level, i32 %label, %struct.Village* %back, i1 %p) nounwind {
+entry:
+  br i1 %p, label %bb8, label %bb1
+
+bb1:                                              ; preds = %entry
+  %0 = malloc %struct.Village                     ; <%struct.Village*> [#uses=3]
+  %exp2 = call double @ldexp(double 1.000000e+00, i32 %level) nounwind ; <double> [#uses=1]
+  %.c = fptosi double %exp2 to i32                ; <i32> [#uses=1]
+  store i32 %.c, i32* null
+  %1 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 0 ; <%struct.List**> [#uses=1]
+  store %struct.List* null, %struct.List** %1
+  %2 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 2 ; <%struct.List**> [#uses=1]
+  store %struct.List* null, %struct.List** %2
+  ret %struct.Village* %0
+
+bb8:                                              ; preds = %entry
+  ret %struct.Village* null
+}
+
+declare double @ldexp(double, i32)
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll
new file mode 100644
index 0000000000000..9123377e7151b
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+@.str = external constant [36 x i8], align 1      ; <[36 x i8]*> [#uses=0]
+@.str1 = external constant [31 x i8], align 1     ; <[31 x i8]*> [#uses=1]
+@.str2 = external constant [4 x i8], align 1      ; <[4 x i8]*> [#uses=1]
+
+declare arm_apcscc i32 @getUnknown(i32, ...) nounwind
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
+
+declare arm_apcscc i32 @printf(i8* nocapture, ...) nounwind
+
+define arm_apcscc i32 @main() nounwind {
+entry:
+  %0 = tail call arm_apcscc  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; <i32> [#uses=0]
+  %1 = tail call arm_apcscc  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; <i32> [#uses=0]
+  %2 = tail call arm_apcscc  i32 (i32, ...)* @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; <i32> [#uses=1]
+  %3 = tail call arm_apcscc  i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
diff --git a/test/CodeGen/ARM/2009-08-23-linkerprivate.ll b/test/CodeGen/ARM/2009-08-23-linkerprivate.ll
new file mode 100644
index 0000000000000..0fad533b6c595
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-23-linkerprivate.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16		; <i32*> [#uses=0]
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
diff --git a/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll b/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll
new file mode 100644
index 0000000000000..c6ef2561490cb
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mattr=+neon | not grep fldmfdd
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+define arm_apcscc void @_ZN6squish10ClusterFit9Compress3EPv(%quuz* %this, i8* %block) {
+entry:
+  %0 = lshr <4 x i32> zeroinitializer, <i32 31, i32 31, i32 31, i32 31> ; <<4 x i32>> [#uses=1]
+  %1 = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> <i32 2, i32 3> ; <<2 x i32>> [#uses=1]
+  %2 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> undef, <2 x i32> %1) nounwind ; <<2 x i32>> [#uses=1]
+  %3 = extractelement <2 x i32> %2, i32 0         ; <i32> [#uses=1]
+  %not..i = icmp eq i32 %3, undef                 ; <i1> [#uses=1]
+  br i1 %not..i, label %return, label %bb221
+
+bb221:                                            ; preds = %bb221, %entry
+  br label %bb221
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll b/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll
new file mode 100644
index 0000000000000..bc5bfe9f60983
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mattr=+neon | not grep fldmfdd
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+define arm_apcscc void @aaaa(%quuz* %this, i8* %block) {
+entry:
+  br i1 undef, label %bb.nph269, label %bb201
+
+bb.nph269:                                        ; preds = %entry
+  br label %bb12
+
+bb12:                                             ; preds = %bb194, %bb.nph269
+  %0 = fmul <4 x float> undef, undef              ; <<4 x float>> [#uses=1]
+  %1 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %2 = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %3 = fadd <4 x float> undef, %2                 ; <<4 x float>> [#uses=1]
+  br i1 undef, label %bb194, label %bb186
+
+bb186:                                            ; preds = %bb12
+  br label %bb194
+
+bb194:                                            ; preds = %bb186, %bb12
+  %besterror.0.0 = phi <4 x float> [ %3, %bb186 ], [ undef, %bb12 ] ; <<4 x float>> [#uses=0]
+  %indvar.next294 = add i32 undef, 1              ; <i32> [#uses=0]
+  br label %bb12
+
+bb201:                                            ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll b/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll
new file mode 100644
index 0000000000000..d5178b4bfb3f3
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mattr=+neon
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define arm_apcscc void @foo() nounwind {
+entry:
+  %0 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> undef, <2 x float> undef) nounwind ; <<2 x float>> [#uses=1]
+  %tmp28 = extractelement <2 x float> %0, i32 0   ; <float> [#uses=1]
+  %1 = fcmp une float %tmp28, 4.900000e+01        ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %bb7
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb7:                                              ; preds = %entry
+  br i1 undef, label %bb8, label %bb9
+
+bb8:                                              ; preds = %bb7
+  unreachable
+
+bb9:                                              ; preds = %bb7
+  ret void
+}
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll b/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll
new file mode 100644
index 0000000000000..266fce6e0c5e2
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mattr=+neon
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define arm_apcscc void @aaa() nounwind {
+entry:
+  %0 = fmul <4 x float> undef, <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02, float 0x3EB0C6F7A0000000> ; <<4 x float>> [#uses=1]
+  %tmp31 = extractelement <4 x float> %0, i32 0   ; <float> [#uses=1]
+  %1 = fpext float %tmp31 to double               ; <double> [#uses=1]
+  %2 = fsub double 1.000000e+00, %1               ; <double> [#uses=1]
+  %3 = fdiv double %2, 1.000000e+00               ; <double> [#uses=1]
+  %4 = tail call double @fabs(double %3) nounwind readnone ; <double> [#uses=1]
+  %5 = fcmp ogt double %4, 1.000000e-05           ; <i1> [#uses=1]
+  br i1 %5, label %bb, label %bb7
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb7:                                              ; preds = %entry
+  unreachable
+}
+
+declare double @fabs(double)
diff --git a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
new file mode 100644
index 0000000000000..b6cf880a30015
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin9 -march=arm | FileCheck %s
+
+%struct.A = type { i32* }
+
+define arm_apcscc void @"\01-[MyFunction Name:]"() {
+entry:
+  %save_filt.1 = alloca i32                       ; <i32*> [#uses=2]
+  %save_eptr.0 = alloca i8*                       ; <i8**> [#uses=2]
+  %a = alloca %struct.A                           ; <%struct.A*> [#uses=3]
+  %eh_exception = alloca i8*                      ; <i8**> [#uses=5]
+  %eh_selector = alloca i32                       ; <i32*> [#uses=3]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call arm_apcscc  void @_ZN1AC1Ev(%struct.A* %a)
+  invoke arm_apcscc  void @_Z3barv()
+          to label %invcont unwind label %lpad
+
+invcont:                                          ; preds = %entry
+  call arm_apcscc  void @_ZN1AD1Ev(%struct.A* %a) nounwind
+  br label %return
+
+bb:                                               ; preds = %ppad
+  %eh_select = load i32* %eh_selector             ; <i32> [#uses=1]
+  store i32 %eh_select, i32* %save_filt.1, align 4
+  %eh_value = load i8** %eh_exception             ; <i8*> [#uses=1]
+  store i8* %eh_value, i8** %save_eptr.0, align 4
+  call arm_apcscc  void @_ZN1AD1Ev(%struct.A* %a) nounwind
+  %0 = load i8** %save_eptr.0, align 4            ; <i8*> [#uses=1]
+  store i8* %0, i8** %eh_exception, align 4
+  %1 = load i32* %save_filt.1, align 4            ; <i32> [#uses=1]
+  store i32 %1, i32* %eh_selector, align 4
+  br label %Unwind
+
+return:                                           ; preds = %invcont
+  ret void
+
+lpad:                                             ; preds = %entry
+  %eh_ptr = call i8* @llvm.eh.exception()         ; <i8*> [#uses=1]
+  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr1 = load i8** %eh_exception              ; <i8*> [#uses=1]
+  %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0) ; <i32> [#uses=1]
+  store i32 %eh_select2, i32* %eh_selector
+  br label %ppad
+
+ppad:                                             ; preds = %lpad
+  br label %bb
+
+Unwind:                                           ; preds = %bb
+  %eh_ptr3 = load i8** %eh_exception              ; <i8*> [#uses=1]
+  call arm_apcscc  void @_Unwind_SjLj_Resume(i8* %eh_ptr3)
+  unreachable
+}
+
+define linkonce_odr arm_apcscc void @_ZN1AC1Ev(%struct.A* %this) {
+entry:
+  %this_addr = alloca %struct.A*                  ; <%struct.A**> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store %struct.A* %this, %struct.A** %this_addr
+  %0 = call arm_apcscc  i8* @_Znwm(i32 4)         ; <i8*> [#uses=1]
+  %1 = bitcast i8* %0 to i32*                     ; <i32*> [#uses=1]
+  %2 = load %struct.A** %this_addr, align 4       ; <%struct.A*> [#uses=1]
+  %3 = getelementptr inbounds %struct.A* %2, i32 0, i32 0 ; <i32**> [#uses=1]
+  store i32* %1, i32** %3, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare arm_apcscc i8* @_Znwm(i32)
+
+define linkonce_odr arm_apcscc void @_ZN1AD1Ev(%struct.A* %this) nounwind {
+entry:
+  %this_addr = alloca %struct.A*                  ; <%struct.A**> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store %struct.A* %this, %struct.A** %this_addr
+  %0 = load %struct.A** %this_addr, align 4       ; <%struct.A*> [#uses=1]
+  %1 = getelementptr inbounds %struct.A* %0, i32 0, i32 0 ; <i32**> [#uses=1]
+  %2 = load i32** %1, align 4                     ; <i32*> [#uses=1]
+  %3 = bitcast i32* %2 to i8*                     ; <i8*> [#uses=1]
+  call arm_apcscc  void @_ZdlPv(i8* %3) nounwind
+  br label %bb
+
+bb:                                               ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %bb
+  ret void
+}
+;CHECK: L_LSDA_1:
+
+declare arm_apcscc void @_ZdlPv(i8*) nounwind
+
+declare arm_apcscc void @_Z3barv()
+
+declare i8* @llvm.eh.exception() nounwind
+
+declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
+
+declare arm_apcscc i32 @__gxx_personality_sj0(...)
+
+declare arm_apcscc void @_Unwind_SjLj_Resume(i8*)
diff --git a/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
new file mode 100644
index 0000000000000..e1e60e6317a69
--- /dev/null
+++ b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; pr4843
+define <4 x i16> @v2regbug(<4 x i16>* %B) nounwind {
+;CHECK: v2regbug:
+;CHECK: vzip.16
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32><i32 0, i32 0, i32 1, i32 1>
+	ret <4 x i16> %tmp2
+}
diff --git a/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll b/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll
new file mode 100644
index 0000000000000..bf91fe099e6b6
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll
@@ -0,0 +1,106 @@
+; RUN: llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-apple-darwin9"
+
+@history = internal global [2 x [56 x i32]] [[56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0], [56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0]] ; <[2 x [56 x i32]]*> [#uses=3]
+@nodes = internal global i64 0                    ; <i64*> [#uses=4]
+@.str = private constant [9 x i8] c"##-<=>+#\00", align 1 ; <[9 x i8]*> [#uses=2]
+@.str1 = private constant [6 x i8] c"%c%d\0A\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str2 = private constant [16 x i8] c"Fhourstones 2.0\00", align 1 ; <[16 x i8]*> [#uses=1]
+@.str3 = private constant [54 x i8] c"Using %d transposition table entries with %d probes.\0A\00", align 1 ; <[54 x i8]*> [#uses=1]
+@.str4 = private constant [31 x i8] c"Solving %d-ply position after \00", align 1 ; <[31 x i8]*> [#uses=1]
+@.str5 = private constant [7 x i8] c" . . .\00", align 1 ; <[7 x i8]*> [#uses=1]
+@.str6 = private constant [28 x i8] c"score = %d (%c)  work = %d\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
+@.str7 = private constant [36 x i8] c"%lu pos / %lu msec = %.1f Kpos/sec\0A\00", align 1 ; <[36 x i8]*> [#uses=1]
+@plycnt = internal global i32 0                   ; <i32*> [#uses=21]
+@dias = internal global [19 x i32] zeroinitializer ; <[19 x i32]*> [#uses=43]
+@columns = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=18]
+@height = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=21]
+@rows = internal global [8 x i32] zeroinitializer ; <[8 x i32]*> [#uses=20]
+@colthr = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=5]
+@moves = internal global [44 x i32] zeroinitializer ; <[44 x i32]*> [#uses=9]
+@.str8 = private constant [3 x i8] c"%d\00", align 1 ; <[3 x i8]*> [#uses=1]
+@he = internal global i8* null                    ; <i8**> [#uses=9]
+@hits = internal global i64 0                     ; <i64*> [#uses=8]
+@posed = internal global i64 0                    ; <i64*> [#uses=7]
+@ht = internal global i32* null                   ; <i32**> [#uses=5]
+@.str16 = private constant [19 x i8] c"store rate = %.3f\0A\00", align 1 ; <[19 x i8]*> [#uses=1]
+@.str117 = private constant [45 x i8] c"- %5.3f  < %5.3f  = %5.3f  > %5.3f  + %5.3f\0A\00", align 1 ; <[45 x i8]*> [#uses=1]
+@.str218 = private constant [6 x i8] c"%7d%c\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str319 = private constant [30 x i8] c"Failed to allocate %u bytes.\0A\00", align 1 ; <[30 x i8]*> [#uses=1]
+
+declare arm_apcscc i32 @puts(i8* nocapture) nounwind
+
+declare arm_apcscc i32 @getchar() nounwind
+
+define internal arm_apcscc i32 @transpose() nounwind readonly {
+; CHECK: push
+entry:
+  %0 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 1), align 4 ; <i32> [#uses=1]
+  %1 = shl i32 %0, 7                              ; <i32> [#uses=1]
+  %2 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 2), align 4 ; <i32> [#uses=1]
+  %3 = or i32 %1, %2                              ; <i32> [#uses=1]
+  %4 = shl i32 %3, 7                              ; <i32> [#uses=1]
+  %5 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 3), align 4 ; <i32> [#uses=1]
+  %6 = or i32 %4, %5                              ; <i32> [#uses=3]
+  %7 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 7), align 4 ; <i32> [#uses=1]
+  %8 = shl i32 %7, 7                              ; <i32> [#uses=1]
+  %9 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 6), align 4 ; <i32> [#uses=1]
+  %10 = or i32 %8, %9                             ; <i32> [#uses=1]
+  %11 = shl i32 %10, 7                            ; <i32> [#uses=1]
+  %12 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 5), align 4 ; <i32> [#uses=1]
+  %13 = or i32 %11, %12                           ; <i32> [#uses=3]
+  %14 = icmp ugt i32 %6, %13                      ; <i1> [#uses=2]
+  %.pn2.in.i = select i1 %14, i32 %6, i32 %13     ; <i32> [#uses=1]
+  %.pn1.in.i = select i1 %14, i32 %13, i32 %6     ; <i32> [#uses=1]
+  %.pn2.i = shl i32 %.pn2.in.i, 7                 ; <i32> [#uses=1]
+  %.pn3.i = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 4) ; <i32> [#uses=1]
+  %.pn.in.in.i = or i32 %.pn2.i, %.pn3.i          ; <i32> [#uses=1]
+  %.pn.in.i = zext i32 %.pn.in.in.i to i64        ; <i64> [#uses=1]
+  %.pn.i = shl i64 %.pn.in.i, 21                  ; <i64> [#uses=1]
+  %.pn1.i = zext i32 %.pn1.in.i to i64            ; <i64> [#uses=1]
+  %iftmp.22.0.i = or i64 %.pn.i, %.pn1.i          ; <i64> [#uses=2]
+  %15 = lshr i64 %iftmp.22.0.i, 17                ; <i64> [#uses=1]
+  %16 = trunc i64 %15 to i32                      ; <i32> [#uses=2]
+  %17 = urem i64 %iftmp.22.0.i, 1050011           ; <i64> [#uses=1]
+  %18 = trunc i64 %17 to i32                      ; <i32> [#uses=1]
+  %19 = urem i32 %16, 179                         ; <i32> [#uses=1]
+  %20 = or i32 %19, 131072                        ; <i32> [#uses=1]
+  %21 = load i32** @ht, align 4                   ; <i32*> [#uses=1]
+  br label %bb5
+
+bb:                                               ; preds = %bb5
+  %22 = getelementptr inbounds i32* %21, i32 %x.0 ; <i32*> [#uses=1]
+  %23 = load i32* %22, align 4                    ; <i32> [#uses=1]
+  %24 = icmp eq i32 %23, %16                      ; <i1> [#uses=1]
+  br i1 %24, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  %25 = load i8** @he, align 4                    ; <i8*> [#uses=1]
+  %26 = getelementptr inbounds i8* %25, i32 %x.0  ; <i8*> [#uses=1]
+  %27 = load i8* %26, align 1                     ; <i8> [#uses=1]
+  %28 = sext i8 %27 to i32                        ; <i32> [#uses=1]
+  ret i32 %28
+
+bb2:                                              ; preds = %bb
+  %29 = add nsw i32 %20, %x.0                     ; <i32> [#uses=3]
+  %30 = add i32 %29, -1050011                     ; <i32> [#uses=1]
+  %31 = icmp sgt i32 %29, 1050010                 ; <i1> [#uses=1]
+  %. = select i1 %31, i32 %30, i32 %29            ; <i32> [#uses=1]
+  %32 = add i32 %33, 1                            ; <i32> [#uses=1]
+  br label %bb5
+
+bb5:                                              ; preds = %bb2, %entry
+  %33 = phi i32 [ 0, %entry ], [ %32, %bb2 ]      ; <i32> [#uses=2]
+  %x.0 = phi i32 [ %18, %entry ], [ %., %bb2 ]    ; <i32> [#uses=3]
+  %34 = icmp sgt i32 %33, 7                       ; <i1> [#uses=1]
+  br i1 %34, label %bb7, label %bb
+
+bb7:                                              ; preds = %bb5
+  ret i32 -128
+}
+
+declare arm_apcscc noalias i8* @calloc(i32, i32) nounwind
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
diff --git a/test/CodeGen/ARM/2009-09-09-AllOnes.ll b/test/CodeGen/ARM/2009-09-09-AllOnes.ll
new file mode 100644
index 0000000000000..f654a1664c8b4
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-09-AllOnes.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mattr=+neon < %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define arm_apcscc void @foo() {
+entry:
+  %0 = insertelement <4 x i32> undef, i32 -1, i32 3
+  store <4 x i32> %0, <4 x i32>* undef, align 16
+  unreachable
+}
diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
new file mode 100644
index 0000000000000..98cab9a9149ea
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O1 -march=arm -mattr=+vfp2 < %s | FileCheck %s
+; pr4939
+
+define void @test(double* %x, double* %y) nounwind {
+  %1 = load double* %x, align 4
+  %2 = load double* %y, align 4
+  %3 = fsub double -0.000000e+00, %1
+  %4 = fcmp ugt double %2, %3
+  br i1 %4, label %bb1, label %bb2
+
+bb1:
+;CHECK: fstdhi
+  store double %1, double* %y, align 4
+  br label %bb2
+
+bb2:
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-09-10-postdec.ll b/test/CodeGen/ARM/2009-09-10-postdec.ll
new file mode 100644
index 0000000000000..10653b51c1463
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-10-postdec.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=arm < %s | FileCheck %s
+; Radar 7213850
+
+define i32 @test(i8* %d, i32 %x, i32 %y) nounwind {
+  %1 = ptrtoint i8* %d to i32
+;CHECK: sub
+  %2 = sub i32 %x, %1
+  %3 = add nsw i32 %2, %y
+  store i8 0, i8* %d, align 1
+  ret i32 %3
+}
diff --git a/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll b/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
new file mode 100644
index 0000000000000..13adb24e2f6fd
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mattr=+neon < %s
+; PR4965
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+%struct.fr = type { [6 x %struct.pl] }
+%struct.obb = type { %"struct.m4", %"struct.p3" }
+%struct.pl = type { %"struct.p3" }
+%"struct.m4" = type { %"struct.p3", %"struct.p3", %"struct.p3", %"struct.p3" }
+%"struct.p3" = type { <4 x float> }
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define arm_aapcs_vfpcc i8 @foo(%struct.fr* nocapture %this, %struct.obb* %box) nounwind {
+entry:
+  %val.i.i = load <4 x float>* undef              ; <<4 x float>> [#uses=1]
+  %val2.i.i = load <4 x float>* null              ; <<4 x float>> [#uses=1]
+  %elt3.i.i = getelementptr inbounds %struct.obb* %box, i32 0, i32 0, i32 2, i32 0 ; <<4 x float>*> [#uses=1]
+  %val4.i.i = load <4 x float>* %elt3.i.i         ; <<4 x float>> [#uses=1]
+  %0 = shufflevector <2 x float> undef, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %1 = fadd <4 x float> undef, zeroinitializer    ; <<4 x float>> [#uses=1]
+  br label %bb33
+
+bb:                                               ; preds = %bb33
+  %2 = fmul <4 x float> %val.i.i, undef           ; <<4 x float>> [#uses=1]
+  %3 = fmul <4 x float> %val2.i.i, undef          ; <<4 x float>> [#uses=1]
+  %4 = fadd <4 x float> %3, %2                    ; <<4 x float>> [#uses=1]
+  %5 = fmul <4 x float> %val4.i.i, undef          ; <<4 x float>> [#uses=1]
+  %6 = fadd <4 x float> %5, %4                    ; <<4 x float>> [#uses=1]
+  %7 = bitcast <4 x float> %6 to <4 x i32>        ; <<4 x i32>> [#uses=1]
+  %8 = and <4 x i32> %7, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> ; <<4 x i32>> [#uses=1]
+  %9 = or <4 x i32> %8, undef                     ; <<4 x i32>> [#uses=1]
+  %10 = bitcast <4 x i32> %9 to <4 x float>       ; <<4 x float>> [#uses=1]
+  %11 = shufflevector <4 x float> %10, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %12 = shufflevector <2 x float> %11, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %13 = fmul <4 x float> undef, %12               ; <<4 x float>> [#uses=1]
+  %14 = fmul <4 x float> %0, undef                ; <<4 x float>> [#uses=1]
+  %15 = fadd <4 x float> %14, %13                 ; <<4 x float>> [#uses=1]
+  %16 = fadd <4 x float> undef, %15               ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %1, %16                  ; <<4 x float>> [#uses=1]
+  %18 = fmul <4 x float> zeroinitializer, %17     ; <<4 x float>> [#uses=1]
+  %19 = insertelement <4 x float> %18, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=2]
+  %20 = shufflevector <4 x float> %19, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %21 = shufflevector <4 x float> %19, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %22 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %20, <2 x float> %21) nounwind ; <<2 x float>> [#uses=2]
+  %23 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %22, <2 x float> %22) nounwind ; <<2 x float>> [#uses=2]
+  %24 = shufflevector <2 x float> %23, <2 x float> %23, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %25 = fadd <4 x float> %24, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %tmp46 = extractelement <4 x float> %25, i32 0  ; <float> [#uses=1]
+  %26 = fcmp olt float %tmp46, 0.000000e+00       ; <i1> [#uses=1]
+  br i1 %26, label %bb41, label %bb33
+
+bb33:                                             ; preds = %bb, %entry
+  br i1 undef, label %bb34, label %bb
+
+bb34:                                             ; preds = %bb33
+  ret i8 undef
+
+bb41:                                             ; preds = %bb
+  ret i8 1
+}
diff --git a/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll b/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
new file mode 100644
index 0000000000000..758b59a4638d5
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=arm -mattr=+neon -mcpu=cortex-a9
+
+define arm_aapcs_vfpcc <4 x float> @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+  %1 = ptrtoint i8* %pBuffer to i32
+
+  %lsr.iv2641 = inttoptr i32 %1 to float*
+  %tmp29 = add i32 %1, 4
+  %tmp2930 = inttoptr i32 %tmp29 to float*
+  %tmp31 = add i32 %1, 8
+  %tmp3132 = inttoptr i32 %tmp31 to float*
+  %tmp33 = add i32 %1, 12
+  %tmp3334 = inttoptr i32 %tmp33 to float*
+  %tmp35 = add i32 %1, 16
+  %tmp3536 = inttoptr i32 %tmp35 to float*
+  %tmp37 = add i32 %1, 20
+  %tmp3738 = inttoptr i32 %tmp37 to float*
+  %tmp39 = add i32 %1, 24
+  %tmp3940 = inttoptr i32 %tmp39 to float*
+  %2 = load float* %lsr.iv2641, align 4
+  %3 = load float* %tmp2930, align 4
+  %4 = load float* %tmp3132, align 4
+  %5 = load float* %tmp3334, align 4
+  %6 = load float* %tmp3536, align 4
+  %7 = load float* %tmp3738, align 4
+  %8 = load float* %tmp3940, align 4
+  %9 = insertelement <4 x float> undef, float %6, i32 0
+  %10 = shufflevector <4 x float> %9, <4 x float> undef, <4 x i32> zeroinitializer
+  %11 = insertelement <4 x float> %10, float %7, i32 1
+  %12 = insertelement <4 x float> %11, float %8, i32 2
+  %13 = insertelement <4 x float> undef, float %2, i32 0
+  %14 = shufflevector <4 x float> %13, <4 x float> undef, <4 x i32> zeroinitializer
+  %15 = insertelement <4 x float> %14, float %3, i32 1
+  %16 = insertelement <4 x float> %15, float %4, i32 2
+  %17 = insertelement <4 x float> %16, float %5, i32 3
+  %18 = fsub <4 x float> zeroinitializer, %12
+  %19 = shufflevector <4 x float> %18, <4 x float> undef, <4 x i32> zeroinitializer
+  %20 = shufflevector <4 x float> %17, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %21 = shufflevector <2 x float> %20, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+
+  ret <4 x float> %21
+}
diff --git a/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll b/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll
new file mode 100644
index 0000000000000..980f8ce6fa1b2
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a9
+
+; PR4986
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb.preheader
+
+bb.preheader:                                     ; preds = %entry
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.preheader
+  %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %2 = insertelement <4 x float> %1, float undef, i32 2 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float undef, i32 3 ; <<4 x float>> [#uses=1]
+  %4 = fmul <4 x float> undef, %3                 ; <<4 x float>> [#uses=1]
+  %5 = extractelement <4 x float> %4, i32 3       ; <float> [#uses=1]
+  store float %5, float* undef, align 4
+  br i1 undef, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc <4 x float> @bar(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+  %1 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %2 = insertelement <4 x float> %1, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float undef, i32 2 ; <<4 x float>> [#uses=1]
+  %4 = insertelement <4 x float> %3, float undef, i32 3 ; <<4 x float>> [#uses=1]
+  %5 = shufflevector <4 x float> %4, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  ret <4 x float> %6
+}
diff --git a/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll
new file mode 100644
index 0000000000000..aace4751915d1
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%bar = type { <4 x float> }
+%foo = type { %bar, %bar, %bar, %bar }
+
+declare arm_aapcs_vfpcc <4 x float> @bbb(%bar*) nounwind
+
+define arm_aapcs_vfpcc void @aaa(%foo* noalias sret %agg.result, %foo* %tfrm) nounwind {
+entry:
+  %0 = call arm_aapcs_vfpcc  <4 x float> @bbb(%bar* undef) nounwind ; <<4 x float>> [#uses=0]
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll
new file mode 100644
index 0000000000000..30931a2ffb66d
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%bar = type { %foo, %foo }
+%foo = type { <4 x float> }
+
+declare arm_aapcs_vfpcc float @aaa(%foo* nocapture) nounwind readonly
+
+declare arm_aapcs_vfpcc %bar* @bbb(%bar*, <4 x float>, <4 x float>) nounwind
+
+define arm_aapcs_vfpcc void @ccc(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %0 = call arm_aapcs_vfpcc  %bar* @bbb(%bar* undef, <4 x float> undef, <4 x float> undef) nounwind ; <%bar*> [#uses=0]
+  %1 = call arm_aapcs_vfpcc  float @aaa(%foo* undef) nounwind ; <float> [#uses=0]
+  unreachable
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
new file mode 100644
index 0000000000000..2ff479b217818
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%struct.1 = type { %struct.4, %struct.4 }
+%struct.4 = type { <4 x float> }
+
+define arm_aapcs_vfpcc %struct.1* @hhh3(%struct.1* %this, <4 x float> %lenation.0, <4 x float> %legalation.0) nounwind {
+entry:
+  %0 = call arm_aapcs_vfpcc  %struct.4* @sss1(%struct.4* undef, float 0.000000e+00) nounwind ; <%struct.4*> [#uses=0]
+  %1 = call arm_aapcs_vfpcc  %struct.4* @qqq1(%struct.4* null, float 5.000000e-01) nounwind ; <%struct.4*> [#uses=0]
+  %val92 = load <4 x float>* null                 ; <<4 x float>> [#uses=1]
+  %2 = call arm_aapcs_vfpcc  %struct.4* @zzz2(%struct.4* undef, <4 x float> %val92) nounwind ; <%struct.4*> [#uses=0]
+  ret %struct.1* %this
+}
+
+declare arm_aapcs_vfpcc %struct.4* @qqq1(%struct.4*, float) nounwind
+
+declare arm_aapcs_vfpcc %struct.4* @sss1(%struct.4*, float) nounwind
+
+declare arm_aapcs_vfpcc %struct.4* @zzz2(%struct.4*, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll
new file mode 100644
index 0000000000000..6281775d0616f
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; pr4926
+
+define arm_apcscc void @test_vget_lanep16() nounwind {
+entry:
+  %arg0_poly16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
+  %out_poly16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+; CHECK: fldd
+  %0 = load <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
+  %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
+  store i16 %1, i16* %out_poly16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll b/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll
new file mode 100644
index 0000000000000..ea2693ac2e408
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8
+; PR5055
+
+module asm ".globl\09__aeabi_f2lz"
+module asm ".set\09__aeabi_f2lz, __fixsfdi"
+module asm ""
+
+define arm_aapcs_vfpcc i64 @__fixsfdi(float %a) nounwind {
+entry:
+  %0 = fcmp olt float %a, 0.000000e+00            ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %1 = fsub float -0.000000e+00, %a               ; <float> [#uses=1]
+  %2 = tail call arm_aapcs_vfpcc  i64 @__fixunssfdi(float %1) nounwind ; <i64> [#uses=1]
+  %3 = sub i64 0, %2                              ; <i64> [#uses=1]
+  ret i64 %3
+
+bb1:                                              ; preds = %entry
+  %4 = tail call arm_aapcs_vfpcc  i64 @__fixunssfdi(float %a) nounwind ; <i64> [#uses=1]
+  ret i64 %4
+}
+
+declare arm_aapcs_vfpcc i64 @__fixunssfdi(float)
diff --git a/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
new file mode 100644
index 0000000000000..53bd668259538
--- /dev/null
+++ b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -mcpu=arm10tdmi | FileCheck %s
+; PR4687
+
+%0 = type { double, double }
+
+define arm_aapcscc void @foo(%0* noalias nocapture sret %agg.result, double %x.0, double %y.0) nounwind {
+; CHECK: foo:
+; CHECK: bl __adddf3
+; CHECK-NOT: strd
+; CHECK: mov
+  %x76 = fmul double %y.0, 0.000000e+00           ; <double> [#uses=1]
+  %x77 = fadd double %y.0, 0.000000e+00           ; <double> [#uses=1]
+  %tmpr = fadd double %x.0, %x76                  ; <double> [#uses=1]
+  %agg.result.0 = getelementptr %0* %agg.result, i32 0, i32 0 ; <double*> [#uses=1]
+  store double %tmpr, double* %agg.result.0, align 8
+  %agg.result.1 = getelementptr %0* %agg.result, i32 0, i32 1 ; <double*> [#uses=1]
+  store double %x77, double* %agg.result.1, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/addrmode.ll b/test/CodeGen/ARM/addrmode.ll
index a3832c0ea3dba..9ccff07d456bb 100644
--- a/test/CodeGen/ARM/addrmode.ll
+++ b/test/CodeGen/ARM/addrmode.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -stats |& grep asm-printer | grep 4
+; RUN: llc < %s -march=arm -stats |& grep asm-printer | grep 4
 
 define i32 @t1(i32 %a) {
 	%b = mul i32 %a, 9
diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll
index 70b2c4d4195bf..b2c03147740ba 100644
--- a/test/CodeGen/ARM/aliases.ll
+++ b/test/CodeGen/ARM/aliases.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=arm-linux-gnueabi -o %t -f
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -o %t
 ; RUN: grep set %t   | count 5
 ; RUN: grep globl %t | count 4
 ; RUN: grep weak %t  | count 1
diff --git a/test/CodeGen/ARM/align.ll b/test/CodeGen/ARM/align.ll
index bb336ceebbabd..d73abe6a560c3 100644
--- a/test/CodeGen/ARM/align.ll
+++ b/test/CodeGen/ARM/align.ll
@@ -1,9 +1,9 @@
-; RUN: llvm-as < %s | llc -march=arm | grep align.*1 | count 1
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm | grep align.*1 | count 1
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | \
 ; RUN:   grep align.*2 | count 2
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | \
 ; RUN:   grep align.*3 | count 2
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -mtriple=arm-apple-darwin | \
 ; RUN:   grep align.*2 | count 4
 
 @a = global i1 true
diff --git a/test/CodeGen/ARM/alloca.ll b/test/CodeGen/ARM/alloca.ll
index f7e450f593242..15cf67734cb2d 100644
--- a/test/CodeGen/ARM/alloca.ll
+++ b/test/CodeGen/ARM/alloca.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnu | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | \
 ; RUN:   grep {mov r11, sp}
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnu | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | \
 ; RUN:   grep {mov sp, r11}
 
 define void @f(i32 %a) {
diff --git a/test/CodeGen/ARM/argaddr.ll b/test/CodeGen/ARM/argaddr.ll
index 080827d7f42e0..116a32f9c74d3 100644
--- a/test/CodeGen/ARM/argaddr.ll
+++ b/test/CodeGen/ARM/argaddr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @f(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
 entry:
diff --git a/test/CodeGen/ARM/arguments-nosplit-double.ll b/test/CodeGen/ARM/arguments-nosplit-double.ll
index 57ff95c0cb6d0..770e41df2c246 100644
--- a/test/CodeGen/ARM/arguments-nosplit-double.ll
+++ b/test/CodeGen/ARM/arguments-nosplit-double.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | not grep r3
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | not grep r3
 ; PR4059
 
 define i32 @f(i64 %z, i32 %a, double %b) {
diff --git a/test/CodeGen/ARM/arguments-nosplit-i64.ll b/test/CodeGen/ARM/arguments-nosplit-i64.ll
index 5464674dbca5f..815edfd845ad5 100644
--- a/test/CodeGen/ARM/arguments-nosplit-i64.ll
+++ b/test/CodeGen/ARM/arguments-nosplit-i64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | not grep r3
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | not grep r3
 ; PR4058
 
 define i32 @f(i64 %z, i32 %a, i64 %b) {
diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll
index 833e22dc269d1..ad5b2d69fab92 100644
--- a/test/CodeGen/ARM/arguments.ll
+++ b/test/CodeGen/ARM/arguments.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | \
 ; RUN:   grep {mov r0, r2} | count 1
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -mtriple=arm-apple-darwin | \
 ; RUN:   grep {mov r0, r1} | count 1
 
 define i32 @f(i32 %a, i64 %b) {
diff --git a/test/CodeGen/ARM/arguments2.ll b/test/CodeGen/ARM/arguments2.ll
index eb7e45b4f3664..a515ad75a6694 100644
--- a/test/CodeGen/ARM/arguments2.ll
+++ b/test/CodeGen/ARM/arguments2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define i32 @f(i32 %a, i128 %b) {
         %tmp = call i32 @g(i128 %b)
diff --git a/test/CodeGen/ARM/arguments3.ll b/test/CodeGen/ARM/arguments3.ll
index 97c040521d8b6..58f64c6c2f108 100644
--- a/test/CodeGen/ARM/arguments3.ll
+++ b/test/CodeGen/ARM/arguments3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define i64 @f(i32 %a, i128 %b) {
         %tmp = call i64 @g(i128 %b)
diff --git a/test/CodeGen/ARM/arguments4.ll b/test/CodeGen/ARM/arguments4.ll
index 63ba64b27f1f2..f5f4207b7b372 100644
--- a/test/CodeGen/ARM/arguments4.ll
+++ b/test/CodeGen/ARM/arguments4.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define float @f(i32 %a, i128 %b) {
         %tmp = call float @g(i128 %b)
diff --git a/test/CodeGen/ARM/arguments5.ll b/test/CodeGen/ARM/arguments5.ll
index 2000ff7b4a857..388a8ebee670d 100644
--- a/test/CodeGen/ARM/arguments5.ll
+++ b/test/CodeGen/ARM/arguments5.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define double @f(i32 %a, i128 %b) {
         %tmp = call double @g(i128 %b)
diff --git a/test/CodeGen/ARM/arguments6.ll b/test/CodeGen/ARM/arguments6.ll
index a18c621d14374..3f757fee45e46 100644
--- a/test/CodeGen/ARM/arguments6.ll
+++ b/test/CodeGen/ARM/arguments6.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define i128 @f(i32 %a, i128 %b) {
         %tmp = call i128 @g(i128 %b)
diff --git a/test/CodeGen/ARM/arguments7.ll b/test/CodeGen/ARM/arguments7.ll
index 489ffd41604d7..038e417b333ae 100644
--- a/test/CodeGen/ARM/arguments7.ll
+++ b/test/CodeGen/ARM/arguments7.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define double @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b) {
         %tmp = call double @g(i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b)
diff --git a/test/CodeGen/ARM/arguments8.ll b/test/CodeGen/ARM/arguments8.ll
index 5ff7e09548ea8..6999a4d4f6566 100644
--- a/test/CodeGen/ARM/arguments8.ll
+++ b/test/CodeGen/ARM/arguments8.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
 
 define i64 @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b) {
         %tmp = call i64 @g(i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b)
diff --git a/test/CodeGen/ARM/arguments_f64_backfill.ll b/test/CodeGen/ARM/arguments_f64_backfill.ll
index 07d928abe81f9..690f488d8483d 100644
--- a/test/CodeGen/ARM/arguments_f64_backfill.ll
+++ b/test/CodeGen/ARM/arguments_f64_backfill.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | grep {fcpys s0, s1}
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | grep {fcpys s0, s1}
 
 define float @f(float %z, double %a, float %b) {
         %tmp = call float @g(float %b)
diff --git a/test/CodeGen/ARM/arm-asm.ll b/test/CodeGen/ARM/arm-asm.ll
index b260b1312daff..2e35e3953f7e9 100644
--- a/test/CodeGen/ARM/arm-asm.ll
+++ b/test/CodeGen/ARM/arm-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @frame_dummy() {
 entry:
diff --git a/test/CodeGen/ARM/arm-frameaddr.ll b/test/CodeGen/ARM/arm-frameaddr.ll
index f1e4c2aeb7fbe..273986034c9b7 100644
--- a/test/CodeGen/ARM/arm-frameaddr.ll
+++ b/test/CodeGen/ARM/arm-frameaddr.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin  | grep mov | grep r7
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | grep mov | grep r11
+; RUN: llc < %s -mtriple=arm-apple-darwin  | grep mov | grep r7
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | grep r11
 ; PR4344
 ; PR4416
 
diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll
index 553c2fb646710..c4b4ec613ee55 100644
--- a/test/CodeGen/ARM/arm-negative-stride.ll
+++ b/test/CodeGen/ARM/arm-negative-stride.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {str r1, \\\[r.*, -r.*, lsl #2\}
+; RUN: llc < %s -march=arm | grep {str r1, \\\[r.*, -r.*, lsl #2\}
 
 define void @test(i32* %P, i32 %A, i32 %i) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/bfc.ll b/test/CodeGen/ARM/bfc.ll
new file mode 100644
index 0000000000000..53392de73fcf8
--- /dev/null
+++ b/test/CodeGen/ARM/bfc.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | grep "bfc " | count 3
+
+; 4278190095 = 0xff00000f
+define i32 @f1(i32 %a) {
+    %tmp = and i32 %a, 4278190095
+    ret i32 %tmp
+}
+
+; 4286578688 = 0xff800000
+define i32 @f2(i32 %a) {
+    %tmp = and i32 %a, 4286578688
+    ret i32 %tmp
+}
+
+; 4095 = 0x00000fff
+define i32 @f3(i32 %a) {
+    %tmp = and i32 %a, 4095
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/ARM/bic.ll b/test/CodeGen/ARM/bic.ll
index b4ea433c40cc0..b16dcc6755b1f 100644
--- a/test/CodeGen/ARM/bic.ll
+++ b/test/CodeGen/ARM/bic.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {bic\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 2
+; RUN: llc < %s -march=arm | grep {bic\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 2
 
 define i32 @f1(i32 %a, i32 %b) {
     %tmp = xor i32 %b, 4294967295
diff --git a/test/CodeGen/ARM/bits.ll b/test/CodeGen/ARM/bits.ll
index 0ac4f9a3833dc..9e94efe3f9dbb 100644
--- a/test/CodeGen/ARM/bits.ll
+++ b/test/CodeGen/ARM/bits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm > %t
+; RUN: llc < %s -march=arm > %t
 ; RUN: grep and      %t | count 1
 ; RUN: grep orr      %t | count 1
 ; RUN: grep eor      %t | count 1
diff --git a/test/CodeGen/ARM/bx_fold.ll b/test/CodeGen/ARM/bx_fold.ll
index 437b3189141dc..0e3e070a818fa 100644
--- a/test/CodeGen/ARM/bx_fold.ll
+++ b/test/CodeGen/ARM/bx_fold.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | not grep bx
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | not grep bx
 
 define void @test(i32 %Ptr, i8* %L) {
 entry:
diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll
index 6b196653e05ac..52246c3f0cd77 100644
--- a/test/CodeGen/ARM/call.ll
+++ b/test/CodeGen/ARM/call.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {mov lr, pc}
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5t | grep blx
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi\
+; RUN: llc < %s -march=arm | grep {mov lr, pc}
+; RUN: llc < %s -march=arm -mattr=+v5t | grep blx
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
 ; RUN:   -relocation-model=pic | grep {PLT}
 
 @t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
diff --git a/test/CodeGen/ARM/call_nolink.ll b/test/CodeGen/ARM/call_nolink.ll
index 1af6fad099b4e..efe29d857d235 100644
--- a/test/CodeGen/ARM/call_nolink.ll
+++ b/test/CodeGen/ARM/call_nolink.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:   not grep {bx lr}
 
 	%struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll
index 3bf2dc0b4f03a..294de5ff72780 100644
--- a/test/CodeGen/ARM/carry.ll
+++ b/test/CodeGen/ARM/carry.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm | grep "subs r" | count 2
-; RUN: llvm-as < %s | llc -march=arm | grep "adc r"
-; RUN: llvm-as < %s | llc -march=arm | grep "sbc r"  | count 2
+; RUN: llc < %s -march=arm | grep "subs r" | count 2
+; RUN: llc < %s -march=arm | grep "adc r"
+; RUN: llc < %s -march=arm | grep "sbc r"  | count 2
 
 define i64 @f1(i64 %a, i64 %b) {
 entry:
diff --git a/test/CodeGen/ARM/clz.ll b/test/CodeGen/ARM/clz.ll
index 389fb2ce1ee8f..d2235c9221cef 100644
--- a/test/CodeGen/ARM/clz.ll
+++ b/test/CodeGen/ARM/clz.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5t | grep clz
+; RUN: llc < %s -march=arm -mattr=+v5t | grep clz
 
 declare i32 @llvm.ctlz.i32(i32)
 
diff --git a/test/CodeGen/ARM/compare-call.ll b/test/CodeGen/ARM/compare-call.ll
index fcb8b179c803d..5f3ed1d2743c0 100644
--- a/test/CodeGen/ARM/compare-call.ll
+++ b/test/CodeGen/ARM/compare-call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | \
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
 ; RUN:   grep fcmpes
 
 define void @test3(float* %glob, i32 %X) {
diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll
index 095157b592bf7..e2d8ddc63fcf4 100644
--- a/test/CodeGen/ARM/constants.ll
+++ b/test/CodeGen/ARM/constants.ll
@@ -1,13 +1,13 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {mov r0, #0} | count 1
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {mov r0, #255$} | count 1
-; RUN: llvm-as < %s | llc -march=arm -asm-verbose | \
+; RUN: llc < %s -march=arm -asm-verbose | \
 ; RUN:   grep {mov r0.*256} | count 1
-; RUN: llvm-as < %s | llc -march=arm -asm-verbose | grep {orr.*256} | count 1
-; RUN: llvm-as < %s | llc -march=arm -asm-verbose | grep {mov r0, .*-1073741761} | count 1
-; RUN: llvm-as < %s | llc -march=arm -asm-verbose | grep {mov r0, .*1008} | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep {cmp r0, #1, 16} | count 1
+; RUN: llc < %s -march=arm -asm-verbose | grep {orr.*256} | count 1
+; RUN: llc < %s -march=arm -asm-verbose | grep {mov r0, .*-1073741761} | count 1
+; RUN: llc < %s -march=arm -asm-verbose | grep {mov r0, .*1008} | count 1
+; RUN: llc < %s -march=arm | grep {cmp r0, #1, 16} | count 1
 
 define i32 @f1() {
         ret i32 0
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 4f4091af4837c..0dcf9ddc0bb17 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {bl.\*__ltdf} | count 1
+; RUN: llc < %s -march=arm | grep {bl.\*__ltdf} | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
diff --git a/test/CodeGen/ARM/ctors_dtors.ll b/test/CodeGen/ARM/ctors_dtors.ll
index 5caa5b1266dac..fb94626ab7dd0 100644
--- a/test/CodeGen/ARM/ctors_dtors.ll
+++ b/test/CodeGen/ARM/ctors_dtors.ll
@@ -1,15 +1,15 @@
-; RUN: llvm-as <  %s | llc -mtriple=arm-apple-darwin | \
-; RUN:   grep {\\.mod_init_func}
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \
-; RUN:   grep {\\.mod_term_func} 
-; RUN: llvm-as  < %s | llc -mtriple=arm-linux-gnu | \
-; RUN:   grep {\\.section \\.ctors,"aw",.progbits}
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | \
-; RUN:   grep {\\.section \\.dtors,"aw",.progbits}
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \
-; RUN:   grep {\\.section \\.init_array,"aw",.init_array}
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \
-; RUN:   grep {\\.section \\.fini_array,"aw",.fini_array}
+; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=arm-linux-gnu     | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=GNUEABI
+
+; DARWIN: .section	__DATA,__mod_init_func,mod_init_funcs
+; DARWIN: .section	__DATA,__mod_term_func,mod_term_funcs
+
+; ELF: .section .ctors,"aw",%progbits
+; ELF: .section .dtors,"aw",%progbits
+
+; GNUEABI: .section .init_array,"aw",%init_array
+; GNUEABI: .section .fini_array,"aw",%fini_array
 
 @llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @__mf_init } ]                ; <[1 x { i32, void ()* }]*> [#uses=0]
 @llvm.global_dtors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @__mf_fini } ]                ; <[1 x { i32, void ()* }]*> [#uses=0]
diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll
index 1085ec7fa624e..2f724e79f104e 100644
--- a/test/CodeGen/ARM/div.ll
+++ b/test/CodeGen/ARM/div.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm > %t
+; RUN: llc < %s -march=arm > %t
 ; RUN: grep __divsi3  %t
 ; RUN: grep __udivsi3 %t
 ; RUN: grep __modsi3  %t
diff --git a/test/CodeGen/ARM/dyn-stackalloc.ll b/test/CodeGen/ARM/dyn-stackalloc.ll
index e0cd4e15f4e3e..92e2d136af68d 100644
--- a/test/CodeGen/ARM/dyn-stackalloc.ll
+++ b/test/CodeGen/ARM/dyn-stackalloc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
 	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
diff --git a/test/CodeGen/ARM/extloadi1.ll b/test/CodeGen/ARM/extloadi1.ll
index 2e9041c6ecab1..dc45ce705f444 100644
--- a/test/CodeGen/ARM/extloadi1.ll
+++ b/test/CodeGen/ARM/extloadi1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 @handler_installed.6144.b = external global i1          ; <i1*> [#uses=1]
 
 define void @__mf_sigusr1_respond() {
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
new file mode 100644
index 0000000000000..5690a01d750b4
--- /dev/null
+++ b/test/CodeGen/ARM/fabss.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+        %dum = fadd float %a, %b
+	%0 = tail call float @fabsf(float %dum)
+        %dum1 = fadd float %0, %b
+	ret float %dum1
+}
+
+declare float @fabsf(float)
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
new file mode 100644
index 0000000000000..a01f868d18b24
--- /dev/null
+++ b/test/CodeGen/ARM/fadds.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fadd float %a, %b
+	ret float %0
+}
+
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index 66acda9c9b913..bf7c305c89599 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep bic | count 2
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | \
+; RUN: llc < %s -march=arm | grep bic | count 2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
 ; RUN:   grep fneg | count 2
 
 define float @test1(float %x, double %y) {
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
new file mode 100644
index 0000000000000..2af250d121d14
--- /dev/null
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fdiv float %a, %b
+	ret float %0
+}
+
diff --git a/test/CodeGen/ARM/fixunsdfdi.ll b/test/CodeGen/ARM/fixunsdfdi.ll
index 777a3d69a1913..ebf1d84536e3b 100644
--- a/test/CodeGen/ARM/fixunsdfdi.ll
+++ b/test/CodeGen/ARM/fixunsdfdi.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
-; RUN: llvm-as < %s | llc -march=arm -mattr=vfp2 | not grep fstd
+; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fstd
 
 define hidden i64 @__fixunsdfdi(double %x) nounwind readnone {
 entry:
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll
new file mode 100644
index 0000000000000..1a1cd0747b498
--- /dev/null
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %acc, float %a, float %b) {
+entry:
+	%0 = fmul float %a, %b
+        %1 = fadd float %acc, %0
+	ret float %1
+}
+
diff --git a/test/CodeGen/ARM/fmdrr-fmrrd.ll b/test/CodeGen/ARM/fmdrr-fmrrd.ll
index 315e6238732fc..eb72faf8d811b 100644
--- a/test/CodeGen/ARM/fmdrr-fmrrd.ll
+++ b/test/CodeGen/ARM/fmdrr-fmrrd.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=vfp2 | not grep fmdrr
-; RUN: llvm-as < %s | llc -march=arm -mattr=vfp2 | not grep fmrrd
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmdrr
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmrrd
 
 ; naive codegen for this is:
 ; _i:
diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll
new file mode 100644
index 0000000000000..c6e6d40604028
--- /dev/null
+++ b/test/CodeGen/ARM/fmscs.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %acc, float %a, float %b) {
+entry:
+	%0 = fmul float %a, %b
+        %1 = fsub float %0, %acc
+	ret float %1
+}
+
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
new file mode 100644
index 0000000000000..cb5dadeb21044
--- /dev/null
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fmul float %a, %b
+	ret float %0
+}
+
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
new file mode 100644
index 0000000000000..7da443dd93f50
--- /dev/null
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2
+
+define float @test1(float* %a) {
+entry:
+	%0 = load float* %a, align 4		; <float> [#uses=2]
+	%1 = fsub float -0.000000e+00, %0		; <float> [#uses=2]
+	%2 = fpext float %1 to double		; <double> [#uses=1]
+	%3 = fcmp olt double %2, 1.234000e+00		; <i1> [#uses=1]
+	%retval = select i1 %3, float %1, float %0		; <float> [#uses=1]
+	ret float %retval
+}
+
+define float @test2(float* %a) {
+entry:
+	%0 = load float* %a, align 4		; <float> [#uses=2]
+	%1 = fmul float -1.000000e+00, %0		; <float> [#uses=2]
+	%2 = fpext float %1 to double		; <double> [#uses=1]
+	%3 = fcmp olt double %2, 1.234000e+00		; <i1> [#uses=1]
+	%retval = select i1 %3, float %1, float %0		; <float> [#uses=1]
+	ret float %retval
+}
diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll
new file mode 100644
index 0000000000000..e57bbbba3b384
--- /dev/null
+++ b/test/CodeGen/ARM/fnmacs.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmls.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmls.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %acc, float %a, float %b) {
+entry:
+	%0 = fmul float %a, %b
+        %1 = fsub float %acc, %0
+	ret float %1
+}
+
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
new file mode 100644
index 0000000000000..3ae437d69db18
--- /dev/null
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+
+define float @test1(float %acc, float %a, float %b) nounwind {
+; CHECK: fnmscs s2, s1, s0 
+entry:
+	%0 = fmul float %a, %b
+	%1 = fsub float -0.0, %0
+        %2 = fsub float %1, %acc
+	ret float %2
+}
+
+define float @test2(float %acc, float %a, float %b) nounwind {
+; CHECK: fnmscs s2, s1, s0 
+entry:
+	%0 = fmul float %a, %b
+	%1 = fmul float -1.0, %0
+        %2 = fsub float %1, %acc
+	ret float %2
+}
+
diff --git a/test/CodeGen/ARM/fnmul.ll b/test/CodeGen/ARM/fnmul.ll
index 7bbda2d76d5d2..613b347cdbf20 100644
--- a/test/CodeGen/ARM/fnmul.ll
+++ b/test/CodeGen/ARM/fnmul.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | grep fnmuld
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep fmul
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep fnmuld
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep fmul
 
 
 define double @t1(double %a, double %b) {
diff --git a/test/CodeGen/ARM/fnmuls.ll b/test/CodeGen/ARM/fnmuls.ll
new file mode 100644
index 0000000000000..efd87d2dcb896
--- /dev/null
+++ b/test/CodeGen/ARM/fnmuls.ll
@@ -0,0 +1,23 @@
+; XFAIL: *
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+
+define float @test1(float %a, float %b) nounwind {
+; CHECK: fnmscs s2, s1, s0 
+entry:
+	%0 = fmul float %a, %b
+        %1 = fsub float -0.0, %0
+	ret float %1
+}
+
+define float @test2(float %a, float %b) nounwind {
+; CHECK: fnmscs s2, s1, s0 
+entry:
+	%0 = fmul float %a, %b
+        %1 = fmul float -1.0, %0
+	ret float %1
+}
+
diff --git a/test/CodeGen/ARM/formal.ll b/test/CodeGen/ARM/formal.ll
index 6d6d108f32835..4ac10badea976 100644
--- a/test/CodeGen/ARM/formal.ll
+++ b/test/CodeGen/ARM/formal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 declare void @bar(i64 %x, i64 %y)
 
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index ba199dbf56080..4e4ef722f97e6 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -1,55 +1,71 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 > %t
-; RUN: grep fmsr %t | count 4
-; RUN: grep fsitos %t
-; RUN: grep fmrs %t | count 2
-; RUN: grep fsitod %t
-; RUN: grep fmrrd %t | count 3
-; RUN: not grep fmdrr %t 
-; RUN: grep fldd %t
-; RUN: grep fuitod %t
-; RUN: grep fuitos %t
-; RUN: grep 1065353216 %t
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 
 define float @f(i32 %a) {
+;CHECK: f:
+;CHECK: fmsr
+;CHECK-NEXT: fsitos
+;CHECK-NEXT: fmrs
 entry:
         %tmp = sitofp i32 %a to float           ; <float> [#uses=1]
         ret float %tmp
 }
 
 define double @g(i32 %a) {
+;CHECK: g:
+;CHECK: fmsr
+;CHECK-NEXT: fsitod
+;CHECK-NEXT: fmrrd
 entry:
         %tmp = sitofp i32 %a to double          ; <double> [#uses=1]
         ret double %tmp
 }
 
 define double @uint_to_double(i32 %a) {
+;CHECK: uint_to_double:
+;CHECK: fmsr
+;CHECK-NEXT: fuitod
+;CHECK-NEXT: fmrrd
 entry:
         %tmp = uitofp i32 %a to double          ; <double> [#uses=1]
         ret double %tmp
 }
 
 define float @uint_to_float(i32 %a) {
+;CHECK: uint_to_float:
+;CHECK: fmsr
+;CHECK-NEXT: fuitos
+;CHECK-NEXT: fmrs
 entry:
         %tmp = uitofp i32 %a to float           ; <float> [#uses=1]
         ret float %tmp
 }
 
 define double @h(double* %v) {
+;CHECK: h:
+;CHECK: fldd
+;CHECK-NEXT: fmrrd
 entry:
         %tmp = load double* %v          ; <double> [#uses=1]
         ret double %tmp
 }
 
 define float @h2() {
+;CHECK: h2:
+;CHECK: 1065353216
 entry:
         ret float 1.000000e+00
 }
 
 define double @f2(double %a) {
+;CHECK: f2:
+;CHECK-NOT: fmdrr
         ret double %a
 }
 
 define void @f3() {
+;CHECK: f3:
+;CHECK-NOT: fmdrr
+;CHECK: f4
 entry:
         %tmp = call double @f5( )               ; <double> [#uses=1]
         call void @f4( double %tmp )
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
new file mode 100644
index 0000000000000..9ce2ac549b571
--- /dev/null
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2
+
+define i32 @test1(float %a, float %b) {
+; VFP2: test1:
+; VFP2: ftosizs s0, s0
+; NEON: test1:
+; NEON: vcvt.s32.f32 d0, d0
+entry:
+        %0 = fadd float %a, %b
+        %1 = fptosi float %0 to i32
+	ret i32 %1
+}
+
+define i32 @test2(float %a, float %b) {
+; VFP2: test2:
+; VFP2: ftouizs s0, s0
+; NEON: test2:
+; NEON: vcvt.u32.f32 d0, d0
+entry:
+        %0 = fadd float %a, %b
+        %1 = fptoui float %0 to i32
+	ret i32 %1
+}
+
+define float @test3(i32 %a, i32 %b) {
+; VFP2: test3:
+; VFP2: fuitos s0, s0
+; NEON: test3:
+; NEON: vcvt.f32.u32 d0, d0
+entry:
+        %0 = add i32 %a, %b
+        %1 = uitofp i32 %0 to float
+	ret float %1
+}
+
+define float @test4(i32 %a, i32 %b) {
+; VFP2: test4:
+; VFP2: fsitos s0, s0
+; NEON: test4:
+; NEON: vcvt.f32.s32 d0, d0
+entry:
+        %0 = add i32 %a, %b
+        %1 = sitofp i32 %0 to float
+	ret float %1
+}
diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll
index 568a6c41a0dde..ebeeb184121b9 100644
--- a/test/CodeGen/ARM/fparith.ll
+++ b/test/CodeGen/ARM/fparith.ll
@@ -1,74 +1,88 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 > %t
-; RUN: grep fadds %t
-; RUN: grep faddd %t
-; RUN: grep fmuls %t
-; RUN: grep fmuld %t
-; RUN: grep eor %t
-; RUN: grep fnegd %t
-; RUN: grep fdivs %t
-; RUN: grep fdivd %t
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 
 define float @f1(float %a, float %b) {
+;CHECK: f1:
+;CHECK: fadds
 entry:
 	%tmp = fadd float %a, %b		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f2(double %a, double %b) {
+;CHECK: f2:
+;CHECK: faddd
 entry:
 	%tmp = fadd double %a, %b		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f3(float %a, float %b) {
+;CHECK: f3:
+;CHECK: fmuls
 entry:
 	%tmp = fmul float %a, %b		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f4(double %a, double %b) {
+;CHECK: f4:
+;CHECK: fmuld
 entry:
 	%tmp = fmul double %a, %b		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f5(float %a, float %b) {
+;CHECK: f5:
+;CHECK: fsubs
 entry:
 	%tmp = fsub float %a, %b		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f6(double %a, double %b) {
+;CHECK: f6:
+;CHECK: fsubd
 entry:
 	%tmp = fsub double %a, %b		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f7(float %a) {
+;CHECK: f7:
+;CHECK: eor
 entry:
 	%tmp1 = fsub float -0.000000e+00, %a		; <float> [#uses=1]
 	ret float %tmp1
 }
 
 define double @f8(double %a) {
+;CHECK: f8:
+;CHECK: fnegd
 entry:
 	%tmp1 = fsub double -0.000000e+00, %a		; <double> [#uses=1]
 	ret double %tmp1
 }
 
 define float @f9(float %a, float %b) {
+;CHECK: f9:
+;CHECK: fdivs
 entry:
 	%tmp1 = fdiv float %a, %b		; <float> [#uses=1]
 	ret float %tmp1
 }
 
 define double @f10(double %a, double %b) {
+;CHECK: f10:
+;CHECK: fdivd
 entry:
 	%tmp1 = fdiv double %a, %b		; <double> [#uses=1]
 	ret double %tmp1
 }
 
 define float @f11(float %a) {
+;CHECK: f11:
+;CHECK: bic
 entry:
 	%tmp1 = call float @fabsf( float %a )		; <float> [#uses=1]
 	ret float %tmp1
@@ -77,6 +91,8 @@ entry:
 declare float @fabsf(float)
 
 define double @f12(double %a) {
+;CHECK: f12:
+;CHECK: fabsd
 entry:
 	%tmp1 = call double @fabs( double %a )		; <double> [#uses=1]
 	ret double %tmp1
diff --git a/test/CodeGen/ARM/fpcmp.ll b/test/CodeGen/ARM/fpcmp.ll
index ce0f4029589d5..2c9591ca5429c 100644
--- a/test/CodeGen/ARM/fpcmp.ll
+++ b/test/CodeGen/ARM/fpcmp.ll
@@ -1,13 +1,9 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 > %t
-; RUN: grep movmi %t
-; RUN: grep moveq %t
-; RUN: grep movgt %t
-; RUN: grep movge %t
-; RUN: grep movne %t
-; RUN: grep fcmped %t | count 1
-; RUN: grep fcmpes %t | count 6
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 
 define i32 @f1(float %a) {
+;CHECK: f1:
+;CHECK: fcmpes
+;CHECK: movmi
 entry:
         %tmp = fcmp olt float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp1 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -15,6 +11,9 @@ entry:
 }
 
 define i32 @f2(float %a) {
+;CHECK: f2:
+;CHECK: fcmpes
+;CHECK: moveq
 entry:
         %tmp = fcmp oeq float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp2 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -22,6 +21,9 @@ entry:
 }
 
 define i32 @f3(float %a) {
+;CHECK: f3:
+;CHECK: fcmpes
+;CHECK: movgt
 entry:
         %tmp = fcmp ogt float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp3 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -29,6 +31,9 @@ entry:
 }
 
 define i32 @f4(float %a) {
+;CHECK: f4:
+;CHECK: fcmpes
+;CHECK: movge
 entry:
         %tmp = fcmp oge float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp4 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -36,6 +41,9 @@ entry:
 }
 
 define i32 @f5(float %a) {
+;CHECK: f5:
+;CHECK: fcmpes
+;CHECK: movls
 entry:
         %tmp = fcmp ole float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp5 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -43,6 +51,9 @@ entry:
 }
 
 define i32 @f6(float %a) {
+;CHECK: f6:
+;CHECK: fcmpes
+;CHECK: movne
 entry:
         %tmp = fcmp une float %a, 1.000000e+00          ; <i1> [#uses=1]
         %tmp6 = zext i1 %tmp to i32              ; <i32> [#uses=1]
@@ -50,6 +61,9 @@ entry:
 }
 
 define i32 @g1(double %a) {
+;CHECK: g1:
+;CHECK: fcmped
+;CHECK: movmi
 entry:
         %tmp = fcmp olt double %a, 1.000000e+00         ; <i1> [#uses=1]
         %tmp7 = zext i1 %tmp to i32              ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/fpcmp_ueq.ll b/test/CodeGen/ARM/fpcmp_ueq.ll
index 3e749afb400cb..67f70e9eb5ed2 100644
--- a/test/CodeGen/ARM/fpcmp_ueq.ll
+++ b/test/CodeGen/ARM/fpcmp_ueq.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep moveq 
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep movvs
+; RUN: llc < %s -march=arm | grep moveq 
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep movvs
 
 define i32 @f7(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fpconv.ll b/test/CodeGen/ARM/fpconv.ll
index 218b25f9c1b1b..ee3c338e3b301 100644
--- a/test/CodeGen/ARM/fpconv.ll
+++ b/test/CodeGen/ARM/fpconv.ll
@@ -1,81 +1,101 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 > %t
-; RUN: grep fcvtsd %t
-; RUN: grep fcvtds %t
-; RUN: grep ftosizs %t
-; RUN: grep ftouizs %t
-; RUN: grep ftosizd %t
-; RUN: grep ftouizd %t
-; RUN: grep fsitos %t
-; RUN: grep fsitod %t
-; RUN: grep fuitos %t
-; RUN: grep fuitod %t
-; RUN: llvm-as < %s | llc -march=arm > %t
-; RUN: grep truncdfsf2 %t
-; RUN: grep extendsfdf2 %t
-; RUN: grep fixsfsi %t
-; RUN: grep fixunssfsi %t
-; RUN: grep fixdfsi %t
-; RUN: grep fixunsdfsi %t
-; RUN: grep floatsisf %t
-; RUN: grep floatsidf %t
-; RUN: grep floatunsisf %t
-; RUN: grep floatunsidf %t
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
+; RUN: llc < %s -march=arm | FileCheck %s
 
 define float @f1(double %x) {
+;CHECK-VFP: f1:
+;CHECK-VFP: fcvtsd
+;CHECK: f1:
+;CHECK: truncdfsf2
 entry:
 	%tmp1 = fptrunc double %x to float		; <float> [#uses=1]
 	ret float %tmp1
 }
 
 define double @f2(float %x) {
+;CHECK-VFP: f2:
+;CHECK-VFP: fcvtds
+;CHECK: f2:
+;CHECK: extendsfdf2
 entry:
 	%tmp1 = fpext float %x to double		; <double> [#uses=1]
 	ret double %tmp1
 }
 
 define i32 @f3(float %x) {
+;CHECK-VFP: f3:
+;CHECK-VFP: ftosizs
+;CHECK: f3:
+;CHECK: fixsfsi
 entry:
 	%tmp = fptosi float %x to i32		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
 define i32 @f4(float %x) {
+;CHECK-VFP: f4:
+;CHECK-VFP: ftouizs
+;CHECK: f4:
+;CHECK: fixunssfsi
 entry:
 	%tmp = fptoui float %x to i32		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
 define i32 @f5(double %x) {
+;CHECK-VFP: f5:
+;CHECK-VFP: ftosizd
+;CHECK: f5:
+;CHECK: fixdfsi
 entry:
 	%tmp = fptosi double %x to i32		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
 define i32 @f6(double %x) {
+;CHECK-VFP: f6:
+;CHECK-VFP: ftouizd
+;CHECK: f6:
+;CHECK: fixunsdfsi
 entry:
 	%tmp = fptoui double %x to i32		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
 define float @f7(i32 %a) {
+;CHECK-VFP: f7:
+;CHECK-VFP: fsitos
+;CHECK: f7:
+;CHECK: floatsisf
 entry:
 	%tmp = sitofp i32 %a to float		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f8(i32 %a) {
+;CHECK-VFP: f8:
+;CHECK-VFP: fsitod
+;CHECK: f8:
+;CHECK: floatsidf
 entry:
 	%tmp = sitofp i32 %a to double		; <double> [#uses=1]
 	ret double %tmp
 }
 
 define float @f9(i32 %a) {
+;CHECK-VFP: f9:
+;CHECK-VFP: fuitos
+;CHECK: f9:
+;CHECK: floatunsisf
 entry:
 	%tmp = uitofp i32 %a to float		; <float> [#uses=1]
 	ret float %tmp
 }
 
 define double @f10(i32 %a) {
+;CHECK-VFP: f10:
+;CHECK-VFP: fuitod
+;CHECK: f10:
+;CHECK: floatunsidf
 entry:
 	%tmp = uitofp i32 %a to double		; <double> [#uses=1]
 	ret double %tmp
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
index 13653bbe6aa02..fa897bf83f3a4 100644
--- a/test/CodeGen/ARM/fpmem.ll
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {mov r0, #0} | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
+; RUN: llc < %s -march=arm -mattr=+vfp2 | \
 ; RUN:   grep {flds.*\\\[} | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
+; RUN: llc < %s -march=arm -mattr=+vfp2 | \
 ; RUN:   grep {fsts.*\\\[} | count 1
 
 define float @f1(float %a) {
diff --git a/test/CodeGen/ARM/fpow.ll b/test/CodeGen/ARM/fpow.ll
index 461a2c966ec49..6d487927ee616 100644
--- a/test/CodeGen/ARM/fpow.ll
+++ b/test/CodeGen/ARM/fpow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define double @t(double %x, double %y) nounwind optsize {
 entry:
diff --git a/test/CodeGen/ARM/fpowi.ll b/test/CodeGen/ARM/fpowi.ll
index ab09ffff6b36a..174106bf4fafb 100644
--- a/test/CodeGen/ARM/fpowi.ll
+++ b/test/CodeGen/ARM/fpowi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | grep powidf2
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep powidf2
 ; PR1287
 
 ; ModuleID = '<stdin>'
diff --git a/test/CodeGen/ARM/fptoint.ll b/test/CodeGen/ARM/fptoint.ll
index 41168acc42a55..0d270b0c0568b 100644
--- a/test/CodeGen/ARM/fptoint.ll
+++ b/test/CodeGen/ARM/fptoint.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | grep fmrs | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | not grep fmrrd
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep fmrs | count 1
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | not grep fmrrd
 
 @i = weak global i32 0		; <i32*> [#uses=2]
 @u = weak global i32 0		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll
new file mode 100644
index 0000000000000..060dd464f1b8d
--- /dev/null
+++ b/test/CodeGen/ARM/fsubs.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vsub.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fsub float %a, %b
+	ret float %0
+}
+
diff --git a/test/CodeGen/ARM/hardfloat_neon.ll b/test/CodeGen/ARM/hardfloat_neon.ll
new file mode 100644
index 0000000000000..4abf04b0a4b6c
--- /dev/null
+++ b/test/CodeGen/ARM/hardfloat_neon.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+neon -float-abi=hard
+
+define <16 x i8> @vmulQi8_reg(<16 x i8> %A, <16 x i8> %B) nounwind {
+        %tmp1 = mul <16 x i8> %A, %B
+        ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @f(<16 x i8> %a, <16 x i8> %b) {
+        %tmp = call <16 x i8> @g(<16 x i8> %b)
+        ret <16 x i8> %tmp
+}
+
+declare <16 x i8> @g(<16 x i8>)
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index 16231da39b7cf..ccdc7bf4c1408 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | grep mov | count 1
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu --disable-fp-elim | \
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | count 1
+; RUN: llc < %s -mtriple=arm-linux-gnu --disable-fp-elim | \
 ; RUN:   grep mov | count 3
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep mov | count 2
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep mov | count 2
 
 @str = internal constant [12 x i8] c"Hello World\00"
 
diff --git a/test/CodeGen/ARM/hidden-vis-2.ll b/test/CodeGen/ARM/hidden-vis-2.ll
index 6cf69aa486d5c..90f5308d5ff01 100644
--- a/test/CodeGen/ARM/hidden-vis-2.ll
+++ b/test/CodeGen/ARM/hidden-vis-2.ll
@@ -1,9 +1,12 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldr | count 2
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
 
 @x = weak hidden global i32 0		; <i32*> [#uses=1]
 
 define i32 @t() nounwind readonly {
 entry:
+; CHECK: t:
+; CHECK: ldr
+; CHECK-NEXT: ldr
 	%0 = load i32* @x, align 4		; <i32> [#uses=1]
 	ret i32 %0
 }
diff --git a/test/CodeGen/ARM/hidden-vis-3.ll b/test/CodeGen/ARM/hidden-vis-3.ll
index 4477f2a441a15..3bd710ae949fa 100644
--- a/test/CodeGen/ARM/hidden-vis-3.ll
+++ b/test/CodeGen/ARM/hidden-vis-3.ll
@@ -1,12 +1,15 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldr | count 6
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep non_lazy_ptr
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep long | count 4
+; RUN: llc < %s -mtriple=arm-apple-darwin9   | FileCheck %s
 
 @x = external hidden global i32		; <i32*> [#uses=1]
 @y = extern_weak hidden global i32	; <i32*> [#uses=1]
 
 define i32 @t() nounwind readonly {
 entry:
+; CHECK: LCPI1_0:
+; CHECK-NEXT: .long _x
+; CHECK: LCPI1_1:
+; CHECK-NEXT: .long _y
+
 	%0 = load i32* @x, align 4		; <i32> [#uses=1]
 	%1 = load i32* @y, align 4		; <i32> [#uses=1]
 	%2 = add i32 %1, %0		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/hidden-vis.ll b/test/CodeGen/ARM/hidden-vis.ll
index 93f81ecdae053..3544ae81a0a45 100644
--- a/test/CodeGen/ARM/hidden-vis.ll
+++ b/test/CodeGen/ARM/hidden-vis.ll
@@ -1,18 +1,23 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \
-; RUN:   grep .private_extern | count 2
+; RUN: llc < %s -mtriple=arm-linux | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN
 
-%struct.Person = type { i32 }
 @a = hidden global i32 0
 @b = external global i32
 
+define weak hidden void @t1() nounwind {
+; LINUX: .hidden t1
+; LINUX: t1:
 
-define weak hidden void @_ZN6Person13privateMethodEv(%struct.Person* %this) {
+; DARWIN: .private_extern _t1
+; DARWIN: t1:
   ret void
 }
 
-declare void @function(i32)
+define weak void @t2() nounwind {
+; LINUX: t2:
+; LINUX: .hidden a
 
-define weak void @_ZN6PersonC1Ei(%struct.Person* %this, i32 %_c) {
+; DARWIN: t2:
+; DARWIN: .private_extern _a
   ret void
 }
-
diff --git a/test/CodeGen/ARM/iabs.ll b/test/CodeGen/ARM/iabs.ll
index ede6d74553309..1054f27dbe302 100644
--- a/test/CodeGen/ARM/iabs.ll
+++ b/test/CodeGen/ARM/iabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -stats |& \
+; RUN: llc < %s -march=arm -stats |& \
 ; RUN:   grep {3 .*Number of machine instrs printed}
 
 ;; Integer absolute value, should produce something as good as: ARM:
diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll
index 7d429550b3add..e6aa044564a2b 100644
--- a/test/CodeGen/ARM/ifcvt1.ll
+++ b/test/CodeGen/ARM/ifcvt1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep bx | count 1
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep bx | count 1
 
 define i32 @t1(i32 %a, i32 %b) {
 	%tmp2 = icmp eq i32 %a, 0
diff --git a/test/CodeGen/ARM/ifcvt2.ll b/test/CodeGen/ARM/ifcvt2.ll
index 3942061212182..ce57d736c1677 100644
--- a/test/CodeGen/ARM/ifcvt2.ll
+++ b/test/CodeGen/ARM/ifcvt2.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep bxlt | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep bxgt | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep bxge | count 1
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep bxlt | count 1
+; RUN: llc < %s -march=arm | grep bxgt | count 1
+; RUN: llc < %s -march=arm | grep bxge | count 1
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
 	%tmp2 = icmp sgt i32 %c, 10
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
index 620bcbea1f27e..f7ebac6f2bac9 100644
--- a/test/CodeGen/ARM/ifcvt3.ll
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep cmpne | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep bx | count 2
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep cmpne | count 1
+; RUN: llc < %s -march=arm | grep bx | count 2
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
 	switch i32 %c, label %cond_next [
diff --git a/test/CodeGen/ARM/ifcvt4.ll b/test/CodeGen/ARM/ifcvt4.ll
index ce5a679196c59..f28c61b9787ff 100644
--- a/test/CodeGen/ARM/ifcvt4.ll
+++ b/test/CodeGen/ARM/ifcvt4.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep subgt | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep suble | count 1
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep subgt | count 1
+; RUN: llc < %s -march=arm | grep suble | count 1
 ; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
 
 define i32 @t(i32 %a, i32 %b) {
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index f8d4f82bbe28b..e9145ac36ddfb 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep blge | count 1
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
 
 @x = external global i32*		; <i32**> [#uses=1]
 
@@ -11,6 +10,8 @@ entry:
 }
 
 define void @t1(i32 %a, i32 %b) {
+; CHECK: t1:
+; CHECK: ldmltfd sp!, {r7, pc}
 entry:
 	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 63c4a0819dbf3..58241157580cc 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,10 +1,6 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep cmpne | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep ldmhi | count 1
 
 define void @foo(i32 %X, i32 %Y) {
diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll
index 6bb4b5609a580..f9cf88f7292e5 100644
--- a/test/CodeGen/ARM/ifcvt7.ll
+++ b/test/CodeGen/ARM/ifcvt7.ll
@@ -1,13 +1,8 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep cmpeq | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep moveq | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep ldmeq | count 1
 ; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
 
diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll
index 85bd8c7bf1fc8..6cb8e7bb69fd6 100644
--- a/test/CodeGen/ARM/ifcvt8.ll
+++ b/test/CodeGen/ARM/ifcvt8.ll
@@ -1,7 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep ldmne | count 1
 
 	%struct.SString = type { i8*, i32, i32 }
diff --git a/test/CodeGen/ARM/ifcvt9.ll b/test/CodeGen/ARM/ifcvt9.ll
index bbd2f2ed62139..05bdc459c83fe 100644
--- a/test/CodeGen/ARM/ifcvt9.ll
+++ b/test/CodeGen/ARM/ifcvt9.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define fastcc void @t() nounwind {
 entry:
diff --git a/test/CodeGen/ARM/illegal-vector-bitcast.ll b/test/CodeGen/ARM/illegal-vector-bitcast.ll
index ad24eb5dad716..febe6f56b66cd 100644
--- a/test/CodeGen/ARM/illegal-vector-bitcast.ll
+++ b/test/CodeGen/ARM/illegal-vector-bitcast.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -mtriple=arm-linux
 
 define void @foo(<8 x float>* %f, <8 x float>* %g, <4 x i64>* %y)
 {
diff --git a/test/CodeGen/ARM/imm.ll b/test/CodeGen/ARM/imm.ll
index 998adbae5c94f..6f25f9dcb323d 100644
--- a/test/CodeGen/ARM/imm.ll
+++ b/test/CodeGen/ARM/imm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | not grep CPI
+; RUN: llc < %s -march=arm | not grep CPI
 
 define i32 @test1(i32 %A) {
         %B = add i32 %A, -268435441             ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/inlineasm-imm-arm.ll b/test/CodeGen/ARM/inlineasm-imm-arm.ll
index 2ceceae0d9d17..45dfcf0b82a5a 100644
--- a/test/CodeGen/ARM/inlineasm-imm-arm.ll
+++ b/test/CodeGen/ARM/inlineasm-imm-arm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 ; Test ARM-mode "I" constraint, for any Data Processing immediate.
 define i32 @testI(i32 %x) {
diff --git a/test/CodeGen/ARM/inlineasm.ll b/test/CodeGen/ARM/inlineasm.ll
index 2f7332a5f4805..d522348ba9993 100644
--- a/test/CodeGen/ARM/inlineasm.ll
+++ b/test/CodeGen/ARM/inlineasm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6
+; RUN: llc < %s -march=arm -mattr=+v6
 
 define i32 @test1(i32 %tmp54) {
 	%tmp56 = tail call i32 asm "uxtb16 $0,$1", "=r,r"( i32 %tmp54 )		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/inlineasm2.ll b/test/CodeGen/ARM/inlineasm2.ll
index 69394eb5bd494..a99bccf5a6541 100644
--- a/test/CodeGen/ARM/inlineasm2.ll
+++ b/test/CodeGen/ARM/inlineasm2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define double @__ieee754_sqrt(double %x) {
 	%tmp2 = tail call double asm "fsqrtd ${0:P}, ${1:P}", "=w,w"( double %x )
diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll
index f20344301e997..59f0d538d47cc 100644
--- a/test/CodeGen/ARM/insn-sched1.ll
+++ b/test/CodeGen/ARM/insn-sched1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6 |\
+; RUN: llc < %s -march=arm -mattr=+v6
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
 ; RUN:   grep mov | count 3
 
 define i32 @test(i32 %x) {
diff --git a/test/CodeGen/ARM/ispositive.ll b/test/CodeGen/ARM/ispositive.ll
index 7e8eb42b690fe..5116ac82862a4 100644
--- a/test/CodeGen/ARM/ispositive.ll
+++ b/test/CodeGen/ARM/ispositive.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {mov r0, r0, lsr #31}
+; RUN: llc < %s -march=arm | grep {mov r0, r0, lsr #31}
 
 define i32 @test1(i32 %X) {
 entry:
diff --git a/test/CodeGen/ARM/large-stack.ll b/test/CodeGen/ARM/large-stack.ll
index b1738a4a38a69..ddf0f0ec7cc0f 100644
--- a/test/CodeGen/ARM/large-stack.ll
+++ b/test/CodeGen/ARM/large-stack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @test1() {
     %tmp = alloca [ 64 x i32 ] , align 4
diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll
index 6a054577fc8bf..774b3c09bed42 100644
--- a/test/CodeGen/ARM/ldm.ll
+++ b/test/CodeGen/ARM/ldm.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep ldmia | count 2
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep ldmib | count 1
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \
+; RUN: llc < %s -mtriple=arm-apple-darwin | \
 ; RUN:   grep {ldmfd sp\!} | count 3
 
 @X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
diff --git a/test/CodeGen/ARM/ldr.ll b/test/CodeGen/ARM/ldr.ll
index ea99655723190..954fb5b8ad311 100644
--- a/test/CodeGen/ARM/ldr.ll
+++ b/test/CodeGen/ARM/ldr.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm | grep {ldr r0} | count 7
-; RUN: llvm-as < %s | llc -march=arm | grep mov | grep 1
-; RUN: llvm-as < %s | llc -march=arm | not grep mvn
-; RUN: llvm-as < %s | llc -march=arm | grep ldr | grep lsl
-; RUN: llvm-as < %s | llc -march=arm | grep ldr | grep lsr
+; RUN: llc < %s -march=arm | grep {ldr r0} | count 7
+; RUN: llc < %s -march=arm | grep mov | grep 1
+; RUN: llc < %s -march=arm | not grep mvn
+; RUN: llc < %s -march=arm | grep ldr | grep lsl
+; RUN: llc < %s -march=arm | grep ldr | grep lsr
 
 define i32 @f1(i32* %v) {
 entry:
diff --git a/test/CodeGen/ARM/ldr_ext.ll b/test/CodeGen/ARM/ldr_ext.ll
index b99c72197740e..d29eb022bacee 100644
--- a/test/CodeGen/ARM/ldr_ext.ll
+++ b/test/CodeGen/ARM/ldr_ext.ll
@@ -1,27 +1,36 @@
-; RUN: llvm-as < %s | llc -march=arm | grep ldrb | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep ldrh | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep ldrsb | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep ldrsh | count 1
+; RUN: llc < %s -march=arm | FileCheck %s
 
-define i32 @test1(i8* %v.pntr.s0.u1) {
-    %tmp.u = load i8* %v.pntr.s0.u1
+define i32 @test1(i8* %t1) nounwind {
+; CHECK: ldrb
+    %tmp.u = load i8* %t1
     %tmp1.s = zext i8 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test2(i16* %v.pntr.s0.u1) {
-    %tmp.u = load i16* %v.pntr.s0.u1
+define i32 @test2(i16* %t1) nounwind {
+; CHECK: ldrh
+    %tmp.u = load i16* %t1
     %tmp1.s = zext i16 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test3(i8* %v.pntr.s1.u0) {
-    %tmp.s = load i8* %v.pntr.s1.u0
+define i32 @test3(i8* %t0) nounwind {
+; CHECK: ldrsb
+    %tmp.s = load i8* %t0
     %tmp1.s = sext i8 %tmp.s to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test4() {
+define i32 @test4(i16* %t0) nounwind {
+; CHECK: ldrsh
+    %tmp.s = load i16* %t0
+    %tmp1.s = sext i16 %tmp.s to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test5() nounwind {
+; CHECK: mov r0, #0
+; CHECK: ldrsh
     %tmp.s = load i16* null
     %tmp1.s = sext i16 %tmp.s to i32
     ret i32 %tmp1.s
diff --git a/test/CodeGen/ARM/ldr_frame.ll b/test/CodeGen/ARM/ldr_frame.ll
index 44315066c4c00..a3abdb603fa79 100644
--- a/test/CodeGen/ARM/ldr_frame.ll
+++ b/test/CodeGen/ARM/ldr_frame.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | not grep mov
+; RUN: llc < %s -march=arm | not grep mov
 
 define i32 @f1() {
 	%buf = alloca [32 x i32], align 4
diff --git a/test/CodeGen/ARM/ldr_post.ll b/test/CodeGen/ARM/ldr_post.ll
index 0491563fc6a69..97a48e1377e55 100644
--- a/test/CodeGen/ARM/ldr_post.ll
+++ b/test/CodeGen/ARM/ldr_post.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {ldr.*\\\[.*\],} | count 1
 
 define i32 @test(i32 %a, i32 %b, i32 %c) {
diff --git a/test/CodeGen/ARM/ldr_pre.ll b/test/CodeGen/ARM/ldr_pre.ll
index 7e447422361ea..7c442845682ec 100644
--- a/test/CodeGen/ARM/ldr_pre.ll
+++ b/test/CodeGen/ARM/ldr_pre.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {ldr.*\\!} | count 2
 
 define i32* @test1(i32* %X, i32* %dest) {
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index f1bee058a0fc7..8f7ae55c6eaf1 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,12 +1,20 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin | grep ldrd
-; RUN: llvm-as < %s | llc -mtriple=armv5-apple-darwin | not grep ldrd
-; RUN: llvm-as < %s | llc -mtriple=armv6-eabi | not grep ldrd
+; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=V6
+; RUN: llc < %s -mtriple=armv5-apple-darwin | FileCheck %s -check-prefix=V5
+; RUN: llc < %s -mtriple=armv6-eabi | FileCheck %s -check-prefix=EABI
 ; rdar://r6949835
 
 @b = external global i64*
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
+;V6:      ldrd r2, [r2]
+
+;V5:      ldr r3, [r2]
+;V5-NEXT: ldr r2, [r2, #+4]
+
+;EABI:      ldr r3, [r2]
+;EABI-NEXT: ldr r2, [r2, #+4]
+
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
 	%2 = mul i64 %1, %a
diff --git a/test/CodeGen/ARM/load-global.ll b/test/CodeGen/ARM/load-global.ll
index 8896ead5a51cd..56a4a477f510c 100644
--- a/test/CodeGen/ARM/load-global.ll
+++ b/test/CodeGen/ARM/load-global.ll
@@ -1,14 +1,10 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=arm-apple-darwin -relocation-model=static | \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=static | \
 ; RUN:   not grep {L_G\$non_lazy_ptr}
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=arm-apple-darwin -relocation-model=dynamic-no-pic | \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=dynamic-no-pic | \
 ; RUN:   grep {L_G\$non_lazy_ptr} | count 2
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=arm-apple-darwin -relocation-model=pic | \
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | \
 ; RUN:   grep {ldr.*pc} | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=arm-linux-gnueabi -relocation-model=pic | \
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -relocation-model=pic | \
 ; RUN:   grep {GOT} | count 1
 
 @G = external global i32
diff --git a/test/CodeGen/ARM/load.ll b/test/CodeGen/ARM/load.ll
index 05097328102c1..253b0e145f811 100644
--- a/test/CodeGen/ARM/load.ll
+++ b/test/CodeGen/ARM/load.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm > %t
+; RUN: llc < %s -march=arm > %t
 ; RUN: grep ldrsb %t
 ; RUN: grep ldrb %t
 ; RUN: grep ldrsh %t
diff --git a/test/CodeGen/ARM/long-setcc.ll b/test/CodeGen/ARM/long-setcc.ll
index 4bab330c73604..c76a5e4d4d1f8 100644
--- a/test/CodeGen/ARM/long-setcc.ll
+++ b/test/CodeGen/ARM/long-setcc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep cmp | count 1
+; RUN: llc < %s -march=arm | grep cmp | count 1
 
 
 define i1 @t1(i64 %x) {
diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll
index fe0ee5473305b..2fcaac0d9c982 100644
--- a/test/CodeGen/ARM/long.ll
+++ b/test/CodeGen/ARM/long.ll
@@ -1,13 +1,13 @@
-; RUN: llvm-as < %s | llc -march=arm -asm-verbose | \
+; RUN: llc < %s -march=arm -asm-verbose | \
 ; RUN:   grep -- {-2147483648} | count 3
-; RUN: llvm-as < %s | llc -march=arm | grep mvn | count 3
-; RUN: llvm-as < %s | llc -march=arm | grep adds | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep adc | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep {subs } | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep sbc | count 1
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | grep mvn | count 3
+; RUN: llc < %s -march=arm | grep adds | count 1
+; RUN: llc < %s -march=arm | grep adc | count 1
+; RUN: llc < %s -march=arm | grep {subs } | count 1
+; RUN: llc < %s -march=arm | grep sbc | count 1
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep smull | count 1
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep umull | count 1
 
 define i64 @f1() {
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index 55d0cdc54151c..057b5f067f803 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm > %t
+; RUN: llc < %s -march=arm > %t
 ; RUN: grep rrx %t | count 1
 ; RUN: grep __ashldi3 %t
 ; RUN: grep __ashrdi3 %t
diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll
index 3881e91453b47..507ec2c7bd3e2 100644
--- a/test/CodeGen/ARM/lsr-code-insertion.ll
+++ b/test/CodeGen/ARM/lsr-code-insertion.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -stats |& grep {39.*Number of machine instrs printed}
-; RUN: llvm-as < %s | llc -stats |& grep {.*Number of re-materialization}
+; RUN: llc < %s -stats |& grep {40.*Number of machine instrs printed}
+; RUN: llc < %s -stats |& grep {.*Number of re-materialization}
 ; This test really wants to check that the resultant "cond_true" block only 
 ; has a single store in it, and that cond_true55 only has code to materialize 
 ; the constant and do a store.  We do *not* want something like this:
diff --git a/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
index 02902f2debd39..8130019cbfd95 100644
--- a/test/CodeGen/ARM/lsr-scale-addr-mode.ll
+++ b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep lsl | grep -F {lsl #2\]}
+; RUN: llc < %s -march=arm | grep lsl | grep -F {lsl #2\]}
 ; Should use scaled addressing mode.
 
 define void @sintzero(i32* %a) nounwind {
diff --git a/test/CodeGen/ARM/mem.ll b/test/CodeGen/ARM/mem.ll
index e98316576d8a1..f46c7a5857ab9 100644
--- a/test/CodeGen/ARM/mem.ll
+++ b/test/CodeGen/ARM/mem.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep strb
-; RUN: llvm-as < %s | llc -march=arm | grep strh
+; RUN: llc < %s -march=arm | grep strb
+; RUN: llc < %s -march=arm | grep strh
 
 define void @f1() {
 entry:
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 4bf0b4f6f3b19..ed20c32dc0d59 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldmia
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep stmia
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldrb
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldrh
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldmia
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep stmia
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrb
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrh
 
 	%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
 @src = external global %struct.x
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index 0b58bf680157a..41d5944cb83e5 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @f() {
 entry:
diff --git a/test/CodeGen/ARM/mls.ll b/test/CodeGen/ARM/mls.ll
new file mode 100644
index 0000000000000..85407fa254b08
--- /dev/null
+++ b/test/CodeGen/ARM/mls.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | grep {mls\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = sub i32 %c, %tmp1
+    ret i32 %tmp2
+}
+
+; sub doesn't commute, so no mls for this one
+define i32 @f2(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = sub i32 %tmp1, %c
+    ret i32 %tmp2
+}
diff --git a/test/CodeGen/ARM/mul.ll b/test/CodeGen/ARM/mul.ll
index 3543b5de55db7..466a8020accec 100644
--- a/test/CodeGen/ARM/mul.ll
+++ b/test/CodeGen/ARM/mul.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep mul | count 2
-; RUN: llvm-as < %s | llc -march=arm | grep lsl | count 2
+; RUN: llc < %s -march=arm | grep mul | count 2
+; RUN: llc < %s -march=arm | grep lsl | count 2
 
 define i32 @f1(i32 %u) {
     %tmp = mul i32 %u, %u
diff --git a/test/CodeGen/ARM/mul_const.ll b/test/CodeGen/ARM/mul_const.ll
new file mode 100644
index 0000000000000..93188cdd883f6
--- /dev/null
+++ b/test/CodeGen/ARM/mul_const.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @t1(i32 %v) nounwind readnone {
+entry:
+; CHECK: t1:
+; CHECK: add r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 9
+	ret i32 %0
+}
+
+define i32 @t2(i32 %v) nounwind readnone {
+entry:
+; CHECK: t2:
+; CHECK: rsb r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 7
+	ret i32 %0
+}
diff --git a/test/CodeGen/ARM/mulhi.ll b/test/CodeGen/ARM/mulhi.ll
index de75e96b87046..148f291e551d7 100644
--- a/test/CodeGen/ARM/mulhi.ll
+++ b/test/CodeGen/ARM/mulhi.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep smmul | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep umull | count 1
+; RUN: llc < %s -march=arm | grep umull | count 1
 
 define i32 @smulhi(i32 %x, i32 %y) {
         %tmp = sext i32 %x to i64               ; <i64> [#uses=1]
diff --git a/test/CodeGen/ARM/mvn.ll b/test/CodeGen/ARM/mvn.ll
index a7ef907033de3..571c21a833ecd 100644
--- a/test/CodeGen/ARM/mvn.ll
+++ b/test/CodeGen/ARM/mvn.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep mvn | count 8
+; RUN: llc < %s -march=arm | grep mvn | count 8
 
 define i32 @f1() {
 entry:
diff --git a/test/CodeGen/ARM/neon_arith1.ll b/test/CodeGen/ARM/neon_arith1.ll
index 18b516fc1a8c6..58927374177a0 100644
--- a/test/CodeGen/ARM/neon_arith1.ll
+++ b/test/CodeGen/ARM/neon_arith1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep vadd
+; RUN: llc < %s -march=arm -mattr=+neon | grep vadd
 
 define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll
index 8901ba177dac6..2796dec5b9705 100644
--- a/test/CodeGen/ARM/neon_ld1.ll
+++ b/test/CodeGen/ARM/neon_ld1.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep fldd | count 4
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep fstd
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep fmrrd
+; RUN: llc < %s -march=arm -mattr=+neon | grep fldd | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | grep fstd
+; RUN: llc < %s -march=arm -mattr=+neon | grep fmrrd
 
 define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index a26904afca30b..547bab76356b9 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep vldmia | count 4
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep vstmia | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep fmrrd  | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | grep vldmia | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | grep vstmia | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | grep fmrrd  | count 2
 
 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/pack.ll b/test/CodeGen/ARM/pack.ll
index 151beac3efce0..1e2e7aa0c8ff1 100644
--- a/test/CodeGen/ARM/pack.ll
+++ b/test/CodeGen/ARM/pack.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep pkhbt | count 5
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep pkhtb | count 4
 
 define i32 @test1(i32 %X, i32 %Y) {
diff --git a/test/CodeGen/ARM/pr3502.ll b/test/CodeGen/ARM/pr3502.ll
index dee3fc43f9733..606d9698b977d 100644
--- a/test/CodeGen/ARM/pr3502.ll
+++ b/test/CodeGen/ARM/pr3502.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-none-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi
 ;pr3502
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/private.ll b/test/CodeGen/ARM/private.ll
index e5eeccb356a5b..03376a4c61b7c 100644
--- a/test/CodeGen/ARM/private.ll
+++ b/test/CodeGen/ARM/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi > %t
+; RUN: llc < %s -mtriple=arm-linux-gnueabi > %t
 ; RUN: grep .Lfoo: %t
 ; RUN: egrep bl.*\.Lfoo %t
 ; RUN: grep .Lbaz: %t
diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll
index 454d36b46f29d..ba9699efd5973 100644
--- a/test/CodeGen/ARM/remat.ll
+++ b/test/CodeGen/ARM/remat.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin 
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 2
+; RUN: llc < %s -mtriple=arm-apple-darwin 
+; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 4
 
 	%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
 	%struct.LOCBOX = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/ARM/ret0.ll b/test/CodeGen/ARM/ret0.ll
index 792b1690add28..5c312eb98a327 100644
--- a/test/CodeGen/ARM/ret0.ll
+++ b/test/CodeGen/ARM/ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define i32 @test() {
         ret i32 0
diff --git a/test/CodeGen/ARM/ret_arg1.ll b/test/CodeGen/ARM/ret_arg1.ll
index 48a1fda35b331..1ab947b1e20d0 100644
--- a/test/CodeGen/ARM/ret_arg1.ll
+++ b/test/CodeGen/ARM/ret_arg1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define i32 @test(i32 %a1) {
         ret i32 %a1
diff --git a/test/CodeGen/ARM/ret_arg2.ll b/test/CodeGen/ARM/ret_arg2.ll
index a74870f85870a..84477d042c749 100644
--- a/test/CodeGen/ARM/ret_arg2.ll
+++ b/test/CodeGen/ARM/ret_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define i32 @test(i32 %a1, i32 %a2) {
         ret i32 %a2
diff --git a/test/CodeGen/ARM/ret_arg3.ll b/test/CodeGen/ARM/ret_arg3.ll
index 9210e7b09f58c..f7f9057432d10 100644
--- a/test/CodeGen/ARM/ret_arg3.ll
+++ b/test/CodeGen/ARM/ret_arg3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 define i32 @test(i32 %a1, i32 %a2, i32 %a3) {
         ret i32 %a3
 }
diff --git a/test/CodeGen/ARM/ret_arg4.ll b/test/CodeGen/ARM/ret_arg4.ll
index a9c66e9e98d1a..f7b3e4a282b2b 100644
--- a/test/CodeGen/ARM/ret_arg4.ll
+++ b/test/CodeGen/ARM/ret_arg4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
         ret i32 %a4
diff --git a/test/CodeGen/ARM/ret_arg5.ll b/test/CodeGen/ARM/ret_arg5.ll
index 620a0175e0728..c4f9fb5e0a9b7 100644
--- a/test/CodeGen/ARM/ret_arg5.ll
+++ b/test/CodeGen/ARM/ret_arg5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
         ret i32 %a5
diff --git a/test/CodeGen/ARM/ret_f32_arg2.ll b/test/CodeGen/ARM/ret_f32_arg2.ll
index 287d92b9eb6e3..2bafea6755318 100644
--- a/test/CodeGen/ARM/ret_f32_arg2.ll
+++ b/test/CodeGen/ARM/ret_f32_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define float @test_f32(float %a1, float %a2) {
         ret float %a2
diff --git a/test/CodeGen/ARM/ret_f32_arg5.ll b/test/CodeGen/ARM/ret_f32_arg5.ll
index 3418be93e1e87..c6ce60ecb9c86 100644
--- a/test/CodeGen/ARM/ret_f32_arg5.ll
+++ b/test/CodeGen/ARM/ret_f32_arg5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define float @test_f32_arg5(float %a1, float %a2, float %a3, float %a4, float %a5) {
         ret float %a5
diff --git a/test/CodeGen/ARM/ret_f64_arg2.ll b/test/CodeGen/ARM/ret_f64_arg2.ll
index 66848d5fb49b1..386e85f4b9a55 100644
--- a/test/CodeGen/ARM/ret_f64_arg2.ll
+++ b/test/CodeGen/ARM/ret_f64_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define double @test_f64(double %a1, double %a2) {
         ret double %a2
diff --git a/test/CodeGen/ARM/ret_f64_arg_reg_split.ll b/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
index 626ee6fb13749..bdb0a606227b6 100644
--- a/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
+++ b/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mcpu=arm8 -mattr=+vfp2
+; RUN: llc < %s -march=arm -mcpu=arm8 -mattr=+vfp2
 
 define double @test_double_arg_reg_split(i32 %a1, double %a2) {
         ret double %a2
diff --git a/test/CodeGen/ARM/ret_f64_arg_split.ll b/test/CodeGen/ARM/ret_f64_arg_split.ll
index b03b604beee75..4f841a3cde7b6 100644
--- a/test/CodeGen/ARM/ret_f64_arg_split.ll
+++ b/test/CodeGen/ARM/ret_f64_arg_split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define double @test_double_arg_split(i64 %a1, i32 %a2, double %a3) {
         ret double %a3
diff --git a/test/CodeGen/ARM/ret_f64_arg_stack.ll b/test/CodeGen/ARM/ret_f64_arg_stack.ll
index ba3ec7fb75173..21443177d3de6 100644
--- a/test/CodeGen/ARM/ret_f64_arg_stack.ll
+++ b/test/CodeGen/ARM/ret_f64_arg_stack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define double @test_double_arg_stack(i64 %a1, i32 %a2, i32 %a3, double %a4) {
         ret double %a4
diff --git a/test/CodeGen/ARM/ret_i128_arg2.ll b/test/CodeGen/ARM/ret_i128_arg2.ll
index 0fe98e6b70fc4..908c34f8cda68 100644
--- a/test/CodeGen/ARM/ret_i128_arg2.ll
+++ b/test/CodeGen/ARM/ret_i128_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define i128 @test_i128(i128 %a1, i128 %a2, i128 %a3) {
         ret i128 %a3
diff --git a/test/CodeGen/ARM/ret_i64_arg2.ll b/test/CodeGen/ARM/ret_i64_arg2.ll
index b015a96e0bf0a..b1a1024acaf1c 100644
--- a/test/CodeGen/ARM/ret_i64_arg2.ll
+++ b/test/CodeGen/ARM/ret_i64_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define i64 @test_i64(i64 %a1, i64 %a2) {
         ret i64 %a2
diff --git a/test/CodeGen/ARM/ret_i64_arg3.ll b/test/CodeGen/ARM/ret_i64_arg3.ll
index 5dfecca319a17..ffc1d2f4b52af 100644
--- a/test/CodeGen/ARM/ret_i64_arg3.ll
+++ b/test/CodeGen/ARM/ret_i64_arg3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define i64 @test_i64_arg3(i64 %a1, i64 %a2, i64 %a3) {
         ret i64 %a3
diff --git a/test/CodeGen/ARM/ret_i64_arg_split.ll b/test/CodeGen/ARM/ret_i64_arg_split.ll
index 5bd5cb2a230be..956bce558fc5e 100644
--- a/test/CodeGen/ARM/ret_i64_arg_split.ll
+++ b/test/CodeGen/ARM/ret_i64_arg_split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=+vfp2
 
 define i64 @test_i64_arg_split(i64 %a1, i32 %a2, i64 %a3) {
         ret i64 %a3
diff --git a/test/CodeGen/ARM/ret_void.ll b/test/CodeGen/ARM/ret_void.ll
index 68db8c423461e..2b7ae05628843 100644
--- a/test/CodeGen/ARM/ret_void.ll
+++ b/test/CodeGen/ARM/ret_void.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 
 define void @test() {
         ret void
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index 68f6264e8a063..1c12268ef86c8 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep rev16
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep revsh
+; RUN: llc < %s -march=arm -mattr=+v6 | grep rev16
+; RUN: llc < %s -march=arm -mattr=+v6 | grep revsh
 
 define i32 @test1(i32 %X) {
         %tmp1 = lshr i32 %X, 8          ; <i32> [#uses=3]
diff --git a/test/CodeGen/ARM/sbfx.ll b/test/CodeGen/ARM/sbfx.ll
new file mode 100644
index 0000000000000..923f52a868626
--- /dev/null
+++ b/test/CodeGen/ARM/sbfx.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+entry:
+; CHECK: f1:
+; CHECK: sbfx r0, r0, #0, #20
+    %tmp = shl i32 %a, 12
+    %tmp2 = ashr i32 %tmp, 12
+    ret i32 %tmp2
+}
+
+define i32 @f2(i32 %a) {
+entry:
+; CHECK: f2:
+; CHECK: ubfx r0, r0, #0, #20
+    %tmp = shl i32 %a, 12
+    %tmp2 = lshr i32 %tmp, 12
+    ret i32 %tmp2
+}
+
+define i32 @f3(i32 %a) {
+entry:
+; CHECK: f3:
+; CHECK: sbfx r0, r0, #5, #3
+    %tmp = shl i32 %a, 24
+    %tmp2 = ashr i32 %tmp, 29
+    ret i32 %tmp2
+}
+
+define i32 @f4(i32 %a) {
+entry:
+; CHECK: f4:
+; CHECK: ubfx r0, r0, #5, #3
+    %tmp = shl i32 %a, 24
+    %tmp2 = lshr i32 %tmp, 29
+    ret i32 %tmp2
+}
diff --git a/test/CodeGen/ARM/section.ll b/test/CodeGen/ARM/section.ll
index aa658451675bc..7a566d49d322d 100644
--- a/test/CodeGen/ARM/section.ll
+++ b/test/CodeGen/ARM/section.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux | \
+; RUN: llc < %s -mtriple=arm-linux | \
 ; RUN:   grep {__DTOR_END__:}
-; RUN: llvm-as < %s | llc -mtriple=arm-linux | \
+; RUN: llc < %s -mtriple=arm-linux | \
 ; RUN:   grep {\\.section.\\.dtors,"aw",.progbits}
 
 @__DTOR_END__ = internal global [1 x i32] zeroinitializer, section ".dtors"       ; <[1 x i32]*> [#uses=0]
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 5148a5b86998c..85c8b5b8477f8 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -1,13 +1,9 @@
-; RUN: llvm-as < %s | llc -march=arm | grep moveq | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep movgt | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep movlt | count 3
-; RUN: llvm-as < %s | llc -march=arm | grep movle | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep movls | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep movhi | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fcpydmi | count 1
+; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
 
 define i32 @f1(i32 %a.s) {
+;CHECK: f1:
+;CHECK: moveq
 entry:
     %tmp = icmp eq i32 %a.s, 4
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -15,6 +11,8 @@ entry:
 }
 
 define i32 @f2(i32 %a.s) {
+;CHECK: f2:
+;CHECK: movgt
 entry:
     %tmp = icmp sgt i32 %a.s, 4
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -22,6 +20,8 @@ entry:
 }
 
 define i32 @f3(i32 %a.s, i32 %b.s) {
+;CHECK: f3:
+;CHECK: movlt
 entry:
     %tmp = icmp slt i32 %a.s, %b.s
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -29,6 +29,8 @@ entry:
 }
 
 define i32 @f4(i32 %a.s, i32 %b.s) {
+;CHECK: f4:
+;CHECK: movle
 entry:
     %tmp = icmp sle i32 %a.s, %b.s
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -36,6 +38,8 @@ entry:
 }
 
 define i32 @f5(i32 %a.u, i32 %b.u) {
+;CHECK: f5:
+;CHECK: movls
 entry:
     %tmp = icmp ule i32 %a.u, %b.u
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -43,6 +47,8 @@ entry:
 }
 
 define i32 @f6(i32 %a.u, i32 %b.u) {
+;CHECK: f6:
+;CHECK: movhi
 entry:
     %tmp = icmp ugt i32 %a.u, %b.u
     %tmp1.s = select i1 %tmp, i32 2, i32 3
@@ -50,6 +56,11 @@ entry:
 }
 
 define double @f7(double %a, double %b) {
+;CHECK: f7:
+;CHECK: movlt
+;CHECK: movlt
+;CHECK-VFP: f7:
+;CHECK-VFP: fcpydmi
     %tmp = fcmp olt double %a, 1.234e+00
     %tmp1 = select i1 %tmp, double -1.000e+00, double %b
     ret double %tmp1
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 6855e3227b991..7fd91ceea5ad7 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep mov | count 2
+; RUN: llc < %s -march=arm | grep mov | count 2
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
         %tmp1 = icmp sgt i32 %c, 10
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index cae1c44a729d2..2bbe9fd2602c2 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep add | grep lsl
-; RUN: llvm-as < %s | llc -march=arm | grep bic | grep asr
+; RUN: llc < %s -march=arm | grep add | grep lsl
+; RUN: llc < %s -march=arm | grep bic | grep asr
 
 
 define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
diff --git a/test/CodeGen/ARM/smul.ll b/test/CodeGen/ARM/smul.ll
index 7a4e4887cc7b6..b7ab2e796f8a4 100644
--- a/test/CodeGen/ARM/smul.ll
+++ b/test/CodeGen/ARM/smul.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5TE
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5TE | \
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm -mattr=+v5TE
+; RUN: llc < %s -march=arm -mattr=+v5TE | \
 ; RUN:   grep smulbt | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5TE | \
+; RUN: llc < %s -march=arm -mattr=+v5TE | \
 ; RUN:   grep smultt | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v5TE | \
+; RUN: llc < %s -march=arm -mattr=+v5TE | \
 ; RUN:   grep smlabt | count 1
 
 @x = weak global i16 0          ; <i16*> [#uses=1]
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
new file mode 100644
index 0000000000000..f4b27a7603e52
--- /dev/null
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=armv7-elf -mattr=+neon | FileCheck %s
+; PR4789
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+
+define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
+; CHECK: aaa:
+; CHECK: vstmia sp
+; CHECK: vldmia sp
+entry:
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  store float 6.300000e+01, float* undef, align 4
+  %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  store float 0.000000e+00, float* undef, align 4
+  %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
+  br label %bb4
+
+bb4:                                              ; preds = %bb193, %entry
+  %besterror.0.2264 = phi <4 x float> [ undef, %entry ], [ %besterror.0.0, %bb193 ] ; <<4 x float>> [#uses=2]
+  %part0.0.0261 = phi <4 x float> [ zeroinitializer, %entry ], [ %23, %bb193 ] ; <<4 x float>> [#uses=2]
+  %3 = fmul <4 x float> zeroinitializer, %0       ; <<4 x float>> [#uses=2]
+  %4 = fadd <4 x float> %3, %part0.0.0261         ; <<4 x float>> [#uses=1]
+  %5 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %7 = fmul <4 x float> %1, undef                 ; <<4 x float>> [#uses=1]
+  %8 = fadd <4 x float> %7, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %9 = fptosi <4 x float> %8 to <4 x i32>         ; <<4 x i32>> [#uses=1]
+  %10 = sitofp <4 x i32> %9 to <4 x float>        ; <<4 x float>> [#uses=1]
+  %11 = fmul <4 x float> %10, %2                  ; <<4 x float>> [#uses=1]
+  %12 = fmul <4 x float> undef, %6                ; <<4 x float>> [#uses=1]
+  %13 = fmul <4 x float> %11, %4                  ; <<4 x float>> [#uses=1]
+  %14 = fsub <4 x float> %12, %13                 ; <<4 x float>> [#uses=1]
+  %15 = fsub <4 x float> %14, undef               ; <<4 x float>> [#uses=1]
+  %16 = fmul <4 x float> %15, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %16, undef               ; <<4 x float>> [#uses=1]
+  %18 = fmul <4 x float> %17, %val173             ; <<4 x float>> [#uses=1]
+  %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %21 = fadd <4 x float> zeroinitializer, %20     ; <<4 x float>> [#uses=2]
+  %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
+  br i1 undef, label %bb193, label %bb186
+
+bb186:                                            ; preds = %bb4
+  br label %bb193
+
+bb193:                                            ; preds = %bb186, %bb4
+  %besterror.0.0 = phi <4 x float> [ %21, %bb186 ], [ %besterror.0.2264, %bb4 ] ; <<4 x float>> [#uses=1]
+  %23 = fadd <4 x float> %part0.0.0261, zeroinitializer ; <<4 x float>> [#uses=1]
+  br label %bb4
+}
diff --git a/test/CodeGen/ARM/stack-frame.ll b/test/CodeGen/ARM/stack-frame.ll
index c3dd65a594d67..1dd57ddb9f2fc 100644
--- a/test/CodeGen/ARM/stack-frame.ll
+++ b/test/CodeGen/ARM/stack-frame.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=arm | grep add | count 1
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep add | count 1
 
 define void @f1() {
 	%c = alloca i8, align 1
diff --git a/test/CodeGen/ARM/stm.ll b/test/CodeGen/ARM/stm.ll
index ed5e4c5f59433..22a7ecb4aa283 100644
--- a/test/CodeGen/ARM/stm.ll
+++ b/test/CodeGen/ARM/stm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | grep stm | count 2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | grep stm | count 2
 
 @"\01LC" = internal constant [32 x i8] c"Boolean Not: %d %d %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[32 x i8]*> [#uses=1]
 @"\01LC1" = internal constant [26 x i8] c"Bitwise Not: %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[26 x i8]*> [#uses=1]
diff --git a/test/CodeGen/ARM/str_post.ll b/test/CodeGen/ARM/str_post.ll
index ba813805bacc1..801b9cee37d69 100644
--- a/test/CodeGen/ARM/str_post.ll
+++ b/test/CodeGen/ARM/str_post.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {strh .*\\\[.*\], #-4} | count 1
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {str .*\\\[.*\],} | count 1
 
 define i16 @test1(i32* %X, i16* %A) {
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index e9f194574e43e..f8d3df29c4089 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | grep {str.*\\!}
-; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #+4}
+; RUN: llc < %s -mtriple=arm-linux-gnu | grep {str.*\\!}
+; RUN: llc < %s -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #+4}
 
 @b = external global i64*
 
diff --git a/test/CodeGen/ARM/str_pre.ll b/test/CodeGen/ARM/str_pre.ll
index c02663fa4040e..e56e3f253e63a 100644
--- a/test/CodeGen/ARM/str_pre.ll
+++ b/test/CodeGen/ARM/str_pre.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep {str.*\\!} | count 2
 
 define void @test1(i32* %X, i32* %A, i32** %dest) {
diff --git a/test/CodeGen/ARM/str_trunc.ll b/test/CodeGen/ARM/str_trunc.ll
index 77c66ec2c7e0c..2f1166b64b59b 100644
--- a/test/CodeGen/ARM/str_trunc.ll
+++ b/test/CodeGen/ARM/str_trunc.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep strb | count 1
-; RUN: llvm-as < %s | llc -march=arm | \
+; RUN: llc < %s -march=arm | \
 ; RUN:   grep strh | count 1
 
 define void @test1(i32 %v, i16* %ptr) {
diff --git a/test/CodeGen/ARM/sxt_rot.ll b/test/CodeGen/ARM/sxt_rot.ll
index e9f302c88d1c0..4752f17f1e1c3 100644
--- a/test/CodeGen/ARM/sxt_rot.ll
+++ b/test/CodeGen/ARM/sxt_rot.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep sxtb | count 2
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep sxtb | grep ror | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \
+; RUN: llc < %s -march=arm -mattr=+v6 | \
 ; RUN:   grep sxtab | count 1
 
 define i32 @test0(i8 %A) {
diff --git a/test/CodeGen/ARM/t2-imm.ll b/test/CodeGen/ARM/t2-imm.ll
new file mode 100644
index 0000000000000..848a4dfed0542
--- /dev/null
+++ b/test/CodeGen/ARM/t2-imm.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s
+
+define i32 @f6(i32 %a) {
+; CHECK:f6
+; CHECK: movw r0, #:lower16:65537123
+; CHECK: movt r0, #:upper16:65537123
+    %tmp = add i32 0, 65537123
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/ARM/thread_pointer.ll b/test/CodeGen/ARM/thread_pointer.ll
index 6476b483d7d9e..3143387ead657 100644
--- a/test/CodeGen/ARM/thread_pointer.ll
+++ b/test/CodeGen/ARM/thread_pointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {__aeabi_read_tp}
 
 define i8* @test() {
diff --git a/test/CodeGen/ARM/tls1.ll b/test/CodeGen/ARM/tls1.ll
index 6866a42db4951..1087094e5798a 100644
--- a/test/CodeGen/ARM/tls1.ll
+++ b/test/CodeGen/ARM/tls1.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {i(tpoff)}
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {__aeabi_read_tp}
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
 ; RUN:     -relocation-model=pic | grep {__tls_get_addr}
 
 
diff --git a/test/CodeGen/ARM/tls2.ll b/test/CodeGen/ARM/tls2.ll
index 90e3bcf9040b5..328472081e197 100644
--- a/test/CodeGen/ARM/tls2.ll
+++ b/test/CodeGen/ARM/tls2.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {i(gottpoff)}
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {ldr r., \[pc, r.\]}
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
 ; RUN:     -relocation-model=pic | grep {__tls_get_addr}
 
 @i = external thread_local global i32		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/tls3.ll b/test/CodeGen/ARM/tls3.ll
index df2913b61cda9..df7a4ca02db8e 100644
--- a/test/CodeGen/ARM/tls3.ll
+++ b/test/CodeGen/ARM/tls3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep {tbss}
 
 %struct.anon = type { i32, i32 }
diff --git a/test/CodeGen/ARM/trunc_ldr.ll b/test/CodeGen/ARM/trunc_ldr.ll
index 6111ec9d2f48b..3033c2ba3e252 100644
--- a/test/CodeGen/ARM/trunc_ldr.ll
+++ b/test/CodeGen/ARM/trunc_ldr.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep ldrb.*7 | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep ldrsb.*7 | count 1
+; RUN: llc < %s -march=arm | grep ldrb.*7 | count 1
+; RUN: llc < %s -march=arm | grep ldrsb.*7 | count 1
 
 	%struct.A = type { i8, i8, i8, i8, i16, i8, i8, %struct.B** }
 	%struct.B = type { float, float, i32, i32, i32, [0 x i8] }
diff --git a/test/CodeGen/ARM/truncstore-dag-combine.ll b/test/CodeGen/ARM/truncstore-dag-combine.ll
index 0e85fb69eb3ae..2da08b60e86cd 100644
--- a/test/CodeGen/ARM/truncstore-dag-combine.ll
+++ b/test/CodeGen/ARM/truncstore-dag-combine.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | not grep orr
-; RUN: llvm-as < %s | llc -march=arm | not grep mov
+; RUN: llc < %s -march=arm | not grep orr
+; RUN: llc < %s -march=arm | not grep mov
 
 define void @bar(i8* %P, i16* %Q) {
 entry:
diff --git a/test/CodeGen/ARM/tst_teq.ll b/test/CodeGen/ARM/tst_teq.ll
index bdeee3fa43fee..c83111e69937f 100644
--- a/test/CodeGen/ARM/tst_teq.ll
+++ b/test/CodeGen/ARM/tst_teq.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep tst
-; RUN: llvm-as < %s | llc -march=arm | grep teq
+; RUN: llc < %s -march=arm | grep tst
+; RUN: llc < %s -march=arm | grep teq
 
 define i32 @f(i32 %a) {
 entry:
diff --git a/test/CodeGen/ARM/uint64tof64.ll b/test/CodeGen/ARM/uint64tof64.ll
index 055c3c370ee69..32eb225a2ad6b 100644
--- a/test/CodeGen/ARM/uint64tof64.ll
+++ b/test/CodeGen/ARM/uint64tof64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+vfp2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+vfp2
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.__sFILEX = type opaque
diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll
index dad1897463a68..fcaa2b3103e93 100644
--- a/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/test/CodeGen/ARM/unaligned_load_store.ll
@@ -1,16 +1,31 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -o %t -f
-; RUN: grep ldrb %t | count 4
-; RUN: grep strb %t | count 4
+; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=GENERIC
+; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6
+; RUN: llc < %s -march=arm -mattr=+v7a | FileCheck %s -check-prefix=V7
 
+; rdar://7113725
 
-	%struct.p = type <{ i8, i32 }>
-@t = global %struct.p <{ i8 1, i32 10 }>		; <%struct.p*> [#uses=1]
-@u = weak global %struct.p zeroinitializer		; <%struct.p*> [#uses=1]
-
-define i32 @main() {
+define arm_apcscc void @t(i8* nocapture %a, i8* nocapture %b) nounwind {
 entry:
-	%tmp3 = load i32* getelementptr (%struct.p* @t, i32 0, i32 1), align 1		; <i32> [#uses=2]
-	store i32 %tmp3, i32* getelementptr (%struct.p* @u, i32 0, i32 1), align 1
-	ret i32 %tmp3
+; GENERIC: t:
+; GENERIC: ldrb r2
+; GENERIC: ldrb r3
+; GENERIC: ldrb r12
+; GENERIC: ldrb r1
+; GENERIC: strb r1
+; GENERIC: strb r12
+; GENERIC: strb r3
+; GENERIC: strb r2
+
+; DARWIN_V6: t:
+; DARWIN_V6: ldr r1
+; DARWIN_V6: str r1
+
+; V7: t:
+; V7: ldr r1
+; V7: str r1
+  %__src1.i = bitcast i8* %b to i32*              ; <i32*> [#uses=1]
+  %__dest2.i = bitcast i8* %a to i32*             ; <i32*> [#uses=1]
+  %tmp.i = load i32* %__src1.i, align 1           ; <i32> [#uses=1]
+  store i32 %tmp.i, i32* %__dest2.i, align 1
+  ret void
 }
diff --git a/test/CodeGen/ARM/unord.ll b/test/CodeGen/ARM/unord.ll
index 149afc4abafe9..bd28034b3adb9 100644
--- a/test/CodeGen/ARM/unord.ll
+++ b/test/CodeGen/ARM/unord.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep movne | count 1
-; RUN: llvm-as < %s | llc -march=arm | grep moveq | count 1
+; RUN: llc < %s -march=arm | grep movne | count 1
+; RUN: llc < %s -march=arm | grep moveq | count 1
 
 define i32 @f1(float %X, float %Y) {
 	%tmp = fcmp uno float %X, %Y
diff --git a/test/CodeGen/ARM/uxt_rot.ll b/test/CodeGen/ARM/uxt_rot.ll
index 09c74ebbb7765..6307795499bf4 100644
--- a/test/CodeGen/ARM/uxt_rot.ll
+++ b/test/CodeGen/ARM/uxt_rot.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep uxtb | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep uxtab | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep uxth | count 1
+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtb | count 1
+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtab | count 1
+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxth | count 1
 
 define i8 @test1(i32 %A.u) zeroext {
     %B.u = trunc i32 %A.u to i8
diff --git a/test/CodeGen/ARM/uxtb.ll b/test/CodeGen/ARM/uxtb.ll
index 73e918b7a5d3b..9d6e4bd4dfce1 100644
--- a/test/CodeGen/ARM/uxtb.ll
+++ b/test/CodeGen/ARM/uxtb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin | \
+; RUN: llc < %s -mtriple=armv6-apple-darwin | \
 ; RUN:   grep uxt | count 10
 
 define i32 @test1(i32 %x) {
diff --git a/test/CodeGen/ARM/vaba.ll b/test/CodeGen/ARM/vaba.ll
index 98ee1e155ba81..e2dca4647bce6 100644
--- a/test/CodeGen/ARM/vaba.ll
+++ b/test/CodeGen/ARM/vaba.ll
@@ -1,12 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vaba\\.s8} %t | count 2
-; RUN: grep {vaba\\.s16} %t | count 2
-; RUN: grep {vaba\\.s32} %t | count 2
-; RUN: grep {vaba\\.u8} %t | count 2
-; RUN: grep {vaba\\.u16} %t | count 2
-; RUN: grep {vaba\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabas8:
+;CHECK: vaba.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = load <8 x i8>* %C
@@ -15,6 +11,8 @@ define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabas16:
+;CHECK: vaba.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = load <4 x i16>* %C
@@ -23,6 +21,8 @@ define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabas32:
+;CHECK: vaba.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = load <2 x i32>* %C
@@ -31,6 +31,8 @@ define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabau8:
+;CHECK: vaba.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = load <8 x i8>* %C
@@ -39,6 +41,8 @@ define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabau16:
+;CHECK: vaba.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = load <4 x i16>* %C
@@ -47,6 +51,8 @@ define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabau32:
+;CHECK: vaba.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = load <2 x i32>* %C
@@ -55,6 +61,8 @@ define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: vabaQs8:
+;CHECK: vaba.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = load <16 x i8>* %C
@@ -63,6 +71,8 @@ define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 }
 
 define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vabaQs16:
+;CHECK: vaba.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = load <8 x i16>* %C
@@ -71,6 +81,8 @@ define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 }
 
 define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vabaQs32:
+;CHECK: vaba.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = load <4 x i32>* %C
@@ -79,6 +91,8 @@ define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 }
 
 define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: vabaQu8:
+;CHECK: vaba.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = load <16 x i8>* %C
@@ -87,6 +101,8 @@ define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 }
 
 define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vabaQu16:
+;CHECK: vaba.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = load <8 x i16>* %C
@@ -95,6 +111,8 @@ define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 }
 
 define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vabaQu32:
+;CHECK: vaba.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = load <4 x i32>* %C
@@ -117,3 +135,71 @@ declare <4 x i32> @llvm.arm.neon.vabas.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) no
 declare <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabals8:
+;CHECK: vabal.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabals16:
+;CHECK: vabal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabals32:
+;CHECK: vabal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabalu8:
+;CHECK: vabal.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabalu16:
+;CHECK: vabal.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabalu32:
+;CHECK: vabal.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+declare <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vabd.ll b/test/CodeGen/ARM/vabd.ll
index 0fe5ddb94ba49..2b4539361459b 100644
--- a/test/CodeGen/ARM/vabd.ll
+++ b/test/CodeGen/ARM/vabd.ll
@@ -1,13 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vabd\\.s8} %t | count 2
-; RUN: grep {vabd\\.s16} %t | count 2
-; RUN: grep {vabd\\.s32} %t | count 2
-; RUN: grep {vabd\\.u8} %t | count 2
-; RUN: grep {vabd\\.u16} %t | count 2
-; RUN: grep {vabd\\.u32} %t | count 2
-; RUN: grep {vabd\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabds8:
+;CHECK: vabd.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -15,6 +10,8 @@ define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabds16:
+;CHECK: vabd.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -22,6 +19,8 @@ define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabds32:
+;CHECK: vabd.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -29,6 +28,8 @@ define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdu8:
+;CHECK: vabd.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -36,6 +37,8 @@ define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdu16:
+;CHECK: vabd.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -43,6 +46,8 @@ define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdu32:
+;CHECK: vabd.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -50,13 +55,17 @@ define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vabdf32:
+;CHECK: vabd.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm.neon.vabdf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
 define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vabdQs8:
+;CHECK: vabd.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -64,6 +73,8 @@ define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vabdQs16:
+;CHECK: vabd.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -71,6 +82,8 @@ define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vabdQs32:
+;CHECK: vabd.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -78,6 +91,8 @@ define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vabdQu8:
+;CHECK: vabd.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -85,6 +100,8 @@ define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vabdQu16:
+;CHECK: vabd.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -92,6 +109,8 @@ define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vabdQu32:
+;CHECK: vabd.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -99,9 +118,11 @@ define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vabdQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vabdQf32:
+;CHECK: vabd.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm.neon.vabdf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
 
@@ -113,7 +134,7 @@ declare <8 x i8>  @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnon
 declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <2 x float> @llvm.arm.neon.vabdf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>) nounwind readnone
 
 declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
@@ -123,4 +144,66 @@ declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind read
 declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
-declare <4 x float> @llvm.arm.neon.vabdf.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdls8:
+;CHECK: vabdl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdls16:
+;CHECK: vabdl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdls32:
+;CHECK: vabdl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdlu8:
+;CHECK: vabdl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdlu16:
+;CHECK: vabdl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdlu32:
+;CHECK: vabdl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vabs.ll b/test/CodeGen/ARM/vabs.ll
index 629baa762a00c..18ba61f81e658 100644
--- a/test/CodeGen/ARM/vabs.ll
+++ b/test/CodeGen/ARM/vabs.ll
@@ -1,64 +1,131 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vabs\\.s8} %t | count 2
-; RUN: grep {vabs\\.s16} %t | count 2
-; RUN: grep {vabs\\.s32} %t | count 2
-; RUN: grep {vabs\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vabss8(<8 x i8>* %A) nounwind {
+;CHECK: vabss8:
+;CHECK: vabs.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vabss16(<4 x i16>* %A) nounwind {
+;CHECK: vabss16:
+;CHECK: vabs.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %tmp1)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vabss32(<2 x i32>* %A) nounwind {
+;CHECK: vabss32:
+;CHECK: vabs.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
 
 define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
+;CHECK: vabsf32:
+;CHECK: vabs.f32
 	%tmp1 = load <2 x float>* %A
-	%tmp2 = call <2 x float> @llvm.arm.neon.vabsf.v2f32(<2 x float> %tmp1)
+	%tmp2 = call <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp2
 }
 
 define <16 x i8> @vabsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vabsQs8:
+;CHECK: vabs.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vabsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vabsQs16:
+;CHECK: vabs.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %tmp1)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vabsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vabsQs32:
+;CHECK: vabs.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
 
 define <4 x float> @vabsQf32(<4 x float>* %A) nounwind {
+;CHECK: vabsQf32:
+;CHECK: vabs.f32
 	%tmp1 = load <4 x float>* %A
-	%tmp2 = call <4 x float> @llvm.arm.neon.vabsf.v4f32(<4 x float> %tmp1)
+	%tmp2 = call <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp2
 }
 
 declare <8 x i8>  @llvm.arm.neon.vabs.v8i8(<8 x i8>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) nounwind readnone
-declare <2 x float> @llvm.arm.neon.vabsf.v2f32(<2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float>) nounwind readnone
 
 declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vabsf.v4f32(<4 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float>) nounwind readnone
 
+define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
+;CHECK: vqabss8:
+;CHECK: vqabs.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
+;CHECK: vqabss16:
+;CHECK: vqabs.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
+;CHECK: vqabss32:
+;CHECK: vqabs.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vqabsQs8:
+;CHECK: vqabs.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vqabsQs16:
+;CHECK: vqabs.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqabsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vqabsQs32:
+;CHECK: vqabs.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqabs.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll
index b2b0e2397c72f..9fa530750aa16 100644
--- a/test/CodeGen/ARM/vadd.ll
+++ b/test/CodeGen/ARM/vadd.ll
@@ -1,11 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vadd\\.i8} %t | count 2
-; RUN: grep {vadd\\.i16} %t | count 2
-; RUN: grep {vadd\\.i32} %t | count 2
-; RUN: grep {vadd\\.i64} %t | count 2
-; RUN: grep {vadd\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddi8:
+;CHECK: vadd.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = add <8 x i8> %tmp1, %tmp2
@@ -13,6 +10,8 @@ define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddi16:
+;CHECK: vadd.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = add <4 x i16> %tmp1, %tmp2
@@ -20,6 +19,8 @@ define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddi32:
+;CHECK: vadd.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = add <2 x i32> %tmp1, %tmp2
@@ -27,6 +28,8 @@ define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vaddi64:
+;CHECK: vadd.i64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = add <1 x i64> %tmp1, %tmp2
@@ -34,6 +37,8 @@ define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vaddf32:
+;CHECK: vadd.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = add <2 x float> %tmp1, %tmp2
@@ -41,6 +46,8 @@ define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vaddQi8:
+;CHECK: vadd.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = add <16 x i8> %tmp1, %tmp2
@@ -48,6 +55,8 @@ define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vaddQi16:
+;CHECK: vadd.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = add <8 x i16> %tmp1, %tmp2
@@ -55,6 +64,8 @@ define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vaddQi32:
+;CHECK: vadd.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = add <4 x i32> %tmp1, %tmp2
@@ -62,6 +73,8 @@ define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vaddQi64:
+;CHECK: vadd.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = add <2 x i64> %tmp1, %tmp2
@@ -69,8 +82,196 @@ define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vaddQf32:
+;CHECK: vadd.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = add <4 x float> %tmp1, %tmp2
 	ret <4 x float> %tmp3
 }
+
+define <8 x i8> @vaddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vaddhni16:
+;CHECK: vaddhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vaddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vaddhni32:
+;CHECK: vaddhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vaddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vaddhni64:
+;CHECK: vaddhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vaddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vraddhni16:
+;CHECK: vraddhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vraddhni32:
+;CHECK: vraddhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vraddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vraddhni64:
+;CHECK: vraddhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddls8:
+;CHECK: vaddl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddls16:
+;CHECK: vaddl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddls32:
+;CHECK: vaddl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddlu8:
+;CHECK: vaddl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddlu16:
+;CHECK: vaddl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddlu32:
+;CHECK: vaddl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddws8:
+;CHECK: vaddw.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddws16:
+;CHECK: vaddw.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddws32:
+;CHECK: vaddw.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddwu8:
+;CHECK: vaddw.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddwu16:
+;CHECK: vaddw.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddwu32:
+;CHECK: vaddw.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vargs.ll b/test/CodeGen/ARM/vargs.ll
index 4bf79c0419225..5f3536cbb9a39 100644
--- a/test/CodeGen/ARM/vargs.ll
+++ b/test/CodeGen/ARM/vargs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm
+; RUN: llc < %s -march=arm
 @str = internal constant [43 x i8] c"Hello World %d %d %d %d %d %d %d %d %d %d\0A\00"           ; <[43 x i8]*> [#uses=1]
 
 define i32 @main() {
diff --git a/test/CodeGen/ARM/vargs_align.ll b/test/CodeGen/ARM/vargs_align.ll
index 1f2f05bd6086f..e4ef9e3c36c12 100644
--- a/test/CodeGen/ARM/vargs_align.ll
+++ b/test/CodeGen/ARM/vargs_align.ll
@@ -1,7 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN:   grep {add sp, sp, #16} | count 1
-; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnu | \
-; RUN:   grep {add sp, sp, #12} | count 2
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=EABI
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | FileCheck %s -check-prefix=OABI
 
 define i32 @f(i32 %a, ...) {
 entry:
@@ -18,4 +16,8 @@ entry:
 return:		; preds = %entry
 	%retval2 = load i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval2
+; EABI: add sp, sp, #12
+; EABI: add sp, sp, #16
+; OABI: add sp, sp, #12
+; OABI: add sp, sp, #12
 }
diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll
new file mode 100644
index 0000000000000..e1d23a17b4cbf
--- /dev/null
+++ b/test/CodeGen/ARM/vbits.ll
@@ -0,0 +1,507 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_andi8:
+;CHECK: vand
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = and <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_andi16:
+;CHECK: vand
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = and <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_andi32:
+;CHECK: vand
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = and <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_andi64:
+;CHECK: vand
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = and <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_andQi8:
+;CHECK: vand
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = and <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_andQi16:
+;CHECK: vand
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = and <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_andQi32:
+;CHECK: vand
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = and <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_andQi64:
+;CHECK: vand
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = and <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_bici8:
+;CHECK: vbic
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = and <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_bici16:
+;CHECK: vbic
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = and <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_bici32:
+;CHECK: vbic
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
+	%tmp4 = and <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_bici64:
+;CHECK: vbic
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
+	%tmp4 = and <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_bicQi8:
+;CHECK: vbic
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = and <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_bicQi16:
+;CHECK: vbic
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = and <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_bicQi32:
+;CHECK: vbic
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	%tmp4 = and <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_bicQi64:
+;CHECK: vbic
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
+	%tmp4 = and <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_eori8:
+;CHECK: veor
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = xor <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_eori16:
+;CHECK: veor
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = xor <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_eori32:
+;CHECK: veor
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = xor <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_eori64:
+;CHECK: veor
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = xor <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_eorQi8:
+;CHECK: veor
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = xor <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_eorQi16:
+;CHECK: veor
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = xor <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_eorQi32:
+;CHECK: veor
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = xor <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_eorQi64:
+;CHECK: veor
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = xor <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
+;CHECK: v_mvni8:
+;CHECK: vmvn
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
+;CHECK: v_mvni16:
+;CHECK: vmvn
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
+;CHECK: v_mvni32:
+;CHECK: vmvn
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
+;CHECK: v_mvni64:
+;CHECK: vmvn
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = xor <1 x i64> %tmp1, < i64 -1 >
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
+;CHECK: v_mvnQi8:
+;CHECK: vmvn
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
+;CHECK: v_mvnQi16:
+;CHECK: vmvn
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
+;CHECK: v_mvnQi32:
+;CHECK: vmvn
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
+;CHECK: v_mvnQi64:
+;CHECK: vmvn
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_orri8:
+;CHECK: vorr
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = or <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_orri16:
+;CHECK: vorr
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = or <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_orri32:
+;CHECK: vorr
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = or <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_orri64:
+;CHECK: vorr
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = or <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_orrQi8:
+;CHECK: vorr
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = or <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_orrQi16:
+;CHECK: vorr
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = or <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_orrQi32:
+;CHECK: vorr
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = or <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_orrQi64:
+;CHECK: vorr
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = or <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_orni8:
+;CHECK: vorn
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = or <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_orni16:
+;CHECK: vorn
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = or <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_orni32:
+;CHECK: vorn
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
+	%tmp4 = or <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_orni64:
+;CHECK: vorn
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
+	%tmp4 = or <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_ornQi8:
+;CHECK: vorn
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = or <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_ornQi16:
+;CHECK: vorn
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = or <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_ornQi32:
+;CHECK: vorn
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	%tmp4 = or <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_ornQi64:
+;CHECK: vorn
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
+	%tmp4 = or <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtsti8:
+;CHECK: vtst.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = and <8 x i8> %tmp1, %tmp2
+	%tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
+        %tmp5 = sext <8 x i1> %tmp4 to <8 x i8>
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vtsti16:
+;CHECK: vtst.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = and <4 x i16> %tmp1, %tmp2
+	%tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer
+        %tmp5 = sext <4 x i1> %tmp4 to <4 x i16>
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vtsti32:
+;CHECK: vtst.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = and <2 x i32> %tmp1, %tmp2
+	%tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer
+        %tmp5 = sext <2 x i1> %tmp4 to <2 x i32>
+	ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vtstQi8:
+;CHECK: vtst.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = and <16 x i8> %tmp1, %tmp2
+	%tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer
+        %tmp5 = sext <16 x i1> %tmp4 to <16 x i8>
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtstQi16:
+;CHECK: vtst.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = and <8 x i16> %tmp1, %tmp2
+	%tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer
+        %tmp5 = sext <8 x i1> %tmp4 to <8 x i16>
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vtstQi32:
+;CHECK: vtst.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = and <4 x i32> %tmp1, %tmp2
+	%tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer
+        %tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
+	ret <4 x i32> %tmp5
+}
diff --git a/test/CodeGen/ARM/vbsl.ll b/test/CodeGen/ARM/vbsl.ll
index 37ddf4de6d329..9f3bb4e1030c7 100644
--- a/test/CodeGen/ARM/vbsl.ll
+++ b/test/CodeGen/ARM/vbsl.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep vbsl %t | count 8
-; Note: function names do not include "vbsl" to allow simple grep for opcodes
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: v_bsli8:
+;CHECK: vbsl
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = load <8 x i8>* %C
@@ -14,6 +14,8 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: v_bsli16:
+;CHECK: vbsl
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = load <4 x i16>* %C
@@ -25,6 +27,8 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: v_bsli32:
+;CHECK: vbsl
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = load <2 x i32>* %C
@@ -36,6 +40,8 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
+;CHECK: v_bsli64:
+;CHECK: vbsl
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = load <1 x i64>* %C
@@ -47,6 +53,8 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind
 }
 
 define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: v_bslQi8:
+;CHECK: vbsl
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = load <16 x i8>* %C
@@ -58,6 +66,8 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 }
 
 define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: v_bslQi16:
+;CHECK: vbsl
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = load <8 x i16>* %C
@@ -69,6 +79,8 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin
 }
 
 define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: v_bslQi32:
+;CHECK: vbsl
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = load <4 x i32>* %C
@@ -80,6 +92,8 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin
 }
 
 define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
+;CHECK: v_bslQi64:
+;CHECK: vbsl
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = load <2 x i64>* %C
diff --git a/test/CodeGen/ARM/vceq.ll b/test/CodeGen/ARM/vceq.ll
index 77f1890d08650..e4787518e731c 100644
--- a/test/CodeGen/ARM/vceq.ll
+++ b/test/CodeGen/ARM/vceq.ll
@@ -1,61 +1,81 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vceq\\.i8} %t | count 2
-; RUN: grep {vceq\\.i16} %t | count 2
-; RUN: grep {vceq\\.i32} %t | count 2
-; RUN: grep {vceq\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vceqi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vceqi8:
+;CHECK: vceq.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp eq <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp eq <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vceqi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vceqi16:
+;CHECK: vceq.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp eq <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp eq <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vceqi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vceqi32:
+;CHECK: vceq.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp eq <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp eq <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <2 x i32> @vceqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vceqf32:
+;CHECK: vceq.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp oeq <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp oeq <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <16 x i8> @vceqQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vceqQi8:
+;CHECK: vceq.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp eq <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp eq <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vceqQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vceqQi16:
+;CHECK: vceq.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp eq <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp eq <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vceqQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vceqQi32:
+;CHECK: vceq.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp eq <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp eq <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vceqQf32:
+;CHECK: vceq.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = vfcmp oeq <4 x float> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = fcmp oeq <4 x float> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll
index 14c623ea082f2..2c161113c1130 100644
--- a/test/CodeGen/ARM/vcge.ll
+++ b/test/CodeGen/ARM/vcge.ll
@@ -1,106 +1,162 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vcge\\.s8} %t | count 2
-; RUN: grep {vcge\\.s16} %t | count 2
-; RUN: grep {vcge\\.s32} %t | count 2
-; RUN: grep {vcge\\.u8} %t | count 2
-; RUN: grep {vcge\\.u16} %t | count 2
-; RUN: grep {vcge\\.u32} %t | count 2
-; RUN: grep {vcge\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcges8:
+;CHECK: vcge.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp sge <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp sge <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcges16:
+;CHECK: vcge.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp sge <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp sge <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcges32:
+;CHECK: vcge.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp sge <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp sge <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgeu8:
+;CHECK: vcge.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp uge <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp uge <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgeu16:
+;CHECK: vcge.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp uge <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp uge <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgeu32:
+;CHECK: vcge.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp uge <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp uge <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcgef32:
+;CHECK: vcge.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp oge <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp oge <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgeQs8:
+;CHECK: vcge.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp sge <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp sge <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgeQs16:
+;CHECK: vcge.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp sge <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp sge <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgeQs32:
+;CHECK: vcge.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp sge <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp sge <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgeQu8:
+;CHECK: vcge.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp uge <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp uge <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgeQu16:
+;CHECK: vcge.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp uge <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp uge <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgeQu32:
+;CHECK: vcge.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp uge <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp uge <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vcgeQf32:
+;CHECK: vcge.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = vfcmp oge <4 x float> %tmp1, %tmp2
+	%tmp3 = fcmp oge <4 x float> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vacgef32:
+;CHECK: vacge.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vacgeQf32:
+;CHECK: vacge.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x i32> %tmp3
 }
+
+declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll
index 3f7e55078733c..6b11ba5ce6933 100644
--- a/test/CodeGen/ARM/vcgt.ll
+++ b/test/CodeGen/ARM/vcgt.ll
@@ -1,106 +1,162 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vcgt\\.s8} %t | count 2
-; RUN: grep {vcgt\\.s16} %t | count 2
-; RUN: grep {vcgt\\.s32} %t | count 2
-; RUN: grep {vcgt\\.u8} %t | count 2
-; RUN: grep {vcgt\\.u16} %t | count 2
-; RUN: grep {vcgt\\.u32} %t | count 2
-; RUN: grep {vcgt\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgts8:
+;CHECK: vcgt.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp sgt <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp sgt <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vcgts16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgts16:
+;CHECK: vcgt.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp sgt <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp sgt <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vcgts32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgts32:
+;CHECK: vcgt.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp sgt <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp sgt <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <8 x i8> @vcgtu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgtu8:
+;CHECK: vcgt.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp ugt <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp ugt <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vcgtu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgtu16:
+;CHECK: vcgt.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp ugt <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp ugt <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vcgtu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgtu32:
+;CHECK: vcgt.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp ugt <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp ugt <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <2 x i32> @vcgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcgtf32:
+;CHECK: vcgt.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ogt <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ogt <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <16 x i8> @vcgtQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgtQs8:
+;CHECK: vcgt.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp sgt <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp sgt <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vcgtQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgtQs16:
+;CHECK: vcgt.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp sgt <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp sgt <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vcgtQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgtQs32:
+;CHECK: vcgt.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp sgt <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp sgt <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <16 x i8> @vcgtQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgtQu8:
+;CHECK: vcgt.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp ugt <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp ugt <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vcgtQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgtQu16:
+;CHECK: vcgt.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp ugt <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp ugt <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vcgtQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgtQu32:
+;CHECK: vcgt.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp ugt <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp ugt <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <4 x i32> @vcgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vcgtQf32:
+;CHECK: vcgt.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = vfcmp ogt <4 x float> %tmp1, %tmp2
+	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vacgtf32:
+;CHECK: vacgt.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vacgtQf32:
+;CHECK: vacgt.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x i32> %tmp3
 }
+
+declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vcnt.ll b/test/CodeGen/ARM/vcnt.ll
index 981716895894e..450f90d03dfe7 100644
--- a/test/CodeGen/ARM/vcnt.ll
+++ b/test/CodeGen/ARM/vcnt.ll
@@ -1,13 +1,16 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vcnt\\.8} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
+;CHECK: vcnt8:
+;CHECK: vcnt.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
 
 define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
+;CHECK: vcntQ8:
+;CHECK: vcnt.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
@@ -15,3 +18,115 @@ define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
 
 declare <8 x i8>  @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone
 declare <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8>) nounwind readnone
+
+define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
+;CHECK: vclz8:
+;CHECK: vclz.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
+;CHECK: vclz16:
+;CHECK: vclz.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
+;CHECK: vclz32:
+;CHECK: vclz.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
+;CHECK: vclzQ8:
+;CHECK: vclz.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
+;CHECK: vclzQ16:
+;CHECK: vclz.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
+;CHECK: vclzQ32:
+;CHECK: vclz.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone
+
+define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
+;CHECK: vclss8:
+;CHECK: vcls.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
+;CHECK: vclss16:
+;CHECK: vcls.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
+;CHECK: vclss32:
+;CHECK: vcls.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vclsQs8:
+;CHECK: vcls.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vclsQs16:
+;CHECK: vcls.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vclsQs32:
+;CHECK: vcls.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vcombine.ll b/test/CodeGen/ARM/vcombine.ll
new file mode 100644
index 0000000000000..e6733051f269c
--- /dev/null
+++ b/test/CodeGen/ARM/vcombine.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=arm -mattr=+neon
+
+define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+	ret <4 x float> %tmp3
+}
+
+define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
+	ret <2 x i64> %tmp3
+}
diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll
index 1cb42bf155cb7..f4cc5368d9aaa 100644
--- a/test/CodeGen/ARM/vcvt.ll
+++ b/test/CodeGen/ARM/vcvt.ll
@@ -1,53 +1,140 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vcvt\\.s32\\.f32} %t | count 2
-; RUN: grep {vcvt\\.u32\\.f32} %t | count 2
-; RUN: grep {vcvt\\.f32\\.s32} %t | count 2
-; RUN: grep {vcvt\\.f32\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_f32tos32:
+;CHECK: vcvt.s32.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
 	ret <2 x i32> %tmp2
 }
 
 define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_f32tou32:
+;CHECK: vcvt.u32.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
 	ret <2 x i32> %tmp2
 }
 
 define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_s32tof32:
+;CHECK: vcvt.f32.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = sitofp <2 x i32> %tmp1 to <2 x float>
 	ret <2 x float> %tmp2
 }
 
 define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_u32tof32:
+;CHECK: vcvt.f32.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = uitofp <2 x i32> %tmp1 to <2 x float>
 	ret <2 x float> %tmp2
 }
 
 define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_f32tos32:
+;CHECK: vcvt.s32.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
 	ret <4 x i32> %tmp2
 }
 
 define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_f32tou32:
+;CHECK: vcvt.u32.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
 	ret <4 x i32> %tmp2
 }
 
 define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_s32tof32:
+;CHECK: vcvt.f32.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = sitofp <4 x i32> %tmp1 to <4 x float>
 	ret <4 x float> %tmp2
 }
 
 define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_u32tof32:
+;CHECK: vcvt.f32.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = uitofp <4 x i32> %tmp1 to <4 x float>
 	ret <4 x float> %tmp2
 }
+
+define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_n_f32tos32:
+;CHECK: vcvt.s32.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1)
+	ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_n_f32tou32:
+;CHECK: vcvt.u32.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1)
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_n_s32tof32:
+;CHECK: vcvt.f32.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
+	ret <2 x float> %tmp2
+}
+
+define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_n_u32tof32:
+;CHECK: vcvt.f32.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
+	ret <2 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+
+define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_n_f32tos32:
+;CHECK: vcvt.s32.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1)
+	ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_n_f32tou32:
+;CHECK: vcvt.u32.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1)
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_n_s32tof32:
+;CHECK: vcvt.f32.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
+	ret <4 x float> %tmp2
+}
+
+define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_n_u32tof32:
+;CHECK: vcvt.f32.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
+	ret <4 x float> %tmp2
+}
+
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index 1c0887a2492df..c9a68cabbc42b 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -1,9 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep vdup.8 %t | count 4
-; RUN: grep vdup.16 %t | count 4
-; RUN: grep vdup.32 %t | count 8
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @v_dup8(i8 %A) nounwind {
+;CHECK: v_dup8:
+;CHECK: vdup.8
 	%tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
 	%tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
 	%tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
@@ -16,6 +15,8 @@ define <8 x i8> @v_dup8(i8 %A) nounwind {
 }
 
 define <4 x i16> @v_dup16(i16 %A) nounwind {
+;CHECK: v_dup16:
+;CHECK: vdup.16
 	%tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
 	%tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
 	%tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
@@ -24,18 +25,24 @@ define <4 x i16> @v_dup16(i16 %A) nounwind {
 }
 
 define <2 x i32> @v_dup32(i32 %A) nounwind {
+;CHECK: v_dup32:
+;CHECK: vdup.32
 	%tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
 	%tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
 	ret <2 x i32> %tmp2
 }
 
 define <2 x float> @v_dupfloat(float %A) nounwind {
+;CHECK: v_dupfloat:
+;CHECK: vdup.32
 	%tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
 	%tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
 	ret <2 x float> %tmp2
 }
 
 define <16 x i8> @v_dupQ8(i8 %A) nounwind {
+;CHECK: v_dupQ8:
+;CHECK: vdup.8
 	%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
 	%tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
 	%tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
@@ -56,6 +63,8 @@ define <16 x i8> @v_dupQ8(i8 %A) nounwind {
 }
 
 define <8 x i16> @v_dupQ16(i16 %A) nounwind {
+;CHECK: v_dupQ16:
+;CHECK: vdup.16
 	%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
 	%tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
 	%tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
@@ -68,6 +77,8 @@ define <8 x i16> @v_dupQ16(i16 %A) nounwind {
 }
 
 define <4 x i32> @v_dupQ32(i32 %A) nounwind {
+;CHECK: v_dupQ32:
+;CHECK: vdup.32
 	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
 	%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
 	%tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
@@ -76,6 +87,8 @@ define <4 x i32> @v_dupQ32(i32 %A) nounwind {
 }
 
 define <4 x float> @v_dupQfloat(float %A) nounwind {
+;CHECK: v_dupQfloat:
+;CHECK: vdup.32
 	%tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
 	%tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
 	%tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
@@ -86,49 +99,171 @@ define <4 x float> @v_dupQfloat(float %A) nounwind {
 ; Check to make sure it works with shuffles, too.
 
 define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
+;CHECK: v_shuffledup8:
+;CHECK: vdup.8
 	%tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
+;CHECK: v_shuffledup16:
+;CHECK: vdup.16
 	%tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
+;CHECK: v_shuffledup32:
+;CHECK: vdup.32
 	%tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
 	ret <2 x i32> %tmp2
 }
 
 define <2 x float> @v_shuffledupfloat(float %A) nounwind {
+;CHECK: v_shuffledupfloat:
+;CHECK: vdup.32
 	%tmp1 = insertelement <2 x float> undef, float %A, i32 0
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
 	ret <2 x float> %tmp2
 }
 
 define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
+;CHECK: v_shuffledupQ8:
+;CHECK: vdup.8
 	%tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
+;CHECK: v_shuffledupQ16:
+;CHECK: vdup.16
 	%tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
+;CHECK: v_shuffledupQ32:
+;CHECK: vdup.32
 	%tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
 	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
 	ret <4 x i32> %tmp2
 }
 
 define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
+;CHECK: v_shuffledupQfloat:
+;CHECK: vdup.32
 	%tmp1 = insertelement <4 x float> undef, float %A, i32 0
 	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
 	ret <4 x float> %tmp2
 }
+
+define <2 x float> @v_shuffledupfloat2(float* %A) nounwind {
+;CHECK: v_shuffledupfloat2:
+;CHECK: vdup.32
+	%tmp0 = load float* %A
+        %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
+        %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
+        ret <2 x float> %tmp2
+}
+
+define <4 x float> @v_shuffledupQfloat2(float* %A) nounwind {
+;CHECK: v_shuffledupQfloat2:
+;CHECK: vdup.32
+        %tmp0 = load float* %A
+        %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
+        ret <4 x float> %tmp2
+}
+
+define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
+;CHECK: vduplane8:
+;CHECK: vdup.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
+;CHECK: vduplane16:
+;CHECK: vdup.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
+;CHECK: vduplane32:
+;CHECK: vdup.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
+;CHECK: vduplanefloat:
+;CHECK: vdup.32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
+;CHECK: vduplaneQ8:
+;CHECK: vdup.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
+;CHECK: vduplaneQ16:
+;CHECK: vdup.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
+;CHECK: vduplaneQ32:
+;CHECK: vdup.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
+;CHECK: vduplaneQfloat:
+;CHECK: vdup.32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x float> %tmp2
+}
+
+define arm_apcscc <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %0
+}
+
+define arm_apcscc <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x i64> %0
+}
+
+define arm_apcscc <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x double> %0
+}
+
+define arm_apcscc <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x double> %0
+}
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
new file mode 100644
index 0000000000000..20d953bfb4a07
--- /dev/null
+++ b/test/CodeGen/ARM/vext.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define arm_apcscc <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextd:
+;CHECK: vext
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+	ret <8 x i8> %tmp3
+}
+
+define arm_apcscc <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextRd:
+;CHECK: vext
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
+	ret <8 x i8> %tmp3
+}
+
+define arm_apcscc <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextq:
+;CHECK: vext
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+	ret <16 x i8> %tmp3
+}
+
+define arm_apcscc <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextRq:
+;CHECK: vext
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
+	ret <16 x i8> %tmp3
+}
+
+define arm_apcscc <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: test_vextd16:
+;CHECK: vext
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+	ret <4 x i16> %tmp3
+}
+
+define arm_apcscc <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: test_vextq32:
+;CHECK: vext
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+	ret <4 x i32> %tmp3
+}
+
diff --git a/test/CodeGen/ARM/vfcmp.ll b/test/CodeGen/ARM/vfcmp.ll
index 58c2068bc8f4a..6946d02637ea8 100644
--- a/test/CodeGen/ARM/vfcmp.ll
+++ b/test/CodeGen/ARM/vfcmp.ll
@@ -1,96 +1,139 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vceq\\.f32} %t | count 1
-; RUN: grep {vcgt\\.f32} %t | count 9
-; RUN: grep {vcge\\.f32} %t | count 5
-; RUN: grep vorr %t | count 4
-; RUN: grep vmvn %t | count 7
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
-; This tests vfcmp operations that do not map directly to NEON instructions.
+; This tests fcmp operations that do not map directly to NEON instructions.
 
 ; une is implemented with VCEQ/VMVN
 define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcunef32:
+;CHECK: vceq.f32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp une <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp une <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; olt is implemented with VCGT
 define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcoltf32:
+;CHECK: vcgt.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp olt <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ole is implemented with VCGE
 define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcolef32:
+;CHECK: vcge.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ole <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; uge is implemented with VCGT/VMVN
 define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcugef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp uge <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ule is implemented with VCGT/VMVN
 define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vculef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ule <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ugt is implemented with VCGE/VMVN
 define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcugtf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ugt <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ult is implemented with VCGE/VMVN
 define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcultf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ult <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ueq is implemented with VCGT/VCGT/VORR/VMVN
 define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcueqf32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ueq <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; one is implemented with VCGT/VCGT/VORR
 define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vconef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp one <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp one <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; uno is implemented with VCGT/VCGE/VORR/VMVN
 define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcunof32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp uno <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 ; ord is implemented with VCGT/VCGE/VORR
 define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcordf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = vfcmp ord <2 x float> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = fcmp ord <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index f58da44093565..50000e31e1129 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -1,19 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fabs | count 2
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fmscs | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fcvt | count 2
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fuito | count 2
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fto.i | count 4
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep bmi | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep bgt | count 1
-; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
-; RUN:   grep fcmpezs | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 
 define void @test(float* %P, double* %D) {
 	%A = load float* %P		; <float> [#uses=1]
@@ -28,16 +13,20 @@ declare float @fabsf(float)
 declare double @fabs(double)
 
 define void @test_abs(float* %P, double* %D) {
+;CHECK: test_abs:
 	%a = load float* %P		; <float> [#uses=1]
+;CHECK: fabss
 	%b = call float @fabsf( float %a )		; <float> [#uses=1]
 	store float %b, float* %P
 	%A = load double* %D		; <double> [#uses=1]
+;CHECK: fabsd
 	%B = call double @fabs( double %A )		; <double> [#uses=1]
 	store double %B, double* %D
 	ret void
 }
 
 define void @test_add(float* %P, double* %D) {
+;CHECK: test_add:
 	%a = load float* %P		; <float> [#uses=2]
 	%b = fadd float %a, %a		; <float> [#uses=1]
 	store float %b, float* %P
@@ -48,9 +37,12 @@ define void @test_add(float* %P, double* %D) {
 }
 
 define void @test_ext_round(float* %P, double* %D) {
+;CHECK: test_ext_round:
 	%a = load float* %P		; <float> [#uses=1]
+;CHECK: fcvtds
 	%b = fpext float %a to double		; <double> [#uses=1]
 	%A = load double* %D		; <double> [#uses=1]
+;CHECK: fcvtsd
 	%B = fptrunc double %A to float		; <float> [#uses=1]
 	store double %b, double* %D
 	store float %B, float* %P
@@ -58,9 +50,11 @@ define void @test_ext_round(float* %P, double* %D) {
 }
 
 define void @test_fma(float* %P1, float* %P2, float* %P3) {
+;CHECK: test_fma:
 	%a1 = load float* %P1		; <float> [#uses=1]
 	%a2 = load float* %P2		; <float> [#uses=1]
 	%a3 = load float* %P3		; <float> [#uses=1]
+;CHECK: fmscs
 	%X = fmul float %a1, %a2		; <float> [#uses=1]
 	%Y = fsub float %X, %a3		; <float> [#uses=1]
 	store float %Y, float* %P1
@@ -68,42 +62,55 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) {
 }
 
 define i32 @test_ftoi(float* %P1) {
+;CHECK: test_ftoi:
 	%a1 = load float* %P1		; <float> [#uses=1]
+;CHECK: ftosizs
 	%b1 = fptosi float %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
 
 define i32 @test_ftou(float* %P1) {
+;CHECK: test_ftou:
 	%a1 = load float* %P1		; <float> [#uses=1]
+;CHECK: ftouizs
 	%b1 = fptoui float %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
 
 define i32 @test_dtoi(double* %P1) {
+;CHECK: test_dtoi:
 	%a1 = load double* %P1		; <double> [#uses=1]
+;CHECK: ftosizd
 	%b1 = fptosi double %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
 
 define i32 @test_dtou(double* %P1) {
+;CHECK: test_dtou:
 	%a1 = load double* %P1		; <double> [#uses=1]
+;CHECK: ftouizd
 	%b1 = fptoui double %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
 }
 
 define void @test_utod(double* %P1, i32 %X) {
+;CHECK: test_utod:
+;CHECK: fuitod
 	%b1 = uitofp i32 %X to double		; <double> [#uses=1]
 	store double %b1, double* %P1
 	ret void
 }
 
 define void @test_utod2(double* %P1, i8 %X) {
+;CHECK: test_utod2:
+;CHECK: fuitod
 	%b1 = uitofp i8 %X to double		; <double> [#uses=1]
 	store double %b1, double* %P1
 	ret void
 }
 
 define void @test_cmp(float* %glob, i32 %X) {
+;CHECK: test_cmp:
 entry:
 	%tmp = load float* %glob		; <float> [#uses=2]
 	%tmp3 = getelementptr float* %glob, i32 2		; <float*> [#uses=1]
@@ -111,6 +118,8 @@ entry:
 	%tmp.upgrd.1 = fcmp oeq float %tmp, %tmp4		; <i1> [#uses=1]
 	%tmp5 = fcmp uno float %tmp, %tmp4		; <i1> [#uses=1]
 	%tmp6 = or i1 %tmp.upgrd.1, %tmp5		; <i1> [#uses=1]
+;CHECK: bmi
+;CHECK-NEXT: bgt
 	br i1 %tmp6, label %cond_true, label %cond_false
 
 cond_true:		; preds = %entry
@@ -129,8 +138,10 @@ declare i32 @bar(...)
 declare i32 @baz(...)
 
 define void @test_cmpfp0(float* %glob, i32 %X) {
+;CHECK: test_cmpfp0:
 entry:
 	%tmp = load float* %glob		; <float> [#uses=1]
+;CHECK: fcmpezs
 	%tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00		; <i1> [#uses=1]
 	br i1 %tmp.upgrd.3, label %cond_true, label %cond_false
 
diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll
index a361ba2ba97f5..f0df7982ef425 100644
--- a/test/CodeGen/ARM/vget_lane.ll
+++ b/test/CodeGen/ARM/vget_lane.ll
@@ -1,11 +1,10 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vmov\\.s8} %t | count 2
-; RUN: grep {vmov\\.s16} %t | count 2
-; RUN: grep {vmov\\.u8} %t | count 2
-; RUN: grep {vmov\\.u16} %t | count 2
-; RUN: grep {vmov\\.32} %t | count 2
+; RUN: llc < %s -mattr=+neon | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
 
 define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
+;CHECK: vget_lanes8:
+;CHECK: vmov.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 1
 	%tmp3 = sext i8 %tmp2 to i32
@@ -13,6 +12,8 @@ define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
 }
 
 define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
+;CHECK: vget_lanes16:
+;CHECK: vmov.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = extractelement <4 x i16> %tmp1, i32 1
 	%tmp3 = sext i16 %tmp2 to i32
@@ -20,6 +21,8 @@ define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
 }
 
 define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
+;CHECK: vget_laneu8:
+;CHECK: vmov.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 1
 	%tmp3 = zext i8 %tmp2 to i32
@@ -27,6 +30,8 @@ define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
 }
 
 define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
+;CHECK: vget_laneu16:
+;CHECK: vmov.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = extractelement <4 x i16> %tmp1, i32 1
 	%tmp3 = zext i16 %tmp2 to i32
@@ -35,6 +40,8 @@ define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
 
 ; Do a vector add to keep the extraction from being done directly from memory.
 define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
+;CHECK: vget_lanei32:
+;CHECK: vmov.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = add <2 x i32> %tmp1, %tmp1
 	%tmp3 = extractelement <2 x i32> %tmp2, i32 1
@@ -42,6 +49,8 @@ define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
 }
 
 define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
+;CHECK: vgetQ_lanes8:
+;CHECK: vmov.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = extractelement <16 x i8> %tmp1, i32 1
 	%tmp3 = sext i8 %tmp2 to i32
@@ -49,6 +58,8 @@ define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
 }
 
 define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
+;CHECK: vgetQ_lanes16:
+;CHECK: vmov.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = extractelement <8 x i16> %tmp1, i32 1
 	%tmp3 = sext i16 %tmp2 to i32
@@ -56,6 +67,8 @@ define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
 }
 
 define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
+;CHECK: vgetQ_laneu8:
+;CHECK: vmov.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = extractelement <16 x i8> %tmp1, i32 1
 	%tmp3 = zext i8 %tmp2 to i32
@@ -63,6 +76,8 @@ define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
 }
 
 define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
+;CHECK: vgetQ_laneu16:
+;CHECK: vmov.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = extractelement <8 x i16> %tmp1, i32 1
 	%tmp3 = zext i16 %tmp2 to i32
@@ -71,8 +86,127 @@ define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
 
 ; Do a vector add to keep the extraction from being done directly from memory.
 define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
+;CHECK: vgetQ_lanei32:
+;CHECK: vmov.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = add <4 x i32> %tmp1, %tmp1
 	%tmp3 = extractelement <4 x i32> %tmp2, i32 1
 	ret i32 %tmp3
 }
+
+define arm_aapcs_vfpcc void @test_vget_laneu16() nounwind {
+entry:
+; CHECK: vmov.u16 r0, d0[1]
+  %arg0_uint16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
+  %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <4 x i16>* %arg0_uint16x4_t, align 8  ; <<4 x i16>> [#uses=1]
+  %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
+  store i16 %1, i16* %out_uint16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vget_laneu8() nounwind {
+entry:
+; CHECK: vmov.u8 r0, d0[1]
+  %arg0_uint8x8_t = alloca <8 x i8>               ; <<8 x i8>*> [#uses=1]
+  %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <8 x i8>* %arg0_uint8x8_t, align 8    ; <<8 x i8>> [#uses=1]
+  %1 = extractelement <8 x i8> %0, i32 1          ; <i8> [#uses=1]
+  store i8 %1, i8* %out_uint8_t, align 1
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vgetQ_laneu16() nounwind {
+entry:
+; CHECK: vmov.u16 r0, d0[1]
+  %arg0_uint16x8_t = alloca <8 x i16>             ; <<8 x i16>*> [#uses=1]
+  %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
+  %1 = extractelement <8 x i16> %0, i32 1         ; <i16> [#uses=1]
+  store i16 %1, i16* %out_uint16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vgetQ_laneu8() nounwind {
+entry:
+; CHECK: vmov.u8 r0, d0[1]
+  %arg0_uint8x16_t = alloca <16 x i8>             ; <<16 x i8>*> [#uses=1]
+  %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
+  %1 = extractelement <16 x i8> %0, i32 1         ; <i8> [#uses=1]
+  store i8 %1, i8* %out_uint8_t, align 1
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
+;CHECK: vset_lane8:
+;CHECK: vmov.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
+;CHECK: vset_lane16:
+;CHECK: vmov.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
+;CHECK: vset_lane32:
+;CHECK: vmov.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
+;CHECK: vsetQ_lane8:
+;CHECK: vmov.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
+;CHECK: vsetQ_lane16:
+;CHECK: vmov.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
+;CHECK: vsetQ_lane32:
+;CHECK: vmov.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
+	ret <4 x i32> %tmp2
+}
+
+define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind {
+;CHECK: test_vset_lanef32:
+;CHECK: fcpys
+;CHECK: fcpys
+entry:
+  %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1]
+  ret <2 x float> %0
+}
diff --git a/test/CodeGen/ARM/vhadd.ll b/test/CodeGen/ARM/vhadd.ll
index 5e7503dc71cf3..379e062838f61 100644
--- a/test/CodeGen/ARM/vhadd.ll
+++ b/test/CodeGen/ARM/vhadd.ll
@@ -1,12 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vhadd\\.s8} %t | count 2
-; RUN: grep {vhadd\\.s16} %t | count 2
-; RUN: grep {vhadd\\.s32} %t | count 2
-; RUN: grep {vhadd\\.u8} %t | count 2
-; RUN: grep {vhadd\\.u16} %t | count 2
-; RUN: grep {vhadd\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhadds8:
+;CHECK: vhadd.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -14,6 +10,8 @@ define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhadds16:
+;CHECK: vhadd.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -21,6 +19,8 @@ define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhadds32:
+;CHECK: vhadd.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -28,6 +28,8 @@ define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhaddu8:
+;CHECK: vhadd.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -35,6 +37,8 @@ define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhaddu16:
+;CHECK: vhadd.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -42,6 +46,8 @@ define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhaddu32:
+;CHECK: vhadd.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -49,6 +55,8 @@ define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhaddQs8:
+;CHECK: vhadd.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -56,6 +64,8 @@ define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhaddQs16:
+;CHECK: vhadd.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -63,6 +73,8 @@ define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhaddQs32:
+;CHECK: vhadd.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -70,6 +82,8 @@ define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhaddQu8:
+;CHECK: vhadd.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -77,6 +91,8 @@ define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhaddQu16:
+;CHECK: vhadd.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -84,6 +100,8 @@ define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhaddQu32:
+;CHECK: vhadd.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -105,3 +123,127 @@ declare <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32>, <4 x i32>) nounwind rea
 declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrhadds8:
+;CHECK: vrhadd.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrhadds16:
+;CHECK: vrhadd.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrhadds32:
+;CHECK: vrhadd.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrhaddu8:
+;CHECK: vrhadd.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrhaddu16:
+;CHECK: vrhadd.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrhaddu32:
+;CHECK: vrhadd.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrhaddQs8:
+;CHECK: vrhadd.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrhaddQs16:
+;CHECK: vrhadd.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrhaddQs32:
+;CHECK: vrhadd.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrhaddQu8:
+;CHECK: vrhadd.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrhaddQu16:
+;CHECK: vrhadd.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrhaddQu32:
+;CHECK: vrhadd.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vhsub.ll b/test/CodeGen/ARM/vhsub.ll
index 32a66e5479454..0f0d0279a5217 100644
--- a/test/CodeGen/ARM/vhsub.ll
+++ b/test/CodeGen/ARM/vhsub.ll
@@ -1,12 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vhsub\\.s8} %t | count 2
-; RUN: grep {vhsub\\.s16} %t | count 2
-; RUN: grep {vhsub\\.s32} %t | count 2
-; RUN: grep {vhsub\\.u8} %t | count 2
-; RUN: grep {vhsub\\.u16} %t | count 2
-; RUN: grep {vhsub\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhsubs8:
+;CHECK: vhsub.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -14,6 +10,8 @@ define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhsubs16:
+;CHECK: vhsub.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -21,6 +19,8 @@ define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhsubs32:
+;CHECK: vhsub.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -28,6 +28,8 @@ define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhsubu8:
+;CHECK: vhsub.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -35,6 +37,8 @@ define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhsubu16:
+;CHECK: vhsub.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -42,6 +46,8 @@ define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhsubu32:
+;CHECK: vhsub.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -49,6 +55,8 @@ define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhsubQs8:
+;CHECK: vhsub.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -56,6 +64,8 @@ define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhsubQs16:
+;CHECK: vhsub.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -63,6 +73,8 @@ define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhsubQs32:
+;CHECK: vhsub.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -70,6 +82,8 @@ define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhsubQu8:
+;CHECK: vhsub.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -77,6 +91,8 @@ define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhsubQu16:
+;CHECK: vhsub.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -84,6 +100,8 @@ define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vhsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhsubQu32:
+;CHECK: vhsub.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
diff --git a/test/CodeGen/ARM/vicmp.ll b/test/CodeGen/ARM/vicmp.ll
index 86858f9293487..2d8cb893bd867 100644
--- a/test/CodeGen/ARM/vicmp.ll
+++ b/test/CodeGen/ARM/vicmp.ll
@@ -1,85 +1,113 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vceq\\.i8} %t | count 2
-; RUN: grep {vceq\\.i16} %t | count 2
-; RUN: grep {vceq\\.i32} %t | count 2
-; RUN: grep vmvn %t | count 6
-; RUN: grep {vcgt\\.s8} %t | count 1
-; RUN: grep {vcge\\.s16} %t | count 1
-; RUN: grep {vcgt\\.u16} %t | count 1
-; RUN: grep {vcge\\.u32} %t | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
-; This tests vicmp operations that do not map directly to NEON instructions.
+; This tests icmp operations that do not map directly to NEON instructions.
 ; Not-equal (ne) operations are implemented by VCEQ/VMVN.  Less-than (lt/ult)
 ; and less-than-or-equal (le/ule) are implemented by swapping the arguments
 ; to VCGT and VCGE.  Test all the operand types for not-equal but only sample
 ; the other operations.
 
 define <8 x i8> @vcnei8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcnei8:
+;CHECK: vceq.i8
+;CHECK-NEXT: vmvn
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = vicmp ne <8 x i8> %tmp1, %tmp2
-	ret <8 x i8> %tmp3
+	%tmp3 = icmp ne <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
 }
 
 define <4 x i16> @vcnei16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcnei16:
+;CHECK: vceq.i16
+;CHECK-NEXT: vmvn
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp ne <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp ne <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <2 x i32> @vcnei32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcnei32:
+;CHECK: vceq.i32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = vicmp ne <2 x i32> %tmp1, %tmp2
-	ret <2 x i32> %tmp3
+	%tmp3 = icmp ne <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
 }
 
 define <16 x i8> @vcneQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcneQi8:
+;CHECK: vceq.i8
+;CHECK-NEXT: vmvn
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp ne <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp ne <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vcneQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcneQi16:
+;CHECK: vceq.i16
+;CHECK-NEXT: vmvn
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = vicmp ne <8 x i16> %tmp1, %tmp2
-	ret <8 x i16> %tmp3
+	%tmp3 = icmp ne <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
 }
 
 define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcneQi32:
+;CHECK: vceq.i32
+;CHECK-NEXT: vmvn
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp ne <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp ne <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
 
 define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcltQs8:
+;CHECK: vcgt.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = vicmp slt <16 x i8> %tmp1, %tmp2
-	ret <16 x i8> %tmp3
+	%tmp3 = icmp slt <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcles16:
+;CHECK: vcge.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp sle <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp sle <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcltu16:
+;CHECK: vcgt.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = vicmp ult <4 x i16> %tmp1, %tmp2
-	ret <4 x i16> %tmp3
+	%tmp3 = icmp ult <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
 }
 
 define <4 x i32> @vcleQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcleQu32:
+;CHECK: vcge.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = vicmp ule <4 x i32> %tmp1, %tmp2
-	ret <4 x i32> %tmp3
+	%tmp3 = icmp ule <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
 }
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
new file mode 100644
index 0000000000000..f5383aafb2bb0
--- /dev/null
+++ b/test/CodeGen/ARM/vld1.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vld1i8(i8* %A) nounwind {
+;CHECK: vld1i8:
+;CHECK: vld1.8
+	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A)
+	ret <8 x i8> %tmp1
+}
+
+define <4 x i16> @vld1i16(i16* %A) nounwind {
+;CHECK: vld1i16:
+;CHECK: vld1.16
+	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i16* %A)
+	ret <4 x i16> %tmp1
+}
+
+define <2 x i32> @vld1i32(i32* %A) nounwind {
+;CHECK: vld1i32:
+;CHECK: vld1.32
+	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i32* %A)
+	ret <2 x i32> %tmp1
+}
+
+define <2 x float> @vld1f(float* %A) nounwind {
+;CHECK: vld1f:
+;CHECK: vld1.32
+	%tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(float* %A)
+	ret <2 x float> %tmp1
+}
+
+define <1 x i64> @vld1i64(i64* %A) nounwind {
+;CHECK: vld1i64:
+;CHECK: vld1.64
+	%tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i64* %A)
+	ret <1 x i64> %tmp1
+}
+
+define <16 x i8> @vld1Qi8(i8* %A) nounwind {
+;CHECK: vld1Qi8:
+;CHECK: vld1.8
+	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A)
+	ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @vld1Qi16(i16* %A) nounwind {
+;CHECK: vld1Qi16:
+;CHECK: vld1.16
+	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i16* %A)
+	ret <8 x i16> %tmp1
+}
+
+define <4 x i32> @vld1Qi32(i32* %A) nounwind {
+;CHECK: vld1Qi32:
+;CHECK: vld1.32
+	%tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i32* %A)
+	ret <4 x i32> %tmp1
+}
+
+define <4 x float> @vld1Qf(float* %A) nounwind {
+;CHECK: vld1Qf:
+;CHECK: vld1.32
+	%tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(float* %A)
+	ret <4 x float> %tmp1
+}
+
+define <2 x i64> @vld1Qi64(i64* %A) nounwind {
+;CHECK: vld1Qi64:
+;CHECK: vld1.64
+	%tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i64* %A)
+	ret <2 x i64> %tmp1
+}
+
+declare <8 x i8>  @llvm.arm.neon.vld1.v8i8(i8*) nounwind readonly
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*) nounwind readonly
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*) nounwind readonly
+declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*) nounwind readonly
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*) nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
new file mode 100644
index 0000000000000..23f7d2ca0cd3e
--- /dev/null
+++ b/test/CodeGen/ARM/vld2.ll
@@ -0,0 +1,113 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
+%struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x2_t = type { <16 x i8>,  <16 x i8> }
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
+
+define <8 x i8> @vld2i8(i8* %A) nounwind {
+;CHECK: vld2i8:
+;CHECK: vld2.8
+	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
+        %tmp4 = add <8 x i8> %tmp2, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld2i16(i16* %A) nounwind {
+;CHECK: vld2i16:
+;CHECK: vld2.16
+	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
+        %tmp4 = add <4 x i16> %tmp2, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld2i32(i32* %A) nounwind {
+;CHECK: vld2i32:
+;CHECK: vld2.32
+	%tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1
+        %tmp4 = add <2 x i32> %tmp2, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld2f(float* %A) nounwind {
+;CHECK: vld2f:
+;CHECK: vld2.32
+	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
+        %tmp4 = add <2 x float> %tmp2, %tmp3
+	ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld2i64(i64* %A) nounwind {
+;CHECK: vld2i64:
+;CHECK: vld1.64
+	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i64* %A)
+        %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld2Qi8(i8* %A) nounwind {
+;CHECK: vld2Qi8:
+;CHECK: vld2.8
+	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld2Qi16(i16* %A) nounwind {
+;CHECK: vld2Qi16:
+;CHECK: vld2.16
+	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
+        %tmp4 = add <8 x i16> %tmp2, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld2Qi32(i32* %A) nounwind {
+;CHECK: vld2Qi32:
+;CHECK: vld2.32
+	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
+        %tmp4 = add <4 x i32> %tmp2, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld2Qf(float* %A) nounwind {
+;CHECK: vld2Qf:
+;CHECK: vld2.32
+	%tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1
+        %tmp4 = add <4 x float> %tmp2, %tmp3
+	ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly
+declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*) nounwind readonly
+
+declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
new file mode 100644
index 0000000000000..207dc6a22e459
--- /dev/null
+++ b/test/CodeGen/ARM/vld3.ll
@@ -0,0 +1,117 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
+%struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x3_t = type { <16 x i8>,  <16 x i8>,  <16 x i8> }
+%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld3i8(i8* %A) nounwind {
+;CHECK: vld3i8:
+;CHECK: vld3.8
+	%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
+        %tmp4 = add <8 x i8> %tmp2, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld3i16(i16* %A) nounwind {
+;CHECK: vld3i16:
+;CHECK: vld3.16
+	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
+        %tmp4 = add <4 x i16> %tmp2, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld3i32(i32* %A) nounwind {
+;CHECK: vld3i32:
+;CHECK: vld3.32
+	%tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2
+        %tmp4 = add <2 x i32> %tmp2, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld3f(float* %A) nounwind {
+;CHECK: vld3f:
+;CHECK: vld3.32
+	%tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2
+        %tmp4 = add <2 x float> %tmp2, %tmp3
+	ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld3i64(i64* %A) nounwind {
+;CHECK: vld3i64:
+;CHECK: vld1.64
+	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i64* %A)
+        %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld3Qi8(i8* %A) nounwind {
+;CHECK: vld3Qi8:
+;CHECK: vld3.8
+;CHECK: vld3.8
+	%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld3Qi16(i16* %A) nounwind {
+;CHECK: vld3Qi16:
+;CHECK: vld3.16
+;CHECK: vld3.16
+	%tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2
+        %tmp4 = add <8 x i16> %tmp2, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld3Qi32(i32* %A) nounwind {
+;CHECK: vld3Qi32:
+;CHECK: vld3.32
+;CHECK: vld3.32
+	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
+        %tmp4 = add <4 x i32> %tmp2, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld3Qf(float* %A) nounwind {
+;CHECK: vld3Qf:
+;CHECK: vld3.32
+;CHECK: vld3.32
+	%tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2
+        %tmp4 = add <4 x float> %tmp2, %tmp3
+	ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*) nounwind readonly
+
+declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
new file mode 100644
index 0000000000000..0624f2977ea46
--- /dev/null
+++ b/test/CodeGen/ARM/vld4.ll
@@ -0,0 +1,117 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
+%struct.__neon_int64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x4_t = type { <16 x i8>,  <16 x i8>,  <16 x i8>, <16 x i8> }
+%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld4i8(i8* %A) nounwind {
+;CHECK: vld4i8:
+;CHECK: vld4.8
+	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
+        %tmp4 = add <8 x i8> %tmp2, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld4i16(i16* %A) nounwind {
+;CHECK: vld4i16:
+;CHECK: vld4.16
+	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2
+        %tmp4 = add <4 x i16> %tmp2, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld4i32(i32* %A) nounwind {
+;CHECK: vld4i32:
+;CHECK: vld4.32
+	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
+        %tmp4 = add <2 x i32> %tmp2, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld4f(float* %A) nounwind {
+;CHECK: vld4f:
+;CHECK: vld4.32
+	%tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 2
+        %tmp4 = add <2 x float> %tmp2, %tmp3
+	ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld4i64(i64* %A) nounwind {
+;CHECK: vld4i64:
+;CHECK: vld1.64
+	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i64* %A)
+        %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld4Qi8(i8* %A) nounwind {
+;CHECK: vld4Qi8:
+;CHECK: vld4.8
+;CHECK: vld4.8
+	%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A)
+        %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld4Qi16(i16* %A) nounwind {
+;CHECK: vld4Qi16:
+;CHECK: vld4.16
+;CHECK: vld4.16
+	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i16* %A)
+        %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
+        %tmp4 = add <8 x i16> %tmp2, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld4Qi32(i32* %A) nounwind {
+;CHECK: vld4Qi32:
+;CHECK: vld4.32
+;CHECK: vld4.32
+	%tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i32* %A)
+        %tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 2
+        %tmp4 = add <4 x i32> %tmp2, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld4Qf(float* %A) nounwind {
+;CHECK: vld4Qf:
+;CHECK: vld4.32
+;CHECK: vld4.32
+	%tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(float* %A)
+        %tmp2 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 2
+        %tmp4 = add <4 x float> %tmp2, %tmp3
+	ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*) nounwind readonly
+
+declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
new file mode 100644
index 0000000000000..53881a3f924e2
--- /dev/null
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -0,0 +1,328 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
+
+define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld2lanei8:
+;CHECK: vld2.8
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld2lanei16:
+;CHECK: vld2.16
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld2lanei32:
+;CHECK: vld2.32
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld2lanef:
+;CHECK: vld2.32
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
+        %tmp5 = add <2 x float> %tmp3, %tmp4
+	ret <2 x float> %tmp5
+}
+
+define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld2laneQi16:
+;CHECK: vld2.16
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld2laneQi32:
+;CHECK: vld2.32
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+        %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld2laneQf:
+;CHECK: vld2.32
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
+        %tmp5 = add <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind readonly
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld3lanei8:
+;CHECK: vld3.8
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+        %tmp6 = add <8 x i8> %tmp3, %tmp4
+        %tmp7 = add <8 x i8> %tmp5, %tmp6
+	ret <8 x i8> %tmp7
+}
+
+define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld3lanei16:
+;CHECK: vld3.16
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
+        %tmp6 = add <4 x i16> %tmp3, %tmp4
+        %tmp7 = add <4 x i16> %tmp5, %tmp6
+	ret <4 x i16> %tmp7
+}
+
+define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld3lanei32:
+;CHECK: vld3.32
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
+        %tmp6 = add <2 x i32> %tmp3, %tmp4
+        %tmp7 = add <2 x i32> %tmp5, %tmp6
+	ret <2 x i32> %tmp7
+}
+
+define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld3lanef:
+;CHECK: vld3.32
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
+        %tmp6 = add <2 x float> %tmp3, %tmp4
+        %tmp7 = add <2 x float> %tmp5, %tmp6
+	ret <2 x float> %tmp7
+}
+
+define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld3laneQi16:
+;CHECK: vld3.16
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
+        %tmp6 = add <8 x i16> %tmp3, %tmp4
+        %tmp7 = add <8 x i16> %tmp5, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld3laneQi32:
+;CHECK: vld3.32
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3)
+        %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
+        %tmp6 = add <4 x i32> %tmp3, %tmp4
+        %tmp7 = add <4 x i32> %tmp5, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld3laneQf:
+;CHECK: vld3.32
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
+        %tmp6 = add <4 x float> %tmp3, %tmp4
+        %tmp7 = add <4 x float> %tmp5, %tmp6
+	ret <4 x float> %tmp7
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld4lanei8:
+;CHECK: vld4.8
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+        %tmp7 = add <8 x i8> %tmp3, %tmp4
+        %tmp8 = add <8 x i8> %tmp5, %tmp6
+        %tmp9 = add <8 x i8> %tmp7, %tmp8
+	ret <8 x i8> %tmp9
+}
+
+define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld4lanei16:
+;CHECK: vld4.16
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
+        %tmp7 = add <4 x i16> %tmp3, %tmp4
+        %tmp8 = add <4 x i16> %tmp5, %tmp6
+        %tmp9 = add <4 x i16> %tmp7, %tmp8
+	ret <4 x i16> %tmp9
+}
+
+define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld4lanei32:
+;CHECK: vld4.32
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
+        %tmp7 = add <2 x i32> %tmp3, %tmp4
+        %tmp8 = add <2 x i32> %tmp5, %tmp6
+        %tmp9 = add <2 x i32> %tmp7, %tmp8
+	ret <2 x i32> %tmp9
+}
+
+define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld4lanef:
+;CHECK: vld4.32
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
+        %tmp7 = add <2 x float> %tmp3, %tmp4
+        %tmp8 = add <2 x float> %tmp5, %tmp6
+        %tmp9 = add <2 x float> %tmp7, %tmp8
+	ret <2 x float> %tmp9
+}
+
+define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld4laneQi16:
+;CHECK: vld4.16
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
+        %tmp7 = add <8 x i16> %tmp3, %tmp4
+        %tmp8 = add <8 x i16> %tmp5, %tmp6
+        %tmp9 = add <8 x i16> %tmp7, %tmp8
+	ret <8 x i16> %tmp9
+}
+
+define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld4laneQi32:
+;CHECK: vld4.32
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
+        %tmp7 = add <4 x i32> %tmp3, %tmp4
+        %tmp8 = add <4 x i32> %tmp5, %tmp6
+        %tmp9 = add <4 x i32> %tmp7, %tmp8
+	ret <4 x i32> %tmp9
+}
+
+define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld4laneQf:
+;CHECK: vld4.32
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
+        %tmp7 = add <4 x float> %tmp3, %tmp4
+        %tmp8 = add <4 x float> %tmp5, %tmp6
+        %tmp9 = add <4 x float> %tmp7, %tmp8
+	ret <4 x float> %tmp9
+}
+
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vminmax.ll b/test/CodeGen/ARM/vminmax.ll
new file mode 100644
index 0000000000000..e3527c1a4d9ba
--- /dev/null
+++ b/test/CodeGen/ARM/vminmax.ll
@@ -0,0 +1,293 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmins8:
+;CHECK: vmin.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmins16:
+;CHECK: vmin.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmins32:
+;CHECK: vmin.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vminu8:
+;CHECK: vmin.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vminu16:
+;CHECK: vmin.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vminu32:
+;CHECK: vmin.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vminf32:
+;CHECK: vmin.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vminQs8:
+;CHECK: vmin.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vminQs16:
+;CHECK: vmin.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vminQs32:
+;CHECK: vmin.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vminQu8:
+;CHECK: vmin.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vminQu16:
+;CHECK: vmin.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vminQu32:
+;CHECK: vmin.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vminQf32:
+;CHECK: vmin.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmaxs8:
+;CHECK: vmax.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmaxs16:
+;CHECK: vmax.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmaxs32:
+;CHECK: vmax.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmaxu8:
+;CHECK: vmax.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmaxu16:
+;CHECK: vmax.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmaxu32:
+;CHECK: vmax.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vmaxf32:
+;CHECK: vmax.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmaxQs8:
+;CHECK: vmax.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmaxQs16:
+;CHECK: vmax.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmaxQs32:
+;CHECK: vmax.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmaxQu8:
+;CHECK: vmax.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmaxQu16:
+;CHECK: vmax.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmaxQu32:
+;CHECK: vmax.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vmaxQf32:
+;CHECK: vmax.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmla.ll b/test/CodeGen/ARM/vmla.ll
index ed77e11a7c47e..840521827413a 100644
--- a/test/CodeGen/ARM/vmla.ll
+++ b/test/CodeGen/ARM/vmla.ll
@@ -1,10 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vmla\\.i8} %t | count 2
-; RUN: grep {vmla\\.i16} %t | count 2
-; RUN: grep {vmla\\.i32} %t | count 2
-; RUN: grep {vmla\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
+;CHECK: vmlai8:
+;CHECK: vmla.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = load <8 x i8>* %C
@@ -14,6 +12,8 @@ define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
 }
 
 define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlai16:
+;CHECK: vmla.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = load <4 x i16>* %C
@@ -23,6 +23,8 @@ define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlai32:
+;CHECK: vmla.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = load <2 x i32>* %C
@@ -32,6 +34,8 @@ define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
+;CHECK: vmlaf32:
+;CHECK: vmla.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = load <2 x float>* %C
@@ -41,6 +45,8 @@ define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n
 }
 
 define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
+;CHECK: vmlaQi8:
+;CHECK: vmla.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = load <16 x i8>* %C
@@ -50,6 +56,8 @@ define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind
 }
 
 define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vmlaQi16:
+;CHECK: vmla.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = load <8 x i16>* %C
@@ -59,6 +67,8 @@ define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 }
 
 define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vmlaQi32:
+;CHECK: vmla.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = load <4 x i32>* %C
@@ -68,6 +78,8 @@ define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 }
 
 define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+;CHECK: vmlaQf32:
+;CHECK: vmla.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = load <4 x float>* %C
@@ -75,3 +87,107 @@ define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C)
 	%tmp5 = add <4 x float> %tmp1, %tmp4
 	ret <4 x float> %tmp5
 }
+
+define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlals8:
+;CHECK: vmlal.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlals16:
+;CHECK: vmlal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlals32:
+;CHECK: vmlal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlalu8:
+;CHECK: vmlal.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlalu16:
+;CHECK: vmlal.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlalu32:
+;CHECK: vmlal.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_lanes16
+; CHECK: vmlal.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_lanes32
+; CHECK: vmlal.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlal_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_laneu16
+; CHECK: vmlal.u16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlal_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_laneu32
+; CHECK: vmlal.u32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmls.ll b/test/CodeGen/ARM/vmls.ll
index d519b7e70e1e3..c89552e6f9eaa 100644
--- a/test/CodeGen/ARM/vmls.ll
+++ b/test/CodeGen/ARM/vmls.ll
@@ -1,10 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vmls\\.i8} %t | count 2
-; RUN: grep {vmls\\.i16} %t | count 2
-; RUN: grep {vmls\\.i32} %t | count 2
-; RUN: grep {vmls\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
+;CHECK: vmlsi8:
+;CHECK: vmls.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = load <8 x i8>* %C
@@ -14,6 +12,8 @@ define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
 }
 
 define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlsi16:
+;CHECK: vmls.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = load <4 x i16>* %C
@@ -23,6 +23,8 @@ define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlsi32:
+;CHECK: vmls.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = load <2 x i32>* %C
@@ -32,6 +34,8 @@ define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
+;CHECK: vmlsf32:
+;CHECK: vmls.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = load <2 x float>* %C
@@ -41,6 +45,8 @@ define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n
 }
 
 define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
+;CHECK: vmlsQi8:
+;CHECK: vmls.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = load <16 x i8>* %C
@@ -50,6 +56,8 @@ define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind
 }
 
 define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vmlsQi16:
+;CHECK: vmls.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = load <8 x i16>* %C
@@ -59,6 +67,8 @@ define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 }
 
 define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vmlsQi32:
+;CHECK: vmls.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = load <4 x i32>* %C
@@ -68,6 +78,8 @@ define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 }
 
 define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+;CHECK: vmlsQf32:
+;CHECK: vmls.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = load <4 x float>* %C
@@ -75,3 +87,107 @@ define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C)
 	%tmp5 = sub <4 x float> %tmp1, %tmp4
 	ret <4 x float> %tmp5
 }
+
+define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlsls8:
+;CHECK: vmlsl.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlsls16:
+;CHECK: vmlsl.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlsls32:
+;CHECK: vmlsl.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlslu8:
+;CHECK: vmlsl.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlslu16:
+;CHECK: vmlsl.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlslu32:
+;CHECK: vmlsl.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_lanes16
+; CHECK: vmlsl.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_lanes32
+; CHECK: vmlsl.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_laneu16
+; CHECK: vmlsl.u16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_laneu32
+; CHECK: vmlsl.u32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index af9c8e25989c9..ed69f970c611c 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -1,101 +1,303 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep vmov.i8 %t | count 2
-; RUN: grep vmov.i16 %t | count 4
-; RUN: grep vmov.i32 %t | count 12
-; RUN: grep vmov.i64 %t | count 2
-; Note: function names do not include "vmov" to allow simple grep for opcodes
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @v_movi8() nounwind {
+;CHECK: v_movi8:
+;CHECK: vmov.i8
 	ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 }
 
 define <4 x i16> @v_movi16a() nounwind {
+;CHECK: v_movi16a:
+;CHECK: vmov.i16
 	ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 >
 }
 
 ; 0x1000 = 4096
 define <4 x i16> @v_movi16b() nounwind {
+;CHECK: v_movi16b:
+;CHECK: vmov.i16
 	ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 >
 }
 
 define <2 x i32> @v_movi32a() nounwind {
+;CHECK: v_movi32a:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 32, i32 32 >
 }
 
 ; 0x2000 = 8192
 define <2 x i32> @v_movi32b() nounwind {
+;CHECK: v_movi32b:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 8192, i32 8192 >
 }
 
 ; 0x200000 = 2097152
 define <2 x i32> @v_movi32c() nounwind {
+;CHECK: v_movi32c:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 2097152, i32 2097152 >
 }
 
 ; 0x20000000 = 536870912
 define <2 x i32> @v_movi32d() nounwind {
+;CHECK: v_movi32d:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 536870912, i32 536870912 >
 }
 
 ; 0x20ff = 8447
 define <2 x i32> @v_movi32e() nounwind {
+;CHECK: v_movi32e:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 8447, i32 8447 >
 }
 
 ; 0x20ffff = 2162687
 define <2 x i32> @v_movi32f() nounwind {
+;CHECK: v_movi32f:
+;CHECK: vmov.i32
 	ret <2 x i32> < i32 2162687, i32 2162687 >
 }
 
 ; 0xff0000ff0000ffff = 18374687574888349695
 define <1 x i64> @v_movi64() nounwind {
+;CHECK: v_movi64:
+;CHECK: vmov.i64
 	ret <1 x i64> < i64 18374687574888349695 >
 }
 
 define <16 x i8> @v_movQi8() nounwind {
+;CHECK: v_movQi8:
+;CHECK: vmov.i8
 	ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 }
 
 define <8 x i16> @v_movQi16a() nounwind {
+;CHECK: v_movQi16a:
+;CHECK: vmov.i16
 	ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
 }
 
 ; 0x1000 = 4096
 define <8 x i16> @v_movQi16b() nounwind {
+;CHECK: v_movQi16b:
+;CHECK: vmov.i16
 	ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 >
 }
 
 define <4 x i32> @v_movQi32a() nounwind {
+;CHECK: v_movQi32a:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 >
 }
 
 ; 0x2000 = 8192
 define <4 x i32> @v_movQi32b() nounwind {
+;CHECK: v_movQi32b:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 >
 }
 
 ; 0x200000 = 2097152
 define <4 x i32> @v_movQi32c() nounwind {
+;CHECK: v_movQi32c:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 >
 }
 
 ; 0x20000000 = 536870912
 define <4 x i32> @v_movQi32d() nounwind {
+;CHECK: v_movQi32d:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 >
 }
 
 ; 0x20ff = 8447
 define <4 x i32> @v_movQi32e() nounwind {
+;CHECK: v_movQi32e:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
 }
 
 ; 0x20ffff = 2162687
 define <4 x i32> @v_movQi32f() nounwind {
+;CHECK: v_movQi32f:
+;CHECK: vmov.i32
 	ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
 }
 
 ; 0xff0000ff0000ffff = 18374687574888349695
 define <2 x i64> @v_movQi64() nounwind {
+;CHECK: v_movQi64:
+;CHECK: vmov.i64
 	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
 }
 
+define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
+;CHECK: vmovls8:
+;CHECK: vmovl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
+;CHECK: vmovls16:
+;CHECK: vmovl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
+;CHECK: vmovls32:
+;CHECK: vmovl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32> %tmp1)
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
+;CHECK: vmovlu8:
+;CHECK: vmovl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
+;CHECK: vmovlu16:
+;CHECK: vmovl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
+;CHECK: vmovlu32:
+;CHECK: vmovl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32> %tmp1)
+	ret <2 x i64> %tmp2
+}
+
+declare <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32>) nounwind readnone
+
+define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
+;CHECK: vmovni16:
+;CHECK: vmovn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
+;CHECK: vmovni32:
+;CHECK: vmovn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
+;CHECK: vmovni64:
+;CHECK: vmovn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vmovn.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64>) nounwind readnone
+
+define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovns16:
+;CHECK: vqmovn.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovns32:
+;CHECK: vqmovn.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovns64:
+;CHECK: vqmovn.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovnu16:
+;CHECK: vqmovn.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovnu32:
+;CHECK: vqmovn.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovnu64:
+;CHECK: vqmovn.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovuns16:
+;CHECK: vqmovun.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovuns32:
+;CHECK: vqmovun.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovuns64:
+;CHECK: vqmovun.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index eb9ae7b95c2d8..325da5deabe51 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -1,11 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vmul\\.i8} %t | count 2
-; RUN: grep {vmul\\.i16} %t | count 2
-; RUN: grep {vmul\\.i32} %t | count 2
-; RUN: grep {vmul\\.f32} %t | count 2
-; RUN: grep {vmul\\.p8} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmuli8:
+;CHECK: vmul.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = mul <8 x i8> %tmp1, %tmp2
@@ -13,6 +10,8 @@ define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmuli16:
+;CHECK: vmul.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = mul <4 x i16> %tmp1, %tmp2
@@ -20,6 +19,8 @@ define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmuli32:
+;CHECK: vmul.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = mul <2 x i32> %tmp1, %tmp2
@@ -27,6 +28,8 @@ define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vmulf32:
+;CHECK: vmul.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = mul <2 x float> %tmp1, %tmp2
@@ -34,6 +37,8 @@ define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmulp8:
+;CHECK: vmul.p8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -41,6 +46,8 @@ define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmulQi8:
+;CHECK: vmul.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = mul <16 x i8> %tmp1, %tmp2
@@ -48,6 +55,8 @@ define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmulQi16:
+;CHECK: vmul.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = mul <8 x i16> %tmp1, %tmp2
@@ -55,6 +64,8 @@ define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmulQi32:
+;CHECK: vmul.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = mul <4 x i32> %tmp1, %tmp2
@@ -62,6 +73,8 @@ define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vmulQf32:
+;CHECK: vmul.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = mul <4 x float> %tmp1, %tmp2
@@ -69,6 +82,8 @@ define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmulQp8:
+;CHECK: vmul.p8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -77,3 +92,166 @@ define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 
 declare <8 x i8>  @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
 declare <16 x i8>  @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+
+define arm_aapcs_vfpcc <2 x float> @test_vmul_lanef32(<2 x float> %arg0_float32x2_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanef32:
+; CHECK: vmul.f32 d0, d0, d1[0]
+  %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <2 x i32> zeroinitializer ; <<2 x float>> [#uses=1]
+  %1 = fmul <2 x float> %0, %arg0_float32x2_t     ; <<2 x float>> [#uses=1]
+  ret <2 x float> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vmul_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanes16:
+; CHECK: vmul.i16 d0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses$
+  %1 = mul <4 x i16> %0, %arg0_int16x4_t          ; <<4 x i16>> [#uses=1]
+  ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vmul_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanes32:
+; CHECK: vmul.i32 d0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = mul <2 x i32> %0, %arg0_int32x2_t          ; <<2 x i32>> [#uses=1]
+  ret <2 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vmulQ_lanef32(<4 x float> %arg0_float32x4_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanef32:
+; CHECK: vmul.f32 q0, q0, d2[1]
+  %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>$
+  %1 = fmul <4 x float> %0, %arg0_float32x4_t     ; <<4 x float>> [#uses=1]
+  ret <4 x float> %1
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmulQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanes16:
+; CHECK: vmul.i16 q0, q0, d2[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %1 = mul <8 x i16> %0, %arg0_int16x8_t          ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmulQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanes32:
+; CHECK: vmul.i32 q0, q0, d2[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses$
+  %1 = mul <4 x i32> %0, %arg0_int32x4_t          ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmulls8:
+;CHECK: vmull.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmulls16:
+;CHECK: vmull.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmulls32:
+;CHECK: vmull.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmullu8:
+;CHECK: vmull.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmullu16:
+;CHECK: vmull.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmullu32:
+;CHECK: vmull.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmullp8:
+;CHECK: vmull.p8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes16
+; CHECK: vmull.s16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes32
+; CHECK: vmull.s32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu16
+; CHECK: vmull.u16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu32
+; CHECK: vmull.u32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16>  @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM/vneg.ll b/test/CodeGen/ARM/vneg.ll
index 9fa527f52fcc6..7764e87c6ac6b 100644
--- a/test/CodeGen/ARM/vneg.ll
+++ b/test/CodeGen/ARM/vneg.ll
@@ -1,53 +1,121 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vneg\\.s8} %t | count 2
-; RUN: grep {vneg\\.s16} %t | count 2
-; RUN: grep {vneg\\.s32} %t | count 2
-; RUN: grep {vneg\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind {
+;CHECK: vnegs8:
+;CHECK: vneg.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = sub <8 x i8> zeroinitializer, %tmp1
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vnegs16(<4 x i16>* %A) nounwind {
+;CHECK: vnegs16:
+;CHECK: vneg.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = sub <4 x i16> zeroinitializer, %tmp1
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vnegs32(<2 x i32>* %A) nounwind {
+;CHECK: vnegs32:
+;CHECK: vneg.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = sub <2 x i32> zeroinitializer, %tmp1
 	ret <2 x i32> %tmp2
 }
 
 define <2 x float> @vnegf32(<2 x float>* %A) nounwind {
+;CHECK: vnegf32:
+;CHECK: vneg.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = sub <2 x float> < float -0.000000e+00, float -0.000000e+00 >, %tmp1
 	ret <2 x float> %tmp2
 }
 
 define <16 x i8> @vnegQs8(<16 x i8>* %A) nounwind {
+;CHECK: vnegQs8:
+;CHECK: vneg.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = sub <16 x i8> zeroinitializer, %tmp1
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vnegQs16(<8 x i16>* %A) nounwind {
+;CHECK: vnegQs16:
+;CHECK: vneg.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = sub <8 x i16> zeroinitializer, %tmp1
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vnegQs32(<4 x i32>* %A) nounwind {
+;CHECK: vnegQs32:
+;CHECK: vneg.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = sub <4 x i32> zeroinitializer, %tmp1
 	ret <4 x i32> %tmp2
 }
 
 define <4 x float> @vnegQf32(<4 x float>* %A) nounwind {
+;CHECK: vnegQf32:
+;CHECK: vneg.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1
 	ret <4 x float> %tmp2
 }
+
+define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind {
+;CHECK: vqnegs8:
+;CHECK: vqneg.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind {
+;CHECK: vqnegs16:
+;CHECK: vqneg.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind {
+;CHECK: vqnegs32:
+;CHECK: vqneg.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind {
+;CHECK: vqnegQs8:
+;CHECK: vqneg.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind {
+;CHECK: vqnegQs16:
+;CHECK: vqneg.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqnegQs32(<4 x i32>* %A) nounwind {
+;CHECK: vqnegQs32:
+;CHECK: vqneg.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqneg.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vpadal.ll b/test/CodeGen/ARM/vpadal.ll
index c41c532988e8f..7296e936cd73e 100644
--- a/test/CodeGen/ARM/vpadal.ll
+++ b/test/CodeGen/ARM/vpadal.ll
@@ -1,12 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vpadal\\.s8} %t | count 2
-; RUN: grep {vpadal\\.s16} %t | count 2
-; RUN: grep {vpadal\\.s32} %t | count 2
-; RUN: grep {vpadal\\.u8} %t | count 2
-; RUN: grep {vpadal\\.u16} %t | count 2
-; RUN: grep {vpadal\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpadals8:
+;CHECK: vpadal.s8
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
@@ -14,6 +10,8 @@ define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpadals16:
+;CHECK: vpadal.s16
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
@@ -21,6 +19,8 @@ define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpadals32:
+;CHECK: vpadal.s32
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
@@ -28,6 +28,8 @@ define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpadalu8:
+;CHECK: vpadal.u8
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
@@ -35,6 +37,8 @@ define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpadalu16:
+;CHECK: vpadal.u16
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
@@ -42,6 +46,8 @@ define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpadalu32:
+;CHECK: vpadal.u32
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
@@ -49,6 +55,8 @@ define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vpadalQs8:
+;CHECK: vpadal.s8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
@@ -56,6 +64,8 @@ define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vpadalQs16:
+;CHECK: vpadal.s16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
@@ -63,6 +73,8 @@ define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vpadalQs32:
+;CHECK: vpadal.s32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
@@ -70,6 +82,8 @@ define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vpadalQu8:
+;CHECK: vpadal.u8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
@@ -77,6 +91,8 @@ define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vpadalQu16:
+;CHECK: vpadal.u16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
@@ -84,6 +100,8 @@ define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vpadalQu32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vpadalQu32:
+;CHECK: vpadal.u32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll
index baff49227e64e..212557394518a 100644
--- a/test/CodeGen/ARM/vpadd.ll
+++ b/test/CodeGen/ARM/vpadd.ll
@@ -1,39 +1,155 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vpadd\\.i8} %t | count 1
-; RUN: grep {vpadd\\.i16} %t | count 1
-; RUN: grep {vpadd\\.i32} %t | count 1
-; RUN: grep {vpadd\\.f32} %t | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpaddi8:
+;CHECK: vpadd.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm.neon.vpaddi.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
 define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpaddi16:
+;CHECK: vpadd.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm.neon.vpaddi.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
 define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpaddi32:
+;CHECK: vpadd.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm.neon.vpaddi.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
 define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpaddf32:
+;CHECK: vpadd.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm.neon.vpaddf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm.neon.vpaddi.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vpaddi.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vpaddi.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <2 x float> @llvm.arm.neon.vpaddf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind {
+;CHECK: vpaddls8:
+;CHECK: vpaddl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind {
+;CHECK: vpaddls16:
+;CHECK: vpaddl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind {
+;CHECK: vpaddls32:
+;CHECK: vpaddl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %tmp1)
+	ret <1 x i64> %tmp2
+}
+
+define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind {
+;CHECK: vpaddlu8:
+;CHECK: vpaddl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind {
+;CHECK: vpaddlu16:
+;CHECK: vpaddl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind {
+;CHECK: vpaddlu32:
+;CHECK: vpaddl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %tmp1)
+	ret <1 x i64> %tmp2
+}
+
+define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind {
+;CHECK: vpaddlQs8:
+;CHECK: vpaddl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind {
+;CHECK: vpaddlQs16:
+;CHECK: vpaddl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind {
+;CHECK: vpaddlQs32:
+;CHECK: vpaddl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %tmp1)
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind {
+;CHECK: vpaddlQu8:
+;CHECK: vpaddl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind {
+;CHECK: vpaddlQu16:
+;CHECK: vpaddl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind {
+;CHECK: vpaddlQu32:
+;CHECK: vpaddl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %tmp1)
+	ret <2 x i64> %tmp2
+}
+
+declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone
+
+declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vpminmax.ll b/test/CodeGen/ARM/vpminmax.ll
new file mode 100644
index 0000000000000..b75bcc99f6b67
--- /dev/null
+++ b/test/CodeGen/ARM/vpminmax.ll
@@ -0,0 +1,147 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmins8:
+;CHECK: vpmin.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmins16:
+;CHECK: vpmin.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmins32:
+;CHECK: vpmin.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpminu8:
+;CHECK: vpmin.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpminu16:
+;CHECK: vpmin.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpminu32:
+;CHECK: vpmin.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpminf32:
+;CHECK: vpmin.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmaxs8:
+;CHECK: vpmax.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmaxs16:
+;CHECK: vpmax.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmaxs32:
+;CHECK: vpmax.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmaxu8:
+;CHECK: vpmax.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmaxu16:
+;CHECK: vpmax.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmaxu32:
+;CHECK: vpmax.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpmaxf32:
+;CHECK: vpmax.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqadd.ll b/test/CodeGen/ARM/vqadd.ll
index c9e235995360a..a1669b60ab564 100644
--- a/test/CodeGen/ARM/vqadd.ll
+++ b/test/CodeGen/ARM/vqadd.ll
@@ -1,14 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vqadd\\.s8} %t | count 2
-; RUN: grep {vqadd\\.s16} %t | count 2
-; RUN: grep {vqadd\\.s32} %t | count 2
-; RUN: grep {vqadd\\.s64} %t | count 2
-; RUN: grep {vqadd\\.u8} %t | count 2
-; RUN: grep {vqadd\\.u16} %t | count 2
-; RUN: grep {vqadd\\.u32} %t | count 2
-; RUN: grep {vqadd\\.u64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqadds8:
+;CHECK: vqadd.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -16,6 +10,8 @@ define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqadds16:
+;CHECK: vqadd.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -23,6 +19,8 @@ define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqadds32:
+;CHECK: vqadd.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -30,6 +28,8 @@ define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqadds64:
+;CHECK: vqadd.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -37,6 +37,8 @@ define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqaddu8:
+;CHECK: vqadd.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -44,6 +46,8 @@ define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqaddu16:
+;CHECK: vqadd.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -51,6 +55,8 @@ define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqaddu32:
+;CHECK: vqadd.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -58,6 +64,8 @@ define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqaddu64:
+;CHECK: vqadd.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -65,6 +73,8 @@ define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqaddQs8:
+;CHECK: vqadd.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -72,6 +82,8 @@ define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqaddQs16:
+;CHECK: vqadd.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -79,6 +91,8 @@ define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqaddQs32:
+;CHECK: vqadd.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -86,6 +100,8 @@ define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqaddQs64:
+;CHECK: vqadd.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -93,6 +109,8 @@ define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqaddQu8:
+;CHECK: vqadd.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -100,6 +118,8 @@ define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqaddQu16:
+;CHECK: vqadd.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -107,6 +127,8 @@ define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqaddQu32:
+;CHECK: vqadd.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -114,6 +136,8 @@ define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqaddQu64:
+;CHECK: vqadd.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
diff --git a/test/CodeGen/ARM/vqdmul.ll b/test/CodeGen/ARM/vqdmul.ll
new file mode 100644
index 0000000000000..8dcc7f73633c3
--- /dev/null
+++ b/test/CodeGen/ARM/vqdmul.ll
@@ -0,0 +1,281 @@
+; RUN: llc -mattr=+neon < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqdmulhs16:
+;CHECK: vqdmulh.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqdmulhs32:
+;CHECK: vqdmulh.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqdmulhQs16:
+;CHECK: vqdmulh.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqdmulhQs32:
+;CHECK: vqdmulh.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulhQ_lanes16
+; CHECK: vqdmulh.s16 q0, q0, d2[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
+  %1 = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulhQ_lanes32
+; CHECK: vqdmulh.s32 q0, q0, d2[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vqdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulh_lanes16
+; CHECK: vqdmulh.s16 d0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
+  ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vqdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulh_lanes32
+; CHECK: vqdmulh.s32 d0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
+  ret <2 x i32> %1
+}
+
+declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrdmulhs16:
+;CHECK: vqrdmulh.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrdmulhs32:
+;CHECK: vqrdmulh.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrdmulhQs16:
+;CHECK: vqrdmulh.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrdmulhQs32:
+;CHECK: vqrdmulh.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqRdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulhQ_lanes16
+; CHECK: vqrdmulh.s16 q0, q0, d2[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
+  %1 = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqRdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulhQ_lanes32
+; CHECK: vqrdmulh.s32 q0, q0, d2[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vqRdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulh_lanes16
+; CHECK: vqrdmulh.s16 d0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
+  ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vqRdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulh_lanes32
+; CHECK: vqrdmulh.s32 d0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
+  ret <2 x i32> %1
+}
+
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqdmulls16:
+;CHECK: vqdmull.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqdmulls32:
+;CHECK: vqdmull.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmull_lanes16
+; CHECK: vqdmull.s16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmull_lanes32
+; CHECK: vqdmull.s32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <4 x i32>  @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vqdmlals16:
+;CHECK: vqdmlal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vqdmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vqdmlals32:
+;CHECK: vqdmlal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlal_lanes16
+; CHECK: vqdmlal.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlal_lanes32
+; CHECK: vqdmlal.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <4 x i32>  @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vqdmlsls16:
+;CHECK: vqdmlsl.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vqdmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vqdmlsls32:
+;CHECK: vqdmlsl.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlsl_lanes16
+; CHECK: vqdmlsl.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlsl_lanes32
+; CHECK: vqdmlsl.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <4 x i32>  @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqshl.ll b/test/CodeGen/ARM/vqshl.ll
index 60b04bd5830ef..e4d29a337cf06 100644
--- a/test/CodeGen/ARM/vqshl.ll
+++ b/test/CodeGen/ARM/vqshl.ll
@@ -1,26 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vqshl\\.s8} %t | count 4
-; RUN: grep {vqshl\\.s16} %t | count 4
-; RUN: grep {vqshl\\.s32} %t | count 4
-; RUN: grep {vqshl\\.s64} %t | count 4
-; RUN: grep {vqshl\\.u8} %t | count 4
-; RUN: grep {vqshl\\.u16} %t | count 4
-; RUN: grep {vqshl\\.u32} %t | count 4
-; RUN: grep {vqshl\\.u64} %t | count 4
-; RUN: grep {vqshl\\.s8.*#7} %t | count 2
-; RUN: grep {vqshl\\.s16.*#15} %t | count 2
-; RUN: grep {vqshl\\.s32.*#31} %t | count 2
-; RUN: grep {vqshl\\.s64.*#63} %t | count 2
-; RUN: grep {vqshl\\.u8.*#7} %t | count 2
-; RUN: grep {vqshl\\.u16.*#15} %t | count 2
-; RUN: grep {vqshl\\.u32.*#31} %t | count 2
-; RUN: grep {vqshl\\.u64.*#63} %t | count 2
-; RUN: grep {vqshlu\\.s8} %t | count 2
-; RUN: grep {vqshlu\\.s16} %t | count 2
-; RUN: grep {vqshlu\\.s32} %t | count 2
-; RUN: grep {vqshlu\\.s64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqshls8:
+;CHECK: vqshl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -28,6 +10,8 @@ define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqshls16:
+;CHECK: vqshl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -35,6 +19,8 @@ define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqshls32:
+;CHECK: vqshl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -42,6 +28,8 @@ define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqshls64:
+;CHECK: vqshl.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -49,6 +37,8 @@ define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqshlu8:
+;CHECK: vqshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -56,6 +46,8 @@ define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqshlu16:
+;CHECK: vqshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -63,6 +55,8 @@ define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqshlu32:
+;CHECK: vqshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -70,6 +64,8 @@ define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqshlu64:
+;CHECK: vqshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -77,6 +73,8 @@ define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqshlQs8:
+;CHECK: vqshl.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -84,6 +82,8 @@ define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqshlQs16:
+;CHECK: vqshl.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -91,6 +91,8 @@ define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqshlQs32:
+;CHECK: vqshl.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -98,6 +100,8 @@ define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqshlQs64:
+;CHECK: vqshl.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -105,6 +109,8 @@ define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqshlQu8:
+;CHECK: vqshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -112,6 +118,8 @@ define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqshlQu16:
+;CHECK: vqshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -119,6 +127,8 @@ define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqshlQu32:
+;CHECK: vqshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -126,6 +136,8 @@ define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqshlQu64:
+;CHECK: vqshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -133,144 +145,192 @@ define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshls_n8:
+;CHECK: vqshl.s8{{.*#7}}
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshls_n16:
+;CHECK: vqshl.s16{{.*#15}}
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshls_n32:
+;CHECK: vqshl.s32{{.*#31}}
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshls_n64:
+;CHECK: vqshl.s64{{.*#63}}
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
 
 define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshlu_n8:
+;CHECK: vqshl.u8{{.*#7}}
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshlu_n16:
+;CHECK: vqshl.u16{{.*#15}}
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshlu_n32:
+;CHECK: vqshl.u32{{.*#31}}
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshlu_n64:
+;CHECK: vqshl.u64{{.*#63}}
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
 
 define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshlsu_n8:
+;CHECK: vqshlu.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshlsu_n16:
+;CHECK: vqshlu.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshlsu_n32:
+;CHECK: vqshlu.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshlsu_n64:
+;CHECK: vqshlu.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQs_n8:
+;CHECK: vqshl.s8{{.*#7}}
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQs_n16:
+;CHECK: vqshl.s16{{.*#15}}
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQs_n32:
+;CHECK: vqshl.s32{{.*#31}}
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQs_n64:
+;CHECK: vqshl.s64{{.*#63}}
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
 }
 
 define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQu_n8:
+;CHECK: vqshl.u8{{.*#7}}
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQu_n16:
+;CHECK: vqshl.u16{{.*#15}}
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQu_n32:
+;CHECK: vqshl.u32{{.*#31}}
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQu_n64:
+;CHECK: vqshl.u64{{.*#63}}
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
 }
 
 define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQsu_n8:
+;CHECK: vqshlu.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQsu_n16:
+;CHECK: vqshlu.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQsu_n32:
+;CHECK: vqshlu.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQsu_n64:
+;CHECK: vqshlu.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
@@ -305,3 +365,167 @@ declare <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8>, <16 x i8>) nounwind
 declare <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 declare <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqrshls8:
+;CHECK: vqrshl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrshls16:
+;CHECK: vqrshl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrshls32:
+;CHECK: vqrshl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqrshls64:
+;CHECK: vqrshl.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqrshlu8:
+;CHECK: vqrshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrshlu16:
+;CHECK: vqrshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrshlu32:
+;CHECK: vqrshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqrshlu64:
+;CHECK: vqrshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqrshlQs8:
+;CHECK: vqrshl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrshlQs16:
+;CHECK: vqrshl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrshlQs32:
+;CHECK: vqrshl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqrshlQs64:
+;CHECK: vqrshl.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqrshlQu8:
+;CHECK: vqrshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrshlQu16:
+;CHECK: vqrshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrshlQu32:
+;CHECK: vqrshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqrshlQu64:
+;CHECK: vqrshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqshrn.ll b/test/CodeGen/ARM/vqshrn.ll
index 6bd607abb4d07..5da79432bb42e 100644
--- a/test/CodeGen/ARM/vqshrn.ll
+++ b/test/CodeGen/ARM/vqshrn.ll
@@ -1,63 +1,72 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vqshrn\\.s16} %t | count 1
-; RUN: grep {vqshrn\\.s32} %t | count 1
-; RUN: grep {vqshrn\\.s64} %t | count 1
-; RUN: grep {vqshrn\\.u16} %t | count 1
-; RUN: grep {vqshrn\\.u32} %t | count 1
-; RUN: grep {vqshrn\\.u64} %t | count 1
-; RUN: grep {vqshrun\\.s16} %t | count 1
-; RUN: grep {vqshrun\\.s32} %t | count 1
-; RUN: grep {vqshrun\\.s64} %t | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vqshrns8:
+;CHECK: vqshrn.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vqshrns16:
+;CHECK: vqshrn.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vqshrns32:
+;CHECK: vqshrn.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
 
 define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind {
+;CHECK: vqshrnu8:
+;CHECK: vqshrn.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind {
+;CHECK: vqshrnu16:
+;CHECK: vqshrn.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind {
+;CHECK: vqshrnu32:
+;CHECK: vqshrn.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
 
 define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind {
+;CHECK: vqshruns8:
+;CHECK: vqshrun.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind {
+;CHECK: vqshruns16:
+;CHECK: vqshrun.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vqshruns32(<2 x i64>* %A) nounwind {
+;CHECK: vqshruns32:
+;CHECK: vqshrun.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
@@ -74,3 +83,87 @@ declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind
 declare <8 x i8>  @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshrns8:
+;CHECK: vqrshrn.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshrns16:
+;CHECK: vqrshrn.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshrns32:
+;CHECK: vqrshrn.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshrnu8:
+;CHECK: vqrshrn.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshrnu16:
+;CHECK: vqrshrn.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshrnu32:
+;CHECK: vqrshrn.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshruns8:
+;CHECK: vqrshrun.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshruns16:
+;CHECK: vqrshrun.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshruns32:
+;CHECK: vqrshrun.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqsub.ll b/test/CodeGen/ARM/vqsub.ll
index 07052f78dbea2..4231fca37e370 100644
--- a/test/CodeGen/ARM/vqsub.ll
+++ b/test/CodeGen/ARM/vqsub.ll
@@ -1,14 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vqsub\\.s8} %t | count 2
-; RUN: grep {vqsub\\.s16} %t | count 2
-; RUN: grep {vqsub\\.s32} %t | count 2
-; RUN: grep {vqsub\\.s64} %t | count 2
-; RUN: grep {vqsub\\.u8} %t | count 2
-; RUN: grep {vqsub\\.u16} %t | count 2
-; RUN: grep {vqsub\\.u32} %t | count 2
-; RUN: grep {vqsub\\.u64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqsubs8:
+;CHECK: vqsub.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -16,6 +10,8 @@ define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqsubs16:
+;CHECK: vqsub.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -23,6 +19,8 @@ define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqsubs32:
+;CHECK: vqsub.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -30,6 +28,8 @@ define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqsubs64:
+;CHECK: vqsub.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -37,6 +37,8 @@ define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqsubu8:
+;CHECK: vqsub.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -44,6 +46,8 @@ define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqsubu16:
+;CHECK: vqsub.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -51,6 +55,8 @@ define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqsubu32:
+;CHECK: vqsub.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -58,6 +64,8 @@ define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqsubu64:
+;CHECK: vqsub.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -65,6 +73,8 @@ define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqsubQs8:
+;CHECK: vqsub.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -72,6 +82,8 @@ define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqsubQs16:
+;CHECK: vqsub.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -79,6 +91,8 @@ define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqsubQs32:
+;CHECK: vqsub.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -86,6 +100,8 @@ define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqsubQs64:
+;CHECK: vqsub.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -93,6 +109,8 @@ define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqsubQu8:
+;CHECK: vqsub.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -100,6 +118,8 @@ define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqsubQu16:
+;CHECK: vqsub.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -107,6 +127,8 @@ define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqsubQu32:
+;CHECK: vqsub.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -114,6 +136,8 @@ define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqsubQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqsubQu64:
+;CHECK: vqsub.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
diff --git a/test/CodeGen/ARM/vrec.ll b/test/CodeGen/ARM/vrec.ll
new file mode 100644
index 0000000000000..99989e9d61448
--- /dev/null
+++ b/test/CodeGen/ARM/vrec.ll
@@ -0,0 +1,119 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind {
+;CHECK: vrecpei32:
+;CHECK: vrecpe.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
+;CHECK: vrecpeQi32:
+;CHECK: vrecpe.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
+;CHECK: vrecpef32:
+;CHECK: vrecpe.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1)
+	ret <2 x float> %tmp2
+}
+
+define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind {
+;CHECK: vrecpeQf32:
+;CHECK: vrecpe.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1)
+	ret <4 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
+
+define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vrecpsf32:
+;CHECK: vrecps.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @vrecpsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vrecpsQf32:
+;CHECK: vrecps.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind {
+;CHECK: vrsqrtei32:
+;CHECK: vrsqrte.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
+;CHECK: vrsqrteQi32:
+;CHECK: vrsqrte.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
+;CHECK: vrsqrtef32:
+;CHECK: vrsqrte.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1)
+	ret <2 x float> %tmp2
+}
+
+define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind {
+;CHECK: vrsqrteQf32:
+;CHECK: vrsqrte.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1)
+	ret <4 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
+
+define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vrsqrtsf32:
+;CHECK: vrsqrts.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @vrsqrtsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vrsqrtsQf32:
+;CHECK: vrsqrts.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
new file mode 100644
index 0000000000000..f0a04a4416452
--- /dev/null
+++ b/test/CodeGen/ARM/vrev.ll
@@ -0,0 +1,113 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define arm_apcscc <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev64D8:
+;CHECK: vrev64.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+	ret <8 x i8> %tmp2
+}
+
+define arm_apcscc <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
+;CHECK: test_vrev64D16:
+;CHECK: vrev64.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+	ret <4 x i16> %tmp2
+}
+
+define arm_apcscc <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
+;CHECK: test_vrev64D32:
+;CHECK: vrev64.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+	ret <2 x i32> %tmp2
+}
+
+define arm_apcscc <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
+;CHECK: test_vrev64Df:
+;CHECK: vrev64.32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+	ret <2 x float> %tmp2
+}
+
+define arm_apcscc <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev64Q8:
+;CHECK: vrev64.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+	ret <16 x i8> %tmp2
+}
+
+define arm_apcscc <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev64Q16:
+;CHECK: vrev64.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+	ret <8 x i16> %tmp2
+}
+
+define arm_apcscc <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
+;CHECK: test_vrev64Q32:
+;CHECK: vrev64.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x i32> %tmp2
+}
+
+define arm_apcscc <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
+;CHECK: test_vrev64Qf:
+;CHECK: vrev64.32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x float> %tmp2
+}
+
+define arm_apcscc <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev32D8:
+;CHECK: vrev32.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+	ret <8 x i8> %tmp2
+}
+
+define arm_apcscc <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
+;CHECK: test_vrev32D16:
+;CHECK: vrev32.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x i16> %tmp2
+}
+
+define arm_apcscc <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev32Q8:
+;CHECK: vrev32.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+	ret <16 x i8> %tmp2
+}
+
+define arm_apcscc <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev32Q16:
+;CHECK: vrev32.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+	ret <8 x i16> %tmp2
+}
+
+define arm_apcscc <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev16D8:
+;CHECK: vrev16.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+	ret <8 x i8> %tmp2
+}
+
+define arm_apcscc <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev16Q8:
+;CHECK: vrev16.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+	ret <16 x i8> %tmp2
+}
diff --git a/test/CodeGen/ARM/vshift.ll b/test/CodeGen/ARM/vshift.ll
index 8c5c4aad18d83..f3cbec7457d06 100644
--- a/test/CodeGen/ARM/vshift.ll
+++ b/test/CodeGen/ARM/vshift.ll
@@ -1,30 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vshl\\.s8} %t | count 2
-; RUN: grep {vshl\\.s16} %t | count 2
-; RUN: grep {vshl\\.s32} %t | count 2
-; RUN: grep {vshl\\.s64} %t | count 2
-; RUN: grep {vshl\\.u8} %t | count 4
-; RUN: grep {vshl\\.u16} %t | count 4
-; RUN: grep {vshl\\.u32} %t | count 4
-; RUN: grep {vshl\\.u64} %t | count 4
-; RUN: grep {vshl\\.i8} %t | count 2
-; RUN: grep {vshl\\.i16} %t | count 2
-; RUN: grep {vshl\\.i32} %t | count 2
-; RUN: grep {vshl\\.i64} %t | count 2
-; RUN: grep {vshr\\.u8} %t | count 2
-; RUN: grep {vshr\\.u16} %t | count 2
-; RUN: grep {vshr\\.u32} %t | count 2
-; RUN: grep {vshr\\.u64} %t | count 2
-; RUN: grep {vshr\\.s8} %t | count 2
-; RUN: grep {vshr\\.s16} %t | count 2
-; RUN: grep {vshr\\.s32} %t | count 2
-; RUN: grep {vshr\\.s64} %t | count 2
-; RUN: grep {vneg\\.s8} %t | count 4
-; RUN: grep {vneg\\.s16} %t | count 4
-; RUN: grep {vneg\\.s32} %t | count 4
-; RUN: grep {vsub\\.i64} %t | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshls8:
+;CHECK: vshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = shl <8 x i8> %tmp1, %tmp2
@@ -32,6 +10,8 @@ define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshls16:
+;CHECK: vshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = shl <4 x i16> %tmp1, %tmp2
@@ -39,6 +19,8 @@ define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshls32:
+;CHECK: vshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = shl <2 x i32> %tmp1, %tmp2
@@ -46,6 +28,8 @@ define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshls64:
+;CHECK: vshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = shl <1 x i64> %tmp1, %tmp2
@@ -53,30 +37,40 @@ define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
+;CHECK: vshli8:
+;CHECK: vshl.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
+;CHECK: vshli16:
+;CHECK: vshl.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
+;CHECK: vshli32:
+;CHECK: vshl.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 >
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
+;CHECK: vshli64:
+;CHECK: vshl.i64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = shl <1 x i64> %tmp1, < i64 63 >
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQs8:
+;CHECK: vshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = shl <16 x i8> %tmp1, %tmp2
@@ -84,6 +78,8 @@ define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQs16:
+;CHECK: vshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = shl <8 x i16> %tmp1, %tmp2
@@ -91,6 +87,8 @@ define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQs32:
+;CHECK: vshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = shl <4 x i32> %tmp1, %tmp2
@@ -98,6 +96,8 @@ define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQs64:
+;CHECK: vshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = shl <2 x i64> %tmp1, %tmp2
@@ -105,30 +105,41 @@ define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
+;CHECK: vshlQi8:
+;CHECK: vshl.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
+;CHECK: vshlQi16:
+;CHECK: vshl.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
+;CHECK: vshlQi32:
+;CHECK: vshl.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
+;CHECK: vshlQi64:
+;CHECK: vshl.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 >
 	ret <2 x i64> %tmp2
 }
 
 define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vlshru8:
+;CHECK: vneg.s8
+;CHECK: vshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = lshr <8 x i8> %tmp1, %tmp2
@@ -136,6 +147,9 @@ define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vlshru16:
+;CHECK: vneg.s16
+;CHECK: vshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = lshr <4 x i16> %tmp1, %tmp2
@@ -143,6 +157,9 @@ define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vlshru32:
+;CHECK: vneg.s32
+;CHECK: vshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = lshr <2 x i32> %tmp1, %tmp2
@@ -150,6 +167,9 @@ define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vlshru64:
+;CHECK: vsub.i64
+;CHECK: vshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = lshr <1 x i64> %tmp1, %tmp2
@@ -157,30 +177,41 @@ define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
+;CHECK: vlshri8:
+;CHECK: vshr.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = lshr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
+;CHECK: vlshri16:
+;CHECK: vshr.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = lshr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
+;CHECK: vlshri32:
+;CHECK: vshr.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = lshr <2 x i32> %tmp1, < i32 32, i32 32 >
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind {
+;CHECK: vlshri64:
+;CHECK: vshr.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = lshr <1 x i64> %tmp1, < i64 64 >
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vlshrQu8:
+;CHECK: vneg.s8
+;CHECK: vshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = lshr <16 x i8> %tmp1, %tmp2
@@ -188,6 +219,9 @@ define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vlshrQu16:
+;CHECK: vneg.s16
+;CHECK: vshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = lshr <8 x i16> %tmp1, %tmp2
@@ -195,6 +229,9 @@ define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vlshrQu32:
+;CHECK: vneg.s32
+;CHECK: vshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = lshr <4 x i32> %tmp1, %tmp2
@@ -202,6 +239,9 @@ define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vlshrQu64:
+;CHECK: vsub.i64
+;CHECK: vshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = lshr <2 x i64> %tmp1, %tmp2
@@ -209,30 +249,48 @@ define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
+;CHECK: vlshrQi8:
+;CHECK: vshr.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = lshr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
+;CHECK: vlshrQi16:
+;CHECK: vshr.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = lshr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
+;CHECK: vlshrQi32:
+;CHECK: vshr.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = lshr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind {
+;CHECK: vlshrQi64:
+;CHECK: vshr.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = lshr <2 x i64> %tmp1, < i64 64, i64 64 >
 	ret <2 x i64> %tmp2
 }
 
+; Example that requires splitting and expanding a vector shift.
+define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
+entry:
+	%shr = lshr <2 x i64> %val, < i64 2, i64 2 >		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %shr
+}
+
 define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vashrs8:
+;CHECK: vneg.s8
+;CHECK: vshl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = ashr <8 x i8> %tmp1, %tmp2
@@ -240,6 +298,9 @@ define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vashrs16:
+;CHECK: vneg.s16
+;CHECK: vshl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = ashr <4 x i16> %tmp1, %tmp2
@@ -247,6 +308,9 @@ define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vashrs32:
+;CHECK: vneg.s32
+;CHECK: vshl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = ashr <2 x i32> %tmp1, %tmp2
@@ -254,6 +318,9 @@ define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vashrs64:
+;CHECK: vsub.i64
+;CHECK: vshl.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = ashr <1 x i64> %tmp1, %tmp2
@@ -261,30 +328,41 @@ define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
+;CHECK: vashri8:
+;CHECK: vshr.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = ashr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
+;CHECK: vashri16:
+;CHECK: vshr.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = ashr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
+;CHECK: vashri32:
+;CHECK: vshr.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = ashr <2 x i32> %tmp1, < i32 32, i32 32 >
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vashri64(<1 x i64>* %A) nounwind {
+;CHECK: vashri64:
+;CHECK: vshr.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = ashr <1 x i64> %tmp1, < i64 64 >
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vashrQs8:
+;CHECK: vneg.s8
+;CHECK: vshl.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = ashr <16 x i8> %tmp1, %tmp2
@@ -292,6 +370,9 @@ define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vashrQs16:
+;CHECK: vneg.s16
+;CHECK: vshl.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = ashr <8 x i16> %tmp1, %tmp2
@@ -299,6 +380,9 @@ define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vashrQs32:
+;CHECK: vneg.s32
+;CHECK: vshl.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = ashr <4 x i32> %tmp1, %tmp2
@@ -306,6 +390,9 @@ define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vashrQs64:
+;CHECK: vsub.i64
+;CHECK: vshl.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = ashr <2 x i64> %tmp1, %tmp2
@@ -313,24 +400,32 @@ define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
+;CHECK: vashrQi8:
+;CHECK: vshr.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = ashr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
+;CHECK: vashrQi16:
+;CHECK: vshr.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = ashr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
+;CHECK: vashrQi32:
+;CHECK: vshr.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = ashr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind {
+;CHECK: vashrQi64:
+;CHECK: vshr.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = ashr <2 x i64> %tmp1, < i64 64, i64 64 >
 	ret <2 x i64> %tmp2
diff --git a/test/CodeGen/ARM/vshiftins.ll b/test/CodeGen/ARM/vshiftins.ll
index cb7cbb89ecdb3..3a4f8574e3977 100644
--- a/test/CodeGen/ARM/vshiftins.ll
+++ b/test/CodeGen/ARM/vshiftins.ll
@@ -1,14 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vsli\\.8} %t | count 2
-; RUN: grep {vsli\\.16} %t | count 2
-; RUN: grep {vsli\\.32} %t | count 2
-; RUN: grep {vsli\\.64} %t | count 2
-; RUN: grep {vsri\\.8} %t | count 2
-; RUN: grep {vsri\\.16} %t | count 2
-; RUN: grep {vsri\\.32} %t | count 2
-; RUN: grep {vsri\\.64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsli8:
+;CHECK: vsli.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
@@ -16,6 +10,8 @@ define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsli16:
+;CHECK: vsli.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
@@ -23,6 +19,8 @@ define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsli32:
+;CHECK: vsli.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 31, i32 31 >)
@@ -30,6 +28,8 @@ define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsli64:
+;CHECK: vsli.64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 63 >)
@@ -37,6 +37,8 @@ define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsliQ8:
+;CHECK: vsli.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
@@ -44,6 +46,8 @@ define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsliQ16:
+;CHECK: vsli.16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
@@ -51,6 +55,8 @@ define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsliQ32:
+;CHECK: vsli.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
@@ -58,6 +64,8 @@ define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsliQ64:
+;CHECK: vsli.64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 63, i64 63 >)
@@ -65,6 +73,8 @@ define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsri8:
+;CHECK: vsri.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -72,6 +82,8 @@ define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsri16:
+;CHECK: vsri.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -79,6 +91,8 @@ define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsri32:
+;CHECK: vsri.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
@@ -86,6 +100,8 @@ define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsri64:
+;CHECK: vsri.64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 -64 >)
@@ -93,6 +109,8 @@ define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsriQ8:
+;CHECK: vsri.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -100,6 +118,8 @@ define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsriQ16:
+;CHECK: vsri.16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -107,6 +127,8 @@ define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsriQ32:
+;CHECK: vsri.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
@@ -114,6 +136,8 @@ define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsriQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsriQ64:
+;CHECK: vsri.64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
diff --git a/test/CodeGen/ARM/vshl.ll b/test/CodeGen/ARM/vshl.ll
index 993126ea578c0..818e71b8ff89b 100644
--- a/test/CodeGen/ARM/vshl.ll
+++ b/test/CodeGen/ARM/vshl.ll
@@ -1,26 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vshl\\.s8} %t | count 2
-; RUN: grep {vshl\\.s16} %t | count 2
-; RUN: grep {vshl\\.s32} %t | count 2
-; RUN: grep {vshl\\.s64} %t | count 2
-; RUN: grep {vshl\\.u8} %t | count 2
-; RUN: grep {vshl\\.u16} %t | count 2
-; RUN: grep {vshl\\.u32} %t | count 2
-; RUN: grep {vshl\\.u64} %t | count 2
-; RUN: grep {vshl\\.i8} %t | count 2
-; RUN: grep {vshl\\.i16} %t | count 2
-; RUN: grep {vshl\\.i32} %t | count 2
-; RUN: grep {vshl\\.i64} %t | count 2
-; RUN: grep {vshr\\.s8} %t | count 2
-; RUN: grep {vshr\\.s16} %t | count 2
-; RUN: grep {vshr\\.s32} %t | count 2
-; RUN: grep {vshr\\.s64} %t | count 2
-; RUN: grep {vshr\\.u8} %t | count 2
-; RUN: grep {vshr\\.u16} %t | count 2
-; RUN: grep {vshr\\.u32} %t | count 2
-; RUN: grep {vshr\\.u64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshls8:
+;CHECK: vshl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -28,6 +10,8 @@ define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshls16:
+;CHECK: vshl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -35,6 +19,8 @@ define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshls32:
+;CHECK: vshl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -42,6 +28,8 @@ define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshls64:
+;CHECK: vshl.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -49,6 +37,8 @@ define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshlu8:
+;CHECK: vshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -56,6 +46,8 @@ define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshlu16:
+;CHECK: vshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -63,6 +55,8 @@ define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshlu32:
+;CHECK: vshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -70,6 +64,8 @@ define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshlu64:
+;CHECK: vshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@@ -77,6 +73,8 @@ define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQs8:
+;CHECK: vshl.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -84,6 +82,8 @@ define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQs16:
+;CHECK: vshl.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -91,6 +91,8 @@ define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQs32:
+;CHECK: vshl.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -98,6 +100,8 @@ define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQs64:
+;CHECK: vshl.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -105,6 +109,8 @@ define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQu8:
+;CHECK: vshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -112,6 +118,8 @@ define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQu16:
+;CHECK: vshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -119,6 +127,8 @@ define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQu32:
+;CHECK: vshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -126,6 +136,8 @@ define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQu64:
+;CHECK: vshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@@ -136,48 +148,64 @@ define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ; Test a mix of both signed and unsigned intrinsics.
 
 define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
+;CHECK: vshli8:
+;CHECK: vshl.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
+;CHECK: vshli16:
+;CHECK: vshl.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
+;CHECK: vshli32:
+;CHECK: vshl.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
+;CHECK: vshli64:
+;CHECK: vshl.i64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
+;CHECK: vshlQi8:
+;CHECK: vshl.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
+;CHECK: vshlQi16:
+;CHECK: vshl.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
+;CHECK: vshlQi32:
+;CHECK: vshl.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
+;CHECK: vshlQi64:
+;CHECK: vshl.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
@@ -186,96 +214,128 @@ define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
 ; Right shift by immediate:
 
 define <8 x i8> @vshrs8(<8 x i8>* %A) nounwind {
+;CHECK: vshrs8:
+;CHECK: vshr.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vshrs16(<4 x i16>* %A) nounwind {
+;CHECK: vshrs16:
+;CHECK: vshr.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vshrs32(<2 x i32>* %A) nounwind {
+;CHECK: vshrs32:
+;CHECK: vshr.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vshrs64(<1 x i64>* %A) nounwind {
+;CHECK: vshrs64:
+;CHECK: vshr.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
 	ret <1 x i64> %tmp2
 }
 
 define <8 x i8> @vshru8(<8 x i8>* %A) nounwind {
+;CHECK: vshru8:
+;CHECK: vshr.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vshru16(<4 x i16>* %A) nounwind {
+;CHECK: vshru16:
+;CHECK: vshr.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vshru32(<2 x i32>* %A) nounwind {
+;CHECK: vshru32:
+;CHECK: vshr.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
 	ret <2 x i32> %tmp2
 }
 
 define <1 x i64> @vshru64(<1 x i64>* %A) nounwind {
+;CHECK: vshru64:
+;CHECK: vshr.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
 	ret <1 x i64> %tmp2
 }
 
 define <16 x i8> @vshrQs8(<16 x i8>* %A) nounwind {
+;CHECK: vshrQs8:
+;CHECK: vshr.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vshrQs16(<8 x i16>* %A) nounwind {
+;CHECK: vshrQs16:
+;CHECK: vshr.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshrQs32(<4 x i32>* %A) nounwind {
+;CHECK: vshrQs32:
+;CHECK: vshr.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshrQs64(<2 x i64>* %A) nounwind {
+;CHECK: vshrQs64:
+;CHECK: vshr.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
 	ret <2 x i64> %tmp2
 }
 
 define <16 x i8> @vshrQu8(<16 x i8>* %A) nounwind {
+;CHECK: vshrQu8:
+;CHECK: vshr.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <16 x i8> %tmp2
 }
 
 define <8 x i16> @vshrQu16(<8 x i16>* %A) nounwind {
+;CHECK: vshrQu16:
+;CHECK: vshr.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshrQu32(<4 x i32>* %A) nounwind {
+;CHECK: vshrQu32:
+;CHECK: vshr.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshrQu64(<2 x i64>* %A) nounwind {
+;CHECK: vshrQu64:
+;CHECK: vshr.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
 	ret <2 x i64> %tmp2
@@ -300,3 +360,295 @@ declare <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind re
 declare <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrshls8:
+;CHECK: vrshl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrshls16:
+;CHECK: vrshl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrshls32:
+;CHECK: vrshl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrshls64:
+;CHECK: vrshl.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrshlu8:
+;CHECK: vrshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrshlu16:
+;CHECK: vrshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrshlu32:
+;CHECK: vrshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrshlu64:
+;CHECK: vrshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrshlQs8:
+;CHECK: vrshl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrshlQs16:
+;CHECK: vrshl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrshlQs32:
+;CHECK: vrshl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrshlQs64:
+;CHECK: vrshl.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrshlQu8:
+;CHECK: vrshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrshlQu16:
+;CHECK: vrshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrshlQu32:
+;CHECK: vrshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrshlQu64:
+;CHECK: vrshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind {
+;CHECK: vrshrs8:
+;CHECK: vrshr.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind {
+;CHECK: vrshrs16:
+;CHECK: vrshr.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind {
+;CHECK: vrshrs32:
+;CHECK: vrshr.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind {
+;CHECK: vrshrs64:
+;CHECK: vrshr.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+	ret <1 x i64> %tmp2
+}
+
+define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind {
+;CHECK: vrshru8:
+;CHECK: vrshr.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind {
+;CHECK: vrshru16:
+;CHECK: vrshr.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind {
+;CHECK: vrshru32:
+;CHECK: vrshr.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind {
+;CHECK: vrshru64:
+;CHECK: vrshr.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind {
+;CHECK: vrshrQs8:
+;CHECK: vrshr.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind {
+;CHECK: vrshrQs16:
+;CHECK: vrshr.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind {
+;CHECK: vrshrQs32:
+;CHECK: vrshr.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind {
+;CHECK: vrshrQs64:
+;CHECK: vrshr.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+	ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind {
+;CHECK: vrshrQu8:
+;CHECK: vrshr.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind {
+;CHECK: vrshrQu16:
+;CHECK: vrshr.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind {
+;CHECK: vrshrQu32:
+;CHECK: vrshr.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vrshrQu64(<2 x i64>* %A) nounwind {
+;CHECK: vrshrQu64:
+;CHECK: vrshr.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+	ret <2 x i64> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vshll.ll b/test/CodeGen/ARM/vshll.ll
index f81c09a7b9d31..8e85b98f49b10 100644
--- a/test/CodeGen/ARM/vshll.ll
+++ b/test/CodeGen/ARM/vshll.ll
@@ -1,45 +1,48 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vshll\\.s8} %t | count 1
-; RUN: grep {vshll\\.s16} %t | count 1
-; RUN: grep {vshll\\.s32} %t | count 1
-; RUN: grep {vshll\\.u8} %t | count 1
-; RUN: grep {vshll\\.u16} %t | count 1
-; RUN: grep {vshll\\.u32} %t | count 1
-; RUN: grep {vshll\\.i8} %t | count 1
-; RUN: grep {vshll\\.i16} %t | count 1
-; RUN: grep {vshll\\.i32} %t | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind {
+;CHECK: vshlls8:
+;CHECK: vshll.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind {
+;CHECK: vshlls16:
+;CHECK: vshll.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftls.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind {
+;CHECK: vshlls32:
+;CHECK: vshll.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i64> %tmp2
 }
 
 define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind {
+;CHECK: vshllu8:
+;CHECK: vshll.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftlu.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind {
+;CHECK: vshllu16:
+;CHECK: vshll.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
+;CHECK: vshllu32:
+;CHECK: vshll.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftlu.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i64> %tmp2
@@ -48,18 +51,24 @@ define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
 ; The following tests use the maximum shift count, so the signedness is
 ; irrelevant.  Test both signed and unsigned versions.
 define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind {
+;CHECK: vshlli8:
+;CHECK: vshll.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >)
 	ret <8 x i16> %tmp2
 }
 
 define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind {
+;CHECK: vshlli16:
+;CHECK: vshll.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 16, i16 16, i16 16, i16 16 >)
 	ret <4 x i32> %tmp2
 }
 
 define <2 x i64> @vshlli32(<2 x i32>* %A) nounwind {
+;CHECK: vshlli32:
+;CHECK: vshll.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 32, i32 32 >)
 	ret <2 x i64> %tmp2
diff --git a/test/CodeGen/ARM/vshrn.ll b/test/CodeGen/ARM/vshrn.ll
index bc640cbbca61c..e2544f424a2c3 100644
--- a/test/CodeGen/ARM/vshrn.ll
+++ b/test/CodeGen/ARM/vshrn.ll
@@ -1,21 +1,24 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vshrn\\.i16} %t | count 1
-; RUN: grep {vshrn\\.i32} %t | count 1
-; RUN: grep {vshrn\\.i64} %t | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vshrns8:
+;CHECK: vshrn.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
 
 define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vshrns16:
+;CHECK: vshrn.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
 
 define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vshrns32:
+;CHECK: vshrn.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
@@ -24,3 +27,31 @@ define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind {
 declare <8 x i8>  @llvm.arm.neon.vshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vrshrns8:
+;CHECK: vrshrn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vrshrns16:
+;CHECK: vrshrn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vrshrns32:
+;CHECK: vrshrn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vsra.ll b/test/CodeGen/ARM/vsra.ll
index e2829dcddae61..acb672d00fa26 100644
--- a/test/CodeGen/ARM/vsra.ll
+++ b/test/CodeGen/ARM/vsra.ll
@@ -1,22 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vsra\\.s8} %t | count 2
-; RUN: grep {vsra\\.s16} %t | count 2
-; RUN: grep {vsra\\.s32} %t | count 2
-; RUN: grep {vsra\\.s64} %t | count 2
-; RUN: grep {vsra\\.u8} %t | count 2
-; RUN: grep {vsra\\.u16} %t | count 2
-; RUN: grep {vsra\\.u32} %t | count 2
-; RUN: grep {vsra\\.u64} %t | count 2
-; RUN: grep {vrsra\\.s8} %t | count 2
-; RUN: grep {vrsra\\.s16} %t | count 2
-; RUN: grep {vrsra\\.s32} %t | count 2
-; RUN: grep {vrsra\\.s64} %t | count 2
-; RUN: grep {vrsra\\.u8} %t | count 2
-; RUN: grep {vrsra\\.u16} %t | count 2
-; RUN: grep {vrsra\\.u32} %t | count 2
-; RUN: grep {vrsra\\.u64} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsras8:
+;CHECK: vsra.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = ashr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
@@ -25,6 +11,8 @@ define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsras16:
+;CHECK: vsra.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = ashr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 >
@@ -33,6 +21,8 @@ define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsras32:
+;CHECK: vsra.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = ashr <2 x i32> %tmp2, < i32 32, i32 32 >
@@ -41,6 +31,8 @@ define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsras64:
+;CHECK: vsra.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = ashr <1 x i64> %tmp2, < i64 64 >
@@ -49,6 +41,8 @@ define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsraQs8:
+;CHECK: vsra.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = ashr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
@@ -57,6 +51,8 @@ define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsraQs16:
+;CHECK: vsra.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = ashr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
@@ -65,6 +61,8 @@ define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsraQs32:
+;CHECK: vsra.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = ashr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 >
@@ -73,6 +71,8 @@ define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsraQs64:
+;CHECK: vsra.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = ashr <2 x i64> %tmp2, < i64 64, i64 64 >
@@ -81,6 +81,8 @@ define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsrau8:
+;CHECK: vsra.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = lshr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
@@ -89,6 +91,8 @@ define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsrau16:
+;CHECK: vsra.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = lshr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 >
@@ -97,6 +101,8 @@ define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsrau32:
+;CHECK: vsra.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = lshr <2 x i32> %tmp2, < i32 32, i32 32 >
@@ -105,6 +111,8 @@ define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsrau64:
+;CHECK: vsra.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = lshr <1 x i64> %tmp2, < i64 64 >
@@ -113,6 +121,8 @@ define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsraQu8:
+;CHECK: vsra.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = lshr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
@@ -121,6 +131,8 @@ define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsraQu16:
+;CHECK: vsra.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = lshr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
@@ -129,6 +141,8 @@ define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsraQu32:
+;CHECK: vsra.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = lshr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 >
@@ -137,6 +151,8 @@ define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsraQu64:
+;CHECK: vsra.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = lshr <2 x i64> %tmp2, < i64 64, i64 64 >
@@ -145,6 +161,8 @@ define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrsras8:
+;CHECK: vrsra.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -153,6 +171,8 @@ define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrsras16:
+;CHECK: vrsra.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -161,6 +181,8 @@ define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrsras32:
+;CHECK: vrsra.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
@@ -169,6 +191,8 @@ define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrsras64:
+;CHECK: vrsra.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
@@ -177,6 +201,8 @@ define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrsrau8:
+;CHECK: vrsra.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -185,6 +211,8 @@ define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrsrau16:
+;CHECK: vrsra.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -193,6 +221,8 @@ define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrsrau32:
+;CHECK: vrsra.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
@@ -201,6 +231,8 @@ define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrsrau64:
+;CHECK: vrsra.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
@@ -209,6 +241,8 @@ define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrsraQs8:
+;CHECK: vrsra.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -217,6 +251,8 @@ define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsraQs16:
+;CHECK: vrsra.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -225,6 +261,8 @@ define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsraQs32:
+;CHECK: vrsra.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
@@ -233,6 +271,8 @@ define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsraQs64:
+;CHECK: vrsra.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
@@ -241,6 +281,8 @@ define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrsraQu8:
+;CHECK: vrsra.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -249,6 +291,8 @@ define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsraQu16:
+;CHECK: vrsra.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -257,6 +301,8 @@ define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsraQu32:
+;CHECK: vrsra.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
@@ -265,6 +311,8 @@ define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vrsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsraQu64:
+;CHECK: vrsra.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
new file mode 100644
index 0000000000000..602b124ffad9a
--- /dev/null
+++ b/test/CodeGen/ARM/vst1.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst1i8:
+;CHECK: vst1.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1)
+	ret void
+}
+
+define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst1i16:
+;CHECK: vst1.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst1.v4i16(i16* %A, <4 x i16> %tmp1)
+	ret void
+}
+
+define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst1i32:
+;CHECK: vst1.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst1.v2i32(i32* %A, <2 x i32> %tmp1)
+	ret void
+}
+
+define void @vst1f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst1f:
+;CHECK: vst1.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst1.v2f32(float* %A, <2 x float> %tmp1)
+	ret void
+}
+
+define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst1i64:
+;CHECK: vst1.64
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst1.v1i64(i64* %A, <1 x i64> %tmp1)
+	ret void
+}
+
+define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst1Qi8:
+;CHECK: vst1.8
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1)
+	ret void
+}
+
+define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst1Qi16:
+;CHECK: vst1.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst1.v8i16(i16* %A, <8 x i16> %tmp1)
+	ret void
+}
+
+define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst1Qi32:
+;CHECK: vst1.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst1.v4i32(i32* %A, <4 x i32> %tmp1)
+	ret void
+}
+
+define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst1Qf:
+;CHECK: vst1.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst1.v4f32(float* %A, <4 x float> %tmp1)
+	ret void
+}
+
+define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
+;CHECK: vst1Qi64:
+;CHECK: vst1.64
+	%tmp1 = load <2 x i64>* %B
+	call void @llvm.arm.neon.vst1.v2i64(i64* %A, <2 x i64> %tmp1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>) nounwind
+
+declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind
+declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>) nounwind
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
new file mode 100644
index 0000000000000..17d6bee0f56c4
--- /dev/null
+++ b/test/CodeGen/ARM/vst2.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst2i8:
+;CHECK: vst2.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1)
+	ret void
+}
+
+define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst2i16:
+;CHECK: vst2.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst2.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1)
+	ret void
+}
+
+define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst2i32:
+;CHECK: vst2.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst2.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1)
+	ret void
+}
+
+define void @vst2f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst2f:
+;CHECK: vst2.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst2.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1)
+	ret void
+}
+
+define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst2i64:
+;CHECK: vst1.64
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst2.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1)
+	ret void
+}
+
+define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst2Qi8:
+;CHECK: vst2.8
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1)
+	ret void
+}
+
+define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst2Qi16:
+;CHECK: vst2.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst2.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1)
+	ret void
+}
+
+define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst2Qi32:
+;CHECK: vst2.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst2.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1)
+	ret void
+}
+
+define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst2Qf:
+;CHECK: vst2.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst2.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>) nounwind
+
+declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
new file mode 100644
index 0000000000000..a831a0c08ce9a
--- /dev/null
+++ b/test/CodeGen/ARM/vst3.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst3i8:
+;CHECK: vst3.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
+	ret void
+}
+
+define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst3i16:
+;CHECK: vst3.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst3.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
+	ret void
+}
+
+define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst3i32:
+;CHECK: vst3.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst3.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
+	ret void
+}
+
+define void @vst3f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst3f:
+;CHECK: vst3.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst3.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
+	ret void
+}
+
+define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst3i64:
+;CHECK: vst1.64
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst3.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1)
+	ret void
+}
+
+define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst3Qi8:
+;CHECK: vst3.8
+;CHECK: vst3.8
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1)
+	ret void
+}
+
+define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst3Qi16:
+;CHECK: vst3.16
+;CHECK: vst3.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst3.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1)
+	ret void
+}
+
+define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst3Qi32:
+;CHECK: vst3.32
+;CHECK: vst3.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst3.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1)
+	ret void
+}
+
+define void @vst3Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst3Qf:
+;CHECK: vst3.32
+;CHECK: vst3.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst3.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>) nounwind
+
+declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
new file mode 100644
index 0000000000000..d92c017c30b29
--- /dev/null
+++ b/test/CodeGen/ARM/vst4.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst4i8:
+;CHECK: vst4.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
+	ret void
+}
+
+define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst4i16:
+;CHECK: vst4.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst4.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
+	ret void
+}
+
+define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst4i32:
+;CHECK: vst4.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst4.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
+	ret void
+}
+
+define void @vst4f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst4f:
+;CHECK: vst4.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst4.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
+	ret void
+}
+
+define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst4i64:
+;CHECK: vst1.64
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst4.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1)
+	ret void
+}
+
+define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst4Qi8:
+;CHECK: vst4.8
+;CHECK: vst4.8
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1)
+	ret void
+}
+
+define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst4Qi16:
+;CHECK: vst4.16
+;CHECK: vst4.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst4.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1)
+	ret void
+}
+
+define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst4Qi32:
+;CHECK: vst4.32
+;CHECK: vst4.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst4.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1)
+	ret void
+}
+
+define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst4Qf:
+;CHECK: vst4.32
+;CHECK: vst4.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst4.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>) nounwind
+
+declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) nounwind
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
new file mode 100644
index 0000000000000..3bfb14f17b775
--- /dev/null
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -0,0 +1,197 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst2lanei8:
+;CHECK: vst2.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst2lanei16:
+;CHECK: vst2.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst2lanei32:
+;CHECK: vst2.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst2lanef:
+;CHECK: vst2.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst2laneQi16:
+;CHECK: vst2.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst2lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst2laneQi32:
+;CHECK: vst2.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst2lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+	ret void
+}
+
+define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst2laneQf:
+;CHECK: vst2.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst2lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, i32 3)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind
+
+declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
+
+define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst3lanei8:
+;CHECK: vst3.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst3lanei16:
+;CHECK: vst3.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst3lanei32:
+;CHECK: vst3.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst3lanef:
+;CHECK: vst3.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst3laneQi16:
+;CHECK: vst3.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst3lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6)
+	ret void
+}
+
+define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst3laneQi32:
+;CHECK: vst3.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst3lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0)
+	ret void
+}
+
+define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst3laneQf:
+;CHECK: vst3.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst3lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+
+declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
+
+
+define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst4lanei8:
+;CHECK: vst4.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst4lanei16:
+;CHECK: vst4.16
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst4lanei32:
+;CHECK: vst4.32
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst4lanef:
+;CHECK: vst4.32
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst4laneQi16:
+;CHECK: vst4.16
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst4lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7)
+	ret void
+}
+
+define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst4laneQi32:
+;CHECK: vst4.32
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst4lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+	ret void
+}
+
+define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst4laneQf:
+;CHECK: vst4.32
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst4lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+
+declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll
index 85dea41835f85..8f0055fd41037 100644
--- a/test/CodeGen/ARM/vsub.ll
+++ b/test/CodeGen/ARM/vsub.ll
@@ -1,11 +1,8 @@
-; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
-; RUN: grep {vsub\\.i8} %t | count 2
-; RUN: grep {vsub\\.i16} %t | count 2
-; RUN: grep {vsub\\.i32} %t | count 2
-; RUN: grep {vsub\\.i64} %t | count 2
-; RUN: grep {vsub\\.f32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubi8:
+;CHECK: vsub.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = sub <8 x i8> %tmp1, %tmp2
@@ -13,6 +10,8 @@ define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubi16:
+;CHECK: vsub.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = sub <4 x i16> %tmp1, %tmp2
@@ -20,6 +19,8 @@ define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubi32:
+;CHECK: vsub.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = sub <2 x i32> %tmp1, %tmp2
@@ -27,6 +28,8 @@ define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsubi64:
+;CHECK: vsub.i64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = sub <1 x i64> %tmp1, %tmp2
@@ -34,6 +37,8 @@ define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vsubf32:
+;CHECK: vsub.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = sub <2 x float> %tmp1, %tmp2
@@ -41,6 +46,8 @@ define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsubQi8:
+;CHECK: vsub.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
 	%tmp3 = sub <16 x i8> %tmp1, %tmp2
@@ -48,6 +55,8 @@ define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsubQi16:
+;CHECK: vsub.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = sub <8 x i16> %tmp1, %tmp2
@@ -55,6 +64,8 @@ define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsubQi32:
+;CHECK: vsub.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
 	%tmp3 = sub <4 x i32> %tmp1, %tmp2
@@ -62,6 +73,8 @@ define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsubQi64:
+;CHECK: vsub.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
 	%tmp3 = sub <2 x i64> %tmp1, %tmp2
@@ -69,8 +82,196 @@ define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vsubQf32:
+;CHECK: vsub.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = sub <4 x float> %tmp1, %tmp2
 	ret <4 x float> %tmp3
 }
+
+define <8 x i8> @vsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsubhni16:
+;CHECK: vsubhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsubhni32:
+;CHECK: vsubhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsubhni64:
+;CHECK: vsubhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsubhni16:
+;CHECK: vrsubhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsubhni32:
+;CHECK: vrsubhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsubhni64:
+;CHECK: vrsubhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubls8:
+;CHECK: vsubl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubls16:
+;CHECK: vsubl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubls32:
+;CHECK: vsubl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsublu8:
+;CHECK: vsubl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsublu16:
+;CHECK: vsubl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsublu32:
+;CHECK: vsubl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubws8:
+;CHECK: vsubw.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubws16:
+;CHECK: vsubw.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubws32:
+;CHECK: vsubw.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubwu8:
+;CHECK: vsubw.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubwu16:
+;CHECK: vsubw.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubwu32:
+;CHECK: vsubw.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vtbl.ll b/test/CodeGen/ARM/vtbl.ll
new file mode 100644
index 0000000000000..926498739e8ab
--- /dev/null
+++ b/test/CodeGen/ARM/vtbl.ll
@@ -0,0 +1,109 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>, <8 x i8> }
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>, <8 x i8> }
+
+define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtbl1:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B) nounwind {
+;CHECK: vtbl2:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x2_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+	%tmp5 = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4)
+	ret <8 x i8> %tmp5
+}
+
+define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B) nounwind {
+;CHECK: vtbl3:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x3_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+	%tmp6 = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
+	ret <8 x i8> %tmp6
+}
+
+define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B) nounwind {
+;CHECK: vtbl4:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+	%tmp7 = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
+	ret <8 x i8> %tmp7
+}
+
+define <8 x i8> @vtbx1(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx1:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx2:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x2_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+	%tmp5 = load <8 x i8>* %C
+	%tmp6 = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
+	ret <8 x i8> %tmp6
+}
+
+define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx3:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x3_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+	%tmp6 = load <8 x i8>* %C
+	%tmp7 = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
+	ret <8 x i8> %tmp7
+}
+
+define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx4:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+	%tmp7 = load <8 x i8>* %C
+	%tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
+	ret <8 x i8> %tmp8
+}
+
+declare <8 x i8>  @llvm.arm.neon.vtbl1(<8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbl2(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbl3(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbl4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vtbx1(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbx2(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbx3(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbx4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll
new file mode 100644
index 0000000000000..5122b0981e961
--- /dev/null
+++ b/test/CodeGen/ARM/vtrn.ll
@@ -0,0 +1,97 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtrni8:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vtrni16:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vtrni32:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
+	%tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3>
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vtrnf:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
+	%tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
+        %tmp5 = add <2 x float> %tmp3, %tmp4
+	ret <2 x float> %tmp5
+}
+
+define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vtrnQi8:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtrnQi16:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vtrnQi32:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vtrnQf:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = add <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll
new file mode 100644
index 0000000000000..e531718d94aa1
--- /dev/null
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vuzpi8:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vuzpi16:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+; VUZP.32 is equivalent to VTRN.32 for 64-bit vectors.
+
+define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vuzpQi8:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vuzpQi16:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vuzpQi32:
+;CHECK: vuzp.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vuzpQf:
+;CHECK: vuzp.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = add <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll
new file mode 100644
index 0000000000000..32f7e0d02c446
--- /dev/null
+++ b/test/CodeGen/ARM/vzip.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vzipi8:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vzipi16:
+;CHECK: vzip.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+; VZIP.32 is equivalent to VTRN.32 for 64-bit vectors.
+
+define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vzipQi8:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vzipQi16:
+;CHECK: vzip.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vzipQi32:
+;CHECK: vzip.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vzipQf:
+;CHECK: vzip.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = add <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
diff --git a/test/CodeGen/ARM/weak.ll b/test/CodeGen/ARM/weak.ll
index dadd1b9767985..5ac4b8c061d8a 100644
--- a/test/CodeGen/ARM/weak.ll
+++ b/test/CodeGen/ARM/weak.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=arm | grep .weak.*f
-; RUN: llvm-as < %s | llc -march=arm | grep .weak.*h
+; RUN: llc < %s -march=arm | grep .weak.*f
+; RUN: llc < %s -march=arm | grep .weak.*h
 
 define weak i32 @f() {
 entry:
diff --git a/test/CodeGen/ARM/weak2.ll b/test/CodeGen/ARM/weak2.ll
index a57a76707ce69..cf327bbf5c876 100644
--- a/test/CodeGen/ARM/weak2.ll
+++ b/test/CodeGen/ARM/weak2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=arm | grep .weak
+; RUN: llc < %s -march=arm | grep .weak
 
 define i32 @f(i32 %a) {
 entry:
diff --git a/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll b/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll
index c96b14ac97e59..87d992836bc3c 100644
--- a/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll
+++ b/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll
@@ -1,5 +1,5 @@
 ; There should be exactly two calls here (memset and malloc), no more.
-; RUN: llvm-as < %s | llc -march=alpha | grep jsr | count 2
+; RUN: llc < %s -march=alpha | grep jsr | count 2
 
 %typedef.bc_struct = type opaque
 declare void @llvm.memset.i64(i8*, i8, i64, i32)
diff --git a/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll b/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
index b45c2a44388e1..4b3d022c1d8d4 100644
--- a/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
+++ b/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
@@ -1,5 +1,5 @@
 ; This shouldn't crash
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 @.str_4 = external global [44 x i8]             ; <[44 x i8]*> [#uses=0]
 
diff --git a/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll b/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
index f89997e0bf6b5..65d2a8d02ac8d 100644
--- a/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
+++ b/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
@@ -1,5 +1,5 @@
 ; The global symbol should be legalized
-; RUN: llvm-as < %s | llc -march=alpha 
+; RUN: llc < %s -march=alpha 
 
 target datalayout = "e-p:64:64"
         %struct.LIST_HELP = type { %struct.LIST_HELP*, i8* }
diff --git a/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll b/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
index 05ebe1eb888b2..45587f08fd6cf 100644
--- a/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
+++ b/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
@@ -1,5 +1,5 @@
 ; This shouldn't crash
-; RUN: llvm-as < %s | llc -march=alpha 
+; RUN: llc < %s -march=alpha 
 
 target datalayout = "e-p:64:64"
 target triple = "alphaev6-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2006-04-04-zextload.ll b/test/CodeGen/Alpha/2006-04-04-zextload.ll
index f3ff5b1750fe3..2b28903c50148 100644
--- a/test/CodeGen/Alpha/2006-04-04-zextload.ll
+++ b/test/CodeGen/Alpha/2006-04-04-zextload.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 target datalayout = "e-p:64:64"
 target triple = "alphaev67-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll b/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
index 6b55047579133..5d31bc3798dc1 100644
--- a/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
+++ b/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 target datalayout = "e-p:64:64"
 target triple = "alphaev67-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2006-11-01-vastart.ll b/test/CodeGen/Alpha/2006-11-01-vastart.ll
index 3f42eda4beb5b..14e0bccc8482c 100644
--- a/test/CodeGen/Alpha/2006-11-01-vastart.ll
+++ b/test/CodeGen/Alpha/2006-11-01-vastart.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 target datalayout = "e-p:64:64"
 target triple = "alphaev67-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2007-11-27-mulneg3.ll b/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
index 3eac13d2b7ac1..b537e250ad869 100644
--- a/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
+++ b/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 ;FIXME: this should produce no mul inst.  But not crashing will have to do for now
 
diff --git a/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll b/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
index 9d814da982d46..1a4b40e2da2c8 100644
--- a/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
+++ b/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
 target triple = "alphaev6-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2008-11-12-Add128.ll b/test/CodeGen/Alpha/2008-11-12-Add128.ll
index e6e57464cb216..8b9b603fe6feb 100644
--- a/test/CodeGen/Alpha/2008-11-12-Add128.ll
+++ b/test/CodeGen/Alpha/2008-11-12-Add128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3044
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
 target triple = "alphaev6-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll b/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll
new file mode 100644
index 0000000000000..cfbf7fcdfd900
--- /dev/null
+++ b/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=alpha
+
+define i1 @a(float %x) {
+  %r = fcmp ult float %x, 1.0
+  ret i1 %r
+}
diff --git a/test/CodeGen/Alpha/add.ll b/test/CodeGen/Alpha/add.ll
index 260584b79048d..24a74188f8c09 100644
--- a/test/CodeGen/Alpha/add.ll
+++ b/test/CodeGen/Alpha/add.ll
@@ -1,6 +1,6 @@
 ;test all the shifted and signextending adds and subs with and without consts
 ;
-; RUN: llvm-as < %s | llc -march=alpha -o %t.s -f
+; RUN: llc < %s -march=alpha -o %t.s
 ; RUN: grep {	addl} %t.s | count 2
 ; RUN: grep {	addq} %t.s | count 2
 ; RUN: grep {	subl} %t.s | count 2
diff --git a/test/CodeGen/Alpha/add128.ll b/test/CodeGen/Alpha/add128.ll
index 61d020890e89b..fa3b949fc7b8c 100644
--- a/test/CodeGen/Alpha/add128.ll
+++ b/test/CodeGen/Alpha/add128.ll
@@ -1,6 +1,6 @@
 ;test for ADDC and ADDE expansion
 ;
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 define i128 @add128(i128 %x, i128 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/bic.ll b/test/CodeGen/Alpha/bic.ll
index 6e635119e569e..9f0035097b0ef 100644
--- a/test/CodeGen/Alpha/bic.ll
+++ b/test/CodeGen/Alpha/bic.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the bic instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep {bic}
+; RUN: llc < %s -march=alpha | grep {bic}
 
 define i64 @bar(i64 %x, i64 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/bsr.ll b/test/CodeGen/Alpha/bsr.ll
index d4618577a0444..14f6b46c54907 100644
--- a/test/CodeGen/Alpha/bsr.ll
+++ b/test/CodeGen/Alpha/bsr.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens the bsr instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep bsr
+; RUN: llc < %s -march=alpha | grep bsr
 
 define internal i64 @abc(i32 %x) {
         %tmp.2 = add i32 %x, -1         ; <i32> [#uses=1]
diff --git a/test/CodeGen/Alpha/call_adj.ll b/test/CodeGen/Alpha/call_adj.ll
index ee8cda840e0ac..24e97a92b86b1 100644
--- a/test/CodeGen/Alpha/call_adj.ll
+++ b/test/CodeGen/Alpha/call_adj.ll
@@ -1,5 +1,5 @@
 ;All this should do is not crash
-;RUN: llvm-as < %s | llc -march=alpha
+;RUN: llc < %s -march=alpha
 
 target datalayout = "e-p:64:64"
 target triple = "alphaev67-unknown-linux-gnu"
diff --git a/test/CodeGen/Alpha/cmov.ll b/test/CodeGen/Alpha/cmov.ll
index 08e1dad2c0e77..9b655f03efdcf 100644
--- a/test/CodeGen/Alpha/cmov.ll
+++ b/test/CodeGen/Alpha/cmov.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=alpha | not grep cmovlt
-; RUN: llvm-as < %s | llc -march=alpha | grep cmoveq
+; RUN: llc < %s -march=alpha | not grep cmovlt
+; RUN: llc < %s -march=alpha | grep cmoveq
 
 define i64 @cmov_lt(i64 %a, i64 %c) {
 entry:
diff --git a/test/CodeGen/Alpha/cmpbge.ll b/test/CodeGen/Alpha/cmpbge.ll
index 9b83215181c96..e88d2eec75e10 100644
--- a/test/CodeGen/Alpha/cmpbge.ll
+++ b/test/CodeGen/Alpha/cmpbge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep cmpbge | count 2
+; RUN: llc < %s -march=alpha | grep cmpbge | count 2
 
 define i1 @test1(i64 %A, i64 %B) {
         %C = and i64 %A, 255            ; <i64> [#uses=1]
diff --git a/test/CodeGen/Alpha/ctlz.ll b/test/CodeGen/Alpha/ctlz.ll
index 83d97b5833c46..aa1588aa39e8f 100644
--- a/test/CodeGen/Alpha/ctlz.ll
+++ b/test/CodeGen/Alpha/ctlz.ll
@@ -1,8 +1,8 @@
 ; Make sure this testcase codegens to the ctlz instruction
-; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev67 | grep -i ctlz
-; RUN: llvm-as < %s | llc -march=alpha -mattr=+CIX | grep -i ctlz
-; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev6 | not grep -i ctlz
-; RUN: llvm-as < %s | llc -march=alpha -mattr=-CIX | not grep -i ctlz
+; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctlz
+; RUN: llc < %s -march=alpha -mattr=+CIX | grep -i ctlz
+; RUN: llc < %s -march=alpha -mcpu=ev6 | not grep -i ctlz
+; RUN: llc < %s -march=alpha -mattr=-CIX | not grep -i ctlz
 
 declare i8 @llvm.ctlz.i8(i8)
 
diff --git a/test/CodeGen/Alpha/ctlz_e.ll b/test/CodeGen/Alpha/ctlz_e.ll
index 56027dd3ea7b9..230e096b08d2b 100644
--- a/test/CodeGen/Alpha/ctlz_e.ll
+++ b/test/CodeGen/Alpha/ctlz_e.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase does not use ctpop
-; RUN: llvm-as < %s | llc -march=alpha | not grep -i ctpop 
+; RUN: llc < %s -march=alpha | not grep -i ctpop 
 
 declare i64 @llvm.ctlz.i64(i64)
 
diff --git a/test/CodeGen/Alpha/ctpop.ll b/test/CodeGen/Alpha/ctpop.ll
index a528d728be066..f887882cec2fb 100644
--- a/test/CodeGen/Alpha/ctpop.ll
+++ b/test/CodeGen/Alpha/ctpop.ll
@@ -1,10 +1,10 @@
 ; Make sure this testcase codegens to the ctpop instruction
-; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev67 | grep -i ctpop
-; RUN: llvm-as < %s | llc -march=alpha -mattr=+CIX | \
+; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctpop
+; RUN: llc < %s -march=alpha -mattr=+CIX | \
 ; RUN:   grep -i ctpop
-; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev6 | \
+; RUN: llc < %s -march=alpha -mcpu=ev6 | \
 ; RUN:   not grep -i ctpop
-; RUN: llvm-as < %s | llc -march=alpha -mattr=-CIX | \
+; RUN: llc < %s -march=alpha -mattr=-CIX | \
 ; RUN:   not grep -i ctpop
 
 declare i64 @llvm.ctpop.i64(i64)
diff --git a/test/CodeGen/Alpha/eqv.ll b/test/CodeGen/Alpha/eqv.ll
index 2539d72474487..b3413d6b5dce5 100644
--- a/test/CodeGen/Alpha/eqv.ll
+++ b/test/CodeGen/Alpha/eqv.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the eqv instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep eqv
+; RUN: llc < %s -march=alpha | grep eqv
 
 define i64 @bar(i64 %x, i64 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/i32_sub_1.ll b/test/CodeGen/Alpha/i32_sub_1.ll
index 7af813454072d..ffeafbd75938d 100644
--- a/test/CodeGen/Alpha/i32_sub_1.ll
+++ b/test/CodeGen/Alpha/i32_sub_1.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the ctpop instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep -i {subl \$16,1,\$0}
+; RUN: llc < %s -march=alpha | grep -i {subl \$16,1,\$0}
 
 
 define i32 @foo(i32 signext %x) signext {
diff --git a/test/CodeGen/Alpha/illegal-element-type.ll b/test/CodeGen/Alpha/illegal-element-type.ll
index c95d57153db2c..4cf80dee57b78 100644
--- a/test/CodeGen/Alpha/illegal-element-type.ll
+++ b/test/CodeGen/Alpha/illegal-element-type.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=alphaev6-unknown-linux-gnu
+; RUN: llc < %s -mtriple=alphaev6-unknown-linux-gnu
 
 define void @foo() {
 entry:
diff --git a/test/CodeGen/Alpha/jmp_table.ll b/test/CodeGen/Alpha/jmp_table.ll
index d7b61163e7b80..917c9327dc16f 100644
--- a/test/CodeGen/Alpha/jmp_table.ll
+++ b/test/CodeGen/Alpha/jmp_table.ll
@@ -1,9 +1,9 @@
 ; try to check that we have the most important instructions, which shouldn't 
 ; appear otherwise
-; RUN: llvm-as < %s | llc -march=alpha | grep jmp
-; RUN: llvm-as < %s | llc -march=alpha | grep gprel32
-; RUN: llvm-as < %s | llc -march=alpha | grep ldl
-; RUN: llvm-as < %s | llc -march=alpha | grep rodata
+; RUN: llc < %s -march=alpha | grep jmp
+; RUN: llc < %s -march=alpha | grep gprel32
+; RUN: llc < %s -march=alpha | grep ldl
+; RUN: llc < %s -march=alpha | grep rodata
 ; END.
 
 target datalayout = "e-p:64:64"
diff --git a/test/CodeGen/Alpha/mb.ll b/test/CodeGen/Alpha/mb.ll
index 50c245ff3d9e0..93e8b1b04465a 100644
--- a/test/CodeGen/Alpha/mb.ll
+++ b/test/CodeGen/Alpha/mb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep mb
+; RUN: llc < %s -march=alpha | grep mb
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
 
diff --git a/test/CodeGen/Alpha/mul128.ll b/test/CodeGen/Alpha/mul128.ll
index b069fea4a5ca9..daf8409409dd5 100644
--- a/test/CodeGen/Alpha/mul128.ll
+++ b/test/CodeGen/Alpha/mul128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 define i128 @__mulvdi3(i128 %a, i128 %b) nounwind {
 entry:
diff --git a/test/CodeGen/Alpha/mul5.ll b/test/CodeGen/Alpha/mul5.ll
index 5af73a1cc774c..4075dd6289ebc 100644
--- a/test/CodeGen/Alpha/mul5.ll
+++ b/test/CodeGen/Alpha/mul5.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase does not use mulq
-; RUN: llvm-as < %s | llc -march=alpha | not grep -i mul
+; RUN: llc < %s -march=alpha | not grep -i mul
 
 define i64 @foo1(i64 %x) {
 entry:
diff --git a/test/CodeGen/Alpha/neg1.ll b/test/CodeGen/Alpha/neg1.ll
index ddaed4a0c6e24..0db767f68e517 100644
--- a/test/CodeGen/Alpha/neg1.ll
+++ b/test/CodeGen/Alpha/neg1.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the lda -1 instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep {\\-1}
+; RUN: llc < %s -march=alpha | grep {\\-1}
 
 define i64 @bar() {
 entry:
diff --git a/test/CodeGen/Alpha/not.ll b/test/CodeGen/Alpha/not.ll
index cea9f6bc95f58..4f0a5c2946ef3 100644
--- a/test/CodeGen/Alpha/not.ll
+++ b/test/CodeGen/Alpha/not.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the ornot instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep eqv
+; RUN: llc < %s -march=alpha | grep eqv
 
 define i64 @bar(i64 %x) {
 entry:
diff --git a/test/CodeGen/Alpha/ornot.ll b/test/CodeGen/Alpha/ornot.ll
index b8d350dc100e1..f930e345ce426 100644
--- a/test/CodeGen/Alpha/ornot.ll
+++ b/test/CodeGen/Alpha/ornot.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the ornot instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep ornot
+; RUN: llc < %s -march=alpha | grep ornot
 
 define i64 @bar(i64 %x, i64 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/private.ll b/test/CodeGen/Alpha/private.ll
index 2d9ed1e413db5..96ab4eb400ea1 100644
--- a/test/CodeGen/Alpha/private.ll
+++ b/test/CodeGen/Alpha/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -march=alpha > %t
+; RUN: llc < %s -march=alpha > %t
 ; RUN: grep \\\$foo: %t
 ; RUN: grep bsr.*\\\$\\\$foo %t
 ; RUN: grep \\\$baz: %t
diff --git a/test/CodeGen/Alpha/rpcc.ll b/test/CodeGen/Alpha/rpcc.ll
index 193a47f7ce3f1..d6665b5d8d6f8 100644
--- a/test/CodeGen/Alpha/rpcc.ll
+++ b/test/CodeGen/Alpha/rpcc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep rpcc
+; RUN: llc < %s -march=alpha | grep rpcc
 
 declare i64 @llvm.readcyclecounter()
 
diff --git a/test/CodeGen/Alpha/srl_and.ll b/test/CodeGen/Alpha/srl_and.ll
index 2344833dc5b36..3042ef3d0237e 100644
--- a/test/CodeGen/Alpha/srl_and.ll
+++ b/test/CodeGen/Alpha/srl_and.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the zapnot instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep zapnot
+; RUN: llc < %s -march=alpha | grep zapnot
 
 define i64 @foo(i64 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/sub128.ll b/test/CodeGen/Alpha/sub128.ll
index cb18559e532c0..d26404bfe024b 100644
--- a/test/CodeGen/Alpha/sub128.ll
+++ b/test/CodeGen/Alpha/sub128.ll
@@ -1,6 +1,6 @@
 ;test for SUBC and SUBE expansion
 ;
-; RUN: llvm-as < %s | llc -march=alpha
+; RUN: llc < %s -march=alpha
 
 define i128 @sub128(i128 %x, i128 %y) {
 entry:
diff --git a/test/CodeGen/Alpha/weak.ll b/test/CodeGen/Alpha/weak.ll
index e00e6d7bfe260..ff04de9ef4679 100644
--- a/test/CodeGen/Alpha/weak.ll
+++ b/test/CodeGen/Alpha/weak.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep .weak.*f
-; RUN: llvm-as < %s | llc -march=alpha | grep .weak.*h
+; RUN: llc < %s -march=alpha | grep .weak.*f
+; RUN: llc < %s -march=alpha | grep .weak.*h
 
 define weak i32 @f() {
 entry:
diff --git a/test/CodeGen/Alpha/wmb.ll b/test/CodeGen/Alpha/wmb.ll
index f745cd52ba3d8..a3e2ccf57256c 100644
--- a/test/CodeGen/Alpha/wmb.ll
+++ b/test/CodeGen/Alpha/wmb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep wmb
+; RUN: llc < %s -march=alpha | grep wmb
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
 
diff --git a/test/CodeGen/Alpha/zapnot.ll b/test/CodeGen/Alpha/zapnot.ll
index 7fec19bdf3f57..d00984acf7f32 100644
--- a/test/CodeGen/Alpha/zapnot.ll
+++ b/test/CodeGen/Alpha/zapnot.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the bic instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep zapnot
+; RUN: llc < %s -march=alpha | grep zapnot
 
 
 define i16 @foo(i64 %y) zeroext {
diff --git a/test/CodeGen/Alpha/zapnot2.ll b/test/CodeGen/Alpha/zapnot2.ll
index 6a33ca2ab21f2..cd3caae41d5aa 100644
--- a/test/CodeGen/Alpha/zapnot2.ll
+++ b/test/CodeGen/Alpha/zapnot2.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase codegens to the zapnot instruction
-; RUN: llvm-as < %s | llc -march=alpha | grep zapnot
+; RUN: llc < %s -march=alpha | grep zapnot
 
 define i64 @bar(i64 %x) {
 entry:
diff --git a/test/CodeGen/Alpha/zapnot3.ll b/test/CodeGen/Alpha/zapnot3.ll
index 26aab37d7bb9f..f02961f1eaec8 100644
--- a/test/CodeGen/Alpha/zapnot3.ll
+++ b/test/CodeGen/Alpha/zapnot3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep zapnot
+; RUN: llc < %s -march=alpha | grep zapnot
 
 ;demanded bits mess up this mask in a hard to fix way
 ;define i64 @foo(i64 %y) {
diff --git a/test/CodeGen/Alpha/zapnot4.ll b/test/CodeGen/Alpha/zapnot4.ll
index 1be3ca2e3c72c..89beeef2d8100 100644
--- a/test/CodeGen/Alpha/zapnot4.ll
+++ b/test/CodeGen/Alpha/zapnot4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=alpha | grep zapnot
+; RUN: llc < %s -march=alpha | grep zapnot
 
 define i64 @foo(i64 %y) {
         %tmp = shl i64 %y, 3            ; <i64> [#uses=1]
diff --git a/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
new file mode 100644
index 0000000000000..3ee5e8df9972f
--- /dev/null
+++ b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs
+
+; Provoke an error in LowerSubregsPass::LowerExtract where the live range of a
+; super-register is illegally extended.
+
+define i16 @f(i16 %x1, i16 %x2, i16 %x3, i16 %x4) {
+  %y1 = add i16 %x1, 1
+  %y2 = add i16 %x2, 2
+  %y3 = add i16 %x3, 3
+  %y4 = add i16 %x4, 4
+  %z12 = add i16 %y1, %y2
+  %z34 = add i16 %y3, %y4
+  %p = add i16 %z12, %z34
+  ret i16 %p
+}
diff --git a/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll b/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll
new file mode 100644
index 0000000000000..e5d1637a50cb5
--- /dev/null
+++ b/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+declare i64 @llvm.cttz.i64(i64) nounwind readnone
+
+declare i16 @llvm.cttz.i16(i16) nounwind readnone
+
+declare i8 @llvm.cttz.i8(i8) nounwind readnone
+
+define void @cttztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
+	%a = call i8 @llvm.cttz.i8(i8 %A)		; <i8> [#uses=1]
+	%b = call i16 @llvm.cttz.i16(i16 %B)		; <i16> [#uses=1]
+	%d = call i64 @llvm.cttz.i64(i64 %D)		; <i64> [#uses=1]
+	store i8 %a, i8* %AP
+	store i16 %b, i16* %BP
+	store i64 %d, i64* %DP
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll b/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll
new file mode 100644
index 0000000000000..0b731dccd19f9
--- /dev/null
+++ b/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; When joining live intervals of sub-registers, an MBB live-in list is not
+; updated properly. The register scavenger asserts on an undefined register.
+
+define i32 @foo(i8 %bar) {
+entry:
+  switch i8 %bar, label %bb1203 [
+    i8 117, label %bb1204
+    i8 85, label %bb1204
+    i8 106, label %bb1204
+  ]
+
+bb1203:                                           ; preds = %entry
+  ret i32 1
+
+bb1204:                                           ; preds = %entry, %entry, %entry
+  ret i32 2
+}
diff --git a/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll b/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll
new file mode 100644
index 0000000000000..dcc3ea0dec883
--- /dev/null
+++ b/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; LocalRewriter can forget to transfer a <def,dead> flag when setting up call
+; argument registers. This then causes register scavenger asserts.
+
+declare i32 @printf(i8*, i32, float)
+
+define i32 @testissue(i32 %i, float %x, float %y) {
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %0
+  %x2 = fmul float %x, 5.000000e-01               ; <float> [#uses=1]
+  %y2 = fmul float %y, 0x3FECCCCCC0000000         ; <float> [#uses=1]
+  %z2 = fadd float %x2, %y2                       ; <float> [#uses=1]
+  %z3 = fadd float undef, %z2                     ; <float> [#uses=1]
+  %i1 = shl i32 %i, 3                             ; <i32> [#uses=1]
+  %j1 = add i32 %i, 7                             ; <i32> [#uses=1]
+  %m1 = add i32 %i1, %j1                          ; <i32> [#uses=2]
+  %b = icmp sle i32 %m1, 6                        ; <i1> [#uses=1]
+  br i1 %b, label %bb1, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %1 = call i32 @printf(i8* undef, i32 %m1, float %z3); <i32> [#uses=0]
+  ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
new file mode 100644
index 0000000000000..f21da52315faf
--- /dev/null
+++ b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+; XFAIL: *
+
+; An undef argument causes a setugt node to escape instruction selection.
+
+define void @bugt() {
+cond_next305:
+  %tmp306307 = trunc i32 undef to i8              ; <i8> [#uses=1]
+  %tmp308 = icmp ugt i8 %tmp306307, 6             ; <i1> [#uses=1]
+  br i1 %tmp308, label %bb311, label %bb314
+
+bb311:                                            ; preds = %cond_next305
+  unreachable
+
+bb314:                                            ; preds = %cond_next305
+  ret void
+}
diff --git a/test/CodeGen/Blackfin/add-overflow.ll b/test/CodeGen/Blackfin/add-overflow.ll
new file mode 100644
index 0000000000000..e982e437d6871
--- /dev/null
+++ b/test/CodeGen/Blackfin/add-overflow.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+	type { i24, i1 }		; type %0
+
+define i1 @func2(i24 zeroext %v1, i24 zeroext %v2) nounwind {
+entry:
+	%t = call %0 @llvm.uadd.with.overflow.i24(i24 %v1, i24 %v2)		; <%0> [#uses=1]
+	%obit = extractvalue %0 %t, 1		; <i1> [#uses=1]
+	br i1 %obit, label %carry, label %normal
+
+normal:		; preds = %entry
+	ret i1 true
+
+carry:		; preds = %entry
+	ret i1 false
+}
+
+declare %0 @llvm.uadd.with.overflow.i24(i24, i24) nounwind
diff --git a/test/CodeGen/Blackfin/add.ll b/test/CodeGen/Blackfin/add.ll
new file mode 100644
index 0000000000000..3311c03199ee9
--- /dev/null
+++ b/test/CodeGen/Blackfin/add.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+define i32 @add(i32 %A, i32 %B) {
+	%R = add i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
diff --git a/test/CodeGen/Blackfin/addsub-i128.ll b/test/CodeGen/Blackfin/addsub-i128.ll
new file mode 100644
index 0000000000000..dd5610120b4d9
--- /dev/null
+++ b/test/CodeGen/Blackfin/addsub-i128.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; These functions have just the right size to annoy the register scavenger: They
+; use all the scratch registers, but not all the callee-saved registers.
+
+define void @test_add(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
+	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
+	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
+	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
+	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
+	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
+	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
+	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
+	%tmp15 = add i128 %tmp12, %tmp5		; <i128> [#uses=2]
+	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
+	store i64 %tmp1617, i64* %RL
+	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
+	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
+	store i64 %tmp2122, i64* %RH
+	ret void
+}
+
+define void @test_sub(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
+	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
+	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
+	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
+	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
+	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
+	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
+	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
+	%tmp15 = sub i128 %tmp5, %tmp12		; <i128> [#uses=2]
+	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
+	store i64 %tmp1617, i64* %RL
+	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
+	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
+	store i64 %tmp2122, i64* %RH
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/basic-i1.ll b/test/CodeGen/Blackfin/basic-i1.ll
new file mode 100644
index 0000000000000..c63adaba06cfa
--- /dev/null
+++ b/test/CodeGen/Blackfin/basic-i1.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin > %t
+
+define i1 @add(i1 %A, i1 %B) {
+	%R = add i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @sub(i1 %A, i1 %B) {
+	%R = sub i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @mul(i1 %A, i1 %B) {
+	%R = mul i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @sdiv(i1 %A, i1 %B) {
+	%R = sdiv i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @udiv(i1 %A, i1 %B) {
+	%R = udiv i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @srem(i1 %A, i1 %B) {
+	%R = srem i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @urem(i1 %A, i1 %B) {
+	%R = urem i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @and(i1 %A, i1 %B) {
+	%R = and i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @or(i1 %A, i1 %B) {
+	%R = or i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @xor(i1 %A, i1 %B) {
+	%R = xor i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
diff --git a/test/CodeGen/Blackfin/basic-i16.ll b/test/CodeGen/Blackfin/basic-i16.ll
new file mode 100644
index 0000000000000..541e9a8dc948c
--- /dev/null
+++ b/test/CodeGen/Blackfin/basic-i16.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=bfin
+
+define i16 @add(i16 %A, i16 %B) {
+	%R = add i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @sub(i16 %A, i16 %B) {
+	%R = sub i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @mul(i16 %A, i16 %B) {
+	%R = mul i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @sdiv(i16 %A, i16 %B) {
+	%R = sdiv i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @udiv(i16 %A, i16 %B) {
+	%R = udiv i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @srem(i16 %A, i16 %B) {
+	%R = srem i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @urem(i16 %A, i16 %B) {
+	%R = urem i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
diff --git a/test/CodeGen/Blackfin/basic-i32.ll b/test/CodeGen/Blackfin/basic-i32.ll
new file mode 100644
index 0000000000000..4b5dbfcb957ec
--- /dev/null
+++ b/test/CodeGen/Blackfin/basic-i32.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define i32 @add(i32 %A, i32 %B) {
+	%R = add i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @sub(i32 %A, i32 %B) {
+	%R = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @mul(i32 %A, i32 %B) {
+	%R = mul i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @sdiv(i32 %A, i32 %B) {
+	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @udiv(i32 %A, i32 %B) {
+	%R = udiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @srem(i32 %A, i32 %B) {
+	%R = srem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @urem(i32 %A, i32 %B) {
+	%R = urem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @and(i32 %A, i32 %B) {
+	%R = and i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @or(i32 %A, i32 %B) {
+	%R = or i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @xor(i32 %A, i32 %B) {
+	%R = xor i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
diff --git a/test/CodeGen/Blackfin/basic-i64.ll b/test/CodeGen/Blackfin/basic-i64.ll
new file mode 100644
index 0000000000000..d4dd8e2703bf8
--- /dev/null
+++ b/test/CodeGen/Blackfin/basic-i64.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define i64 @add(i64 %A, i64 %B) {
+	%R = add i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @sub(i64 %A, i64 %B) {
+	%R = sub i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @mul(i64 %A, i64 %B) {
+	%R = mul i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @sdiv(i64 %A, i64 %B) {
+	%R = sdiv i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @udiv(i64 %A, i64 %B) {
+	%R = udiv i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @srem(i64 %A, i64 %B) {
+	%R = srem i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @urem(i64 %A, i64 %B) {
+	%R = urem i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @and(i64 %A, i64 %B) {
+	%R = and i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @or(i64 %A, i64 %B) {
+	%R = or i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @xor(i64 %A, i64 %B) {
+	%R = xor i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
diff --git a/test/CodeGen/Blackfin/basic-i8.ll b/test/CodeGen/Blackfin/basic-i8.ll
new file mode 100644
index 0000000000000..2c7ce9d1015ae
--- /dev/null
+++ b/test/CodeGen/Blackfin/basic-i8.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin
+
+define i8 @add(i8 %A, i8 %B) {
+	%R = add i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @sub(i8 %A, i8 %B) {
+	%R = sub i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @mul(i8 %A, i8 %B) {
+	%R = mul i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @sdiv(i8 %A, i8 %B) {
+	%R = sdiv i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @udiv(i8 %A, i8 %B) {
+	%R = udiv i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @srem(i8 %A, i8 %B) {
+	%R = srem i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @urem(i8 %A, i8 %B) {
+	%R = urem i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @and(i8 %A, i8 %B) {
+	%R = and i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @or(i8 %A, i8 %B) {
+	%R = or i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @xor(i8 %A, i8 %B) {
+	%R = xor i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
diff --git a/test/CodeGen/Blackfin/basictest.ll b/test/CodeGen/Blackfin/basictest.ll
new file mode 100644
index 0000000000000..85040df0fde5d
--- /dev/null
+++ b/test/CodeGen/Blackfin/basictest.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define void @void(i32, i32) {
+        add i32 0, 0            ; <i32>:3 [#uses=2]
+        sub i32 0, 4            ; <i32>:4 [#uses=2]
+        br label %5
+
+; <label>:5             ; preds = %5, %2
+        add i32 %0, %1          ; <i32>:6 [#uses=2]
+        sub i32 %6, %4          ; <i32>:7 [#uses=1]
+        icmp sle i32 %7, %3             ; <i1>:8 [#uses=1]
+        br i1 %8, label %9, label %5
+
+; <label>:9             ; preds = %5
+        add i32 %0, %1          ; <i32>:10 [#uses=0]
+        sub i32 %6, %4          ; <i32>:11 [#uses=1]
+        icmp sle i32 %11, %3            ; <i1>:12 [#uses=0]
+        ret void
+}
diff --git a/test/CodeGen/Blackfin/burg.ll b/test/CodeGen/Blackfin/burg.ll
new file mode 100644
index 0000000000000..8cc3713b7e73a
--- /dev/null
+++ b/test/CodeGen/Blackfin/burg.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+	%IntList = type %struct.intlist*
+	%ReadFn = type i32 ()*
+	%YYSTYPE = type { %IntList }
+	%struct.intlist = type { i32, %IntList }
+@yyval = external global %YYSTYPE		; <%YYSTYPE*> [#uses=1]
+
+define i32 @yyparse() {
+bb0:
+	%reg254 = load i16* null		; <i16> [#uses=1]
+	%reg254-idxcast = sext i16 %reg254 to i64		; <i64> [#uses=1]
+	%reg254-idxcast-scale = mul i64 %reg254-idxcast, -1		; <i64> [#uses=1]
+	%reg254-idxcast-scale-offset = add i64 %reg254-idxcast-scale, 1		; <i64> [#uses=1]
+	%reg261.idx1 = getelementptr %YYSTYPE* null, i64 %reg254-idxcast-scale-offset, i32 0		; <%IntList*> [#uses=1]
+	%reg261 = load %IntList* %reg261.idx1		; <%IntList> [#uses=1]
+	store %IntList %reg261, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	unreachable
+}
diff --git a/test/CodeGen/Blackfin/cmp-small-imm.ll b/test/CodeGen/Blackfin/cmp-small-imm.ll
new file mode 100644
index 0000000000000..e1732a8f806b8
--- /dev/null
+++ b/test/CodeGen/Blackfin/cmp-small-imm.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=bfin > %t
+
+define i1 @cmp3(i32 %A) {
+	%R = icmp uge i32 %A, 2
+	ret i1 %R
+}
diff --git a/test/CodeGen/Blackfin/cmp64.ll b/test/CodeGen/Blackfin/cmp64.ll
new file mode 100644
index 0000000000000..ef5bf45861ddc
--- /dev/null
+++ b/test/CodeGen/Blackfin/cmp64.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin
+
+; This test tries to use a JustCC register as a data operand for MOVEcc.  It
+; calls copyRegToReg(JustCC -> DP), failing because JustCC can only be copied to
+; D.  The proper solution would be to restrict the virtual register to D only.
+
+define i32 @main() {
+entry:
+	br label %loopentry
+
+loopentry:
+	%done = icmp sle i64 undef, 5
+	br i1 %done, label %loopentry, label %exit.1
+
+exit.1:
+	ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/ct32.ll b/test/CodeGen/Blackfin/ct32.ll
new file mode 100644
index 0000000000000..e9b66ebe5772b
--- /dev/null
+++ b/test/CodeGen/Blackfin/ct32.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=bfin
+
+declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.ctpop.i32(i32)
+
+define i32 @ctlztest(i32 %B) {
+	%b = call i32 @llvm.ctlz.i32( i32 %B )
+	ret i32 %b;
+}
+
+define i32 @cttztest(i32 %B) {
+	%b = call i32 @llvm.cttz.i32( i32 %B )
+	ret i32 %b;
+}
+
+define i32 @ctpoptest(i32 %B) {
+	%b = call i32 @llvm.ctpop.i32( i32 %B )
+	ret i32 %b;
+}
diff --git a/test/CodeGen/Blackfin/ct64.ll b/test/CodeGen/Blackfin/ct64.ll
new file mode 100644
index 0000000000000..ac4bdcffbe957
--- /dev/null
+++ b/test/CodeGen/Blackfin/ct64.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=bfin
+
+declare i64 @llvm.ctlz.i64(i64)
+declare i64 @llvm.cttz.i64(i64)
+declare i64 @llvm.ctpop.i64(i64)
+
+define i64 @ctlztest(i64 %B) {
+	%b = call i64 @llvm.ctlz.i64( i64 %B )
+	ret i64 %b;
+}
+
+define i64 @cttztest(i64 %B) {
+	%b = call i64 @llvm.cttz.i64( i64 %B )
+	ret i64 %b;
+}
+
+define i64 @ctpoptest(i64 %B) {
+	%b = call i64 @llvm.ctpop.i64( i64 %B )
+	ret i64 %b;
+}
diff --git a/test/CodeGen/Blackfin/ctlz16.ll b/test/CodeGen/Blackfin/ctlz16.ll
new file mode 100644
index 0000000000000..56a65c05853e0
--- /dev/null
+++ b/test/CodeGen/Blackfin/ctlz16.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin
+
+declare i16 @llvm.ctlz.i16(i16)
+
+define i16 @ctlztest(i16 %B) {
+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+define i16 @ctlztest_z(i16 zeroext %B) {
+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
+define i16 @ctlztest_s(i16 signext %B) {
+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
diff --git a/test/CodeGen/Blackfin/ctlz64.ll b/test/CodeGen/Blackfin/ctlz64.ll
new file mode 100644
index 0000000000000..3e22f88435532
--- /dev/null
+++ b/test/CodeGen/Blackfin/ctlz64.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+@.str = external constant [14 x i8]		; <[14 x i8]*> [#uses=1]
+
+define i32 @main(i64 %arg) nounwind {
+entry:
+	%tmp47 = tail call i64 @llvm.cttz.i64(i64 %arg)		; <i64> [#uses=1]
+	%tmp48 = trunc i64 %tmp47 to i32		; <i32> [#uses=1]
+	%tmp40 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr ([14 x i8]* @.str, i32 0, i32 0), i64 %arg, i32 0, i32 %tmp48, i32 0) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @printf(i8* noalias, ...) nounwind
+
+declare i64 @llvm.cttz.i64(i64) nounwind readnone
diff --git a/test/CodeGen/Blackfin/ctpop16.ll b/test/CodeGen/Blackfin/ctpop16.ll
new file mode 100644
index 0000000000000..cbbb3d9831a80
--- /dev/null
+++ b/test/CodeGen/Blackfin/ctpop16.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin
+
+declare i16 @llvm.ctpop.i16(i16)
+
+define i16 @ctpoptest(i16 %B) {
+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+define i16 @ctpoptest_z(i16 zeroext %B) {
+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
+define i16 @ctpoptest_s(i16 signext %B) {
+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
diff --git a/test/CodeGen/Blackfin/cttz16.ll b/test/CodeGen/Blackfin/cttz16.ll
new file mode 100644
index 0000000000000..05fe9bfd44698
--- /dev/null
+++ b/test/CodeGen/Blackfin/cttz16.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin
+
+declare i16 @llvm.cttz.i16(i16)
+
+define i16 @cttztest(i16 %B) {
+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+define i16 @cttztest_z(i16 zeroext %B) {
+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
+define i16 @cttztest_s(i16 signext %B) {
+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b;
+}
+
diff --git a/test/CodeGen/Blackfin/cycles.ll b/test/CodeGen/Blackfin/cycles.ll
new file mode 100644
index 0000000000000..6451c747bd709
--- /dev/null
+++ b/test/CodeGen/Blackfin/cycles.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin | FileCheck %s
+
+declare i64 @llvm.readcyclecounter()
+
+; CHECK: cycles
+; CHECK: cycles2
+define i64 @cyc64() {
+	%tmp.1 = call i64 @llvm.readcyclecounter()
+	ret i64 %tmp.1
+}
+
+; CHECK: cycles
+define i32@cyc32() {
+	%tmp.1 = call i64 @llvm.readcyclecounter()
+        %s = trunc i64 %tmp.1 to i32
+	ret i32 %s
+}
diff --git a/test/CodeGen/Blackfin/dg.exp b/test/CodeGen/Blackfin/dg.exp
new file mode 100644
index 0000000000000..5fdbe5feb0872
--- /dev/null
+++ b/test/CodeGen/Blackfin/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target Blackfin] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/Blackfin/double-cast.ll b/test/CodeGen/Blackfin/double-cast.ll
new file mode 100644
index 0000000000000..815ca797d752c
--- /dev/null
+++ b/test/CodeGen/Blackfin/double-cast.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=bfin
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%1 = call i32 (i8*, ...)* @printf(i8* undef, double undef)
+	ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/frameindex.ll b/test/CodeGen/Blackfin/frameindex.ll
new file mode 100644
index 0000000000000..7e677fbf18cf1
--- /dev/null
+++ b/test/CodeGen/Blackfin/frameindex.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+declare i32 @SIM(i8*, i8*, i32, i32, i32, [256 x i32]*, i32, i32, i32)
+
+define void @foo() {
+bb0:
+	%V = alloca [256 x i32], i32 256		; <[256 x i32]*> [#uses=1]
+	%0 = call i32 @SIM(i8* null, i8* null, i32 0, i32 0, i32 0, [256 x i32]* %V, i32 0, i32 0, i32 2)		; <i32> [#uses=0]
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i17mem.ll b/test/CodeGen/Blackfin/i17mem.ll
new file mode 100644
index 0000000000000..bc5ade7416fa1
--- /dev/null
+++ b/test/CodeGen/Blackfin/i17mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i17_l = external global i17		; <i17*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+
+define void @i17_ls() nounwind  {
+	%tmp = load i17* @i17_l		; <i17> [#uses=1]
+	store i17 %tmp, i17* @i17_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i1mem.ll b/test/CodeGen/Blackfin/i1mem.ll
new file mode 100644
index 0000000000000..cb03e3d7fcb0a
--- /dev/null
+++ b/test/CodeGen/Blackfin/i1mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i1_l = external global i1		; <i1*> [#uses=1]
+@i1_s = external global i1		; <i1*> [#uses=1]
+
+define void @i1_ls() nounwind  {
+	%tmp = load i1* @i1_l		; <i1> [#uses=1]
+	store i1 %tmp, i1* @i1_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i1ops.ll b/test/CodeGen/Blackfin/i1ops.ll
new file mode 100644
index 0000000000000..6b5612cc4997a
--- /dev/null
+++ b/test/CodeGen/Blackfin/i1ops.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define i32 @adj(i32 %d.1, i32 %ct.1) {
+entry:
+	%tmp.22.not = trunc i32 %ct.1 to i1		; <i1> [#uses=1]
+	%tmp.221 = xor i1 %tmp.22.not, true		; <i1> [#uses=1]
+	%tmp.26 = or i1 false, %tmp.221		; <i1> [#uses=1]
+	%tmp.27 = zext i1 %tmp.26 to i32		; <i32> [#uses=1]
+	ret i32 %tmp.27
+}
diff --git a/test/CodeGen/Blackfin/i216mem.ll b/test/CodeGen/Blackfin/i216mem.ll
new file mode 100644
index 0000000000000..9f8cf48e87564
--- /dev/null
+++ b/test/CodeGen/Blackfin/i216mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i216_l = external global i216		; <i216*> [#uses=1]
+@i216_s = external global i216		; <i216*> [#uses=1]
+
+define void @i216_ls() nounwind  {
+	%tmp = load i216* @i216_l		; <i216> [#uses=1]
+	store i216 %tmp, i216* @i216_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i248mem.ll b/test/CodeGen/Blackfin/i248mem.ll
new file mode 100644
index 0000000000000..db23f541adcbd
--- /dev/null
+++ b/test/CodeGen/Blackfin/i248mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin
+@i248_l = external global i248		; <i248*> [#uses=1]
+@i248_s = external global i248		; <i248*> [#uses=1]
+
+define void @i248_ls() nounwind  {
+	%tmp = load i248* @i248_l		; <i248> [#uses=1]
+	store i248 %tmp, i248* @i248_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i256mem.ll b/test/CodeGen/Blackfin/i256mem.ll
new file mode 100644
index 0000000000000..bc5ade7416fa1
--- /dev/null
+++ b/test/CodeGen/Blackfin/i256mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i17_l = external global i17		; <i17*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+
+define void @i17_ls() nounwind  {
+	%tmp = load i17* @i17_l		; <i17> [#uses=1]
+	store i17 %tmp, i17* @i17_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i256param.ll b/test/CodeGen/Blackfin/i256param.ll
new file mode 100644
index 0000000000000..df74c9a6e0e8f
--- /dev/null
+++ b/test/CodeGen/Blackfin/i256param.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i256_s = external global i256		; <i256*> [#uses=1]
+
+define void @i256_ls(i256 %x) nounwind  {
+	store i256 %x, i256* @i256_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i56param.ll b/test/CodeGen/Blackfin/i56param.ll
new file mode 100644
index 0000000000000..ca0256391b1fc
--- /dev/null
+++ b/test/CodeGen/Blackfin/i56param.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i56_l = external global i56		; <i56*> [#uses=1]
+@i56_s = external global i56		; <i56*> [#uses=1]
+
+define void @i56_ls(i56 %x) nounwind  {
+	store i56 %x, i56* @i56_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/i8mem.ll b/test/CodeGen/Blackfin/i8mem.ll
new file mode 100644
index 0000000000000..ea3a67e4994cf
--- /dev/null
+++ b/test/CodeGen/Blackfin/i8mem.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin
+
+@i8_l = external global i8		; <i8*> [#uses=1]
+@i8_s = external global i8		; <i8*> [#uses=1]
+
+define void @i8_ls() nounwind  {
+	%tmp = load i8* @i8_l		; <i8> [#uses=1]
+	store i8 %tmp, i8* @i8_s
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/inline-asm.ll b/test/CodeGen/Blackfin/inline-asm.ll
new file mode 100644
index 0000000000000..d623f6bd95aa3
--- /dev/null
+++ b/test/CodeGen/Blackfin/inline-asm.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=bfin | FileCheck %s
+
+; Standard "r"
+; CHECK: r0 = r0 + r1;
+define i32 @add_r(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = $1 + $2;", "=r,r,r"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "d"
+; CHECK: r0 = r0 - r1;
+define i32 @add_d(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = $1 - $2;", "=d,d,d"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "a" for P-regs
+; CHECK: p0 = (p0 + p1) << 1;
+define i32 @add_a(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = ($1 + $2) << 1;", "=a,a,a"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "z" for P0, P1, P2. This is not a real regclass
+; CHECK: p0 = (p0 + p1) << 2;
+define i32 @add_Z(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = ($1 + $2) << 2;", "=z,z,z"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "C" for CC. This is a single register
+; CHECK: cc = p0 < p1;
+; CHECK: r0 = cc;
+define i32 @add_C(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = $1 < $2;", "=C,z,z"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
diff --git a/test/CodeGen/Blackfin/int-setcc.ll b/test/CodeGen/Blackfin/int-setcc.ll
new file mode 100644
index 0000000000000..6bd9f86a999c5
--- /dev/null
+++ b/test/CodeGen/Blackfin/int-setcc.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+define fastcc void @Evaluate() {
+entry:
+	br i1 false, label %cond_false186, label %cond_true
+
+cond_true:		; preds = %entry
+	ret void
+
+cond_false186:		; preds = %entry
+	br i1 false, label %cond_true293, label %bb203
+
+bb203:		; preds = %cond_false186
+	ret void
+
+cond_true293:		; preds = %cond_false186
+	br i1 false, label %cond_true298, label %cond_next317
+
+cond_true298:		; preds = %cond_true293
+	br i1 false, label %cond_next518, label %cond_true397.preheader
+
+cond_next317:		; preds = %cond_true293
+	ret void
+
+cond_true397.preheader:		; preds = %cond_true298
+	ret void
+
+cond_next518:		; preds = %cond_true298
+	br i1 false, label %bb1069, label %cond_true522
+
+cond_true522:		; preds = %cond_next518
+	ret void
+
+bb1069:		; preds = %cond_next518
+	br i1 false, label %cond_next1131, label %bb1096
+
+bb1096:		; preds = %bb1069
+	ret void
+
+cond_next1131:		; preds = %bb1069
+	br i1 false, label %cond_next1207, label %cond_true1150
+
+cond_true1150:		; preds = %cond_next1131
+	ret void
+
+cond_next1207:		; preds = %cond_next1131
+	br i1 false, label %cond_next1219, label %cond_true1211
+
+cond_true1211:		; preds = %cond_next1207
+	ret void
+
+cond_next1219:		; preds = %cond_next1207
+	br i1 false, label %cond_true1223, label %cond_next1283
+
+cond_true1223:		; preds = %cond_next1219
+	br i1 false, label %cond_true1254, label %cond_true1264
+
+cond_true1254:		; preds = %cond_true1223
+	br i1 false, label %bb1567, label %cond_true1369.preheader
+
+cond_true1264:		; preds = %cond_true1223
+	ret void
+
+cond_next1283:		; preds = %cond_next1219
+	ret void
+
+cond_true1369.preheader:		; preds = %cond_true1254
+	ret void
+
+bb1567:		; preds = %cond_true1254
+	%tmp1605 = load i8* null		; <i8> [#uses=1]
+	%tmp1606 = icmp eq i8 %tmp1605, 0		; <i1> [#uses=1]
+	br i1 %tmp1606, label %cond_next1637, label %cond_true1607
+
+cond_true1607:		; preds = %bb1567
+	ret void
+
+cond_next1637:		; preds = %bb1567
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/invalid-apint.ll b/test/CodeGen/Blackfin/invalid-apint.ll
new file mode 100644
index 0000000000000..a8c01ba65f88a
--- /dev/null
+++ b/test/CodeGen/Blackfin/invalid-apint.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=bfin
+
+; Assertion failed: (width < BitWidth && "Invalid APInt Truncate request"),
+; function trunc, file APInt.cpp, line 956.
+
+@str2 = external global [29 x i8]
+
+define void @printArgsNoRet(i32 %a1, float %a2, i8 %a3, double %a4, i8* %a5, i32 %a6, float %a7, i8 %a8, double %a9, i8* %a10, i32 %a11, float %a12, i8 %a13, double %a14, i8* %a15) {
+entry:
+	%tmp17 = sext i8 %a13 to i32
+	%tmp23 = call i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8]* @str2, i32 0, i64 0), i32 %a11, double 0.000000e+00, i32 %tmp17, double %a14, i32 0)
+	ret void
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Blackfin/jumptable.ll b/test/CodeGen/Blackfin/jumptable.ll
new file mode 100644
index 0000000000000..5f49e9d193e4a
--- /dev/null
+++ b/test/CodeGen/Blackfin/jumptable.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
+
+; CHECK: .section .rodata
+; CHECK: JTI1_0:
+; CHECK: .long .BB1_1
+
+define i32 @oper(i32 %op, i32 %A, i32 %B) {
+entry:
+        switch i32 %op, label %bbx [
+               i32 1 , label %bb1
+               i32 2 , label %bb2
+               i32 3 , label %bb3
+               i32 4 , label %bb4
+               i32 5 , label %bb5
+               i32 6 , label %bb6
+               i32 7 , label %bb7
+               i32 8 , label %bb8
+               i32 9 , label %bb9
+               i32 10, label %bb10
+        ]
+bb1:
+	%R1 = add i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R1
+bb2:
+	%R2 = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R2
+bb3:
+	%R3 = mul i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R3
+bb4:
+	%R4 = sdiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R4
+bb5:
+	%R5 = udiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R5
+bb6:
+	%R6 = srem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R6
+bb7:
+	%R7 = urem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R7
+bb8:
+	%R8 = and i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R8
+bb9:
+	%R9 = or i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R9
+bb10:
+	%R10 = xor i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R10
+bbx:
+        ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/large-switch.ll b/test/CodeGen/Blackfin/large-switch.ll
new file mode 100644
index 0000000000000..02d32ef85f12d
--- /dev/null
+++ b/test/CodeGen/Blackfin/large-switch.ll
@@ -0,0 +1,187 @@
+; RUN: llc < %s -march=bfin
+
+; The switch expansion uses a dynamic shl, and it produces a jumptable
+
+define void @athlon_fp_unit_ready_cost() {
+entry:
+	switch i32 0, label %UnifiedReturnBlock [
+		i32 -1, label %bb2063
+		i32 19, label %bb2035
+		i32 20, label %bb2035
+		i32 21, label %bb2035
+		i32 23, label %bb2035
+		i32 24, label %bb2035
+		i32 27, label %bb2035
+		i32 32, label %bb2035
+		i32 33, label %bb1994
+		i32 35, label %bb2035
+		i32 36, label %bb1994
+		i32 90, label %bb1948
+		i32 94, label %bb1948
+		i32 95, label %bb1948
+		i32 133, label %bb1419
+		i32 135, label %bb1238
+		i32 136, label %bb1238
+		i32 137, label %bb1238
+		i32 138, label %bb1238
+		i32 139, label %bb1201
+		i32 140, label %bb1201
+		i32 141, label %bb1154
+		i32 142, label %bb1126
+		i32 144, label %bb1201
+		i32 145, label %bb1126
+		i32 146, label %bb1201
+		i32 147, label %bb1126
+		i32 148, label %bb1201
+		i32 149, label %bb1126
+		i32 150, label %bb1201
+		i32 151, label %bb1126
+		i32 152, label %bb1096
+		i32 153, label %bb1096
+		i32 154, label %bb1096
+		i32 157, label %bb1096
+		i32 158, label %bb1096
+		i32 159, label %bb1096
+		i32 162, label %bb1096
+		i32 163, label %bb1096
+		i32 164, label %bb1096
+		i32 167, label %bb1201
+		i32 168, label %bb1201
+		i32 170, label %bb1201
+		i32 171, label %bb1201
+		i32 173, label %bb1201
+		i32 174, label %bb1201
+		i32 176, label %bb1201
+		i32 177, label %bb1201
+		i32 179, label %bb993
+		i32 180, label %bb993
+		i32 181, label %bb993
+		i32 182, label %bb993
+		i32 183, label %bb993
+		i32 184, label %bb993
+		i32 365, label %bb1126
+		i32 366, label %bb1126
+		i32 367, label %bb1126
+		i32 368, label %bb1126
+		i32 369, label %bb1126
+		i32 370, label %bb1126
+		i32 371, label %bb1126
+		i32 372, label %bb1126
+		i32 373, label %bb1126
+		i32 384, label %bb1126
+		i32 385, label %bb1126
+		i32 386, label %bb1126
+		i32 387, label %bb1126
+		i32 388, label %bb1126
+		i32 389, label %bb1126
+		i32 390, label %bb1126
+		i32 391, label %bb1126
+		i32 392, label %bb1126
+		i32 525, label %bb919
+		i32 526, label %bb839
+		i32 528, label %bb919
+		i32 529, label %bb839
+		i32 532, label %cond_next6.i97
+		i32 533, label %cond_next6.i81
+		i32 534, label %bb495
+		i32 536, label %cond_next6.i81
+		i32 537, label %cond_next6.i81
+		i32 538, label %bb396
+		i32 539, label %bb288
+		i32 541, label %bb396
+		i32 542, label %bb396
+		i32 543, label %bb396
+		i32 544, label %bb396
+		i32 545, label %bb189
+		i32 546, label %cond_next6.i
+		i32 547, label %bb189
+		i32 548, label %cond_next6.i
+		i32 549, label %bb189
+		i32 550, label %cond_next6.i
+		i32 551, label %bb189
+		i32 552, label %cond_next6.i
+		i32 553, label %bb189
+		i32 554, label %cond_next6.i
+		i32 555, label %bb189
+		i32 556, label %cond_next6.i
+		i32 557, label %bb189
+		i32 558, label %cond_next6.i
+		i32 618, label %bb40
+		i32 619, label %bb18
+		i32 620, label %bb40
+		i32 621, label %bb10
+		i32 622, label %bb10
+	]
+
+bb10:
+	ret void
+
+bb18:
+	ret void
+
+bb40:
+	ret void
+
+cond_next6.i:
+	ret void
+
+bb189:
+	ret void
+
+bb288:
+	ret void
+
+bb396:
+	ret void
+
+bb495:
+	ret void
+
+cond_next6.i81:
+	ret void
+
+cond_next6.i97:
+	ret void
+
+bb839:
+	ret void
+
+bb919:
+	ret void
+
+bb993:
+	ret void
+
+bb1096:
+	ret void
+
+bb1126:
+	ret void
+
+bb1154:
+	ret void
+
+bb1201:
+	ret void
+
+bb1238:
+	ret void
+
+bb1419:
+	ret void
+
+bb1948:
+	ret void
+
+bb1994:
+	ret void
+
+bb2035:
+	ret void
+
+bb2063:
+	ret void
+
+UnifiedReturnBlock:
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/load-i16.ll b/test/CodeGen/Blackfin/load-i16.ll
new file mode 100644
index 0000000000000..eb18d410d0881
--- /dev/null
+++ b/test/CodeGen/Blackfin/load-i16.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; This somewhat contrived function heavily exercises register classes
+; It can trick -join-cross-class-copies into making illegal joins
+
+define void @f(i16** nocapture %p) nounwind readonly {
+entry:
+	%tmp1 = load i16** %p		; <i16*> [#uses=1]
+	%tmp2 = load i16* %tmp1		; <i16> [#uses=1]
+	%ptr = getelementptr i16* %tmp1, i16 %tmp2
+    store i16 %tmp2, i16* %ptr
+    ret void
+}
diff --git a/test/CodeGen/Blackfin/logic-i16.ll b/test/CodeGen/Blackfin/logic-i16.ll
new file mode 100644
index 0000000000000..e44672ff4200f
--- /dev/null
+++ b/test/CodeGen/Blackfin/logic-i16.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=bfin
+
+define i16 @and(i16 %A, i16 %B) {
+	%R = and i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @or(i16 %A, i16 %B) {
+	%R = or i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @xor(i16 %A, i16 %B) {
+	%R = xor i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
diff --git a/test/CodeGen/Blackfin/many-args.ll b/test/CodeGen/Blackfin/many-args.ll
new file mode 100644
index 0000000000000..8c52874e773b5
--- /dev/null
+++ b/test/CodeGen/Blackfin/many-args.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+	type { i32, float, float, float, float, float, float, float, float, float, float }		; type %0
+	%struct..s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }
+
+define i32 @main(i32 %argc.1, i8** %argv.1) {
+entry:
+	%tmp.218 = load float* null		; <float> [#uses=1]
+	%tmp.219 = getelementptr %0* null, i64 0, i32 6		; <float*> [#uses=1]
+	%tmp.220 = load float* %tmp.219		; <float> [#uses=1]
+	%tmp.221 = getelementptr %0* null, i64 0, i32 7		; <float*> [#uses=1]
+	%tmp.222 = load float* %tmp.221		; <float> [#uses=1]
+	%tmp.223 = getelementptr %0* null, i64 0, i32 8		; <float*> [#uses=1]
+	%tmp.224 = load float* %tmp.223		; <float> [#uses=1]
+	%tmp.225 = getelementptr %0* null, i64 0, i32 9		; <float*> [#uses=1]
+	%tmp.226 = load float* %tmp.225		; <float> [#uses=1]
+	%tmp.227 = getelementptr %0* null, i64 0, i32 10		; <float*> [#uses=1]
+	%tmp.228 = load float* %tmp.227		; <float> [#uses=1]
+	call void @place_and_route(i32 0, i32 0, float 0.000000e+00, i32 0, i32 0, i8* null, i32 0, i32 0, i8* null, i8* null, i8* null, i8* null, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 0, i32 0, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 0, i32 0, i16 0, i16 0, i16 0, float 0.000000e+00, float 0.000000e+00, %struct..s_segment_inf* null, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %tmp.218, float %tmp.220, float %tmp.222, float %tmp.224, float %tmp.226, float %tmp.228)
+	ret i32 0
+}
+
+declare void @place_and_route(i32, i32, float, i32, i32, i8*, i32, i32, i8*, i8*, i8*, i8*, i32, i32, i32, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, float, float, float, i32, i32, i16, i16, i16, float, float, %struct..s_segment_inf*, i32, float, float, float, float, float, float, float, float, float, float)
diff --git a/test/CodeGen/Blackfin/mulhu.ll b/test/CodeGen/Blackfin/mulhu.ll
new file mode 100644
index 0000000000000..72bacee33eb27
--- /dev/null
+++ b/test/CodeGen/Blackfin/mulhu.ll
@@ -0,0 +1,106 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
+	%struct.bb_ann_d = type { %struct.tree_node*, i8, %struct.edge_prediction* }
+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+	%struct.cost_pair = type { %struct.iv_cand*, i32, %struct.bitmap_head_def* }
+	%struct.dataflow_d = type { %struct.varray_head_tag*, [2 x %struct.tree_node*] }
+	%struct.def_operand_ptr = type { %struct.tree_node** }
+	%struct.def_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
+	%struct.edge_def_insns = type { %struct.rtx_def* }
+	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.et_node = type opaque
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i1, i1, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
+	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i32, i32, i32, i32, i32, i8* (i32, i32)*, void (i8*)*, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.iv = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i1, i1, i32 }
+	%struct.iv_cand = type { i32, i1, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.iv*, i32 }
+	%struct.iv_use = type { i32, i32, %struct.iv*, %struct.tree_node*, %struct.tree_node**, %struct.bitmap_head_def*, i32, %struct.cost_pair*, %struct.iv_cand* }
+	%struct.ivopts_data = type { %struct.loop*, %struct.htab*, i32, %struct.version_info*, %struct.bitmap_head_def*, i32, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.bitmap_head_def*, i1 }
+	%struct.lang_decl = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.loop = type { i32, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.lpt_decision, i32, i32, %struct.edge_def**, i32, %struct.basic_block_def*, %struct.basic_block_def*, i32, %struct.edge_def**, i32, %struct.edge_def**, i32, %struct.simple_bitmap_def*, i32, %struct.loop**, i32, %struct.loop*, %struct.loop*, %struct.loop*, %struct.loop*, i32, i8*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound*, %struct.edge_def*, i1 }
+	%struct.lpt_decision = type { i32, i32 }
+	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
+	%struct.nb_iter_bound = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound* }
+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.simple_bitmap_def = type { i32, i32, i32, [1 x i64] }
+	%struct.stack_local_entry = type opaque
+	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.dataflow_d*, %struct.bitmap_head_def*, i32 }
+	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.def_optype_d*, %struct.v_may_def_optype_d*, %struct.vuse_optype_d*, %struct.v_may_def_optype_d* }
+	%struct.temp_slot = type opaque
+	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
+	%struct.tree_ann_d = type { %struct.stmt_ann_d }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.u = type { [1 x i64] }
+	%struct.v_def_use_operand_type_t = type { %struct.tree_node*, %struct.tree_node* }
+	%struct.v_may_def_optype_d = type { i32, [1 x %struct.v_def_use_operand_type_t] }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }
+	%struct.version_info = type { %struct.tree_node*, %struct.iv*, i1, i32, i1 }
+	%struct.vuse_optype_d = type { i32, [1 x %struct.tree_node*] }
+
+define i1 @determine_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand) {
+entry:
+	switch i32 0, label %bb91 [
+		i32 0, label %bb
+		i32 1, label %bb6
+		i32 3, label %cond_next135
+	]
+
+bb:		; preds = %entry
+	ret i1 false
+
+bb6:		; preds = %entry
+	br i1 false, label %bb87, label %cond_next27
+
+cond_next27:		; preds = %bb6
+	br i1 false, label %cond_true30, label %cond_next55
+
+cond_true30:		; preds = %cond_next27
+	br i1 false, label %cond_next41, label %cond_true35
+
+cond_true35:		; preds = %cond_true30
+	ret i1 false
+
+cond_next41:		; preds = %cond_true30
+	%tmp44 = call i32 @force_var_cost(%struct.ivopts_data* %data, %struct.tree_node* null, %struct.bitmap_head_def** null)		; <i32> [#uses=1]
+	%tmp46 = udiv i32 %tmp44, 5		; <i32> [#uses=1]
+	call void @set_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand, i32 %tmp46, %struct.bitmap_head_def* null)
+	br label %bb87
+
+cond_next55:		; preds = %cond_next27
+	ret i1 false
+
+bb87:		; preds = %cond_next41, %bb6
+	ret i1 false
+
+bb91:		; preds = %entry
+	ret i1 false
+
+cond_next135:		; preds = %entry
+	ret i1 false
+}
+
+declare void @set_use_iv_cost(%struct.ivopts_data*, %struct.iv_use*, %struct.iv_cand*, i32, %struct.bitmap_head_def*)
+
+declare i32 @force_var_cost(%struct.ivopts_data*, %struct.tree_node*, %struct.bitmap_head_def**)
diff --git a/test/CodeGen/Blackfin/printf.ll b/test/CodeGen/Blackfin/printf.ll
new file mode 100644
index 0000000000000..9e54b73c87723
--- /dev/null
+++ b/test/CodeGen/Blackfin/printf.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@.str_1 = external constant [42 x i8]		; <[42 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main(i32 %argc.1, i8** %argv.1) {
+entry:
+	%tmp.16 = call i32 (i8*, ...)* @printf(i8* getelementptr ([42 x i8]* @.str_1, i64 0, i64 0), i32 0, i32 0, i64 0, i64 0)
+	ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/printf2.ll b/test/CodeGen/Blackfin/printf2.ll
new file mode 100644
index 0000000000000..7ac7e8032bb6d
--- /dev/null
+++ b/test/CodeGen/Blackfin/printf2.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=bfin
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%1 = call i32 (i8*, ...)* @printf(i8* undef, i1 undef)
+	ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/promote-logic.ll b/test/CodeGen/Blackfin/promote-logic.ll
new file mode 100644
index 0000000000000..c247aca0a5b0a
--- /dev/null
+++ b/test/CodeGen/Blackfin/promote-logic.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=bfin > %t
+
+; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR
+; operation after LegalizeOps.
+
+define void @mng_display_bgr565() {
+entry:
+	br i1 false, label %bb.preheader, label %return
+
+bb.preheader:
+	br i1 false, label %cond_true48, label %cond_next80
+
+cond_true48:
+	%tmp = load i8* null
+	%tmp51 = zext i8 %tmp to i16
+	%tmp99 = load i8* null
+	%tmp54 = bitcast i8 %tmp99 to i8
+	%tmp54.upgrd.1 = zext i8 %tmp54 to i32
+	%tmp55 = lshr i32 %tmp54.upgrd.1, 3
+	%tmp55.upgrd.2 = trunc i32 %tmp55 to i16
+	%tmp52 = shl i16 %tmp51, 5
+	%tmp56 = and i16 %tmp55.upgrd.2, 28
+	%tmp57 = or i16 %tmp56, %tmp52
+	%tmp60 = zext i16 %tmp57 to i32
+	%tmp62 = xor i32 0, 65535
+	%tmp63 = mul i32 %tmp60, %tmp62
+	%tmp65 = add i32 0, %tmp63
+	%tmp69 = add i32 0, %tmp65
+	%tmp70 = lshr i32 %tmp69, 16
+	%tmp70.upgrd.3 = trunc i32 %tmp70 to i16
+	%tmp75 = lshr i16 %tmp70.upgrd.3, 8
+	%tmp75.upgrd.4 = trunc i16 %tmp75 to i8
+	%tmp76 = lshr i8 %tmp75.upgrd.4, 5
+	store i8 %tmp76, i8* null
+	ret void
+
+cond_next80:
+	ret void
+
+return:
+	ret void
+}
diff --git a/test/CodeGen/Blackfin/promote-setcc.ll b/test/CodeGen/Blackfin/promote-setcc.ll
new file mode 100644
index 0000000000000..d344fadbf3d2f
--- /dev/null
+++ b/test/CodeGen/Blackfin/promote-setcc.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=bfin > %t
+
+; The DAG combiner may sometimes create illegal i16 SETCC operations when run
+; after LegalizeOps. Try to tease out all the optimizations in
+; TargetLowering::SimplifySetCC.
+
+@x = external global i16
+@y = external global i16
+
+declare i16 @llvm.ctlz.i16(i16)
+
+; Case (srl (ctlz x), 5) == const
+; Note: ctlz is promoted, so this test does not catch the DAG combiner
+define i1 @srl_ctlz_const() {
+  %x = load i16* @x
+  %c = call i16 @llvm.ctlz.i16(i16 %x)
+  %s = lshr i16 %c, 4
+  %r = icmp eq i16 %s, 1
+  ret i1 %r
+}
+
+; Case (zext x) == const
+define i1 @zext_const() {
+  %x = load i16* @x
+  %r = icmp ugt i16 %x, 1
+  ret i1 %r
+}
+
+; Case (sext x) == const
+define i1 @sext_const() {
+  %x = load i16* @x
+  %y = add i16 %x, 1
+  %x2 = sext i16 %y to i32
+  %r = icmp ne i32 %x2, -1
+  ret i1 %r
+}
+
diff --git a/test/CodeGen/Blackfin/sdiv.ll b/test/CodeGen/Blackfin/sdiv.ll
new file mode 100644
index 0000000000000..1426655ba0b9e
--- /dev/null
+++ b/test/CodeGen/Blackfin/sdiv.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+define i32 @sdiv(i32 %A, i32 %B) {
+	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
diff --git a/test/CodeGen/Blackfin/simple-select.ll b/test/CodeGen/Blackfin/simple-select.ll
new file mode 100644
index 0000000000000..0f7f270967a61
--- /dev/null
+++ b/test/CodeGen/Blackfin/simple-select.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+declare i1 @foo()
+
+define i32 @test(i32* %A, i32* %B) {
+	%a = load i32* %A
+	%b = load i32* %B
+	%cond = call i1 @foo()
+	%c = select i1 %cond, i32 %a, i32 %b
+	ret i32 %c
+}
diff --git a/test/CodeGen/Blackfin/switch.ll b/test/CodeGen/Blackfin/switch.ll
new file mode 100644
index 0000000000000..3680ec6e554bd
--- /dev/null
+++ b/test/CodeGen/Blackfin/switch.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+define i32 @foo(i32 %A, i32 %B, i32 %C) {
+entry:
+	switch i32 %A, label %out [
+		i32 1, label %bb
+		i32 0, label %bb13
+	]
+
+bb:		; preds = %entry
+	ret i32 1
+
+bb13:		; preds = %entry
+	ret i32 1
+
+out:		; preds = %entry
+	ret i32 0
+}
diff --git a/test/CodeGen/Blackfin/switch2.ll b/test/CodeGen/Blackfin/switch2.ll
new file mode 100644
index 0000000000000..7877bce9c3721
--- /dev/null
+++ b/test/CodeGen/Blackfin/switch2.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+define i8* @FindChar(i8* %CurPtr) {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%tmp = load i8* null		; <i8> [#uses=1]
+	switch i8 %tmp, label %bb [
+		i8 0, label %bb7
+		i8 120, label %bb7
+	]
+
+bb7:		; preds = %bb, %bb
+	ret i8* null
+}
diff --git a/test/CodeGen/Blackfin/sync-intr.ll b/test/CodeGen/Blackfin/sync-intr.ll
new file mode 100644
index 0000000000000..75084f01e5607
--- /dev/null
+++ b/test/CodeGen/Blackfin/sync-intr.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
+
+define void @f() nounwind {
+entry:
+        ; CHECK: csync;
+        call void @llvm.bfin.csync()
+        ; CHECK: ssync;
+        call void @llvm.bfin.ssync()
+	ret void
+}
+
+declare void @llvm.bfin.csync() nounwind
+declare void @llvm.bfin.ssync() nounwind
diff --git a/test/CodeGen/CBackend/2002-05-16-NameCollide.ll b/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
index c69b9b0c2cd73..0b06041f57132 100644
--- a/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
+++ b/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; Make sure that global variables do not collide if they have the same name,
 ; but different types.
diff --git a/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll b/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
index 29081579ac71d..a9f54e467d7e3 100644
--- a/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
+++ b/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; This case was emitting code that looked like this:
 ; ...
diff --git a/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll b/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
index 297807ee32d1b..2afb1a02bbadf 100644
--- a/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
+++ b/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; Test const pointer refs & forward references
 
diff --git a/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll b/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
index ead1bce818f25..b71cf07dbf0c0 100644
--- a/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
+++ b/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 global i32* bitcast (float* @2 to i32*)   ;; Forward numeric reference
 global float* @2                       ;; Duplicate forward numeric reference
diff --git a/test/CodeGen/CBackend/2002-08-19-DataPointer.ll b/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
index 7ae13ec54c30a..b5a1f0b28b2c1 100644
--- a/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
+++ b/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @sptr1 = global [11 x i8]* @somestr         ;; Forward ref to a constant
 @somestr = constant [11 x i8] c"hello world"
diff --git a/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll b/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
index 25f63a0da6b3d..10b9fe22847c1 100644
--- a/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
+++ b/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @fptr = global void ()* @f       ;; Forward ref method defn
 declare void @f()               ;; External method
diff --git a/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll b/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
index 528b8de87edfa..0827423e1ad02 100644
--- a/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
+++ b/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @array = constant [2 x i32] [ i32 12, i32 52 ]          ; <[2 x i32]*> [#uses=1]
 @arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)         ; <i32**> [#uses=0]
diff --git a/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll b/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll
index e9df0c29e1a98..3b2085c950c49 100644
--- a/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll
+++ b/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll
@@ -1,3 +1,3 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @MyIntList = external global { \2*, i32 }
diff --git a/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll b/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
index ccffe688992ca..59aafd55d4c16 100644
--- a/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
+++ b/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; The C Writer bombs on this testcase because it tries the print the prototype
 ; for the test function, which tries to print the argument name.  The function
diff --git a/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll b/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
index bf592ce3f6fb0..6c4d62905b135 100644
--- a/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
+++ b/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; Indirect function call test... found by Joel & Brian
 ;
diff --git a/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll b/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
index e04890519d9e5..1187a374601ef 100644
--- a/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
+++ b/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; This testcase fails because the C backend does not arrange to output the 
 ; contents of a structure type before it outputs the structure type itself.
diff --git a/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll b/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
index ebb1c0fae2654..021adb9c88735 100644
--- a/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
+++ b/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 define void @test() {
         %X = alloca [4 x i32]           ; <[4 x i32]*> [#uses=0]
diff --git a/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll b/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
index 69f45753c50e9..e915cd2fb3f42 100644
--- a/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
+++ b/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 
 declare void @foo(...)
diff --git a/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll b/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll
index 2f6d9beb74a51..2563d8cb51e62 100644
--- a/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll
+++ b/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 %MPI_Comm = type %struct.Comm*
 %struct.Comm = type opaque
diff --git a/test/CodeGen/CBackend/2002-10-16-External.ll b/test/CodeGen/CBackend/2002-10-16-External.ll
index d40cbdaa9ad0e..2cdd15cf185b4 100644
--- a/test/CodeGen/CBackend/2002-10-16-External.ll
+++ b/test/CodeGen/CBackend/2002-10-16-External.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @bob = external global i32              ; <i32*> [#uses=0]
 
diff --git a/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll b/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll
index a17b8db41c8b3..54e0aa6c0bb94 100644
--- a/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll
+++ b/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
         %BitField = type i32
         %tokenptr = type i32*
diff --git a/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll b/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
index 2dd281a4beeb2..82d594fc7e202 100644
--- a/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
+++ b/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @testString = internal constant [18 x i8] c"Escaped newline\5Cn\00"             ; <[18 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll b/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
index 4a7170dbbd172..92d582d7f36de 100644
--- a/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
+++ b/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; Apparently this constant was unsigned in ISO C 90, but not in C 99.
 
diff --git a/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll b/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
index 2a4e839d737c7..a42dc27a1e70f 100644
--- a/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
+++ b/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; This testcase breaks the C backend, because gcc doesn't like (...) functions
 ; with no arguments at all.
diff --git a/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll b/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
index fb7e2ba69fd51..19c7840229261 100644
--- a/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
+++ b/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; The C backend was dying when there was no typename for a struct type!
 
diff --git a/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll b/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
index 6b7f9f0d378a3..048e045b31e61 100644
--- a/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
+++ b/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 %X = type { i32, float }
 
diff --git a/test/CodeGen/CBackend/2003-06-11-HexConstant.ll b/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
index c6128d6df7a7b..6197b301fd4af 100644
--- a/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
+++ b/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; Make sure hex constant does not continue into a valid hexadecimal letter/number
 @version = global [3 x i8] c"\001\00"
diff --git a/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll b/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
index fd6821174bf2f..f6177ea7db367 100644
--- a/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
+++ b/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
@@ -1,3 +1,3 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 @version = global [3 x i8] c"1\00\00"
diff --git a/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll b/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
index 9fe98e22063a2..f0b1bbc7f03b9 100644
--- a/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
+++ b/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 declare i32 @callee(i32, i32)
 
diff --git a/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll b/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
index ef3b579b5b472..4bd1da25b3552 100644
--- a/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
+++ b/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
@@ -1,3 +1,3 @@
-; RUN: llvm-as < %s | llc -march=c | grep common | grep X
+; RUN: llc < %s -march=c | grep common | grep X
 
 @X = linkonce global i32 5
diff --git a/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll b/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
index 077f16cfac926..0fbb3feef137c 100644
--- a/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
+++ b/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; This is a non-normal FP value: it's a nan.
 @NAN = global { float } { float 0x7FF8000000000000 }            ; <{ float }*> [#uses=0]
diff --git a/test/CodeGen/CBackend/2003-10-23-UnusedType.ll b/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
index e67ba2e5b1645..9195634b0fc46 100644
--- a/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
+++ b/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 %A = type { i32, i8*, { i32, i32, i32, i32, i32, i32, i32, i32 }*, i16 }
 
diff --git a/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll b/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
index 41f3f1ef0fb34..b4389ffab18c8 100644
--- a/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
+++ b/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 ; reduced from DOOM.
         %union._XEvent = type { i32 }
diff --git a/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll b/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
index e8da787d27fc5..6a26291240429 100644
--- a/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
+++ b/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 @y = weak global i8 0           ; <i8*> [#uses=1]
 
 define i32 @testcaseshr() {
diff --git a/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll b/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
index 911d6d4e9f5f1..142fbd84dd8df 100644
--- a/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
+++ b/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep builtin_return_address
+; RUN: llc < %s -march=c | grep builtin_return_address
 
 declare i8* @llvm.returnaddress(i32)
 
diff --git a/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll b/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
index 1629debc58fb6..d1c6861c58d06 100644
--- a/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
+++ b/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
@@ -4,7 +4,7 @@
 ; this testcase for example, which caused the CBE to mangle one, screwing
 ; everything up.  :(  Test that this does not happen anymore.
 ;
-; RUN: llvm-as < %s | llc -march=c | not grep _memcpy
+; RUN: llc < %s -march=c | not grep _memcpy
 
 declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
 
diff --git a/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll b/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
index 6f23915a75c02..6fceb08657412 100644
--- a/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
+++ b/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
@@ -1,5 +1,5 @@
 ; This is a non-normal FP value
-; RUN: llvm-as < %s | llc -march=c | grep FPConstant | grep static
+; RUN: llc < %s -march=c | grep FPConstant | grep static
 
 define float @func() {
         ret float 0xFFF0000000000000
diff --git a/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll b/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
index 2d62231b74e3a..cf59634e82c1d 100644
--- a/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
+++ b/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep func1 | grep WEAK
+; RUN: llc < %s -march=c | grep func1 | grep WEAK
 
 define linkonce i32 @func1() {
         ret i32 5
diff --git a/test/CodeGen/CBackend/2004-08-09-va-end-null.ll b/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
index ae7ba5305c00f..3ee23d1a909a2 100644
--- a/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
+++ b/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 declare void @llvm.va_end(i8*)
 
diff --git a/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll b/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
index a8ee438cc431d..af8f441c2229b 100644
--- a/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
+++ b/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
@@ -1,7 +1,7 @@
 ; The CBE should not emit code that casts the function pointer.  This causes
 ; GCC to get testy and insert trap instructions instead of doing the right
 ; thing. :(
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 declare void @external(i8*)
 
diff --git a/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll b/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
index 8acab764d7806..78e9bacd9e776 100644
--- a/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
+++ b/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | not grep extern.*msg
+; RUN: llc < %s -march=c | not grep extern.*msg
 ; PR472
 
 @msg = internal global [6 x i8] c"hello\00"             ; <[6 x i8]*> [#uses=1]
diff --git a/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll b/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
index 9acaa726dbe62..57a9adc7e89a3 100644
--- a/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
+++ b/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 define i32 @foo() {
         ret i32 and (i32 123456, i32 ptrtoint (i32 ()* @foo to i32))
diff --git a/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll b/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
index 12c87901f54ea..dd505af4831b7 100644
--- a/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
+++ b/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep volatile
+; RUN: llc < %s -march=c | grep volatile
 
 define void @test(i32* %P) {
         %X = volatile load i32* %P              ; <i32> [#uses=1]
diff --git a/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll b/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll
index 162e3d358fdb8..1c5f5061df633 100644
--- a/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll
+++ b/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
         %JNIEnv = type %struct.JNINa*
         %struct.JNINa = type { i8*, i8*, i8*, void (%JNIEnv*)* }
diff --git a/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll b/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
index 55d43e2bb0201..808b8f91407a0 100644
--- a/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
+++ b/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | not grep -- --65535
+; RUN: llc < %s -march=c | not grep -- --65535
 ; PR596
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/CBackend/2005-08-23-Fmod.ll b/test/CodeGen/CBackend/2005-08-23-Fmod.ll
index 7c55019d23500..6e650eb293fc1 100644
--- a/test/CodeGen/CBackend/2005-08-23-Fmod.ll
+++ b/test/CodeGen/CBackend/2005-08-23-Fmod.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep fmod
+; RUN: llc < %s -march=c | grep fmod
 
 define double @test(double %A, double %B) {
         %C = frem double %A, %B         ; <double> [#uses=1]
diff --git a/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll b/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
index 37f311d4b93b5..99de837dc79ae 100644
--- a/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
+++ b/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep {\\* *volatile *\\*}
+; RUN: llc < %s -march=c | grep {\\* *volatile *\\*}
 
 @G = external global void ()*           ; <void ()**> [#uses=2]
 
diff --git a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll b/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
index f8393a3fbc80b..c9df800d72d58 100644
--- a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
+++ b/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | \
+; RUN: llc < %s -march=c | \
 ; RUN:   grep __BITCAST | count 14
 
 define i32 @test1(float %F) {
diff --git a/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll b/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
index 63dd9da0b11d1..da36e78e0b05b 100644
--- a/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
+++ b/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
@@ -1,5 +1,5 @@
 ; For PR1099
-; RUN: llvm-as < %s | llc -march=c | grep {(llvm_cbe_tmp2 == llvm_cbe_b_2e_0_2e_0_2e_val)}
+; RUN: llc < %s -march=c | grep {(llvm_cbe_tmp2 == llvm_cbe_b_2e_0_2e_0_2e_val)}
 
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll b/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll
index 42fa0d897f66b..8a5f2532e701f 100644
--- a/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll
+++ b/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll
@@ -1,5 +1,5 @@
 ; PR918
-; RUN: llvm-as < %s | llc -march=c | not grep {l_structtype_s l_fixarray_array3}
+; RUN: llc < %s -march=c | not grep {l_structtype_s l_fixarray_array3}
 
 %structtype_s = type { i32 }
 %fixarray_array3 = type [3 x %structtype_s]
diff --git a/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll b/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
index 8fe06b77d7871..4f699b792e20d 100644
--- a/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
+++ b/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=c | grep __builtin_stack_save
-; RUN: llvm-as < %s | llc -march=c | grep __builtin_stack_restore
+; RUN: llc < %s -march=c | grep __builtin_stack_save
+; RUN: llc < %s -march=c | grep __builtin_stack_restore
 ; PR1028
 
 declare i8* @llvm.stacksave()
diff --git a/test/CodeGen/CBackend/2007-02-05-memset.ll b/test/CodeGen/CBackend/2007-02-05-memset.ll
index f253b30136f04..7d508e4240511 100644
--- a/test/CodeGen/CBackend/2007-02-05-memset.ll
+++ b/test/CodeGen/CBackend/2007-02-05-memset.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 ; PR1181
 target datalayout = "e-p:64:64"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll b/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
index eb5cb86446525..7e1ff2a9dfa0d 100644
--- a/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
+++ b/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
@@ -1,7 +1,7 @@
 ; PR1164
-; RUN: llvm-as < %s | llc -march=c | grep {llvm_cbe_A = \\*llvm_cbe_G;}
-; RUN: llvm-as < %s | llc -march=c | grep {llvm_cbe_B = \\*(&ltmp_0_1);}
-; RUN: llvm-as < %s | llc -march=c | grep {return (((unsigned int )(((unsigned int )llvm_cbe_A) + ((unsigned int )llvm_cbe_B))));}
+; RUN: llc < %s -march=c | grep {llvm_cbe_A = \\*llvm_cbe_G;}
+; RUN: llc < %s -march=c | grep {llvm_cbe_B = \\*(&ltmp_0_1);}
+; RUN: llc < %s -march=c | grep {return (((unsigned int )(((unsigned int )llvm_cbe_A) + ((unsigned int )llvm_cbe_B))));}
 
 @G = global i32 123
 @ltmp_0_1 = global i32 123
diff --git a/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll b/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
index 6057616111844..c8bfdd6bcfc69 100644
--- a/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
+++ b/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep {packed}
+; RUN: llc < %s -march=c | grep {packed}
 
 	%struct.p = type <{ i16 }>
 
diff --git a/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll b/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
index 269126d759837..6e0cf68292961 100644
--- a/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
+++ b/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | \
+; RUN: llc < %s -march=c | \
 ; RUN:          grep {struct __attribute__ ((packed, aligned(} | count 4
 
 define void @test(i32* %P) {
diff --git a/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll b/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll
index 16bf23e4d88e5..8db3167e54d46 100644
--- a/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll
+++ b/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 declare {i32, i32} @foo()
 
diff --git a/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll b/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
index 52e025900780f..e9fa552433a5f 100644
--- a/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
+++ b/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep {llvm_cbe_t.*&1}
+; RUN: llc < %s -march=c | grep {llvm_cbe_t.*&1}
 define i32 @test(i32 %r) {
   %s = icmp eq i32 %r, 0
   %t = add i1 %s, %s
diff --git a/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll b/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll
index a2c10469bd7f9..054a3cad900da 100644
--- a/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll
+++ b/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep {"m"(llvm_cbe_newcw))}
+; RUN: llc < %s -march=c | grep {"m"(llvm_cbe_newcw))}
 ; PR2407
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll b/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
index 32d635ad720e8..b72b57343cd0d 100644
--- a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
+++ b/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 ; PR2907
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9.5"
diff --git a/test/CodeGen/CBackend/fneg.ll b/test/CodeGen/CBackend/fneg.ll
index 68849b20c7696..7dec3d9e09c2c 100644
--- a/test/CodeGen/CBackend/fneg.ll
+++ b/test/CodeGen/CBackend/fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 
 define void @func() nounwind {
   entry:
diff --git a/test/CodeGen/CBackend/pr2408.ll b/test/CodeGen/CBackend/pr2408.ll
index a16f91bfad89f..bf8477b7e6ddc 100644
--- a/test/CodeGen/CBackend/pr2408.ll
+++ b/test/CodeGen/CBackend/pr2408.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c | grep {\\* ((unsigned int )}
+; RUN: llc < %s -march=c | grep {\\* ((unsigned int )}
 ; PR2408
 
 define i32 @a(i32 %a) {
diff --git a/test/CodeGen/CBackend/vectors.ll b/test/CodeGen/CBackend/vectors.ll
index d01e99288f7ec..b7b76775f6c6f 100644
--- a/test/CodeGen/CBackend/vectors.ll
+++ b/test/CodeGen/CBackend/vectors.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=c
+; RUN: llc < %s -march=c
 @.str15 = external global [2 x i8]
 
 define <4 x i32> @foo(<4 x i32> %a, i32 %b) {
diff --git a/test/CodeGen/CPP/2007-06-16-Funcname.ll b/test/CodeGen/CPP/2007-06-16-Funcname.ll
index 75b96e6c373f3..71fea12d9c2c9 100644
--- a/test/CodeGen/CPP/2007-06-16-Funcname.ll
+++ b/test/CodeGen/CPP/2007-06-16-Funcname.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=cpp -cppfname=WAKKA | not grep makeLLVMModule
+; RUN: llc < %s -march=cpp -cppfname=WAKKA | not grep makeLLVMModule
 ; PR1515
 
 define void @foo() {
diff --git a/test/CodeGen/CPP/2009-05-01-Long-Double.ll b/test/CodeGen/CPP/2009-05-01-Long-Double.ll
index 9d132ec00f7cb..0b2d882971a34 100644
--- a/test/CodeGen/CPP/2009-05-01-Long-Double.ll
+++ b/test/CodeGen/CPP/2009-05-01-Long-Double.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=cpp -cppgen=program -f -o %t
+; RUN: llc < %s -march=cpp -cppgen=program -o %t
 
 define x86_fp80 @some_func() nounwind {
 entry:
diff --git a/test/CodeGen/CPP/2009-05-04-CondBr.ll b/test/CodeGen/CPP/2009-05-04-CondBr.ll
index 6c3f984282c3c..feb2cf765e7d6 100644
--- a/test/CodeGen/CPP/2009-05-04-CondBr.ll
+++ b/test/CodeGen/CPP/2009-05-04-CondBr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=cpp -cppgen=program -f -o %t
+; RUN: llc < %s -march=cpp -cppgen=program -o %t
 ; RUN: grep "BranchInst::Create(label_if_then, label_if_end, int1_cmp, label_entry);" %t
 
 define i32 @some_func(i32 %a) nounwind {
diff --git a/test/CodeGen/CPP/llvm2cpp.ll b/test/CodeGen/CPP/llvm2cpp.ll
index 651a65bbea928..447f332b269e0 100644
--- a/test/CodeGen/CPP/llvm2cpp.ll
+++ b/test/CodeGen/CPP/llvm2cpp.ll
@@ -1,5 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis > /dev/null
-; RUN: llvm-as < %s | llc -march=cpp -cppgen=program -o -
+; RUN: llc < %s -march=cpp -cppgen=program -o -
 
 @X = global i32 4, align 16		; <i32*> [#uses=0]
 
diff --git a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
index 75e0ed0cd2fcc..35422311c5743 100644
--- a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
+++ b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=cellspu -o - | grep brz
+; RUN: llc < %s -march=cellspu -o - | grep brz
 ; PR3274
 
 target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll
index a18b6f8d05fc4..139e97b967a73 100644
--- a/test/CodeGen/CellSPU/and_ops.ll
+++ b/test/CodeGen/CellSPU/and_ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep and    %t1.s | count 234
 ; RUN: grep andc   %t1.s | count 85
 ; RUN: grep andi   %t1.s | count 37
diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll
index a305a23540418..960d2feadeda4 100644
--- a/test/CodeGen/CellSPU/call.ll
+++ b/test/CodeGen/CellSPU/call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep brsl    %t1.s | count 1
 ; RUN: grep brasl   %t1.s | count 1
 ; RUN: grep stqd    %t1.s | count 80
diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll
index 9be714ebc9b8a..639c794424f38 100644
--- a/test/CodeGen/CellSPU/call_indirect.ll
+++ b/test/CodeGen/CellSPU/call_indirect.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
 ; RUN: grep bisl    %t1.s | count 7
 ; RUN: grep ila     %t1.s | count 1
 ; RUN: grep rotqby  %t1.s | count 6
diff --git a/test/CodeGen/CellSPU/ctpop.ll b/test/CodeGen/CellSPU/ctpop.ll
index 3c7ee7aeea2bb..e1a6cd829260f 100644
--- a/test/CodeGen/CellSPU/ctpop.ll
+++ b/test/CodeGen/CellSPU/ctpop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep cntb    %t1.s | count 3
 ; RUN: grep andi    %t1.s | count 3
 ; RUN: grep rotmi   %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll
index d4802ae8f545b..b0a372beba0de 100644
--- a/test/CodeGen/CellSPU/dp_farith.ll
+++ b/test/CodeGen/CellSPU/dp_farith.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep dfa    %t1.s | count 2
 ; RUN: grep dfs    %t1.s | count 2
 ; RUN: grep dfm    %t1.s | count 6
diff --git a/test/CodeGen/CellSPU/eqv.ll b/test/CodeGen/CellSPU/eqv.ll
index 540695677205f..22c8c3bff9401 100644
--- a/test/CodeGen/CellSPU/eqv.ll
+++ b/test/CodeGen/CellSPU/eqv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep eqv  %t1.s | count 18
 ; RUN: grep xshw %t1.s | count 6
 ; RUN: grep xsbh %t1.s | count 3
diff --git a/test/CodeGen/CellSPU/extract_elt.ll b/test/CodeGen/CellSPU/extract_elt.ll
index bcd2f42aa77e3..0ac971c58c5b8 100644
--- a/test/CodeGen/CellSPU/extract_elt.ll
+++ b/test/CodeGen/CellSPU/extract_elt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep shufb   %t1.s | count 39
 ; RUN: grep ilhu    %t1.s | count 27
 ; RUN: grep iohl    %t1.s | count 27
diff --git a/test/CodeGen/CellSPU/fcmp32.ll b/test/CodeGen/CellSPU/fcmp32.ll
index 27a659e82930c..f07fe6fdab28f 100644
--- a/test/CodeGen/CellSPU/fcmp32.ll
+++ b/test/CodeGen/CellSPU/fcmp32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep fceq  %t1.s | count 1
 ; RUN: grep fcmeq %t1.s | count 1
 
diff --git a/test/CodeGen/CellSPU/fcmp64.ll b/test/CodeGen/CellSPU/fcmp64.ll
index 1906bfe7ddaa4..2b61fa6d2dc22 100644
--- a/test/CodeGen/CellSPU/fcmp64.ll
+++ b/test/CodeGen/CellSPU/fcmp64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 
 define i1 @fcmp_eq_setcc_f64(double %arg1, double %arg2) nounwind {
 entry:
diff --git a/test/CodeGen/CellSPU/fdiv.ll b/test/CodeGen/CellSPU/fdiv.ll
index d121c3f8c907c..9921626b79cbc 100644
--- a/test/CodeGen/CellSPU/fdiv.ll
+++ b/test/CodeGen/CellSPU/fdiv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep frest    %t1.s | count 2 
 ; RUN: grep -w fi    %t1.s | count 2 
 ; RUN: grep -w fm    %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll
index 5bd66f4aaef3a..1e5e3b3414405 100644
--- a/test/CodeGen/CellSPU/fneg-fabs.ll
+++ b/test/CodeGen/CellSPU/fneg-fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep 32768   %t1.s | count 2
 ; RUN: grep xor     %t1.s | count 4
 ; RUN: grep and     %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/i64ops.ll b/test/CodeGen/CellSPU/i64ops.ll
index dd6782772a5dc..3553cbbf7b5cc 100644
--- a/test/CodeGen/CellSPU/i64ops.ll
+++ b/test/CodeGen/CellSPU/i64ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep xswd	     %t1.s | count 3
 ; RUN: grep xsbh	     %t1.s | count 1
 ; RUN: grep xshw	     %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/i8ops.ll b/test/CodeGen/CellSPU/i8ops.ll
index 23a036e37443d..57a2aa894725b 100644
--- a/test/CodeGen/CellSPU/i8ops.ll
+++ b/test/CodeGen/CellSPU/i8ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 
 ; ModuleID = 'i8ops.bc'
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/icmp16.ll b/test/CodeGen/CellSPU/icmp16.ll
index 56d1b8fb41b2e..32b12617cfc38 100644
--- a/test/CodeGen/CellSPU/icmp16.ll
+++ b/test/CodeGen/CellSPU/icmp16.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ilh                                %t1.s | count 15
 ; RUN: grep ceqh                               %t1.s | count 29
 ; RUN: grep ceqhi                              %t1.s | count 13
diff --git a/test/CodeGen/CellSPU/icmp32.ll b/test/CodeGen/CellSPU/icmp32.ll
index 4f74b0dd04290..ccbb5f7cde580 100644
--- a/test/CodeGen/CellSPU/icmp32.ll
+++ b/test/CodeGen/CellSPU/icmp32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ila                                %t1.s | count 6
 ; RUN: grep ceq                                %t1.s | count 28
 ; RUN: grep ceqi                               %t1.s | count 12
diff --git a/test/CodeGen/CellSPU/icmp64.ll b/test/CodeGen/CellSPU/icmp64.ll
index b26252cedb305..9dd2cdc0dea9c 100644
--- a/test/CodeGen/CellSPU/icmp64.ll
+++ b/test/CodeGen/CellSPU/icmp64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ceq                                %t1.s | count 20
 ; RUN: grep cgti                               %t1.s | count 12
 ; RUN: grep cgt                                %t1.s | count 16
diff --git a/test/CodeGen/CellSPU/icmp8.ll b/test/CodeGen/CellSPU/icmp8.ll
index d246481f03a19..5517d104ab9f2 100644
--- a/test/CodeGen/CellSPU/icmp8.ll
+++ b/test/CodeGen/CellSPU/icmp8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ceqb                               %t1.s | count 24
 ; RUN: grep ceqbi                              %t1.s | count 12
 ; RUN: grep clgtb                              %t1.s | count 11
diff --git a/test/CodeGen/CellSPU/immed16.ll b/test/CodeGen/CellSPU/immed16.ll
index 9a461cbb85a61..077d07169e455 100644
--- a/test/CodeGen/CellSPU/immed16.ll
+++ b/test/CodeGen/CellSPU/immed16.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep "ilh" %t1.s | count 11
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
diff --git a/test/CodeGen/CellSPU/immed32.ll b/test/CodeGen/CellSPU/immed32.ll
index bf471b1eb1ced..119f526847ce5 100644
--- a/test/CodeGen/CellSPU/immed32.ll
+++ b/test/CodeGen/CellSPU/immed32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ilhu  %t1.s | count 8
 ; RUN: grep iohl  %t1.s | count 6
 ; RUN: grep -w il    %t1.s | count 3
diff --git a/test/CodeGen/CellSPU/immed64.ll b/test/CodeGen/CellSPU/immed64.ll
index bbda3ff329cb7..fd483651756ed 100644
--- a/test/CodeGen/CellSPU/immed64.ll
+++ b/test/CodeGen/CellSPU/immed64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep lqa        %t1.s | count 13
 ; RUN: grep ilhu       %t1.s | count 15
 ; RUN: grep ila        %t1.s | count 1
diff --git a/test/CodeGen/CellSPU/int2fp.ll b/test/CodeGen/CellSPU/int2fp.ll
index ee3076594ad6b..984c017c96d17 100644
--- a/test/CodeGen/CellSPU/int2fp.ll
+++ b/test/CodeGen/CellSPU/int2fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep csflt %t1.s | count 5
 ; RUN: grep cuflt %t1.s | count 1
 ; RUN: grep xshw  %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/intrinsics_branch.ll b/test/CodeGen/CellSPU/intrinsics_branch.ll
index 87ad18211a251..b0f6a6247e41a 100644
--- a/test/CodeGen/CellSPU/intrinsics_branch.ll
+++ b/test/CodeGen/CellSPU/intrinsics_branch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep ceq     %t1.s | count 30 
 ; RUN: grep ceqb    %t1.s | count 10
 ; RUN: grep ceqhi   %t1.s | count 5
diff --git a/test/CodeGen/CellSPU/intrinsics_float.ll b/test/CodeGen/CellSPU/intrinsics_float.ll
index c18f8deb385e9..81373470d0692 100644
--- a/test/CodeGen/CellSPU/intrinsics_float.ll
+++ b/test/CodeGen/CellSPU/intrinsics_float.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep fa      %t1.s | count 5
 ; RUN: grep fs      %t1.s | count 5
 ; RUN: grep fm      %t1.s | count 15
diff --git a/test/CodeGen/CellSPU/intrinsics_logical.ll b/test/CodeGen/CellSPU/intrinsics_logical.ll
index 843340b74542d..a29ee4c2405d2 100644
--- a/test/CodeGen/CellSPU/intrinsics_logical.ll
+++ b/test/CodeGen/CellSPU/intrinsics_logical.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep and       %t1.s | count 20
 ; RUN: grep andc      %t1.s | count 5
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/loads.ll b/test/CodeGen/CellSPU/loads.ll
index 3b9746c8080a5..8e5422c58eb6d 100644
--- a/test/CodeGen/CellSPU/loads.ll
+++ b/test/CodeGen/CellSPU/loads.ll
@@ -1,6 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: grep {lqd.*0(\$3)}   %t1.s | count 1
-; RUN: grep {lqd.*16(\$3)}  %t1.s | count 1
+; RUN: llc < %s -march=cellspu | FileCheck %s
 
 ; ModuleID = 'loads.bc'
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
@@ -10,11 +8,13 @@ define <4 x float> @load_v4f32_1(<4 x float>* %a) nounwind readonly {
 entry:
 	%tmp1 = load <4 x float>* %a
 	ret <4 x float> %tmp1
+; CHECK:	lqd	$3, 0($3)
 }
 
 define <4 x float> @load_v4f32_2(<4 x float>* %a) nounwind readonly {
 entry:
-	%arrayidx = getelementptr <4 x float>* %a, i32 1		; <<4 x float>*> [#uses=1]
-	%tmp1 = load <4 x float>* %arrayidx		; <<4 x float>> [#uses=1]
+	%arrayidx = getelementptr <4 x float>* %a, i32 1
+	%tmp1 = load <4 x float>* %arrayidx
 	ret <4 x float> %tmp1
+; CHECK:	lqd	$3, 16($3)
 }
diff --git a/test/CodeGen/CellSPU/mul-with-overflow.ll b/test/CodeGen/CellSPU/mul-with-overflow.ll
index 755b99be9cdd4..d15da12649ead 100644
--- a/test/CodeGen/CellSPU/mul-with-overflow.ll
+++ b/test/CodeGen/CellSPU/mul-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=cellspu
+; RUN: llc < %s -march=cellspu
 
 declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
 define i1 @a(i16 %x) zeroext nounwind {
diff --git a/test/CodeGen/CellSPU/mul_ops.ll b/test/CodeGen/CellSPU/mul_ops.ll
index 085ce555dc253..031d6c37ce706 100644
--- a/test/CodeGen/CellSPU/mul_ops.ll
+++ b/test/CodeGen/CellSPU/mul_ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep mpy     %t1.s | count 44
 ; RUN: grep mpyu    %t1.s | count 4
 ; RUN: grep mpyh    %t1.s | count 10
diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll
index 841a3ec54d6f2..e1419232ece7d 100644
--- a/test/CodeGen/CellSPU/nand.ll
+++ b/test/CodeGen/CellSPU/nand.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep nand   %t1.s | count 90
 ; RUN: grep and    %t1.s | count 94
 ; RUN: grep xsbh   %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll
index 4e9da8f129722..8aa1e998bd0ee 100644
--- a/test/CodeGen/CellSPU/or_ops.ll
+++ b/test/CodeGen/CellSPU/or_ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep and    %t1.s | count 2
 ; RUN: grep orc    %t1.s | count 85
 ; RUN: grep ori    %t1.s | count 30
diff --git a/test/CodeGen/CellSPU/private.ll b/test/CodeGen/CellSPU/private.ll
index 91567ce82803b..7452276ccc8c4 100644
--- a/test/CodeGen/CellSPU/private.ll
+++ b/test/CodeGen/CellSPU/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -march=cellspu > %t
+; RUN: llc < %s -march=cellspu > %t
 ; RUN: grep .Lfoo: %t
 ; RUN: grep brsl.*\.Lfoo %t
 ; RUN: grep .Lbaz: %t
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll
index e308172486a59..a504c002ae127 100644
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu -f -o %t1.s
+; RUN: llc < %s -march=cellspu -o %t1.s
 ; RUN: grep rot          %t1.s | count 85
 ; RUN: grep roth         %t1.s | count 8
 ; RUN: grep roti.*5      %t1.s | count 1
diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll
index e83e47606c281..c804256f513b7 100644
--- a/test/CodeGen/CellSPU/select_bits.ll
+++ b/test/CodeGen/CellSPU/select_bits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep selb   %t1.s | count 56
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/sext128.ll b/test/CodeGen/CellSPU/sext128.ll
new file mode 100644
index 0000000000000..0c0b3599b110f
--- /dev/null
+++ b/test/CodeGen/CellSPU/sext128.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=cellspu | FileCheck %s 
+
+; ModuleID = 'sext128.bc'
+target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:128:128-v128:128:128-a0:0:128-s0:128:128"
+target triple = "spu"
+
+define i128 @sext_i64_i128(i64 %a) {
+entry:
+        %0 = sext i64 %a to i128
+        ret i128 %0
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK:	long	66051
+; CHECK: 	long	67438087
+; CHECK: 	rotmai
+; CHECK:	lqa
+; CHECK:	shufb
+}
+
+define i128 @sext_i32_i128(i32 %a) {
+entry:
+        %0 = sext i32 %a to i128
+        ret i128 %0
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK:	long	66051
+; CHECK: 	rotmai
+; CHECK:	lqa
+; CHECK:	shufb
+}
+
+define i128 @sext_i32_i128a(float %a) {
+entry:
+  %0 = call i32 @myfunc(float %a)
+  %1 = sext i32 %0 to i128
+  ret i128 %1
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK:	long	66051
+; CHECK: 	rotmai
+; CHECK:	lqa
+; CHECK:	shufb
+}
+
+declare i32 @myfunc(float)
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
index 3c26baa7c7ab7..0264fc830ea84 100644
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep {shlh	}  %t1.s | count 9
 ; RUN: grep {shlhi	}  %t1.s | count 3
 ; RUN: grep {shl	}  %t1.s | count 9
diff --git a/test/CodeGen/CellSPU/sp_farith.ll b/test/CodeGen/CellSPU/sp_farith.ll
index d77dd9216cd7e..80bf47ccf5d9d 100644
--- a/test/CodeGen/CellSPU/sp_farith.ll
+++ b/test/CodeGen/CellSPU/sp_farith.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu -enable-unsafe-fp-math > %t1.s
+; RUN: llc < %s -march=cellspu -enable-unsafe-fp-math > %t1.s
 ; RUN: grep fa %t1.s | count 2
 ; RUN: grep fs %t1.s | count 2
 ; RUN: grep fm %t1.s | count 6
diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll
index f2f35ef4dbc45..05f44f4be0460 100644
--- a/test/CodeGen/CellSPU/stores.ll
+++ b/test/CodeGen/CellSPU/stores.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep {stqd.*0(\$3)}      %t1.s | count 4
 ; RUN: grep {stqd.*16(\$3)}     %t1.s | count 4
 ; RUN: grep 16256               %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll
index 82d319dd1050b..8ee7d932251aa 100644
--- a/test/CodeGen/CellSPU/struct_1.ll
+++ b/test/CodeGen/CellSPU/struct_1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
 ; RUN: grep lqa     %t1.s | count 5
 ; RUN: grep lqd     %t1.s | count 11
 ; RUN: grep rotqbyi %t1.s | count 7
diff --git a/test/CodeGen/CellSPU/trunc.ll b/test/CodeGen/CellSPU/trunc.ll
index db22564f43411..d16185238af09 100644
--- a/test/CodeGen/CellSPU/trunc.ll
+++ b/test/CodeGen/CellSPU/trunc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep shufb   %t1.s | count 19
 ; RUN: grep {ilhu.*1799}  %t1.s | count 1
 ; RUN: grep {ilhu.*771}  %t1.s | count 2
diff --git a/test/CodeGen/CellSPU/vec_const.ll b/test/CodeGen/CellSPU/vec_const.ll
index 4b29adc809216..24c05c684084d 100644
--- a/test/CodeGen/CellSPU/vec_const.ll
+++ b/test/CodeGen/CellSPU/vec_const.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
 ; RUN: grep -w il  %t1.s | count 3
 ; RUN: grep ilhu   %t1.s | count 8
 ; RUN: grep -w ilh %t1.s | count 5
diff --git a/test/CodeGen/CellSPU/vecinsert.ll b/test/CodeGen/CellSPU/vecinsert.ll
index 6abbd9ac797de..9a00c1f29f8fc 100644
--- a/test/CodeGen/CellSPU/vecinsert.ll
+++ b/test/CodeGen/CellSPU/vecinsert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep cbd     %t1.s | count 5
 ; RUN: grep chd     %t1.s | count 5
 ; RUN: grep cwd     %t1.s | count 10
diff --git a/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll b/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll
index f6d95cbd53fc0..dd382cfcb24de 100644
--- a/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll
+++ b/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | llc 
+; RUN: llc < %s
 
 ; This caused the backend to assert out with:
 ; SparcInstrInfo.cpp:103: failed assertion `0 && "Unexpected unsigned type"'
diff --git a/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll b/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll
index d77b9e15596ee..751ed407456d0 100644
--- a/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll
+++ b/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Compiling this file produces:
 ; Sparc.cpp:91: failed assertion `(offset - OFFSET) % getStackFrameSizeAlignment() == 0'
diff --git a/test/CodeGen/Generic/2003-05-27-phifcmpd.ll b/test/CodeGen/Generic/2003-05-27-phifcmpd.ll
index cf17ef455418c..6fb17991e7393 100644
--- a/test/CodeGen/Generic/2003-05-27-phifcmpd.ll
+++ b/test/CodeGen/Generic/2003-05-27-phifcmpd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @QRiterate(i32 %p.1, double %tmp.212) {
 entry:
diff --git a/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll b/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll
index 03b2a16846706..14bb00048d203 100644
--- a/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll
+++ b/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @QRiterate(double %tmp.212) {
         %tmp.213 = fcmp une double %tmp.212, 0.000000e+00               ; <i1> [#uses=1]
diff --git a/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll b/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll
index b456eebea2c23..cc0eb5cd1374a 100644
--- a/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll
+++ b/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @QRiterate(double %tmp.212) {
 entry:
diff --git a/test/CodeGen/Generic/2003-05-28-ManyArgs.ll b/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
index 595700ad183ef..c6fbdaef82938 100644
--- a/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
+++ b/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date:     May 28, 2003.
 ;; From:     test/Programs/External/SPEC/CINT2000/175.vpr.llvm.bc
diff --git a/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll b/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
index 41c90bd29d385..10d3a11a5190d 100644
--- a/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
+++ b/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date:     May 28, 2003.
 ;; From:     test/Programs/External/SPEC/CINT2000/254.gap.llvm.bc
diff --git a/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll b/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll
index 43bff82ef7cc2..f7c3e42dc4879 100644
--- a/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll
+++ b/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date:     May 28, 2003.
 ;; From:     test/Programs/SingleSource/richards_benchmark.c
diff --git a/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll b/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
index d66ea186e8731..1d1aad5f27e29 100644
--- a/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
+++ b/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date: May 28, 2003.
 ;; From: test/Programs/MultiSource/Olden-perimeter/maketree.c
diff --git a/test/CodeGen/Generic/2003-07-07-BadLongConst.ll b/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
index 80738d54370ca..64312ba09a50b 100644
--- a/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
+++ b/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str_1 = internal constant [42 x i8] c"   ui = %u (0x%x)\09\09UL-ui = %lld (0x%llx)\0A\00"             ; <[42 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll b/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
index 4532b760c7073..8019caa832d72 100644
--- a/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
+++ b/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date:     Jul 8, 2003.
 ;; From:     test/Programs/MultiSource/Olden-perimeter
diff --git a/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll b/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
index 54880db392896..4e6fe1cf8bf51 100644
--- a/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
+++ b/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Date:     Jul 29, 2003.
 ;; From:     test/Programs/MultiSource/Ptrdist-bc
diff --git a/test/CodeGen/Generic/2004-02-08-UnwindSupport.ll b/test/CodeGen/Generic/2004-02-08-UnwindSupport.ll
index 10d40693d8e56..393062abf78e1 100644
--- a/test/CodeGen/Generic/2004-02-08-UnwindSupport.ll
+++ b/test/CodeGen/Generic/2004-02-08-UnwindSupport.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-correct-eh-support
+; RUN: llc < %s -enable-correct-eh-support
 
 define i32 @test() {
         unwind
diff --git a/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll b/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
index 1f58ce11fe71a..d4a4cf88ce0cb 100644
--- a/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
+++ b/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @global_long_1 = linkonce global i64 7          ; <i64*> [#uses=1]
 @global_long_2 = linkonce global i64 49         ; <i64*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll b/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll
index ed8b2a22607d7..7fd23612fb5f3 100644
--- a/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll
+++ b/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @intersect_pixel() {
 entry:
diff --git a/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll b/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll
index 37aaa3237caa9..353e411b08871 100644
--- a/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll
+++ b/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
         %struct.TypHeader = type { i32, %struct.TypHeader**, [3 x i8], i8 }
 @.str_67 = external global [4 x i8]             ; <[4 x i8]*> [#uses=1]
 @.str_87 = external global [17 x i8]            ; <[17 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll b/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
index ab3a31d3c077c..733202c8a96b9 100644
--- a/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
+++ b/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; Test that llvm.memcpy works with a i64 length operand on all targets.
 
 declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
diff --git a/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll b/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll
index b2bea1c6be3b0..08060bf3d6f11 100644
--- a/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll
+++ b/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @test() {
         %X = alloca {  }                ; <{  }*> [#uses=0]
diff --git a/test/CodeGen/Generic/2005-10-21-longlonggtu.ll b/test/CodeGen/Generic/2005-10-21-longlonggtu.ll
index b355b026531e4..53a9cd0f26597 100644
--- a/test/CodeGen/Generic/2005-10-21-longlonggtu.ll
+++ b/test/CodeGen/Generic/2005-10-21-longlonggtu.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define float @t(i64 %u_arg) {
         %u = bitcast i64 %u_arg to i64          ; <i64> [#uses=1]
diff --git a/test/CodeGen/Generic/2005-12-01-Crash.ll b/test/CodeGen/Generic/2005-12-01-Crash.ll
index ee72ee1317b0c..a9eeddedc54d2 100644
--- a/test/CodeGen/Generic/2005-12-01-Crash.ll
+++ b/test/CodeGen/Generic/2005-12-01-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @str = external global [36 x i8]		; <[36 x i8]*> [#uses=0]
 @str.upgrd.1 = external global [29 x i8]		; <[29 x i8]*> [#uses=0]
 @str1 = external global [29 x i8]		; <[29 x i8]*> [#uses=0]
diff --git a/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll b/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll
index bd2e043c96f42..349540fb384e4 100644
--- a/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll
+++ b/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i64 @test(i64 %A) {
         %B = trunc i64 %A to i8         ; <i8> [#uses=1]
diff --git a/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll b/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll
index 1a555b3550676..42e8ed02ca506 100644
--- a/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll
+++ b/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; ModuleID = '2006-01-12-BadSetCCFold.ll'
 	%struct.node_t = type { double*, %struct.node_t*, %struct.node_t**, double**, double*, i32, i32 }
 
diff --git a/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll b/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll
index b1e08c759c088..f06d3412a9d56 100644
--- a/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll
+++ b/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; This crashed the PPC backend.
 
 define void @test() {
diff --git a/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll b/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
index bacf8b5e2f9ec..5508272b55514 100644
--- a/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
+++ b/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @G = external global i32		; <i32*> [#uses=1]
 
 define void @encode_one_frame(i64 %tmp.2i) {
diff --git a/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll b/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
index 9607ebee1cc86..2a6cc0c9cdd21 100644
--- a/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
+++ b/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; Infinite loop in the dag combiner, reduced from 176.gcc.	
 %struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
 	%struct.anon = type { i32 }
diff --git a/test/CodeGen/Generic/2006-04-11-vecload.ll b/test/CodeGen/Generic/2006-04-11-vecload.ll
index cc96d8f1ab6ac..a68ed838c24fd 100644
--- a/test/CodeGen/Generic/2006-04-11-vecload.ll
+++ b/test/CodeGen/Generic/2006-04-11-vecload.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah
+; RUN: llc < %s -march=x86 -mcpu=yonah
 
 ; The vload was getting memoized to the previous scalar load!
 
diff --git a/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll b/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
index b99aa98fe9e87..8465b829e29ff 100644
--- a/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
+++ b/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR748
 @G = external global i16		; <i16*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll b/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll
index 6b9bf11860f98..22d8f99beea43 100644
--- a/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll
+++ b/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @test(i32 %tmp93) {
         %tmp98 = shl i32 %tmp93, 31             ; <i32> [#uses=1]
diff --git a/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll b/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
index 59ed2953afadd..1a9fa9f5de6b4 100644
--- a/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
+++ b/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc	
+; RUN: llc < %s	
 %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.SYMBOL_TABLE_ENTRY = type { [9 x i8], [9 x i8], i32, i32, i32, %struct.SYMBOL_TABLE_ENTRY* }
 	%struct.__sFILEX = type opaque
diff --git a/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll b/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll
index b644bd2be78fd..a3720a9e3ce77 100644
--- a/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll
+++ b/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -O0
+; RUN: llc < %s -O0
 
 define float @test(i32 %tmp12771278) {
         switch i32 %tmp12771278, label %bb1279 [
diff --git a/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll b/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
index 1aa3c62f955b6..bd922b3aa8512 100644
--- a/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
+++ b/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -O0
+; RUN: llc < %s -O0
 	
 %struct.cl_perfunc_opts = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32 }
 @cl_pf_opts = external global %struct.cl_perfunc_opts		; <%struct.cl_perfunc_opts*> [#uses=2]
diff --git a/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll b/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
index 8e8f18639bdb0..c4f2fb0c47268 100644
--- a/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
+++ b/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc	
+; RUN: llc < %s	
 %struct.rtunion = type { i64 }
 	%struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] }
 @ix86_cpu = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/Generic/2006-07-03-schedulers.ll b/test/CodeGen/Generic/2006-07-03-schedulers.ll
index 597ee56609ba2..756bd5ddb1ae0 100644
--- a/test/CodeGen/Generic/2006-07-03-schedulers.ll
+++ b/test/CodeGen/Generic/2006-07-03-schedulers.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -pre-RA-sched=default
-; RUN: llvm-as < %s | llc -pre-RA-sched=list-burr
-; RUN: llvm-as < %s | llc -pre-RA-sched=fast
+; RUN: llc < %s -pre-RA-sched=default
+; RUN: llc < %s -pre-RA-sched=list-burr
+; RUN: llc < %s -pre-RA-sched=fast
 ; PR859
 
 ; The top-down schedulers are excluded here because they don't yet support
diff --git a/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll b/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll
index 7f8af5dda4980..cbe8b15a2e83e 100644
--- a/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll
+++ b/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc	
+; RUN: llc < %s	
 %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
 	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
 	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
diff --git a/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll b/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
index c6d0dfee38158..4b332b32cf124 100644
--- a/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
+++ b/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -regalloc=local
+; RUN: llc < %s -regalloc=local
 	
 %struct.CHESS_POSITION = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, i8, i8, [64 x i8], i8, i8, i8, i8, i8 }
 @search = external global %struct.CHESS_POSITION		; <%struct.CHESS_POSITION*> [#uses=2]
diff --git a/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll b/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll
index 2134d3302bc2c..3d592b3a38aaf 100644
--- a/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll
+++ b/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @foo() {
 	br label %cond_true813.i
diff --git a/test/CodeGen/Generic/2006-10-27-CondFolding.ll b/test/CodeGen/Generic/2006-10-27-CondFolding.ll
index b3cfb9941bbb2..51902c867f60b 100644
--- a/test/CodeGen/Generic/2006-10-27-CondFolding.ll
+++ b/test/CodeGen/Generic/2006-10-27-CondFolding.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 
 define void @start_pass_huff(i32 %gather_statistics) {
 entry:
diff --git a/test/CodeGen/Generic/2006-10-29-Crash.ll b/test/CodeGen/Generic/2006-10-29-Crash.ll
index cabec54d16108..7dcb52cf00f7a 100644
--- a/test/CodeGen/Generic/2006-10-29-Crash.ll
+++ b/test/CodeGen/Generic/2006-10-29-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @form_component_prediction(i32 %dy) {
 entry:
diff --git a/test/CodeGen/Generic/2006-11-06-MemIntrinsicExpand.ll b/test/CodeGen/Generic/2006-11-06-MemIntrinsicExpand.ll
index a773759f36ea9..ad3e49f8f9224 100644
--- a/test/CodeGen/Generic/2006-11-06-MemIntrinsicExpand.ll
+++ b/test/CodeGen/Generic/2006-11-06-MemIntrinsicExpand.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep adc
+; RUN: llc < %s -march=x86 | not grep adc
 ; PR987
 
 declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
diff --git a/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll b/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
index 95ef53c62ec52..26d0f4f96ae8b 100644
--- a/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
+++ b/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1011	
 %struct.mng_data = type { i8* (%struct.mng_data*, i32)*, i32, i32, i32, i8, i8, i32, i32, i32, i32, i32 }
 
diff --git a/test/CodeGen/Generic/2006-12-16-InlineAsmCrash.ll b/test/CodeGen/Generic/2006-12-16-InlineAsmCrash.ll
index 91ac3b9909dc1..50a244b9e05bf 100644
--- a/test/CodeGen/Generic/2006-12-16-InlineAsmCrash.ll
+++ b/test/CodeGen/Generic/2006-12-16-InlineAsmCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR1049
 target datalayout = "e-p:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll b/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
index 49203d95d4a06..255b12092a77d 100644
--- a/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
+++ b/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1114
 
 declare i1 @foo()
diff --git a/test/CodeGen/Generic/2007-02-16-BranchFold.ll b/test/CodeGen/Generic/2007-02-16-BranchFold.ll
index 0a8e49e56e490..6bf5631b4e34d 100644
--- a/test/CodeGen/Generic/2007-02-16-BranchFold.ll
+++ b/test/CodeGen/Generic/2007-02-16-BranchFold.ll
@@ -1,5 +1,5 @@
 ; PR 1200
-; RUN: llvm-as < %s | llc -enable-tail-merge=0 | not grep jmp 
+; RUN: llc < %s -enable-tail-merge=0 | not grep jmp 
 
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/Generic/2007-02-23-DAGCombine-Miscompile.ll b/test/CodeGen/Generic/2007-02-23-DAGCombine-Miscompile.ll
index 8b7db478181e2..a8f0e576b95e7 100644
--- a/test/CodeGen/Generic/2007-02-23-DAGCombine-Miscompile.ll
+++ b/test/CodeGen/Generic/2007-02-23-DAGCombine-Miscompile.ll
@@ -1,5 +1,5 @@
 ; PR1219
-; RUN: llvm-as < %s | llc -march=x86 | grep {movl	\$1, %eax}
+; RUN: llc < %s -march=x86 | grep {movl	\$1, %eax}
 
 define i32 @test(i1 %X) {
 old_entry1:
diff --git a/test/CodeGen/Generic/2007-02-25-invoke.ll b/test/CodeGen/Generic/2007-02-25-invoke.ll
index 6dba99e21f790..6e20eaae3bde3 100644
--- a/test/CodeGen/Generic/2007-02-25-invoke.ll
+++ b/test/CodeGen/Generic/2007-02-25-invoke.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; PR1224
 
diff --git a/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll b/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll
index 9cbf3146eb80c..339f0f71ed5ab 100644
--- a/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll
+++ b/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; XFAIL: sparc-sun-solaris2
 ; PR1308
 ; PR1557
diff --git a/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll b/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll
index 1418bbf16dfd8..a0b1403cf8d12 100644
--- a/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll
+++ b/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -O0
+; RUN: llc < %s -O0
 ; PR 1323
 
 ; ModuleID = 'test.bc'
diff --git a/test/CodeGen/Generic/2007-04-14-BitTestsBadMask.ll b/test/CodeGen/Generic/2007-04-14-BitTestsBadMask.ll
index 5490687e1ba81..00337b930145f 100644
--- a/test/CodeGen/Generic/2007-04-14-BitTestsBadMask.ll
+++ b/test/CodeGen/Generic/2007-04-14-BitTestsBadMask.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 8388635
-; RUN: llvm-as < %s | llc -march=x86-64 | grep 4294981120
+; RUN: llc < %s -march=x86 | grep 8388635
+; RUN: llc < %s -march=x86-64 | grep 4294981120
 ; PR 1325
 
 ; ModuleID = 'bugpoint.test.bc'
diff --git a/test/CodeGen/Generic/2007-04-17-lsr-crash.ll b/test/CodeGen/Generic/2007-04-17-lsr-crash.ll
index 4257e9f4c2dcd..98f87e5c514c2 100644
--- a/test/CodeGen/Generic/2007-04-17-lsr-crash.ll
+++ b/test/CodeGen/Generic/2007-04-17-lsr-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @foo(i32 %inTextSize) {
 entry:
diff --git a/test/CodeGen/Generic/2007-04-27-BitTestsBadMask.ll b/test/CodeGen/Generic/2007-04-27-BitTestsBadMask.ll
index 16d7a1654d936..3e8857f37cb93 100644
--- a/test/CodeGen/Generic/2007-04-27-BitTestsBadMask.ll
+++ b/test/CodeGen/Generic/2007-04-27-BitTestsBadMask.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep je | count 3
-; RUN: llvm-as < %s | llc -march=x86-64 | grep 4297064449
+; RUN: llc < %s -march=x86 | grep je | count 3
+; RUN: llc < %s -march=x86-64 | grep 4297064449
 ; PR 1325+
 
 define i32 @foo(i8 %bar) {
diff --git a/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll b/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll
index 0ea13a2ad2e84..af522dc4c58d1 100644
--- a/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll
+++ b/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Test that we can have an "X" output constraint.
 
diff --git a/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll b/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll
index 9424ea7a29fde..f2c9b7f849b6f 100644
--- a/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll
+++ b/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
         %struct..0anon = type { [100 x i32] }
 
diff --git a/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll b/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
index 71b4c857d0794..568b88f4df19e 100644
--- a/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
+++ b/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; PR1228
 
 	"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
diff --git a/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll b/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
index 8a427902f7218..533aa4a8d9b08 100644
--- a/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
+++ b/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-eh -march=x86
+; RUN: llc < %s -enable-eh -march=x86
 
 	%struct.exception = type { i8, i8, i32, i8*, i8*, i32, i8* }
 @program_error = external global %struct.exception		; <%struct.exception*> [#uses=1]
diff --git a/test/CodeGen/Generic/2007-05-05-Personality.ll b/test/CodeGen/Generic/2007-05-05-Personality.ll
index 0fa0e2ff6b220..27493261d569f 100644
--- a/test/CodeGen/Generic/2007-05-05-Personality.ll
+++ b/test/CodeGen/Generic/2007-05-05-Personality.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu -enable-eh -o - | grep zPLR
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -enable-eh -o - | grep zPLR
 
 @error = external global i8		; <i8*> [#uses=2]
 
diff --git a/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll b/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
index a61108a0012af..b989819f40390 100644
--- a/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
+++ b/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
 	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
diff --git a/test/CodeGen/Generic/2007-06-06-CriticalEdgeLandingPad.ll b/test/CodeGen/Generic/2007-06-06-CriticalEdgeLandingPad.ll
index 0b98ebe4b665f..33a36452b2e55 100644
--- a/test/CodeGen/Generic/2007-06-06-CriticalEdgeLandingPad.ll
+++ b/test/CodeGen/Generic/2007-06-06-CriticalEdgeLandingPad.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -enable-eh -asm-verbose -o - | \
+; RUN: llc < %s -march=x86 -enable-eh -asm-verbose -o - | \
 ; RUN:   grep -A 3 {Llabel138.*Region start} | grep {3.*Action}
 ; PR1422
 ; PR1508
diff --git a/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll b/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
index cedee6ffbe0de..e220be6389dc6 100644
--- a/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
+++ b/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -o -
+; RUN: llc < %s -o -
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll b/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll
index 98871d0e3a45d..bd26481bd3060 100644
--- a/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll
+++ b/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-eh
+; RUN: llc < %s -enable-eh
 
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
index 41fdb71ddf491..fc9164f7c4aa3 100644
--- a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
+++ b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-eh
+; RUN: llc < %s -enable-eh
 ; PR1833
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll b/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
index 4b2544446939b..314bb05c67841 100644
--- a/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
+++ b/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; rdar://5707064
 
 define i32 @f(i16* %pc) {
diff --git a/test/CodeGen/Generic/2008-01-30-LoadCrash.ll b/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
index 8ed4139bd64fc..70c3aaabedc18 100644
--- a/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
+++ b/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @letters.3100 = external constant [63 x i8]		; <[63 x i8]*> [#uses=2]
 
diff --git a/test/CodeGen/Generic/2008-02-04-Ctlz.ll b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
index 4639b6f977038..288bfd245da90 100644
--- a/test/CodeGen/Generic/2008-02-04-Ctlz.ll
+++ b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str = internal constant [14 x i8] c"%lld %d %d %d\00"
 
diff --git a/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll b/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
index 9acb852bced0c..8bf82dfe186d6 100644
--- a/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
+++ b/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @main() nounwind  {
 entry:
diff --git a/test/CodeGen/Generic/2008-02-20-MatchingMem.ll b/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
index ef60f92fa05aa..da1aeb556a39d 100644
--- a/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
+++ b/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1133
 define void @test(i32* %X) nounwind  {
 entry:
diff --git a/test/CodeGen/Generic/2008-02-25-NegateZero.ll b/test/CodeGen/Generic/2008-02-25-NegateZero.ll
index 01693079a3efb..97db667dc13a7 100644
--- a/test/CodeGen/Generic/2008-02-25-NegateZero.ll
+++ b/test/CodeGen/Generic/2008-02-25-NegateZero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; rdar://5763967
 
 define void @test() {
diff --git a/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll b/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
index b2112f3ad39bc..10b3d444c682f 100644
--- a/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
+++ b/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2096
 	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
 	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
diff --git a/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll b/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
index a60d101fee252..4f95dfe8a730e 100644
--- a/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
+++ b/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2603
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll b/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
index c5844027b44d9..6281ada73fc6b 100644
--- a/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
+++ b/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3806
 
 	%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
diff --git a/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll b/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
index 40ad3deaeea70..9a9c1a110d61a 100644
--- a/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
+++ b/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -soft-float
+; RUN: llc < %s -soft-float
 ; PR3899
 
 @m = external global <2 x double>;
diff --git a/test/CodeGen/Generic/2009-04-10-SinkCrash.ll b/test/CodeGen/Generic/2009-04-10-SinkCrash.ll
index 3637a06c217f0..125f87594b85c 100644
--- a/test/CodeGen/Generic/2009-04-10-SinkCrash.ll
+++ b/test/CodeGen/Generic/2009-04-10-SinkCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @QRiterate(i32 %p.1, double %tmp.212) nounwind {
 entry:
diff --git a/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll b/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
index 405a6a8d6e907..577b547007d0d 100644
--- a/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
+++ b/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; rdar://6836460
 
 define i32 @test(i128* %P) nounwind {
diff --git a/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll b/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
index 59e7d0c7a8f58..112cac4f96445 100644
--- a/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
+++ b/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR4317
 
 declare i32 @b()
diff --git a/test/CodeGen/Generic/APIntLoadStore.ll b/test/CodeGen/Generic/APIntLoadStore.ll
index 57ddae2c1b026..7c71a33fc3fde 100644
--- a/test/CodeGen/Generic/APIntLoadStore.ll
+++ b/test/CodeGen/Generic/APIntLoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc > %t
+; RUN: llc < %s > %t
 @i1_l = external global i1		; <i1*> [#uses=1]
 @i1_s = external global i1		; <i1*> [#uses=1]
 @i2_l = external global i2		; <i2*> [#uses=1]
diff --git a/test/CodeGen/Generic/APIntParam.ll b/test/CodeGen/Generic/APIntParam.ll
index f80f71b176184..8aa0b494c26b0 100644
--- a/test/CodeGen/Generic/APIntParam.ll
+++ b/test/CodeGen/Generic/APIntParam.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc > %t
+; RUN: llc < %s > %t
 @i1_s = external global i1		; <i1*> [#uses=1]
 @i2_s = external global i2		; <i2*> [#uses=1]
 @i3_s = external global i3		; <i3*> [#uses=1]
diff --git a/test/CodeGen/Generic/APIntSextParam.ll b/test/CodeGen/Generic/APIntSextParam.ll
index 9fb06cb1bfb1d..acc0eebcada85 100644
--- a/test/CodeGen/Generic/APIntSextParam.ll
+++ b/test/CodeGen/Generic/APIntSextParam.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc > %t
+; RUN: llc < %s > %t
 @i1_s = external global i1		; <i1*> [#uses=1]
 @i2_s = external global i2		; <i2*> [#uses=1]
 @i3_s = external global i3		; <i3*> [#uses=1]
diff --git a/test/CodeGen/Generic/APIntZextParam.ll b/test/CodeGen/Generic/APIntZextParam.ll
index ea7743ecd086c..173b9fd74ca42 100644
--- a/test/CodeGen/Generic/APIntZextParam.ll
+++ b/test/CodeGen/Generic/APIntZextParam.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc > %t
+; RUN: llc < %s > %t
 @i1_s = external global i1		; <i1*> [#uses=1]
 @i2_s = external global i2		; <i2*> [#uses=1]
 @i3_s = external global i3		; <i3*> [#uses=1]
diff --git a/test/CodeGen/Generic/BasicInstrs.ll b/test/CodeGen/Generic/BasicInstrs.ll
index e65cbf772fd9f..578431e8efa4c 100644
--- a/test/CodeGen/Generic/BasicInstrs.ll
+++ b/test/CodeGen/Generic/BasicInstrs.ll
@@ -1,7 +1,7 @@
 ; New testcase, this contains a bunch of simple instructions that should be
 ; handled by a code generator.
 
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @add(i32 %A, i32 %B) {
 	%R = add i32 %A, %B		; <i32> [#uses=1]
diff --git a/test/CodeGen/Generic/BurgBadRegAlloc.ll b/test/CodeGen/Generic/BurgBadRegAlloc.ll
index 3ccc9a0aeb69f..99d856aea9901 100644
--- a/test/CodeGen/Generic/BurgBadRegAlloc.ll
+++ b/test/CodeGen/Generic/BurgBadRegAlloc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; Register allocation is doing a very poor job on this routine from yyparse
 ;; in Burg:
diff --git a/test/CodeGen/Generic/ConstantExprLowering.ll b/test/CodeGen/Generic/ConstantExprLowering.ll
index d26541596da5b..428d712462d6b 100644
--- a/test/CodeGen/Generic/ConstantExprLowering.ll
+++ b/test/CodeGen/Generic/ConstantExprLowering.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str_1 = internal constant [16 x i8] c"%d %d %d %d %d\0A\00"           ; <[16 x i8]*> [#uses=1]
 @XA = external global i32               ; <i32*> [#uses=1]
diff --git a/test/CodeGen/Generic/GC/alloc_loop.ll b/test/CodeGen/Generic/GC/alloc_loop.ll
index b1fee68abab8d..fb78ba2cd10b3 100644
--- a/test/CodeGen/Generic/GC/alloc_loop.ll
+++ b/test/CodeGen/Generic/GC/alloc_loop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 
 declare i8* @llvm_gc_allocate(i32)
diff --git a/test/CodeGen/Generic/GC/argpromotion.ll b/test/CodeGen/Generic/GC/argpromotion.ll
index 5df947a9fb12a..dda376d6168e0 100644
--- a/test/CodeGen/Generic/GC/argpromotion.ll
+++ b/test/CodeGen/Generic/GC/argpromotion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -anders-aa -argpromotion
+; RUN: opt < %s -anders-aa -argpromotion
 
 declare void @llvm.gcroot(i8**, i8*)
 
diff --git a/test/CodeGen/Generic/GC/deadargelim.ll b/test/CodeGen/Generic/GC/deadargelim.ll
index c5a56f600dd0e..176019020ad4f 100644
--- a/test/CodeGen/Generic/GC/deadargelim.ll
+++ b/test/CodeGen/Generic/GC/deadargelim.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -deadargelim
+; RUN: opt < %s -deadargelim
 
 declare void @llvm.gcroot(i8**, i8*)
 
diff --git a/test/CodeGen/Generic/GC/frame_size.ll b/test/CodeGen/Generic/GC/frame_size.ll
index 75626c18c5b17..31783cdb97efd 100644
--- a/test/CodeGen/Generic/GC/frame_size.ll
+++ b/test/CodeGen/Generic/GC/frame_size.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -asm-verbose | grep {frame size} | grep -v 0x0
+; RUN: llc < %s -asm-verbose | grep {frame size} | grep -v 0x0
 
 declare void @llvm.gcroot(i8** %value, i8* %tag)
 declare void @g() gc "ocaml"
diff --git a/test/CodeGen/Generic/GC/inline.ll b/test/CodeGen/Generic/GC/inline.ll
index 157e19d2d9297..9da33aef8dd38 100644
--- a/test/CodeGen/Generic/GC/inline.ll
+++ b/test/CodeGen/Generic/GC/inline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | grep example
+; RUN: opt < %s -inline -S | grep example
 
 	%IntArray = type { i32, [0 x i32*] }
 
diff --git a/test/CodeGen/Generic/GC/inline2.ll b/test/CodeGen/Generic/GC/inline2.ll
index b45ef7c47f526..15947056ee39e 100644
--- a/test/CodeGen/Generic/GC/inline2.ll
+++ b/test/CodeGen/Generic/GC/inline2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -inline | llvm-dis | grep sample
-; RUN: llvm-as < %s | opt -inline | llvm-dis | grep example
+; RUN: opt < %s -inline -S | grep sample
+; RUN: opt < %s -inline -S | grep example
 
 	%IntArray = type { i32, [0 x i32*] }
 
diff --git a/test/CodeGen/Generic/GC/lower_gcroot.ll b/test/CodeGen/Generic/GC/lower_gcroot.ll
index bd5a2bd14b4e6..c2d418ac50ef5 100644
--- a/test/CodeGen/Generic/GC/lower_gcroot.ll
+++ b/test/CodeGen/Generic/GC/lower_gcroot.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 	%Env = type i8*
 
diff --git a/test/CodeGen/Generic/GC/redundant_init.ll b/test/CodeGen/Generic/GC/redundant_init.ll
index 44996034748bd..10c70e731052a 100644
--- a/test/CodeGen/Generic/GC/redundant_init.ll
+++ b/test/CodeGen/Generic/GC/redundant_init.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   ignore grep {movl..0} | count 0
 
 %struct.obj = type { i8*, %struct.obj* }
diff --git a/test/CodeGen/Generic/GC/simple_ocaml.ll b/test/CodeGen/Generic/GC/simple_ocaml.ll
index a33e0351f7fd8..f765dc029da5a 100644
--- a/test/CodeGen/Generic/GC/simple_ocaml.ll
+++ b/test/CodeGen/Generic/GC/simple_ocaml.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | grep caml.*__frametable
-; RUN: llvm-as < %s | llc -march=x86 | grep {movl	.0}
+; RUN: llc < %s | grep caml.*__frametable
+; RUN: llc < %s -march=x86 | grep {movl	.0}
 
 %struct.obj = type { i8*, %struct.obj* }
 
diff --git a/test/CodeGen/Generic/Makefile b/test/CodeGen/Generic/Makefile
index d228f69a85661..26ebc316a215b 100644
--- a/test/CodeGen/Generic/Makefile
+++ b/test/CodeGen/Generic/Makefile
@@ -1,10 +1,10 @@
 # Makefile for running ad-hoc custom LLVM tests
 #
 %.bc: %.ll
-	llvm-as -f $< 
+	llvm-as $< 
 	
 %.llc.s: %.bc
-	llc -f $< -o $@ 
+	llc $< -o $@ 
 
 %.gcc.s: %.c
 	gcc -O0 -S $< -o $@
diff --git a/test/CodeGen/Generic/SwitchLowering.ll b/test/CodeGen/Generic/SwitchLowering.ll
index 9fdfd8d5f63e9..29a0e82bf59f1 100644
--- a/test/CodeGen/Generic/SwitchLowering.ll
+++ b/test/CodeGen/Generic/SwitchLowering.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | count 1
+; RUN: llc < %s -march=x86 | grep cmp | count 1
 ; PR964
 
 define i8* @FindChar(i8* %CurPtr) {
diff --git a/test/CodeGen/Generic/add-with-overflow-24.ll b/test/CodeGen/Generic/add-with-overflow-24.ll
index debdeb25af07e..63f5a222a003f 100644
--- a/test/CodeGen/Generic/add-with-overflow-24.ll
+++ b/test/CodeGen/Generic/add-with-overflow-24.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/Generic/add-with-overflow.ll b/test/CodeGen/Generic/add-with-overflow.ll
index 5c3d540c5cc29..0c2c9608deb9f 100644
--- a/test/CodeGen/Generic/add-with-overflow.ll
+++ b/test/CodeGen/Generic/add-with-overflow.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc
-; RUN: llvm-as < %s | llc -fast-isel
+; RUN: llc < %s
+; RUN: llc < %s -fast-isel
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/Generic/addc-fold2.ll b/test/CodeGen/Generic/addc-fold2.ll
index 8f3cdd0793d4b..34f5ac1b9814e 100644
--- a/test/CodeGen/Generic/addc-fold2.ll
+++ b/test/CodeGen/Generic/addc-fold2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep add
-; RUN: llvm-as < %s | llc -march=x86 | not grep adc
+; RUN: llc < %s -march=x86 | grep add
+; RUN: llc < %s -march=x86 | not grep adc
 
 define i64 @test(i64 %A, i32 %B) {
         %tmp12 = zext i32 %B to i64             ; <i64> [#uses=1]
diff --git a/test/CodeGen/Generic/asm-large-immediate.ll b/test/CodeGen/Generic/asm-large-immediate.ll
index 70649133712c0..605665bef6d19 100644
--- a/test/CodeGen/Generic/asm-large-immediate.ll
+++ b/test/CodeGen/Generic/asm-large-immediate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep 68719476738
+; RUN: llc < %s | grep 68719476738
 
 define void @test() {
 entry:
diff --git a/test/CodeGen/Generic/badCallArgLRLLVM.ll b/test/CodeGen/Generic/badCallArgLRLLVM.ll
index 56384748ac05b..4ed88df4a538c 100644
--- a/test/CodeGen/Generic/badCallArgLRLLVM.ll
+++ b/test/CodeGen/Generic/badCallArgLRLLVM.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; This caused a problem because the argument of a call was defined by
 ; the return value of another call that appears later in the code.
diff --git a/test/CodeGen/Generic/badFoldGEP.ll b/test/CodeGen/Generic/badFoldGEP.ll
index 8de12512d1451..2d4474bdf9301 100644
--- a/test/CodeGen/Generic/badFoldGEP.ll
+++ b/test/CodeGen/Generic/badFoldGEP.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; GetMemInstArgs() folded the two getElementPtr instructions together,
 ;; producing an illegal getElementPtr.  That's because the type generated
diff --git a/test/CodeGen/Generic/badarg6.ll b/test/CodeGen/Generic/badarg6.ll
index 1ff7df42a3484..d6e5ac5791e43 100644
--- a/test/CodeGen/Generic/badarg6.ll
+++ b/test/CodeGen/Generic/badarg6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; On this code, llc did not pass the sixth argument (%reg321) to printf.
 ; It passed the first five in %o0 - %o4, but never initialized %o5.
diff --git a/test/CodeGen/Generic/badlive.ll b/test/CodeGen/Generic/badlive.ll
index 0114fb0fa0603..43b03e31fa83b 100644
--- a/test/CodeGen/Generic/badlive.ll
+++ b/test/CodeGen/Generic/badlive.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @main() {
 bb0:
diff --git a/test/CodeGen/Generic/bool-to-double.ll b/test/CodeGen/Generic/bool-to-double.ll
index d6c9e52395164..81350a40b4dbe 100644
--- a/test/CodeGen/Generic/bool-to-double.ll
+++ b/test/CodeGen/Generic/bool-to-double.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 define double @test(i1 %X) {
         %Y = uitofp i1 %X to double             ; <double> [#uses=1]
         ret double %Y
diff --git a/test/CodeGen/Generic/bool-vector.ll b/test/CodeGen/Generic/bool-vector.ll
index e0f2a70886a77..4758697286a25 100644
--- a/test/CodeGen/Generic/bool-vector.ll
+++ b/test/CodeGen/Generic/bool-vector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1845
 
 define void @boolVectorSelect(<4 x i1>* %boolVectorPtr) {
diff --git a/test/CodeGen/Generic/call-ret0.ll b/test/CodeGen/Generic/call-ret0.ll
index 7ab966ba90701..a8e00cd54ef71 100644
--- a/test/CodeGen/Generic/call-ret0.ll
+++ b/test/CodeGen/Generic/call-ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 define i32 @foo(i32 %x) {
         ret i32 %x
 }
diff --git a/test/CodeGen/Generic/call-ret42.ll b/test/CodeGen/Generic/call-ret42.ll
index ac9bd9235794e..95cc28625aa6a 100644
--- a/test/CodeGen/Generic/call-ret42.ll
+++ b/test/CodeGen/Generic/call-ret42.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @foo(i32 %x) {
         ret i32 42
diff --git a/test/CodeGen/Generic/call-void.ll b/test/CodeGen/Generic/call-void.ll
index b882689923968..9ed4179415930 100644
--- a/test/CodeGen/Generic/call-void.ll
+++ b/test/CodeGen/Generic/call-void.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @foo() {
         ret void
diff --git a/test/CodeGen/Generic/call2-ret0.ll b/test/CodeGen/Generic/call2-ret0.ll
index 8c7e8920f2598..4e57ef804f229 100644
--- a/test/CodeGen/Generic/call2-ret0.ll
+++ b/test/CodeGen/Generic/call2-ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @bar(i32 %x) {
         ret i32 0
diff --git a/test/CodeGen/Generic/cast-fp.ll b/test/CodeGen/Generic/cast-fp.ll
index 5f05d85ea8543..590b7ceee4bf3 100644
--- a/test/CodeGen/Generic/cast-fp.ll
+++ b/test/CodeGen/Generic/cast-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @a_fstr = internal constant [8 x i8] c"a = %f\0A\00"		; <[8 x i8]*> [#uses=1]
 @a_lstr = internal constant [10 x i8] c"a = %lld\0A\00"		; <[10 x i8]*> [#uses=1]
 @a_dstr = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/constindices.ll b/test/CodeGen/Generic/constindices.ll
index 6366fd59598fa..7deb30f43d12e 100644
--- a/test/CodeGen/Generic/constindices.ll
+++ b/test/CodeGen/Generic/constindices.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Test that a sequence of constant indices are folded correctly
 ; into the equivalent offset at compile-time.
diff --git a/test/CodeGen/Generic/debug-info.ll b/test/CodeGen/Generic/debug-info.ll
index d1bb66d1529de..20d9f913c11cb 100644
--- a/test/CodeGen/Generic/debug-info.ll
+++ b/test/CodeGen/Generic/debug-info.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
         %lldb.compile_unit = type { i32, i16, i16, i8*, i8*, i8*, {  }* }
 @d.compile_unit7 = external global %lldb.compile_unit           ; <%lldb.compile_unit*> [#uses=1]
diff --git a/test/CodeGen/Generic/div-neg-power-2.ll b/test/CodeGen/Generic/div-neg-power-2.ll
index 3bc4899d8fddf..246cd033e2798 100644
--- a/test/CodeGen/Generic/div-neg-power-2.ll
+++ b/test/CodeGen/Generic/div-neg-power-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @test(i32 %X) {
         %Y = sdiv i32 %X, -2            ; <i32> [#uses=1]
diff --git a/test/CodeGen/Generic/empty-load-store.ll b/test/CodeGen/Generic/empty-load-store.ll
index d7bb37194e49f..bca73054447c3 100644
--- a/test/CodeGen/Generic/empty-load-store.ll
+++ b/test/CodeGen/Generic/empty-load-store.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2612
 
 @current_foo = internal global {  } zeroinitializer
diff --git a/test/CodeGen/Generic/externally_available.ll b/test/CodeGen/Generic/externally_available.ll
index 73b6b9825d2ca..7976cc971880f 100644
--- a/test/CodeGen/Generic/externally_available.ll
+++ b/test/CodeGen/Generic/externally_available.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep test_
+; RUN: llc < %s | not grep test_
 
 ; test_function should not be emitted to the .s file.
 define available_externally i32 @test_function() {
diff --git a/test/CodeGen/Generic/fastcall.ll b/test/CodeGen/Generic/fastcall.ll
index 65e66c7ef0610..35e04f1863a36 100644
--- a/test/CodeGen/Generic/fastcall.ll
+++ b/test/CodeGen/Generic/fastcall.ll
@@ -1,5 +1,5 @@
 ; Test fastcc works. Test from bug 2770.
-; RUN: llvm-as < %s | llc -relocation-model=pic
+; RUN: llc < %s -relocation-model=pic
 
 
 %struct.__gcov_var = type {  i32 }
diff --git a/test/CodeGen/Generic/fneg-fabs.ll b/test/CodeGen/Generic/fneg-fabs.ll
index 2709fa1afd68c..2f2f59762cb9b 100644
--- a/test/CodeGen/Generic/fneg-fabs.ll
+++ b/test/CodeGen/Generic/fneg-fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define double @fneg(double %X) {
         %Y = fsub double -0.000000e+00, %X               ; <double> [#uses=1]
diff --git a/test/CodeGen/Generic/fp-to-int-invalid.ll b/test/CodeGen/Generic/fp-to-int-invalid.ll
index 73176b14391e1..cdcc3a277b6ea 100644
--- a/test/CodeGen/Generic/fp-to-int-invalid.ll
+++ b/test/CodeGen/Generic/fp-to-int-invalid.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR4057
 define void @test_cast_float_to_char(i8* %result) nounwind {
 entry:
diff --git a/test/CodeGen/Generic/fp_to_int.ll b/test/CodeGen/Generic/fp_to_int.ll
index 609de6546cfdd..ad944132d338c 100644
--- a/test/CodeGen/Generic/fp_to_int.ll
+++ b/test/CodeGen/Generic/fp_to_int.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i8 @test1(double %X) {
 	%tmp.1 = fptosi double %X to i8		; <i8> [#uses=1]
diff --git a/test/CodeGen/Generic/fpowi-promote.ll b/test/CodeGen/Generic/fpowi-promote.ll
index 55c2d2ad0c6fb..82628ef6093b7 100644
--- a/test/CodeGen/Generic/fpowi-promote.ll
+++ b/test/CodeGen/Generic/fpowi-promote.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386
+; RUN: llc < %s
+; RUN: llc < %s -march=x86 -mcpu=i386
 
 ; PR1239
 
diff --git a/test/CodeGen/Generic/fwdtwice.ll b/test/CodeGen/Generic/fwdtwice.ll
index 05e831af6be2c..6b38f04673dea 100644
--- a/test/CodeGen/Generic/fwdtwice.ll
+++ b/test/CodeGen/Generic/fwdtwice.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;;
 ;; Test the sequence:
diff --git a/test/CodeGen/Generic/getresult-undef.ll b/test/CodeGen/Generic/getresult-undef.ll
index 7905ff52ef613..c675535335a03 100644
--- a/test/CodeGen/Generic/getresult-undef.ll
+++ b/test/CodeGen/Generic/getresult-undef.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define double @foo() {
   %t = getresult {double, double} undef, 1
diff --git a/test/CodeGen/Generic/global-ret0.ll b/test/CodeGen/Generic/global-ret0.ll
index 8fcef33a34d70..74bff876f8824 100644
--- a/test/CodeGen/Generic/global-ret0.ll
+++ b/test/CodeGen/Generic/global-ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @g = global i32 0               ; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/hello.ll b/test/CodeGen/Generic/hello.ll
index 705423f973cfb..705945cf19837 100644
--- a/test/CodeGen/Generic/hello.ll
+++ b/test/CodeGen/Generic/hello.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str_1 = internal constant [7 x i8] c"hello\0A\00"             ; <[7 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/i128-addsub.ll b/test/CodeGen/Generic/i128-addsub.ll
index 10f0acc363089..e7cbf4aaf7851 100644
--- a/test/CodeGen/Generic/i128-addsub.ll
+++ b/test/CodeGen/Generic/i128-addsub.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @test_add(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
 entry:
diff --git a/test/CodeGen/Generic/i128-arith.ll b/test/CodeGen/Generic/i128-arith.ll
index 9a670847b575f..cf10463143c9c 100644
--- a/test/CodeGen/Generic/i128-arith.ll
+++ b/test/CodeGen/Generic/i128-arith.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 
 define i64 @foo(i64 %x, i64 %y, i32 %amt) {
         %tmp0 = zext i64 %x to i128
diff --git a/test/CodeGen/Generic/inline-asm-special-strings.ll b/test/CodeGen/Generic/inline-asm-special-strings.ll
index e52e0be74bc3a..d18221ef934da 100644
--- a/test/CodeGen/Generic/inline-asm-special-strings.ll
+++ b/test/CodeGen/Generic/inline-asm-special-strings.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep "foo 0 0"
+; RUN: llc < %s | grep "foo 0 0"
 
 define void @bar() nounwind {
 	tail call void asm sideeffect "foo ${:uid} ${:uid}", ""() nounwind
diff --git a/test/CodeGen/Generic/intrinsics.ll b/test/CodeGen/Generic/intrinsics.ll
index 373bec9adc05f..9a42c3ef32a1f 100644
--- a/test/CodeGen/Generic/intrinsics.ll
+++ b/test/CodeGen/Generic/intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ;; SQRT
 declare float @llvm.sqrt.f32(float)
diff --git a/test/CodeGen/Generic/invalid-memcpy.ll b/test/CodeGen/Generic/invalid-memcpy.ll
index e3acf0c2be13e..8448565a2b824 100644
--- a/test/CodeGen/Generic/invalid-memcpy.ll
+++ b/test/CodeGen/Generic/invalid-memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 
 ; This testcase is invalid (the alignment specified for memcpy is 
 ; greater than the alignment guaranteed for Qux or C.0.1173), but it
diff --git a/test/CodeGen/Generic/isunord.ll b/test/CodeGen/Generic/isunord.ll
index 589f49611791e..ebbba010793b3 100644
--- a/test/CodeGen/Generic/isunord.ll
+++ b/test/CodeGen/Generic/isunord.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc
-; XFAIL: ia64
+; RUN: llc < %s
 
 declare i1 @llvm.isunordered.f64(double, double)
 
diff --git a/test/CodeGen/Generic/llvm-ct-intrinsics.ll b/test/CodeGen/Generic/llvm-ct-intrinsics.ll
index 66f409ed27d57..1db75497592f6 100644
--- a/test/CodeGen/Generic/llvm-ct-intrinsics.ll
+++ b/test/CodeGen/Generic/llvm-ct-intrinsics.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase is supported by all code generators
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 declare i64 @llvm.ctpop.i64(i64)
 
diff --git a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
index f21b645615ad0..282e973ff9adc 100644
--- a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
+++ b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 declare { i64, double } @wild()
 
diff --git a/test/CodeGen/Generic/negintconst.ll b/test/CodeGen/Generic/negintconst.ll
index a2b3d69ee555b..67d775e168822 100644
--- a/test/CodeGen/Generic/negintconst.ll
+++ b/test/CodeGen/Generic/negintconst.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Test that a negative constant smaller than 64 bits (e.g., int)
 ; is correctly implemented with sign-extension.
diff --git a/test/CodeGen/Generic/nested-select.ll b/test/CodeGen/Generic/nested-select.ll
index a723a4d74268a..f81fed332df09 100644
--- a/test/CodeGen/Generic/nested-select.ll
+++ b/test/CodeGen/Generic/nested-select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -o /dev/null -f
+; RUN: llc < %s -o /dev/null
 
 ; Test that select of a select works
 
diff --git a/test/CodeGen/Generic/phi-immediate-factoring.ll b/test/CodeGen/Generic/phi-immediate-factoring.ll
index e0f675976d913..9f9f92115c797 100644
--- a/test/CodeGen/Generic/phi-immediate-factoring.ll
+++ b/test/CodeGen/Generic/phi-immediate-factoring.ll
@@ -1,5 +1,5 @@
 ; PR1296
-; RUN: llvm-as < %s | llc -march=x86 | grep {movl	\$1} | count 1
+; RUN: llc < %s -march=x86 | grep {movl	\$1} | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/Generic/pr2625.ll b/test/CodeGen/Generic/pr2625.ll
index c1f585de73767..3e3dc4b2d2bcf 100644
--- a/test/CodeGen/Generic/pr2625.ll
+++ b/test/CodeGen/Generic/pr2625.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2625
 
 define i32 @main({ i32, { i32 } }*) {
diff --git a/test/CodeGen/Generic/pr3288.ll b/test/CodeGen/Generic/pr3288.ll
index ff0384db6db5f..b62710f31ecbd 100644
--- a/test/CodeGen/Generic/pr3288.ll
+++ b/test/CodeGen/Generic/pr3288.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3288
 
 define void @a() {
diff --git a/test/CodeGen/Generic/print-add.ll b/test/CodeGen/Generic/print-add.ll
index 4f1cb5e736afb..95608dc60b503 100644
--- a/test/CodeGen/Generic/print-add.ll
+++ b/test/CodeGen/Generic/print-add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str_1 = internal constant [4 x i8] c"%d\0A\00"                ; <[4 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/print-arith-fp.ll b/test/CodeGen/Generic/print-arith-fp.ll
index 1e27061941a46..d129ff85870e4 100644
--- a/test/CodeGen/Generic/print-arith-fp.ll
+++ b/test/CodeGen/Generic/print-arith-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @a_str = internal constant [8 x i8] c"a = %f\0A\00"		; <[8 x i8]*> [#uses=1]
 @b_str = internal constant [8 x i8] c"b = %f\0A\00"		; <[8 x i8]*> [#uses=1]
 @add_str = internal constant [12 x i8] c"a + b = %f\0A\00"		; <[12 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/print-arith-int.ll b/test/CodeGen/Generic/print-arith-int.ll
index cf275151571ec..ce938cf05b987 100644
--- a/test/CodeGen/Generic/print-arith-int.ll
+++ b/test/CodeGen/Generic/print-arith-int.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 @a_str = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]
 @b_str = internal constant [8 x i8] c"b = %d\0A\00"		; <[8 x i8]*> [#uses=1]
 @add_str = internal constant [12 x i8] c"a + b = %d\0A\00"		; <[12 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/print-int.ll b/test/CodeGen/Generic/print-int.ll
index 58f5047ceb798..7ca4b3de48a37 100644
--- a/test/CodeGen/Generic/print-int.ll
+++ b/test/CodeGen/Generic/print-int.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @.str_1 = internal constant [4 x i8] c"%d\0A\00"                ; <[4 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/print-mul-exp.ll b/test/CodeGen/Generic/print-mul-exp.ll
index 06667754a3235..90fc55b258382 100644
--- a/test/CodeGen/Generic/print-mul-exp.ll
+++ b/test/CodeGen/Generic/print-mul-exp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @a_str = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]
 @a_mul_str = internal constant [13 x i8] c"a * %d = %d\0A\00"		; <[13 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/print-mul.ll b/test/CodeGen/Generic/print-mul.ll
index 1d9452a58434b..0707f3c2318cf 100644
--- a/test/CodeGen/Generic/print-mul.ll
+++ b/test/CodeGen/Generic/print-mul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @a_str = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]
 @b_str = internal constant [8 x i8] c"b = %d\0A\00"		; <[8 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/print-shift.ll b/test/CodeGen/Generic/print-shift.ll
index 8992e8df0c014..6c5d222209bea 100644
--- a/test/CodeGen/Generic/print-shift.ll
+++ b/test/CodeGen/Generic/print-shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 @a_str = internal constant [8 x i8] c"a = %d\0A\00"             ; <[8 x i8]*> [#uses=1]
 @b_str = internal constant [8 x i8] c"b = %d\0A\00"             ; <[8 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Generic/ret0.ll b/test/CodeGen/Generic/ret0.ll
index 489f31c3730d6..9e628a1a1409e 100644
--- a/test/CodeGen/Generic/ret0.ll
+++ b/test/CodeGen/Generic/ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @main() {  
   ret i32 0
diff --git a/test/CodeGen/Generic/ret42.ll b/test/CodeGen/Generic/ret42.ll
index 0cbe1763faadf..f5cd33dc0b212 100644
--- a/test/CodeGen/Generic/ret42.ll
+++ b/test/CodeGen/Generic/ret42.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i32 @main() {  
   ret i32 42
diff --git a/test/CodeGen/Generic/select-cc.ll b/test/CodeGen/Generic/select-cc.ll
index 85e68d19c45a8..b653e2a46dcfb 100644
--- a/test/CodeGen/Generic/select-cc.ll
+++ b/test/CodeGen/Generic/select-cc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2504
 
 define <2 x double> @vector_select(<2 x double> %x, <2 x double> %y) nounwind  {
diff --git a/test/CodeGen/Generic/select.ll b/test/CodeGen/Generic/select.ll
index a532703d9417e..63052c1a28453 100644
--- a/test/CodeGen/Generic/select.ll
+++ b/test/CodeGen/Generic/select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 %Domain = type { i8*, i32, i32*, i32, i32, i32*, %Domain* }
 @AConst = constant i32 123              ; <i32*> [#uses=1]
diff --git a/test/CodeGen/Generic/shift-int64.ll b/test/CodeGen/Generic/shift-int64.ll
index 31be2d634e998..670ef20e084bf 100644
--- a/test/CodeGen/Generic/shift-int64.ll
+++ b/test/CodeGen/Generic/shift-int64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define i64 @test_imm(i64 %X) {
         %Y = ashr i64 %X, 17            ; <i64> [#uses=1]
diff --git a/test/CodeGen/Generic/spillccr.ll b/test/CodeGen/Generic/spillccr.ll
index 8545133317420..0a774c64f82e6 100644
--- a/test/CodeGen/Generic/spillccr.ll
+++ b/test/CodeGen/Generic/spillccr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | llc
+; RUN: llc < %s
 
 ; July 6, 2002 -- LLC Regression test
 ; This test case checks if the integer CC register %xcc (or %ccr)
diff --git a/test/CodeGen/Generic/stack-protector.ll b/test/CodeGen/Generic/stack-protector.ll
index a11a7149a36c0..a59c649781d48 100644
--- a/test/CodeGen/Generic/stack-protector.ll
+++ b/test/CodeGen/Generic/stack-protector.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -o - | grep {__stack_chk_guard}
-; RUN: llvm-as < %s | llc -o - | grep {__stack_chk_fail}
+; RUN: llc < %s -o - | grep {__stack_chk_guard}
+; RUN: llc < %s -o - | grep {__stack_chk_fail}
 
 @"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00"		; <[11 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Generic/stacksave-restore.ll b/test/CodeGen/Generic/stacksave-restore.ll
index fd3dd67298ed6..b124b5f9b7d55 100644
--- a/test/CodeGen/Generic/stacksave-restore.ll
+++ b/test/CodeGen/Generic/stacksave-restore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 declare i8* @llvm.stacksave()
 
diff --git a/test/CodeGen/Generic/storetrunc-fp.ll b/test/CodeGen/Generic/storetrunc-fp.ll
index 0f7bb0b85ee91..7f7c7f71b3adf 100644
--- a/test/CodeGen/Generic/storetrunc-fp.ll
+++ b/test/CodeGen/Generic/storetrunc-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @foo(double %a, double %b, float* %fp) {
 	%c = fadd double %a, %b
diff --git a/test/CodeGen/Generic/switch-crit-edge-constant.ll b/test/CodeGen/Generic/switch-crit-edge-constant.ll
index d71fe56eb49d0..1f2ab0dbcbe96 100644
--- a/test/CodeGen/Generic/switch-crit-edge-constant.ll
+++ b/test/CodeGen/Generic/switch-crit-edge-constant.ll
@@ -1,5 +1,5 @@
 ; PR925
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   grep mov.*str1 | count 1
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/Generic/switch-lower-feature-2.ll b/test/CodeGen/Generic/switch-lower-feature-2.ll
index 5e532a8db3ed0..d6e56471c364c 100644
--- a/test/CodeGen/Generic/switch-lower-feature-2.ll
+++ b/test/CodeGen/Generic/switch-lower-feature-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -o %t -f
+; RUN: llc < %s -march=x86 -o %t
 ; RUN: grep jb %t | count 1
 ; RUN: grep \\\$6 %t | count 2
 ; RUN: grep 1024 %t | count 1
diff --git a/test/CodeGen/Generic/switch-lower-feature.ll b/test/CodeGen/Generic/switch-lower-feature.ll
index 05234012547c1..65fdf5add7900 100644
--- a/test/CodeGen/Generic/switch-lower-feature.ll
+++ b/test/CodeGen/Generic/switch-lower-feature.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 -o - | grep {\$7} | count 1
-; RUN: llvm-as < %s | llc -march=x86 -o - | grep {\$6} | count 1
-; RUN: llvm-as < %s | llc -march=x86 -o - | grep 1024 | count 1
-; RUN: llvm-as < %s | llc -march=x86 -o - | grep jb | count 2
-; RUN: llvm-as < %s | llc -march=x86 -o - | grep je | count 1
+; RUN: llc < %s -march=x86 -o - | grep {\$7} | count 1
+; RUN: llc < %s -march=x86 -o - | grep {\$6} | count 1
+; RUN: llc < %s -march=x86 -o - | grep 1024 | count 1
+; RUN: llc < %s -march=x86 -o - | grep jb | count 2
+; RUN: llc < %s -march=x86 -o - | grep je | count 1
 
 define i32 @main(i32 %tmp158) {
 entry:
diff --git a/test/CodeGen/Generic/switch-lower.ll b/test/CodeGen/Generic/switch-lower.ll
index b1aad3f45140f..eb240edc7c675 100644
--- a/test/CodeGen/Generic/switch-lower.ll
+++ b/test/CodeGen/Generic/switch-lower.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1197
 
 
diff --git a/test/CodeGen/Generic/trap.ll b/test/CodeGen/Generic/trap.ll
index 4dfc1a6450b93..67d1a7a347f38 100644
--- a/test/CodeGen/Generic/trap.ll
+++ b/test/CodeGen/Generic/trap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 define i32 @test() noreturn nounwind  {
 entry:
 	tail call void @llvm.trap( )
diff --git a/test/CodeGen/Generic/v-split.ll b/test/CodeGen/Generic/v-split.ll
index 44601d0c40247..634b5621aa99d 100644
--- a/test/CodeGen/Generic/v-split.ll
+++ b/test/CodeGen/Generic/v-split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 %f8 = type <8 x float>
 
 define void @test_f8(%f8 *%P, %f8* %Q, %f8 *%S) {
diff --git a/test/CodeGen/Generic/vector-casts.ll b/test/CodeGen/Generic/vector-casts.ll
index 12104a32eecd6..a26918b8f2428 100644
--- a/test/CodeGen/Generic/vector-casts.ll
+++ b/test/CodeGen/Generic/vector-casts.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2671
 
 define void @a(<2 x double>* %p, <2 x i8>* %q) {
diff --git a/test/CodeGen/Generic/vector-constantexpr.ll b/test/CodeGen/Generic/vector-constantexpr.ll
index 441c4a0e71dc6..d8e0258221c85 100644
--- a/test/CodeGen/Generic/vector-constantexpr.ll
+++ b/test/CodeGen/Generic/vector-constantexpr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 	
 define void @""(float* %inregs, float* %outregs) {
         %a_addr.i = alloca <4 x float>          ; <<4 x float>*> [#uses=1]
diff --git a/test/CodeGen/Generic/vector-identity-shuffle.ll b/test/CodeGen/Generic/vector-identity-shuffle.ll
index 61b44af118b95..332d6d8c25363 100644
--- a/test/CodeGen/Generic/vector-identity-shuffle.ll
+++ b/test/CodeGen/Generic/vector-identity-shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 
 
 define void @test(<4 x float>* %tmp2.i) {
diff --git a/test/CodeGen/Generic/vector.ll b/test/CodeGen/Generic/vector.ll
index f283256d10acc..a0f9a02d4cbbe 100644
--- a/test/CodeGen/Generic/vector.ll
+++ b/test/CodeGen/Generic/vector.ll
@@ -1,5 +1,5 @@
 ; Test that vectors are scalarized/lowered correctly.
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 
 %d8 = type <8 x double>
diff --git a/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll b/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
index 245c2f908bdeb..f339373ffc75a 100644
--- a/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
+++ b/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR4136
 
 target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
diff --git a/test/CodeGen/MSP430/2009-05-17-Rot.ll b/test/CodeGen/MSP430/2009-05-17-Rot.ll
index c25a906308af0..2ae005259d4fa 100644
--- a/test/CodeGen/MSP430/2009-05-17-Rot.ll
+++ b/test/CodeGen/MSP430/2009-05-17-Rot.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=msp430
+; RUN: llc < %s -march=msp430
 
 define i16 @rol1u16(i16 %x.arg) nounwind {
         %retval = alloca i16
diff --git a/test/CodeGen/MSP430/2009-05-17-Shift.ll b/test/CodeGen/MSP430/2009-05-17-Shift.ll
index b048bb3fd32ac..25aff60c2b3ff 100644
--- a/test/CodeGen/MSP430/2009-05-17-Shift.ll
+++ b/test/CodeGen/MSP430/2009-05-17-Shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=msp430 | grep rra | count 1
+; RUN: llc < %s -march=msp430 | grep rra | count 1
 
 define i16 @lsr2u16(i16 %x.arg) nounwind {
         %retval = alloca i16
diff --git a/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll b/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
index 70f1d996e7cab..54eb7ff5c0bf3 100644
--- a/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
+++ b/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=msp430
+; RUN: llc < %s -march=msp430
 
 define i16 @test(double %d) nounwind {
 entry:
diff --git a/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll b/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
new file mode 100644
index 0000000000000..088d3e1e7b37b
--- /dev/null
+++ b/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s
+; PR4769
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i16 @foo() nounwind readnone {
+entry:
+  %result = alloca i16, align 1                   ; <i16*> [#uses=2]
+  volatile store i16 0, i16* %result
+  %tmp = volatile load i16* %result               ; <i16> [#uses=1]
+  ret i16 %tmp
+}
+
+define i16 @main() nounwind {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %call = call i16 @bar() nounwind                ; <i16> [#uses=1]
+  %tobool = icmp eq i16 %call, 0                  ; <i1> [#uses=1]
+  br i1 %tobool, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  %result.i = alloca i16, align 1                 ; <i16*> [#uses=2]
+  volatile store i16 0, i16* %result.i
+  %tmp.i = volatile load i16* %result.i           ; <i16> [#uses=0]
+  ret i16 0
+}
+
+declare i16 @bar()
diff --git a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
new file mode 100644
index 0000000000000..cc574c7290aba
--- /dev/null
+++ b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | grep 0x0021 | count 2
+; PR4776
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-unknown-unknown"
+
+@"\010x0021" = common global i8 0, align 1        ; <i8*> [#uses=2]
+
+define zeroext i8 @foo(i8 zeroext %x) nounwind {
+entry:
+  %retval = alloca i8                             ; <i8*> [#uses=2]
+  %x.addr = alloca i8                             ; <i8*> [#uses=2]
+  %tmp = alloca i8, align 1                       ; <i8*> [#uses=2]
+  store i8 %x, i8* %x.addr
+  %tmp1 = volatile load i8* @"\010x0021"          ; <i8> [#uses=1]
+  store i8 %tmp1, i8* %tmp
+  %tmp2 = load i8* %x.addr                        ; <i8> [#uses=1]
+  volatile store i8 %tmp2, i8* @"\010x0021"
+  %tmp3 = load i8* %tmp                           ; <i8> [#uses=1]
+  store i8 %tmp3, i8* %retval
+  %0 = load i8* %retval                           ; <i8> [#uses=1]
+  ret i8 %0
+}
diff --git a/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll b/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
new file mode 100644
index 0000000000000..856eb9db3f6b3
--- /dev/null
+++ b/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=msp430 < %s
+; PR4779 
+define void @foo() nounwind {
+entry:
+	%r = alloca i8		; <i8*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	volatile load i8* %r, align 1		; <i8>:0 [#uses=1]
+	or i8 %0, 1		; <i8>:1 [#uses=1]
+	volatile store i8 %1, i8* %r, align 1
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/MSP430/Inst16mi.ll b/test/CodeGen/MSP430/Inst16mi.ll
new file mode 100644
index 0000000000000..33d7aa495d3c2
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst16mi.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.w	#2, &foo
+	store i16 2, i16 * @foo
+	ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = add i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = and i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = or i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = xor i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
diff --git a/test/CodeGen/MSP430/Inst16mm.ll b/test/CodeGen/MSP430/Inst16mm.ll
new file mode 100644
index 0000000000000..510afe373494a
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst16mm.ll
@@ -0,0 +1,54 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+@bar = common global i16 0, align 2
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.w	&bar, &foo
+        %1 = load i16* @bar
+        store i16 %1, i16* @foo
+        ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = add i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = and i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = or i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = xor i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
diff --git a/test/CodeGen/MSP430/Inst16mr.ll b/test/CodeGen/MSP430/Inst16mr.ll
new file mode 100644
index 0000000000000..53334aa748e98
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst16mr.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+
+define void @mov(i16 %a) nounwind {
+; CHECK: mov:
+; CHECK: mov.w	r15, &foo
+	store i16 %a, i16* @foo
+	ret void
+}
+
+define void @add(i16 %a) nounwind {
+; CHECK: add:
+; CHECK: add.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = add i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
+define void @and(i16 %a) nounwind {
+; CHECK: and:
+; CHECK: and.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = and i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
+define void @bis(i16 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = or i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
+define void @xor(i16 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = xor i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
diff --git a/test/CodeGen/MSP430/Inst16rm.ll b/test/CodeGen/MSP430/Inst16rm.ll
new file mode 100644
index 0000000000000..d0cb0d19b938c
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst16rm.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+
+define i16 @add(i16 %a) nounwind {
+; CHECK: add:
+; CHECK: add.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = add i16 %a, %1
+	ret i16 %2
+}
+
+define i16 @and(i16 %a) nounwind {
+; CHECK: and:
+; CHECK: and.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = and i16 %a, %1
+	ret i16 %2
+}
+
+
+define i16 @bis(i16 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = or i16 %a, %1
+	ret i16 %2
+}
+
+define i16 @xor(i16 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = xor i16 %a, %1
+	ret i16 %2
+}
+
diff --git a/test/CodeGen/MSP430/Inst16rr.ll b/test/CodeGen/MSP430/Inst16rr.ll
new file mode 100644
index 0000000000000..6619c51823648
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst16rr.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i16 @mov(i16 %a, i16 %b) nounwind {
+; CHECK: mov:
+; CHECK: mov.w	r14, r15
+	ret i16 %b
+}
+
+define i16 @add(i16 %a, i16 %b) nounwind {
+; CHECK: add:
+; CHECK: add.w	r14, r15
+	%1 = add i16 %a, %b
+	ret i16 %1
+}
+
+define i16 @and(i16 %a, i16 %b) nounwind {
+; CHECK: and:
+; CHECK: and.w	r14, r15
+	%1 = and i16 %a, %b
+	ret i16 %1
+}
+
+define i16 @bis(i16 %a, i16 %b) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	r14, r15
+	%1 = or i16 %a, %b
+	ret i16 %1
+}
+
+define i16 @xor(i16 %a, i16 %b) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	r14, r15
+	%1 = xor i16 %a, %b
+	ret i16 %1
+}
diff --git a/test/CodeGen/MSP430/Inst8mi.ll b/test/CodeGen/MSP430/Inst8mi.ll
new file mode 100644
index 0000000000000..ef318ce1590da
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst8mi.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i8:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i8 0, align 1
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.b	#2, &foo
+	store i8 2, i8 * @foo
+	ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = add i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = and i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = or i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = xor i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
diff --git a/test/CodeGen/MSP430/Inst8mm.ll b/test/CodeGen/MSP430/Inst8mm.ll
new file mode 100644
index 0000000000000..a2987ac9b46de
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst8mm.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+@foo = common global i8 0, align 1
+@bar = common global i8 0, align 1
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.b	&bar, &foo
+        %1 = load i8* @bar
+        store i8 %1, i8* @foo
+        ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = add i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = and i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = or i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = xor i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
diff --git a/test/CodeGen/MSP430/Inst8mr.ll b/test/CodeGen/MSP430/Inst8mr.ll
new file mode 100644
index 0000000000000..04c681ef29f27
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst8mr.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i8 0, align 1
+
+define void @mov(i8 %a) nounwind {
+; CHECK: mov:
+; CHECK: mov.b	r15, &foo
+	store i8 %a, i8* @foo
+	ret void
+}
+
+define void @and(i8 %a) nounwind {
+; CHECK: and:
+; CHECK: and.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = and i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
+define void @add(i8 %a) nounwind {
+; CHECK: add:
+; CHECK: add.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = add i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
+define void @bis(i8 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = or i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
+define void @xor(i8 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = xor i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
diff --git a/test/CodeGen/MSP430/Inst8rm.ll b/test/CodeGen/MSP430/Inst8rm.ll
new file mode 100644
index 0000000000000..62a5d4b9088b1
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst8rm.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i8:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i8 0, align 1
+
+define i8 @add(i8 %a) nounwind {
+; CHECK: add:
+; CHECK: add.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = add i8 %a, %1
+	ret i8 %2
+}
+
+define i8 @and(i8 %a) nounwind {
+; CHECK: and:
+; CHECK: and.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = and i8 %a, %1
+	ret i8 %2
+}
+
+
+define i8 @bis(i8 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = or i8 %a, %1
+	ret i8 %2
+}
+
+define i8 @xor(i8 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = xor i8 %a, %1
+	ret i8 %2
+}
+
diff --git a/test/CodeGen/MSP430/Inst8rr.ll b/test/CodeGen/MSP430/Inst8rr.ll
new file mode 100644
index 0000000000000..90ea94516abd6
--- /dev/null
+++ b/test/CodeGen/MSP430/Inst8rr.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i8:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i8 @mov(i8 %a, i8 %b) nounwind {
+; CHECK: mov:
+; CHECK: mov.b	r14, r15
+	ret i8 %b
+}
+
+define i8 @add(i8 %a, i8 %b) nounwind {
+; CHECK: add:
+; CHECK: add.b	r14, r15
+	%1 = add i8 %a, %b
+	ret i8 %1
+}
+
+define i8 @and(i8 %a, i8 %b) nounwind {
+; CHECK: and:
+; CHECK: and.w	r14, r15
+	%1 = and i8 %a, %b
+	ret i8 %1
+}
+
+define i8 @bis(i8 %a, i8 %b) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	r14, r15
+	%1 = or i8 %a, %b
+	ret i8 %1
+}
+
+define i8 @xor(i8 %a, i8 %b) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	r14, r15
+	%1 = xor i8 %a, %b
+	ret i8 %1
+}
+
diff --git a/test/CodeGen/MSP430/inline-asm.ll b/test/CodeGen/MSP430/inline-asm.ll
new file mode 100644
index 0000000000000..2cc25a4835d61
--- /dev/null
+++ b/test/CodeGen/MSP430/inline-asm.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define void @imm() nounwind {
+        call void asm sideeffect "bic\09$0,r2", "i"(i16 32) nounwind
+        ret void
+}
+
+define void @reg(i16 %a) nounwind {
+        call void asm sideeffect "bic\09$0,r2", "r"(i16 %a) nounwind
+        ret void
+}
+
+@foo = global i16 0, align 2
+
+define void @immmem() nounwind {
+        call void asm sideeffect "bic\09$0,r2", "i"(i16* getelementptr(i16* @foo, i32 1)) nounwind
+        ret void
+}
+
+define void @mem() nounwind {
+        call void asm sideeffect "bic\09$0,r2", "m"(i16* @foo) nounwind
+        ret void
+}
diff --git a/test/CodeGen/Mips/2008-06-05-Carry.ll b/test/CodeGen/Mips/2008-06-05-Carry.ll
index 9cd7c80577a16..8e7b70e2216f5 100644
--- a/test/CodeGen/Mips/2008-06-05-Carry.ll
+++ b/test/CodeGen/Mips/2008-06-05-Carry.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep subu %t | count 2
 ; RUN: grep addu %t | count 4
 
diff --git a/test/CodeGen/Mips/2008-07-03-SRet.ll b/test/CodeGen/Mips/2008-07-03-SRet.ll
index 53ceaf360653b..b2aaa00754b70 100644
--- a/test/CodeGen/Mips/2008-07-03-SRet.ll
+++ b/test/CodeGen/Mips/2008-07-03-SRet.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep {sw.*(\$4)} | count 3
+; RUN: llc < %s -march=mips | grep {sw.*(\$4)} | count 3
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-05-ByVal.ll b/test/CodeGen/Mips/2008-07-05-ByVal.ll
index 2d1101a253027..6bb6bd862b25e 100644
--- a/test/CodeGen/Mips/2008-07-05-ByVal.ll
+++ b/test/CodeGen/Mips/2008-07-05-ByVal.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep {lw.*(\$4)} | count 2
+; RUN: llc < %s -march=mips | grep {lw.*(\$4)} | count 2
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-06-fadd64.ll b/test/CodeGen/Mips/2008-07-06-fadd64.ll
index f8eca85efafb2..808ce16910eee 100644
--- a/test/CodeGen/Mips/2008-07-06-fadd64.ll
+++ b/test/CodeGen/Mips/2008-07-06-fadd64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep __adddf3
+; RUN: llc < %s -march=mips | grep __adddf3
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-07-FPExtend.ll b/test/CodeGen/Mips/2008-07-07-FPExtend.ll
index e0e7d767b1d80..7ac0f5f840db1 100644
--- a/test/CodeGen/Mips/2008-07-07-FPExtend.ll
+++ b/test/CodeGen/Mips/2008-07-07-FPExtend.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep __extendsfdf2
+; RUN: llc < %s -march=mips | grep __extendsfdf2
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-07-Float2Int.ll b/test/CodeGen/Mips/2008-07-07-Float2Int.ll
index f2f0374c31685..ca996367733e4 100644
--- a/test/CodeGen/Mips/2008-07-07-Float2Int.ll
+++ b/test/CodeGen/Mips/2008-07-07-Float2Int.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep trunc.w.s | count 3
+; RUN: llc < %s -march=mips | grep trunc.w.s | count 3
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll b/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll
index ab6a9c8edae88..20de18a0164c8 100644
--- a/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll
+++ b/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep __floatsidf   %t | count 1
 ; RUN: grep __floatunsidf %t | count 1
 ; RUN: grep __fixdfsi %t | count 1
diff --git a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
index 4d218cf6b4bc2..f6b2045444a57 100644
--- a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
+++ b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep {rodata.str1.4,"aMS",@progbits}  %t | count 1
 ; RUN: grep {r.data,}  %t | count 1
 ; RUN: grep {\%hi} %t | count 2
diff --git a/test/CodeGen/Mips/2008-07-15-SmallSection.ll b/test/CodeGen/Mips/2008-07-15-SmallSection.ll
index 0e3f864795395..26eb4db26d4d4 100644
--- a/test/CodeGen/Mips/2008-07-15-SmallSection.ll
+++ b/test/CodeGen/Mips/2008-07-15-SmallSection.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mips-ssection-threshold=8 -march=mips -f -o %t0
-; RUN: llvm-as < %s | llc -mips-ssection-threshold=0 -march=mips -f -o %t1
+; RUN: llc < %s -mips-ssection-threshold=8 -march=mips -o %t0
+; RUN: llc < %s -mips-ssection-threshold=0 -march=mips -o %t1
 ; RUN: grep {sdata} %t0 | count 1
 ; RUN: grep {sbss} %t0 | count 1
 ; RUN: grep {gp_rel} %t0 | count 2
diff --git a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
index fc03bb5fef0d4..59599b399c291 100644
--- a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
+++ b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep seh %t | count 1
 ; RUN: grep seb %t | count 1
 
diff --git a/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
index 2af7ab17c2cd9..21ff960054213 100644
--- a/test/CodeGen/Mips/2008-07-22-Cstpool.ll
+++ b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep {CPI\[01\]_\[01\]:} %t | count 2
 ; RUN: grep {rodata.cst4,"aM",@progbits} %t | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
index 4580215b38f65..80101fa25b3ef 100644
--- a/test/CodeGen/Mips/2008-07-23-fpcmp.ll
+++ b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep {c\\..*\\.s} %t | count 3
 ; RUN: grep {bc1\[tf\]} %t | count 3
 
diff --git a/test/CodeGen/Mips/2008-07-29-icmp.ll b/test/CodeGen/Mips/2008-07-29-icmp.ll
index 5d03a1986b2d4..042cad60e2b04 100644
--- a/test/CodeGen/Mips/2008-07-29-icmp.ll
+++ b/test/CodeGen/Mips/2008-07-29-icmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep {b\[ne\]\[eq\]} | count 1
+; RUN: llc < %s -march=mips | grep {b\[ne\]\[eq\]} | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-31-fcopysign.ll b/test/CodeGen/Mips/2008-07-31-fcopysign.ll
index de11ac77c01e2..77680bccf9765 100644
--- a/test/CodeGen/Mips/2008-07-31-fcopysign.ll
+++ b/test/CodeGen/Mips/2008-07-31-fcopysign.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep abs.s  %t | count 1
 ; RUN: grep neg.s %t | count 1
 
diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
index fea5730f73d10..cd35ccaee83d5 100644
--- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll
+++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep mfhi  %t | count 1
 ; RUN: grep mflo  %t | count 1
 ; RUN: grep multu %t | count 1
diff --git a/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll b/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
index b1e999cea1f38..c41d5213c1789 100644
--- a/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
+++ b/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
@@ -1,5 +1,5 @@
 ; Double return in abicall (default)
-; RUN: llvm-as < %s | llc -march=mips
+; RUN: llc < %s -march=mips
 ; PR2615
 
 define double @main(...) {
diff --git a/test/CodeGen/Mips/2008-08-03-fabs64.ll b/test/CodeGen/Mips/2008-08-03-fabs64.ll
index 9d18f47bce20c..2f33e9bea73f8 100644
--- a/test/CodeGen/Mips/2008-08-03-fabs64.ll
+++ b/test/CodeGen/Mips/2008-08-03-fabs64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep {lui.*32767} %t | count 1
 ; RUN: grep {ori.*65535} %t | count 1
 
diff --git a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
index f7a64c32f27bc..ca90b500f0506 100644
--- a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
+++ b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f -o %t
+; RUN: llc < %s -march=mips -o %t
 ; RUN: grep mtc1 %t | count 1
 ; RUN: grep mfc1 %t | count 1
 
diff --git a/test/CodeGen/Mips/2008-08-06-Alloca.ll b/test/CodeGen/Mips/2008-08-06-Alloca.ll
index 34596ea029a04..79e49a3d682e5 100644
--- a/test/CodeGen/Mips/2008-08-06-Alloca.ll
+++ b/test/CodeGen/Mips/2008-08-06-Alloca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep {subu.*sp} | count 2
+; RUN: llc < %s -march=mips | grep {subu.*sp} | count 2
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-08-07-CC.ll b/test/CodeGen/Mips/2008-08-07-CC.ll
index e276f5e90e5b4..54d454cc3aded 100644
--- a/test/CodeGen/Mips/2008-08-07-CC.ll
+++ b/test/CodeGen/Mips/2008-08-07-CC.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips -f 
+; RUN: llc < %s -march=mips
 ; Mips must ignore fastcc
 
 target datalayout =
diff --git a/test/CodeGen/Mips/2008-08-07-FPRound.ll b/test/CodeGen/Mips/2008-08-07-FPRound.ll
index fd41ff1f8cf23..f3bb965cdb69e 100644
--- a/test/CodeGen/Mips/2008-08-07-FPRound.ll
+++ b/test/CodeGen/Mips/2008-08-07-FPRound.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep __truncdfsf2 | count 1
+; RUN: llc < %s -march=mips | grep __truncdfsf2 | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-08-08-bswap.ll b/test/CodeGen/Mips/2008-08-08-bswap.ll
index 71c2b85d8df1b..83289d97cfd75 100644
--- a/test/CodeGen/Mips/2008-08-08-bswap.ll
+++ b/test/CodeGen/Mips/2008-08-08-bswap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep wsbw | count 1
+; RUN: llc < %s | grep wsbw | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "psp"
diff --git a/test/CodeGen/Mips/2008-08-08-ctlz.ll b/test/CodeGen/Mips/2008-08-08-ctlz.ll
index e468b6da5a6b3..1da1db24bf5a0 100644
--- a/test/CodeGen/Mips/2008-08-08-ctlz.ll
+++ b/test/CodeGen/Mips/2008-08-08-ctlz.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips | grep clz | count 1
+; RUN: llc < %s -march=mips | grep clz | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-psp-elf"
diff --git a/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll b/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
index 783850a7827e3..18f5b3d7ff7b2 100644
--- a/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
+++ b/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=mips
+; RUN: llc < %s -march=mips
 ; PR2794
 
 define i32 @main(i8*) nounwind {
diff --git a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
index 1f7440afedd6b..f5188434670be 100644
--- a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
+++ b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2667
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "psp"
diff --git a/test/CodeGen/Mips/private.ll b/test/CodeGen/Mips/private.ll
index 00a969d0620ae..a1b45c2a63e1a 100644
--- a/test/CodeGen/Mips/private.ll
+++ b/test/CodeGen/Mips/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -march=mips > %t
+; RUN: llc < %s -march=mips > %t
 ; RUN: grep \\\$foo: %t
 ; RUN: grep call.*\\\$foo %t
 ; RUN: grep \\\$baz: %t
diff --git a/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll b/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll
new file mode 100644
index 0000000000000..d7072dd9b5d76
--- /dev/null
+++ b/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=pic16 | grep {movf \\+@i + 0, \\+W}
+
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-f32:32:32"
+target triple = "pic16-"
+@i = global i32 -10, align 1		; <i32*> [#uses=1]
+@j = global i32 -20, align 1		; <i32*> [#uses=1]
+@pc = global i8* inttoptr (i64 160 to i8*), align 1		; <i8**> [#uses=3]
+@main.auto.k = internal global i32 0		; <i32*> [#uses=2]
+
+define void @main() nounwind {
+entry:
+	%tmp = load i32* @i		; <i32> [#uses=1]
+	%tmp1 = load i32* @j		; <i32> [#uses=1]
+	%add = add i32 %tmp, %tmp1		; <i32> [#uses=1]
+	store i32 %add, i32* @main.auto.k
+	%tmp2 = load i32* @main.auto.k		; <i32> [#uses=1]
+	%add3 = add i32 %tmp2, 32		; <i32> [#uses=1]
+	%conv = trunc i32 %add3 to i8		; <i8> [#uses=1]
+	%tmp4 = load i8** @pc		; <i8*> [#uses=1]
+	store i8 %conv, i8* %tmp4
+	%tmp5 = load i8** @pc		; <i8*> [#uses=1]
+	%tmp6 = load i8* %tmp5		; <i8> [#uses=1]
+	%conv7 = sext i8 %tmp6 to i16		; <i16> [#uses=1]
+	%sub = sub i16 %conv7, 1		; <i16> [#uses=1]
+	%conv8 = trunc i16 %sub to i8		; <i8> [#uses=1]
+	%tmp9 = load i8** @pc		; <i8*> [#uses=1]
+	store i8 %conv8, i8* %tmp9
+	ret void
+}
diff --git a/test/CodeGen/PIC16/dg.exp b/test/CodeGen/PIC16/dg.exp
new file mode 100644
index 0000000000000..b08b9858e048f
--- /dev/null
+++ b/test/CodeGen/PIC16/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target PIC16] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/PIC16/global-in-user-section.ll b/test/CodeGen/PIC16/global-in-user-section.ll
new file mode 100644
index 0000000000000..74c9d9d256cde
--- /dev/null
+++ b/test/CodeGen/PIC16/global-in-user-section.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=pic16 | FileCheck %s
+
+@G1 = common global i16 0, section "usersection", align 1 
+; CHECK: usersection UDATA
+; CHECK: @G1 RES 2 
diff --git a/test/CodeGen/PIC16/globals.ll b/test/CodeGen/PIC16/globals.ll
new file mode 100644
index 0000000000000..959eb254d7661
--- /dev/null
+++ b/test/CodeGen/PIC16/globals.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=pic16 | FileCheck %s
+
+@G1 = global i32 4712, section "Address=412"
+; CHECK: @G1.412.idata.0.# IDATA 412
+; CHECK: @G1 dl 4712
+
+@G2 = global i32 0, section "Address=412"
+; CHECK: @G2.412.udata.0.# UDATA 412
+; CHECK: @G2 RES 4
+
+@G3 = addrspace(1) constant i32 4712, section "Address=412"
+; CHECK: @G3.412.romdata.1.# ROMDATA 412
+; CHECK: @G3 rom_dl 4712
+
+
diff --git a/test/CodeGen/PIC16/sext.ll b/test/CodeGen/PIC16/sext.ll
new file mode 100644
index 0000000000000..b49925ffb7c34
--- /dev/null
+++ b/test/CodeGen/PIC16/sext.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=pic16
+
+@main.auto.c = internal global i8 0		; <i8*> [#uses=1]
+
+define i16 @main() nounwind {
+entry:
+	%tmp = load i8* @main.auto.c		; <i8> [#uses=1]
+	%conv = sext i8 %tmp to i16		; <i16> [#uses=1]
+	ret i16 %conv
+}
diff --git a/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll b/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll
index 70f294a78d865..f95465cfc5370 100644
--- a/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll
+++ b/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 define void @test() {
 	%tr1 = lshr i32 1, 0		; <i32> [#uses=0]
 	ret void
diff --git a/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll b/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll
index 93a91234b707f..c3bfa49115b99 100644
--- a/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll
+++ b/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define void @main() {
         %tr4 = shl i64 1, 0             ; <i64> [#uses=0]
diff --git a/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll b/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll
index 1a1aca4b5d116..dea654ac0c0b9 100644
--- a/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll
+++ b/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define void @main() {
         %shamt = add i8 0, 1            ; <i8> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll b/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll
index 3e490b1dc7a2b..fc190a486e6b1 100644
--- a/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll
+++ b/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep .comm.*X,0
+; RUN: llc < %s -march=ppc32 | not grep .comm.*X,0
 
 @X = linkonce global {  } zeroinitializer               ; <{  }*> [#uses=0]
 
diff --git a/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll b/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll
index f84caaf1d499e..ad02ece900c8d 100644
--- a/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll
+++ b/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 
+; RUN: llc < %s -march=ppc32 
 
 define i32 @main() {
         %setle = icmp sle i64 1, 0              ; <i1> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll b/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll
index 7b3e9b4f092ff..671bf804ed323 100644
--- a/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll
+++ b/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define i64 @test() {
         ret i64 undef
diff --git a/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll b/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll
index 8e8fee2888fb9..95012c30fc5f3 100644
--- a/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll
+++ b/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll
@@ -1,6 +1,6 @@
 ; this should not crash the ppc backend
 
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 
 define i32 @test(i32 %j.0.0.i) {
diff --git a/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll b/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
index 428dd0c3e3fdd..5d1df468a66d4 100644
--- a/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
+++ b/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
@@ -1,6 +1,6 @@
 ; This function should have exactly one call to fixdfdi, no more!
 
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=-64bit | \
+; RUN: llc < %s -march=ppc32 -mattr=-64bit | \
 ; RUN:    grep {bl .*fixdfdi} | count 1
 
 define double @test2(double %tmp.7705) {
diff --git a/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll b/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll
index 54f24c612660e..8a5d3b0fa2c2e 100644
--- a/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll
+++ b/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll
@@ -1,7 +1,7 @@
 ; This was erroneously being turned into an rlwinm instruction.
 ; The sign bit does matter in this case.
 
-; RUN: llvm-as < %s | llc -march=ppc32 | grep srawi
+; RUN: llc < %s -march=ppc32 | grep srawi
 
 define i32 @test(i32 %X) {
         %Y = and i32 %X, -2             ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll b/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
index d56cffcf4ab1e..047a12bedd812 100644
--- a/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
+++ b/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.2.0"
diff --git a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
index 1b3bde8fb12e0..97bb48e96e565 100644
--- a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
+++ b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep {, f1}
+; RUN: llc < %s | not grep {, f1}
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.2.0"
diff --git a/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll b/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
index 86ad718617739..fbf254082ee0e 100644
--- a/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
+++ b/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define void @iterative_hash_host_wide_int() {
         %zero = alloca i32              ; <i32*> [#uses=2]
diff --git a/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll b/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll
index 8500260fafce3..172e34849d1d9 100644
--- a/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll
+++ b/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 
 define double @CalcSpeed(float %tmp127) {
diff --git a/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
index a536fa162c033..969772ee2bee9 100644
--- a/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
+++ b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
 ; RUN:   grep {vspltish v.*, 10}
 
 define void @test(<8 x i16>* %P) {
diff --git a/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll b/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll
index b79cce2ead00e..d2256642fbf06 100644
--- a/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll
+++ b/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 ; END.
 
 define void @test(i8* %stack) {
diff --git a/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll b/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
index e1033c3f808a0..0205d10a795c5 100644
--- a/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
+++ b/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; END.
 
 	%struct.attr_desc = type { i8*, %struct.attr_desc*, %struct.attr_value*, %struct.attr_value*, i32 }
diff --git a/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll b/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
index 33807ca012b50..1b8b064ee914e 100644
--- a/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
+++ b/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin | grep extsw | count 2
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin | grep extsw | count 2
 
 @lens = external global i8*             ; <i8**> [#uses=1]
 @vals = external global i32*            ; <i32**> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll b/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
index c25cf215bc341..65dd568b1ee36 100644
--- a/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
+++ b/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
-define void @img2buf(i32 %symbol_size_in_bytes, i16* %ui16) {
+define void @img2buf(i32 %symbol_size_in_bytes, i16* %ui16) nounwind {
         %tmp93 = load i16* null         ; <i16> [#uses=1]
         %tmp99 = call i16 @llvm.bswap.i16( i16 %tmp93 )         ; <i16> [#uses=1]
         store i16 %tmp99, i16* %ui16
diff --git a/test/CodeGen/PowerPC/2006-08-11-RetVector.ll b/test/CodeGen/PowerPC/2006-08-11-RetVector.ll
index 1043e45efb117..a947e5cd9c584 100644
--- a/test/CodeGen/PowerPC/2006-08-11-RetVector.ll
+++ b/test/CodeGen/PowerPC/2006-08-11-RetVector.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vsldoi
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep vor
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsldoi
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vor
 
 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) {
         %tmp76 = shufflevector <4 x float> %fp0, <4 x float> %fp1, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >     ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll b/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
index aff4edeba54be..cb76b5c70cf01 100644
--- a/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
+++ b/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 	%struct..0anon = type { i32 }
 	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
diff --git a/test/CodeGen/PowerPC/2006-09-28-shift_64.ll b/test/CodeGen/PowerPC/2006-09-28-shift_64.ll
index 5210dd1cb1a8a..f748a8bf1d6c5 100644
--- a/test/CodeGen/PowerPC/2006-09-28-shift_64.ll
+++ b/test/CodeGen/PowerPC/2006-09-28-shift_64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 target datalayout = "E-p:64:64"
 target triple = "powerpc64-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
index 7a65c00f104f5..57ed250abc097 100644
--- a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
+++ b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -combiner-alias-analysis | grep f5
+; RUN: llc < %s -march=ppc32 -combiner-alias-analysis | grep f5
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.2.0"
diff --git a/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll b/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
index 6621cec7f43ee..002a0644183a2 100644
--- a/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
+++ b/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep IMPLICIT_DEF
+; RUN: llc < %s -march=ppc32 | not grep IMPLICIT_DEF
 
 define void @foo(i64 %X) {
 entry:
diff --git a/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll b/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
index 313568c1e4bec..3d462b4d14618 100644
--- a/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
+++ b/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep xor 
+; RUN: llc < %s -march=ppc32 | grep xor 
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.7.0"
diff --git a/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll b/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll
index 6dc1ff037eb3d..3284f0a624f67 100644
--- a/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll
+++ b/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 define i32* @foo(i32 %n) {
         %A = alloca i32, i32 %n         ; <i32*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll b/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll
index 80ef479fb0763..49b3b9d18fae1 100644
--- a/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll
+++ b/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwimi
+; RUN: llc < %s -march=ppc32 | grep rlwimi
 
 define void @test(i16 %div.0.i.i.i.i, i32 %L_num.0.i.i.i.i, i32 %tmp1.i.i206.i.i, i16* %P) {
         %X = shl i16 %div.0.i.i.i.i, 1          ; <i16> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll b/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll
index 7680c215c217e..61b9967618989 100644
--- a/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll
+++ b/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 
 define void @glgRunProcessor15() {
         %tmp26355.i = shufflevector <4 x float> zeroinitializer, <4 x float> < float 0x379FFFE000000000, float 0x379FFFE000000000, float 0x379FFFE000000000, float 0x379FFFE000000000 >, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll b/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
index be3b86308f13a..ba863047be99b 100644
--- a/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
+++ b/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc64
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s 
 
 define void @bitap() {
 entry:
diff --git a/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll b/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
index 058166ff93db1..6d9a3fa7b1064 100644
--- a/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
+++ b/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc64
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s
 
 @qsz.b = external global i1             ; <i1*> [#uses=1]
 
diff --git a/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll b/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll
index 19fedf9f59625..805528cf2efd6 100644
--- a/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll
+++ b/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep extsb
-; RUN: llvm-as < %s | llc -march=ppc32 | grep extsh
+; RUN: llc < %s -march=ppc32 | grep extsb
+; RUN: llc < %s -march=ppc32 | grep extsh
 
 define i32 @p1(i8 %c, i16 %s) {
 entry:
diff --git a/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll b/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
index d9374edfe8656..7b00ac69b91ac 100644
--- a/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
+++ b/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
 ; RUN:    grep cntlzw
 
-define i32 @foo() {
+define i32 @foo() nounwind {
 entry:
 	%retval = alloca i32, align 4		; <i32*> [#uses=2]
 	%temp = alloca i32, align 4		; <i32*> [#uses=2]
diff --git a/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll b/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll
index f2c951ec21d5e..0c454729290d2 100644
--- a/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll
+++ b/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
 
 define i16 @test(i8* %d1, i16* %d2) {
 	%tmp237 = call i16 asm "lhbrx $0, $2, $1", "=r,r,bO,m"( i8* %d1, i32 0, i16* %d2 )		; <i16> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll b/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
index d4764622af681..fe5145d152306 100644
--- a/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
+++ b/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
 
 ; Test two things: 1) that a frameidx can be rewritten in an inline asm
 ; 2) that inline asms can handle reg+imm addr modes.
diff --git a/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll b/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll
index 97f6a018b30bd..621d43b5c22d6 100644
--- a/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll
+++ b/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | \
 ; RUN:   grep align.*3
 
 @X = global <{i32, i32}> <{ i32 1, i32 123 }>
diff --git a/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll b/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll
index 5a3d3b5d9c1ce..f48f3656ddfe2 100644
--- a/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll
+++ b/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.8.0"
diff --git a/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll b/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
index 3eef9c551b75f..0473857ae70f4 100644
--- a/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
+++ b/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep mflr | count 1
+; RUN: llc < %s | grep mflr | count 1
 
 target datalayout = "e-p:32:32"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
index 098e7484e173f..e93395a67ec6d 100644
--- a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
+++ b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 -mcpu=g5 | grep cntlzd
+; RUN: llc < %s -march=ppc64 -mcpu=g5 | grep cntlzd
 
 define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) {
         %tmp19 = load i64* %t
diff --git a/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll b/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
index 637208b610a4c..d43916d4f3c14 100644
--- a/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
+++ b/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 
 define void @test(<4 x float>*, { { i16, i16, i32 } }*) {
 xOperationInitMasks.exit:
diff --git a/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll b/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
index 656b83192e005..86fd9475029df 100644
--- a/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
+++ b/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {foo r3, r4}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {bari r3, 47}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {foo r3, r4}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {bari r3, 47}
 
 ; PR1351
 
diff --git a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
index ba0f8fe1b77d4..f2fdedf200728 100644
--- a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
+++ b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc | grep {subfc r3,r5,r4}
-; RUN: llvm-as < %s | llc | grep {subfze r4,r2}
-; RUN: llvm-as < %s | llc -regalloc=local | grep {subfc r5,r2,r4}
-; RUN: llvm-as < %s | llc -regalloc=local | grep {subfze r2,r3}
+; RUN: llc < %s | grep {subfc r3,r5,r4}
+; RUN: llc < %s | grep {subfze r4,r2}
+; RUN: llc < %s -regalloc=local | grep {subfc r5,r2,r4}
+; RUN: llc < %s -regalloc=local | grep {subfze r2,r3}
 ; The first argument of subfc must not be the same as any other register.
 
 ; PR1357
diff --git a/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll b/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
index 989a7516aa4d9..1df51406fac99 100644
--- a/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
+++ b/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1382
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll b/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
index b64de683f8375..e4e931492ac44 100644
--- a/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
+++ b/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "powerpc-apple-darwin8.8.0"
 	%struct..0anon = type { i32 }
diff --git a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
index 5a86418f7cb79..42f215281a8b5 100644
--- a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep bl.*baz | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 | grep bl.*quux | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 -enable-tail-merge | grep bl.*baz | count 1
-; RUN: llvm-as < %s | llc -march=ppc32 -enable-tail-merge=1 | grep bl.*quux | count 1
+; RUN: llc < %s -march=ppc32 | grep bl.*baz | count 2
+; RUN: llc < %s -march=ppc32 | grep bl.*quux | count 2
+; RUN: llc < %s -march=ppc32 -enable-tail-merge | grep bl.*baz | count 1
+; RUN: llc < %s -march=ppc32 -enable-tail-merge=1 | grep bl.*quux | count 1
 ; Check that tail merging is not the default on ppc, and that -enable-tail-merge works.
 
 ; ModuleID = 'tail.c'
diff --git a/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
index ae853f67e2002..2938c70c48bf1 100644
--- a/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
+++ b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
@@ -1,7 +1,7 @@
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "powerpc-apple-darwin8.8.0"
 
-; RUN: llvm-as < %s | llc -march=ppc32 | grep {rlwinm r3, r3, 23, 30, 30}
+; RUN: llc < %s -march=ppc32 | grep {rlwinm r3, r3, 23, 30, 30}
 ; PR1473
 
 define i8 @foo(i16 zeroext  %a) zeroext  {
diff --git a/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll b/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
index 58260ec6b7395..6de7a09128f07 100644
--- a/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
+++ b/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=+altivec
+; RUN: llc < %s -march=ppc32 -mattr=+altivec
 
 	%struct.XATest = type { float, i16, i8, i8 }
 	%struct.XArrayRange = type { i8, i8, i8, i8 }
diff --git a/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll b/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll
index 34df7bb7d057c..06f40d98c68cb 100644
--- a/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll
+++ b/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 ; PR1596
 
 	%struct._obstack_chunk = type { i8* }
diff --git a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
index 9c8fa97be967c..82ef2b82cbe62 100644
--- a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
+++ b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep dst | count 4
+; RUN: llc < %s -march=ppc64 | grep dst | count 4
 
 define hidden void @_Z4borkPc(i8* %image) {
 entry:
diff --git a/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll b/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
index c5e7a4d38a000..ea7de9847ea7d 100644
--- a/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
+++ b/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep lwzx
+; RUN: llc < %s -march=ppc64 | grep lwzx
 
         %struct.__db_region = type { %struct.__mutex_t, [4 x i8], %struct.anon, i32, [1 x i32] }
         %struct.__mutex_t = type { i32 }
diff --git a/test/CodeGen/PowerPC/2007-09-08-unaligned.ll b/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
index f6bd3337aef7f..898c470b17260 100644
--- a/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
+++ b/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc | grep stfd | count 3
-; RUN: llvm-as < %s | llc | grep stfs | count 1
-; RUN: llvm-as < %s | llc | grep lfd | count 2
-; RUN: llvm-as < %s | llc | grep lfs | count 2
+; RUN: llc < %s | grep stfd | count 3
+; RUN: llc < %s | grep stfs | count 1
+; RUN: llc < %s | grep lfd | count 2
+; RUN: llc < %s | grep lfs | count 2
 ; ModuleID = 'foo.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll b/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll
index bb7aba45a9636..d12698b9a00f2 100644
--- a/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll
+++ b/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
         %struct.TCMalloc_SpinLock = type { i32 }
 
diff --git a/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll b/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll
index f4b87cf4517b4..5cfe54e1582b5 100644
--- a/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll
+++ b/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin
 
 declare void @cxa_atexit_check_1(i8*)
 
diff --git a/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll b/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll
index e71a8fb0f1608..c4152b4fc8de3 100644
--- a/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll
+++ b/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; rdar://5538377
 
         %struct.disk_unsigned = type { i32 }
diff --git a/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll b/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
index bd11b5d5b7b24..84fadd1b04615 100644
--- a/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
+++ b/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 -mattr=+altivec
+; RUN: llc < %s -march=ppc64 -mattr=+altivec
 	%struct.inoutprops = type <{ i8, [3 x i8] }>
 
 define void @bork(float* %argA, float* %argB, float* %res, i8 %inoutspec.0) {
diff --git a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
index bca6e5a8fbdb7..ee614782952df 100644
--- a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
+++ b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin9 -regalloc=local -relocation-model=pic
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=local -relocation-model=pic
 
 	%struct.NSError = type opaque
 	%struct.NSManagedObjectContext = type opaque
diff --git a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
index 80ef6f19f7271..5a07a9b7acf01 100644
--- a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
+++ b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin9 -regalloc=local -relocation-model=pic
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=local -relocation-model=pic
 
 	%struct.NSError = type opaque
 	%struct.NSManagedObjectContext = type opaque
diff --git a/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll b/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll
index e49d59acfe5f9..a9f242ba5b167 100644
--- a/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll
+++ b/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 	%struct.HDescriptor = type <{ i32, i32 }>
 
diff --git a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
index a0649e08076e2..439ef14d8b247 100644
--- a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
+++ b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-eh
+; RUN: llc < %s -enable-eh
 ;; Formerly crashed, see PR 1508
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll b/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
index aca0faaa4e41b..d1f0285861600 100644
--- a/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
+++ b/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc 
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g3
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s 
+; RUN: llc < %s -march=ppc32 -mcpu=g3
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 ; PR1811
 
 define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>*
diff --git a/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll b/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll
index 38ae87ce8c00c..db2ab877ff7d2 100644
--- a/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll
+++ b/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep nop
+; RUN: llc < %s -march=ppc32 | grep nop
 target triple = "powerpc-apple-darwin8"
 
 
diff --git a/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll b/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll
index 5b9cd1d8408f3..791e9e610655b 100644
--- a/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll
+++ b/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 	%struct.Handle = type { %struct.oopDesc** }
 	%struct.JNI_ArgumentPusher = type { %struct.SignatureIterator, %struct.JavaCallArguments* }
diff --git a/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll b/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
index 5edf6b761fe86..cfa1b10d32ee0 100644
--- a/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
+++ b/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin -regalloc=local
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -regalloc=local
 
 define i32 @bork(i64 %foo, i64 %bar) {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll b/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
index 8101a35a4fb4f..e50fac4472a90 100644
--- a/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
+++ b/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin -enable-ppc32-regscavenger
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -enable-ppc32-regscavenger
 
 declare i8* @bar(i32)
 
diff --git a/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll b/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll
index 919de33234b88..222dde45353b2 100644
--- a/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll
+++ b/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 -enable-ppc64-regscavenger
+; RUN: llc < %s -march=ppc64 -enable-ppc64-regscavenger
 @.str242 = external constant [3 x i8]		; <[3 x i8]*> [#uses=1]
 
 define fastcc void @ParseContent(i8* %buf, i32 %bufsize) {
diff --git a/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll b/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
index eaeccc5f27a45..9f35b8346c685 100644
--- a/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -enable-ppc32-regscavenger
+; RUN: llc < %s -march=ppc32 -enable-ppc32-regscavenger
 
 	%struct._cpp_strbuf = type { i8*, i32, i32 }
 	%struct.cpp_string = type { i32, i8* }
diff --git a/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll b/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
index 061c585c74766..dd425f59822b2 100644
--- a/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
+++ b/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 -enable-ppc64-regscavenger
+; RUN: llc < %s -march=ppc64 -enable-ppc64-regscavenger
 
 define i16 @test(i8* %d1, i16* %d2) {
  %tmp237 = call i16 asm "lhbrx $0, $2, $1", "=r,r,bO,m"( i8* %d1, i32 0, i16* %d2 )
diff --git a/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll b/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
index 395c986a84123..a8fef05b1ad81 100644
--- a/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
+++ b/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 define fastcc i8* @page_rec_get_next(i8* %rec) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll
index 67c167aca1278..8776d9a3eda5f 100644
--- a/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 	%struct..0objc_object = type { %struct.objc_class* }
 	%struct.NSArray = type { %struct..0objc_object }
diff --git a/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
index 0b748d20b7cae..8e5bf567b126d 100644
--- a/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 define i32 @t(i64 %byteStart, i32 %activeIndex) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll b/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll
index 410736d5872d6..270633786077f 100644
--- a/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll
+++ b/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 define fastcc i64 @nonzero_bits1() nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll
index 357ab100d2d45..839098ef5c2f6 100644
--- a/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 ; Avoid reading memory that's already freed.
 
 @llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i64)* @_Z13GetSectorSizey to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
diff --git a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
index a390e522686db..7b6d4916c1a8a 100644
--- a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 @_ZL10DeviceCode = internal global i16 0		; <i16*> [#uses=1]
 @.str19 = internal constant [64 x i8] c"unlock_then_erase_sector: failed to erase block (status= 0x%x)\0A\00"		; <[64 x i8]*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll b/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll
index 5c40b9e0aed9c..d42c814a46a23 100644
--- a/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll
+++ b/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 target triple = "powerpc-apple-darwin9.2.2"
 
 define i256 @func(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind readnone  {
diff --git a/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll b/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll
index d337e37730988..6b40b2462dafa 100644
--- a/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define void @t() nounwind {
 	call void null( ppc_fp128 undef )
diff --git a/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll b/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
index 92b5ca26b2e63..862559b109cf5 100644
--- a/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
+++ b/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 @g = external global ppc_fp128
 @h = external global ppc_fp128
diff --git a/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll b/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
index d3238d23c0eed..83c5511878cac 100644
--- a/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
+++ b/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; <rdar://problem/6020042>
 
 define i32 @bork() nounwind  {
diff --git a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
index b6b9c89730a3d..8802b97d2a6a2 100644
--- a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
+++ b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vadduhm
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vsubuhm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vadduhm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubuhm
 
 define <4 x i32> @test() nounwind {
 	ret <4 x i32> < i32 4293066722, i32 4293066722, i32 4293066722, i32 4293066722>
diff --git a/test/CodeGen/PowerPC/2008-07-15-Bswap.ll b/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
index 7060fe560e94e..4a834f93a2052 100644
--- a/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
+++ b/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9"
 	%struct.BiPartSrcDescriptor = type <{ %"struct.BiPartSrcDescriptor::$_105" }>
diff --git a/test/CodeGen/PowerPC/2008-07-15-Fabs.ll b/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
index f55ffac45b08e..17737d9d3b2df 100644
--- a/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
+++ b/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9"
 
diff --git a/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll b/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
index 32e36427c5e6f..5cd8c348b4dbe 100644
--- a/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
+++ b/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9"
 
diff --git a/test/CodeGen/PowerPC/2008-07-17-Fneg.ll b/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
index a7f8181fd906b..dc1e9369825a6 100644
--- a/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
+++ b/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9"
 
diff --git a/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll b/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
index 2ccca25e2a279..c9c05e1cc3638 100644
--- a/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
+++ b/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin | grep lwz | grep 228
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin | grep lwz | grep 228
 
 @"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
index b625cebaca418..97844dd7486a7 100644
--- a/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 	%struct.CGLDI = type { %struct.cgli*, i32, i32, i32, i32, i32, i8*, i32, void (%struct.CGLSI*, i32, %struct.CGLDI*)*, i8*, %struct.vv_t }
 	%struct.cgli = type { i32, %struct.cgli*, void (%struct.cgli*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32)*, i32, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i8*, i32*, %struct._cgro*, %struct._cgro*, float, float, float, float, i32, i8*, float, i8*, [16 x i32] }
diff --git a/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll b/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll
index 00ca811610273..91c36efc522e1 100644
--- a/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll
+++ b/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; XFAIL: *
 ; PR2356
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
diff --git a/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll b/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
index c760b41b3047f..f474a6d7cc22e 100644
--- a/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
+++ b/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 define void @__divtc3({ ppc_fp128, ppc_fp128 }* noalias sret %agg.result, ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
index 071c78833baec..f4c06fba6dfec 100644
--- a/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
+++ b/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -o - | not grep fixunstfsi
+; RUN: llc < %s -march=ppc32 -o - | not grep fixunstfsi
 
 define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll b/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
index af9a54ee9d13d..83f3f6f8a7621 100644
--- a/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
+++ b/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; PR2986
 @argc = external global i32		; <i32*> [#uses=1]
 @buffer = external global [32 x i8], align 4		; <[32 x i8]*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll b/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
index 0ad5b06c8032f..20683b9019e02 100644
--- a/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
+++ b/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2988
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin10.0"
diff --git a/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll b/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll
index f5b3e93d61707..9ed7f6f82dc47 100644
--- a/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll
+++ b/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc64-apple-darwin9.5
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9.5
 
 define void @__multc3({ ppc_fp128, ppc_fp128 }* noalias sret %agg.result, ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-12-12-EH.ll b/test/CodeGen/PowerPC/2008-12-12-EH.ll
index 21218f55f039b..b56c22abc6ddd 100644
--- a/test/CodeGen/PowerPC/2008-12-12-EH.ll
+++ b/test/CodeGen/PowerPC/2008-12-12-EH.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | grep ^.L_Z1fv.eh
-; RUN: llvm-as < %s | llc  -march=ppc32 -mtriple=powerpc-apple-darwin9 | grep ^__Z1fv.eh
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | grep ^.L_Z1fv.eh
+; RUN: llc < %s  -march=ppc32 -mtriple=powerpc-apple-darwin9 | grep ^__Z1fv.eh
 
 define void @_Z1fv() {
 entry:
diff --git a/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll b/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll
index 0cf55188278ef..d49d58deeafff 100644
--- a/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll
+++ b/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin9.5
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9.5
 ; rdar://6499616
 
 	%llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll b/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll
index a898de0b48536..172531e5db499 100644
--- a/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll
+++ b/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin10
+; RUN: llc < %s -mtriple=powerpc-apple-darwin10
 ; rdar://6692215
 
 define fastcc void @_qsort(i8* %a, i32 %n, i32 %es, i32 (i8*, i8*)* %cmp, i32 %depth_limit) nounwind optsize ssp {
diff --git a/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll b/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll
index 4ea43ec505e3a..29d115dc6a445 100644
--- a/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll
+++ b/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin10
+; RUN: llc < %s -mtriple=powerpc-apple-darwin10
 ; PR4280
 
 define i32 @__fixunssfsi(float %a) nounwind readnone {
diff --git a/test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll b/test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll
new file mode 100644
index 0000000000000..f64e3dcf73282
--- /dev/null
+++ b/test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ppc32 -verify-machineinstrs
+
+; Machine code verifier will call isRegTiedToDefOperand() on /all/ register use
+; operands.  We must make sure that the operand flag is found correctly.
+
+; This test case is actually not specific to PowerPC, but the (imm, reg) format
+; of PowerPC "m" operands trigger this bug.
+
+define void @memory_asm_operand(i32 %a) {
+  ; "m" operand will be represented as:
+  ; INLINEASM <es:fake $0>, 10, %R2, 20, -4, %R1
+  ; It is difficult to find the flag operand (20) when starting from %R1
+  call i32 asm "lbzx $0, $1", "=r,m" (i32 %a)
+  ret void
+}
+
diff --git a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
new file mode 100644
index 0000000000000..5d09696933b5e
--- /dev/null
+++ b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc32 | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin10.0"
+; It is wrong on powerpc to substitute reg+reg for $0; the stw opcode
+; would have to change.
+
+@x = external global [0 x i32]                    ; <[0 x i32]*> [#uses=1]
+
+define void @foo(i32 %y) nounwind ssp {
+entry:
+; CHECK: foo
+; CHECK: add r2
+; CHECK: 0(r2)
+  %y_addr = alloca i32                            ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 %y, i32* %y_addr
+  %0 = load i32* %y_addr, align 4                 ; <i32> [#uses=1]
+  %1 = getelementptr inbounds [0 x i32]* @x, i32 0, i32 %0 ; <i32*> [#uses=1]
+  call void asm sideeffect "isync\0A\09eieio\0A\09stw $1, $0", "=*o,r,~{memory}"(i32* %1, i32 0) nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll b/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll
new file mode 100644
index 0000000000000..12c4c993ab518
--- /dev/null
+++ b/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 -mtriple=ppc-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16		; <i32*> [#uses=0]
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
diff --git a/test/CodeGen/PowerPC/2009-09-18-carrybit.ll b/test/CodeGen/PowerPC/2009-09-18-carrybit.ll
new file mode 100644
index 0000000000000..6c23a6162c9d4
--- /dev/null
+++ b/test/CodeGen/PowerPC/2009-09-18-carrybit.ll
@@ -0,0 +1,62 @@
+; RUN: llc -march=ppc32 < %s | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9.6"
+
+define i64 @foo(i64 %r.0.ph, i64 %q.0.ph, i32 %sr1.1.ph) nounwind {
+entry:
+; CHECK: foo:
+; CHECK: subfc
+; CHECK: subfe
+; CHECK: subfc
+; CHECK: subfe
+  %tmp0 = add i64 %r.0.ph, -1                           ; <i64> [#uses=1]
+  br label %bb40
+
+bb40:                                             ; preds = %bb40, %entry
+  %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb40 ] ; <i32> [#uses=1]
+  %carry.0274 = phi i32 [ 0, %entry ], [%tmp122, %bb40 ] ; <i32> [#uses=1]
+  %r.0273 = phi i64 [ %r.0.ph, %entry ], [ %tmp124, %bb40 ] ; <i64> [#uses=2]
+  %q.0272 = phi i64 [ %q.0.ph, %entry ], [ %ins169, %bb40 ] ; <i64> [#uses=3]
+  %tmp1 = lshr i64 %r.0273, 31                     ; <i64> [#uses=1]
+  %tmp2 = trunc i64 %tmp1 to i32                    ; <i32> [#uses=1]
+  %tmp3 = and i32 %tmp2, -2                         ; <i32> [#uses=1]
+  %tmp213 = trunc i64 %r.0273 to i32              ; <i32> [#uses=2]
+  %tmp106 = lshr i32 %tmp213, 31                     ; <i32> [#uses=1]
+  %tmp107 = or i32 %tmp3, %tmp106                        ; <i32> [#uses=1]
+  %tmp215 = zext i32 %tmp107 to i64                  ; <i64> [#uses=1]
+  %tmp216 = shl i64 %tmp215, 32                   ; <i64> [#uses=1]
+  %tmp108 = shl i32 %tmp213, 1                       ; <i32> [#uses=1]
+  %tmp109 = lshr i64 %q.0272, 63                     ; <i64> [#uses=1]
+  %tmp110 = trunc i64 %tmp109 to i32                    ; <i32> [#uses=1]
+  %tmp111 = or i32 %tmp108, %tmp110                        ; <i32> [#uses=1]
+  %tmp222 = zext i32 %tmp111 to i64                  ; <i64> [#uses=1]
+  %ins224 = or i64 %tmp216, %tmp222               ; <i64> [#uses=2]
+  %tmp112 = lshr i64 %q.0272, 31                     ; <i64> [#uses=1]
+  %tmp113 = trunc i64 %tmp112 to i32                    ; <i32> [#uses=1]
+  %tmp114 = and i32 %tmp113, -2                         ; <i32> [#uses=1]
+  %tmp158 = trunc i64 %q.0272 to i32              ; <i32> [#uses=2]
+  %tmp115 = lshr i32 %tmp158, 31                     ; <i32> [#uses=1]
+  %tmp116 = or i32 %tmp114, %tmp115                        ; <i32> [#uses=1]
+  %tmp160 = zext i32 %tmp116 to i64                  ; <i64> [#uses=1]
+  %tmp161 = shl i64 %tmp160, 32                   ; <i64> [#uses=1]
+  %tmp117 = shl i32 %tmp158, 1                       ; <i32> [#uses=1]
+  %tmp118 = or i32 %tmp117, %carry.0274                 ; <i32> [#uses=1]
+  %tmp167 = zext i32 %tmp118 to i64                  ; <i64> [#uses=1]
+  %ins169 = or i64 %tmp161, %tmp167               ; <i64> [#uses=2]
+  %tmp119 = sub i64 %tmp0, %ins224                    ; <i64> [#uses=1]
+  %tmp120 = ashr i64 %tmp119, 63                        ; <i64> [#uses=2]
+  %tmp121 = trunc i64 %tmp120 to i32                    ; <i32> [#uses=1]
+  %tmp122 = and i32 %tmp121, 1                          ; <i32> [#uses=2]
+  %tmp123 = and i64 %tmp120, %q.0.ph                         ; <i64> [#uses=1]
+  %tmp124 = sub i64 %ins224, %tmp123                    ; <i64> [#uses=2]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %sr1.1.ph ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb41.bb42_crit_edge, label %bb40
+
+bb41.bb42_crit_edge:                              ; preds = %bb40
+  %phitmp278 = zext i32 %tmp122 to i64               ; <i64> [#uses=1]
+  %tmp125 = shl i64 %ins169, 1                    ; <i64> [#uses=1]
+  %tmp126 = or i64 %phitmp278, %tmp125              ; <i64> [#uses=2]
+  ret i64 %tmp126
+}
diff --git a/test/CodeGen/PowerPC/Atomics-32.ll b/test/CodeGen/PowerPC/Atomics-32.ll
index f3246fda15cb2..03905a36dcfb9 100644
--- a/test/CodeGen/PowerPC/Atomics-32.ll
+++ b/test/CodeGen/PowerPC/Atomics-32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; ModuleID = 'Atomics.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9"
diff --git a/test/CodeGen/PowerPC/Atomics-64.ll b/test/CodeGen/PowerPC/Atomics-64.ll
index c3de7102b0384..1dc4310761c30 100644
--- a/test/CodeGen/PowerPC/Atomics-64.ll
+++ b/test/CodeGen/PowerPC/Atomics-64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 ; ModuleID = 'Atomics.c'
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-apple-darwin9"
diff --git a/test/CodeGen/PowerPC/Frames-alloca.ll b/test/CodeGen/PowerPC/Frames-alloca.ll
index 45c13a7bfe419..25fc626550d28 100644
--- a/test/CodeGen/PowerPC/Frames-alloca.ll
+++ b/test/CodeGen/PowerPC/Frames-alloca.ll
@@ -1,35 +1,28 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | \
-; RUN:   grep {stwu r1, -80(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {lwz r1, 0(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim -enable-ppc32-regscavenger | \
-; RUN:   grep {stwu r1, -80(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lwz r1, 0(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {std r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {stdu r1, -112(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {ld r1, 0(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {ld r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {std r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {stdu r1, -112(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ld r1, 0(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ld r31, 40(r1)}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC64
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-NOFP
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-NOFP
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS-NOFP
+
+; CHECK-PPC32: stw r31, 20(r1)
+; CHECK-PPC32: lwz r1, 0(r1)
+; CHECK-PPC32: lwz r31, 20(r1)
+; CHECK-PPC32-NOFP: stw r31, 20(r1)
+; CHECK-PPC32-NOFP: lwz r1, 0(r1)
+; CHECK-PPC32-NOFP: lwz r31, 20(r1)
+; CHECK-PPC32-RS: stwu r1, -80(r1)
+; CHECK-PPC32-RS-NOFP: stwu r1, -80(r1)
+
+; CHECK-PPC64: std r31, 40(r1)
+; CHECK-PPC64: stdu r1, -112(r1)
+; CHECK-PPC64: ld r1, 0(r1)
+; CHECK-PPC64: ld r31, 40(r1)
+; CHECK-PPC64-NOFP: std r31, 40(r1)
+; CHECK-PPC64-NOFP: stdu r1, -112(r1)
+; CHECK-PPC64-NOFP: ld r1, 0(r1)
+; CHECK-PPC64-NOFP: ld r31, 40(r1)
 
 define i32* @f1(i32 %n) {
 	%tmp = alloca i32, i32 %n		; <i32*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/Frames-large.ll b/test/CodeGen/PowerPC/Frames-large.ll
index 0a15d2233e79a..fda2e4ff9ce9b 100644
--- a/test/CodeGen/PowerPC/Frames-large.ll
+++ b/test/CodeGen/PowerPC/Frames-large.ll
@@ -1,77 +1,52 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   not grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | grep {lis r0, -1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {ori r0, r0, 32704}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {stwux r1, r1, r0}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {lwz r1, 0(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   not grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lis r0, -1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ori r0, r0, 32704}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {stwux r1, r1, r0}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lwz r1, 0(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   not grep {std r31, 40(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {lis r0, -1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {ori r0, r0, 32656}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {stdux r1, r1, r0}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   grep {ld r1, 0(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
-; RUN:   not grep {ld r31, 40(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {std r31, 40(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {lis r0, -1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ori r0, r0, 32656}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {stdux r1, r1, r0}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ld r1, 0(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
-; RUN:   grep {ld r31, 40(r1)}
+; RUN: llvm-as < %s > %t.bc
+; RUN: llc < %t.bc -march=ppc32 | FileCheck %s -check-prefix=PPC32-NOFP
+; RUN: llc < %t.bc -march=ppc32 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-FP
 
-define i32* @f1() {
+; RUN: llc < %t.bc -march=ppc64 | FileCheck %s -check-prefix=PPC64-NOFP
+; RUN: llc < %t.bc -march=ppc64 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-FP
+
+
+target triple = "powerpc-apple-darwin8"
+
+define i32* @f1() nounwind {
         %tmp = alloca i32, i32 8191             ; <i32*> [#uses=1]
         ret i32* %tmp
 }
 
+; PPC32-NOFP: _f1:
+; PPC32-NOFP: 	lis r0, -1
+; PPC32-NOFP: 	ori r0, r0, 32704
+; PPC32-NOFP: 	stwux r1, r1, r0
+; PPC32-NOFP: 	addi r3, r1, 68
+; PPC32-NOFP: 	lwz r1, 0(r1)
+; PPC32-NOFP: 	blr 
+
+; PPC32-FP: _f1:
+; PPC32-FP:	stw r31, 20(r1)
+; PPC32-FP:	lis r0, -1
+; PPC32-FP:	ori r0, r0, 32704
+; PPC32-FP:	stwux r1, r1, r0
+; ...
+; PPC32-FP:	lwz r1, 0(r1)
+; PPC32-FP:	lwz r31, 20(r1)
+; PPC32-FP:	blr 
+
+
+; PPC64-NOFP: _f1:
+; PPC64-NOFP: 	lis r0, -1
+; PPC64-NOFP: 	ori r0, r0, 32656
+; PPC64-NOFP: 	stdux r1, r1, r0
+; PPC64-NOFP: 	addi r3, r1, 116
+; PPC64-NOFP: 	ld r1, 0(r1)
+; PPC64-NOFP: 	blr 
+
+
+; PPC64-FP: _f1:
+; PPC64-FP:	std r31, 40(r1)
+; PPC64-FP:	lis r0, -1
+; PPC64-FP:	ori r0, r0, 32656
+; PPC64-FP:	stdux r1, r1, r0
+; ...
+; PPC64-FP:	ld r1, 0(r1)
+; PPC64-FP:	ld r31, 40(r1)
+; PPC64-FP:	blr 
diff --git a/test/CodeGen/PowerPC/Frames-leaf.ll b/test/CodeGen/PowerPC/Frames-leaf.ll
index 11b64703ebd80..c2e1d6bddc5db 100644
--- a/test/CodeGen/PowerPC/Frames-leaf.ll
+++ b/test/CodeGen/PowerPC/Frames-leaf.ll
@@ -1,34 +1,34 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   not grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   not grep {stwu r1, -.*(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   not grep {addi r1, r1, }
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   not grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
 ; RUN:   not grep {stw r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
 ; RUN:   not grep {stwu r1, -.*(r1)}
-; RUN: llvm-as < %s | llc -march=ppc32 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
 ; RUN:   not grep {addi r1, r1, }
-; RUN: llvm-as < %s | llc -march=ppc32 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
 ; RUN:   not grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 | \
+; RUN: llc < %s -march=ppc64 | \
 ; RUN:   not grep {std r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 | \
+; RUN: llc < %s -march=ppc64 | \
 ; RUN:   not grep {stdu r1, -.*(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 | \
+; RUN: llc < %s -march=ppc64 | \
 ; RUN:   not grep {addi r1, r1, }
-; RUN: llvm-as < %s | llc -march=ppc64 | \
+; RUN: llc < %s -march=ppc64 | \
 ; RUN:   not grep {ld r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
 ; RUN:   not grep {stw r31, 40(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
 ; RUN:   not grep {stdu r1, -.*(r1)}
-; RUN: llvm-as < %s | llc -march=ppc64 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
 ; RUN:   not grep {addi r1, r1, }
-; RUN: llvm-as < %s | llc -march=ppc64 -disable-fp-elim | \
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
 ; RUN:   not grep {ld r31, 40(r1)}
 
 define i32* @f1() {
diff --git a/test/CodeGen/PowerPC/Frames-small.ll b/test/CodeGen/PowerPC/Frames-small.ll
index 4ea3afba88317..6875704cf30d5 100644
--- a/test/CodeGen/PowerPC/Frames-small.ll
+++ b/test/CodeGen/PowerPC/Frames-small.ll
@@ -1,26 +1,22 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1 -f
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1
 ; RUN  not grep {stw r31, 20(r1)} %t1
 ; RUN: grep {stwu r1, -16448(r1)} %t1
 ; RUN: grep {addi r1, r1, 16448} %t1
-; RUN: llvm-as < %s | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN: not grep {lwz r31, 20(r1)}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
-; RUN:   -o %t2 -f
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
+; RUN:   -o %t2
 ; RUN: grep {stw r31, 20(r1)} %t2
 ; RUN: grep {stwu r1, -16448(r1)} %t2
 ; RUN: grep {addi r1, r1, 16448} %t2
 ; RUN: grep {lwz r31, 20(r1)} %t2
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3 -f
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3
 ; RUN: not grep {std r31, 40(r1)} %t3
 ; RUN: grep {stdu r1, -16496(r1)} %t3
 ; RUN: grep {addi r1, r1, 16496} %t3
 ; RUN: not grep {ld r31, 40(r1)} %t3
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
-; RUN:   -o %t4 -f
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
+; RUN:   -o %t4
 ; RUN: grep {std r31, 40(r1)} %t4
 ; RUN: grep {stdu r1, -16496(r1)} %t4
 ; RUN: grep {addi r1, r1, 16496} %t4
diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
index 17053796bc79a..0f7acacbfac9d 100644
--- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
+++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \
 ; RUN:   grep {stw r3, 32751}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin | \
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
 ; RUN:   grep {stw r3, 32751}
-; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin | \
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
 ; RUN:   grep {std r2, 9024}
 
 define void @test() {
diff --git a/test/CodeGen/PowerPC/addc.ll b/test/CodeGen/PowerPC/addc.ll
index 406053bee27ff..09a7fbd7a69fe 100644
--- a/test/CodeGen/PowerPC/addc.ll
+++ b/test/CodeGen/PowerPC/addc.ll
@@ -1,5 +1,5 @@
 ; All of these should be codegen'd without loading immediates
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: grep addc %t | count 1
 ; RUN: grep adde %t | count 1
 ; RUN: grep addze %t | count 1
diff --git a/test/CodeGen/PowerPC/addi-reassoc.ll b/test/CodeGen/PowerPC/addi-reassoc.ll
index bee8660a8a9ef..2b71ce65f6bca 100644
--- a/test/CodeGen/PowerPC/addi-reassoc.ll
+++ b/test/CodeGen/PowerPC/addi-reassoc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep addi
+; RUN: llc < %s -march=ppc32 | not grep addi
 
         %struct.X = type { [5 x i8] }
 
diff --git a/test/CodeGen/PowerPC/align.ll b/test/CodeGen/PowerPC/align.ll
index 7ffbe3676704c..e619faa75173a 100644
--- a/test/CodeGen/PowerPC/align.ll
+++ b/test/CodeGen/PowerPC/align.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep align.4 | count 1
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep align.2 | count 1
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep align.3 | count 1
 
 @A = global <4 x i32> < i32 10, i32 20, i32 30, i32 40 >                ; <<4 x i32>*> [#uses=0]
diff --git a/test/CodeGen/PowerPC/and-branch.ll b/test/CodeGen/PowerPC/and-branch.ll
index f0bb5ea40157e..0484f882ec72f 100644
--- a/test/CodeGen/PowerPC/and-branch.ll
+++ b/test/CodeGen/PowerPC/and-branch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep mfcr
+; RUN: llc < %s -march=ppc32 | not grep mfcr
 
 define void @foo(i32 %X, i32 %Y, i32 %Z) {
 entry:
diff --git a/test/CodeGen/PowerPC/and-elim.ll b/test/CodeGen/PowerPC/and-elim.ll
index eef8f51f7a190..36853614c40ac 100644
--- a/test/CodeGen/PowerPC/and-elim.ll
+++ b/test/CodeGen/PowerPC/and-elim.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwin
+; RUN: llc < %s -march=ppc32 | not grep rlwin
 
 define void @test(i8* %P) {
 	%W = load i8* %P
diff --git a/test/CodeGen/PowerPC/and-imm.ll b/test/CodeGen/PowerPC/and-imm.ll
index 9c806494be3b0..64a45e50c0a94 100644
--- a/test/CodeGen/PowerPC/and-imm.ll
+++ b/test/CodeGen/PowerPC/and-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep {ori\\|lis}
+; RUN: llc < %s -march=ppc32 | not grep {ori\\|lis}
 
 ; andi. r3, r3, 32769	
 define i32 @test(i32 %X) {
diff --git a/test/CodeGen/PowerPC/and_add.ll b/test/CodeGen/PowerPC/and_add.ll
index f103e7c0df063..517e775172c37 100644
--- a/test/CodeGen/PowerPC/and_add.ll
+++ b/test/CodeGen/PowerPC/and_add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: grep slwi %t
 ; RUN: not grep addi %t
 ; RUN: not grep rlwinm %t
diff --git a/test/CodeGen/PowerPC/and_sext.ll b/test/CodeGen/PowerPC/and_sext.ll
index e0e498def057f..c6d234ea665fb 100644
--- a/test/CodeGen/PowerPC/and_sext.ll
+++ b/test/CodeGen/PowerPC/and_sext.ll
@@ -1,6 +1,6 @@
 ; These tests should not contain a sign extend.
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep extsh
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep extsb
+; RUN: llc < %s -march=ppc32 | not grep extsh
+; RUN: llc < %s -march=ppc32 | not grep extsb
 
 define i32 @test1(i32 %mode.0.i.0) {
         %tmp.79 = trunc i32 %mode.0.i.0 to i16
diff --git a/test/CodeGen/PowerPC/and_sra.ll b/test/CodeGen/PowerPC/and_sra.ll
index c780605c9753e..e6c02d80452d4 100644
--- a/test/CodeGen/PowerPC/and_sra.ll
+++ b/test/CodeGen/PowerPC/and_sra.ll
@@ -1,5 +1,5 @@
 ; Neither of these functions should contain algebraic right shifts
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep srawi 
+; RUN: llc < %s -march=ppc32 | not grep srawi 
 
 define i32 @test1(i32 %mode.0.i.0) {
         %tmp.79 = bitcast i32 %mode.0.i.0 to i32                ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll
index f6bb2983d5651..ec4e42defdcb0 100644
--- a/test/CodeGen/PowerPC/atomic-1.ll
+++ b/test/CodeGen/PowerPC/atomic-1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep lwarx  | count 3
-; RUN: llvm-as < %s | llc -march=ppc32 | grep stwcx. | count 4
+; RUN: llc < %s -march=ppc32 | grep lwarx  | count 3
+; RUN: llc < %s -march=ppc32 | grep stwcx. | count 4
 
 define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind  {
 	%tmp = call i32 @llvm.atomic.load.add.i32( i32* %mem, i32 %val )
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index 77b7b08d8c225..6d9daef9285c0 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep ldarx  | count 3
-; RUN: llvm-as < %s | llc -march=ppc64 | grep stdcx. | count 4
+; RUN: llc < %s -march=ppc64 | grep ldarx  | count 3
+; RUN: llc < %s -march=ppc64 | grep stdcx. | count 4
 
 define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind  {
 	%tmp = call i64 @llvm.atomic.load.add.i64( i64* %mem, i64 %val )
diff --git a/test/CodeGen/PowerPC/available-externally.ll b/test/CodeGen/PowerPC/available-externally.ll
index cfad6ea454ad1..fdead7dd8b342 100644
--- a/test/CodeGen/PowerPC/available-externally.ll
+++ b/test/CodeGen/PowerPC/available-externally.ll
@@ -1,69 +1,71 @@
-; RUN: llvm-as < %s | llc | grep {bl L_exact_log2.stub}
+; RUN: llc < %s -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
 ; PR4482
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "powerpc-apple-darwin8"
 
 define i32 @foo(i64 %x) nounwind {
 entry:
-	%x_addr = alloca i64		; <i64*> [#uses=2]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i64 %x, i64* %x_addr
-	%1 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%2 = call i32 @exact_log2(i64 %1) nounwind		; <i32> [#uses=1]
-	store i32 %2, i32* %0, align 4
-	%3 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %3, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %retval1
+; STATIC: _foo:
+; STATIC: bl _exact_log2
+; STATIC: blr
+; STATIC: .subsections_via_symbols
+
+; PIC: _foo:
+; PIC: bl L_exact_log2$stub
+; PIC: blr
+
+; DYNAMIC: _foo:
+; DYNAMIC: bl L_exact_log2$stub
+; DYNAMIC: blr
+
+        %A = call i32 @exact_log2(i64 %x) nounwind
+	ret i32 %A
 }
 
 define available_externally i32 @exact_log2(i64 %x) nounwind {
 entry:
-	%x_addr = alloca i64		; <i64*> [#uses=6]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%iftmp.0 = alloca i32		; <i32*> [#uses=3]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i64 %x, i64* %x_addr
-	%1 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%2 = sub i64 0, %1		; <i64> [#uses=1]
-	%3 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%4 = and i64 %2, %3		; <i64> [#uses=1]
-	%5 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%6 = icmp ne i64 %4, %5		; <i1> [#uses=1]
-	br i1 %6, label %bb2, label %bb
-
-bb:		; preds = %entry
-	%7 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%8 = icmp eq i64 %7, 0		; <i1> [#uses=1]
-	br i1 %8, label %bb2, label %bb1
-
-bb1:		; preds = %bb
-	%9 = load i64* %x_addr, align 8		; <i64> [#uses=1]
-	%10 = call i64 @llvm.cttz.i64(i64 %9)		; <i64> [#uses=1]
-	%11 = trunc i64 %10 to i32		; <i32> [#uses=1]
-	store i32 %11, i32* %iftmp.0, align 4
-	br label %bb3
-
-bb2:		; preds = %bb, %entry
-	store i32 -1, i32* %iftmp.0, align 4
-	br label %bb3
-
-bb3:		; preds = %bb2, %bb1
-	%12 = load i32* %iftmp.0, align 4		; <i32> [#uses=1]
-	store i32 %12, i32* %0, align 4
-	%13 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %13, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %bb3
-	%retval4 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %retval4
+	ret i32 42
 }
 
-declare i64 @llvm.cttz.i64(i64) nounwind readnone
+
+; PIC: .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+; PIC: L_exact_log2$stub:
+; PIC: .indirect_symbol _exact_log2
+; PIC: mflr r0
+; PIC: bcl 20,31,L_exact_log2$stub$tmp
+
+; PIC: L_exact_log2$stub$tmp:
+; PIC: mflr r11
+; PIC: addis r11,r11,ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
+; PIC: mtlr r0
+; PIC: lwzu r12,lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
+; PIC: mtctr r12
+; PIC: bctr
+
+; PIC: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; PIC: L_exact_log2$lazy_ptr:
+; PIC: .indirect_symbol _exact_log2
+; PIC: .long dyld_stub_binding_helper
+
+; PIC: .subsections_via_symbols
+
+
+; DYNAMIC: .section __TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
+; DYNAMIC: L_exact_log2$stub:
+; DYNAMIC: .indirect_symbol _exact_log2
+; DYNAMIC: lis r11,ha16(L_exact_log2$lazy_ptr)
+; DYNAMIC: lwzu r12,lo16(L_exact_log2$lazy_ptr)(r11)
+; DYNAMIC: mtctr r12
+; DYNAMIC: bctr
+
+; DYNAMIC: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; DYNAMIC: L_exact_log2$lazy_ptr:
+; DYNAMIC: .indirect_symbol _exact_log2
+; DYNAMIC: .long dyld_stub_binding_helper
+
+
+
+
+
diff --git a/test/CodeGen/PowerPC/big-endian-actual-args.ll b/test/CodeGen/PowerPC/big-endian-actual-args.ll
index d23935756dc22..009f46811e78c 100644
--- a/test/CodeGen/PowerPC/big-endian-actual-args.ll
+++ b/test/CodeGen/PowerPC/big-endian-actual-args.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {addc 4, 4, 6}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {adde 3, 3, 5}
 
 define i64 @foo(i64 %x, i64 %y) {
diff --git a/test/CodeGen/PowerPC/big-endian-call-result.ll b/test/CodeGen/PowerPC/big-endian-call-result.ll
index ab136f65d2746..fe85404cb94f2 100644
--- a/test/CodeGen/PowerPC/big-endian-call-result.ll
+++ b/test/CodeGen/PowerPC/big-endian-call-result.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {addic 4, 4, 1}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {addze 3, 3}
 
 declare i64 @foo()
diff --git a/test/CodeGen/PowerPC/big-endian-formal-args.ll b/test/CodeGen/PowerPC/big-endian-formal-args.ll
index 08589f4999650..e46e1ec8d775f 100644
--- a/test/CodeGen/PowerPC/big-endian-formal-args.ll
+++ b/test/CodeGen/PowerPC/big-endian-formal-args.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {li 6, 3}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {li 4, 2}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {li 3, 0}
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
 ; RUN:   grep {mr 5, 3}
 
 declare void @bar(i64 %x, i64 %y)
diff --git a/test/CodeGen/PowerPC/branch-opt.ll b/test/CodeGen/PowerPC/branch-opt.ll
index 4aa55a39e5de6..cc02e406aa61b 100644
--- a/test/CodeGen/PowerPC/branch-opt.ll
+++ b/test/CodeGen/PowerPC/branch-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {b LBB.*} | count 4
 
 target datalayout = "E-p:32:32"
diff --git a/test/CodeGen/PowerPC/bswap-load-store.ll b/test/CodeGen/PowerPC/bswap-load-store.ll
index e450eb8c23789..7eb3bbb8d308c 100644
--- a/test/CodeGen/PowerPC/bswap-load-store.ll
+++ b/test/CodeGen/PowerPC/bswap-load-store.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {stwbrx\\|lwbrx\\|sthbrx\\|lhbrx} | count 4
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwinm
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwimi
-; RUN: llvm-as < %s | llc -march=ppc64 | \
+; RUN: llc < %s -march=ppc32 | not grep rlwinm
+; RUN: llc < %s -march=ppc32 | not grep rlwimi
+; RUN: llc < %s -march=ppc64 | \
 ; RUN:   grep {stwbrx\\|lwbrx\\|sthbrx\\|lhbrx} | count 4
-; RUN: llvm-as < %s | llc -march=ppc64 | not grep rlwinm
-; RUN: llvm-as < %s | llc -march=ppc64 | not grep rlwimi
+; RUN: llc < %s -march=ppc64 | not grep rlwinm
+; RUN: llc < %s -march=ppc64 | not grep rlwimi
 
 define void @STWBRX(i32 %i, i8* %ptr, i32 %off) {
         %tmp1 = getelementptr i8* %ptr, i32 %off                ; <i8*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/buildvec_canonicalize.ll b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
index 20ff3dbc4f7b9..0454c584bcfe3 100644
--- a/test/CodeGen/PowerPC/buildvec_canonicalize.ll
+++ b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
@@ -1,11 +1,9 @@
 ; There should be exactly one vxor here.
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
 ; RUN:   grep vxor | count 1
 
 ; There should be exactly one vsplti here.
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
 ; RUN:   grep vsplti | count 1
 
 define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
diff --git a/test/CodeGen/PowerPC/calls.ll b/test/CodeGen/PowerPC/calls.ll
index 034c14147a29f..0db184f728550 100644
--- a/test/CodeGen/PowerPC/calls.ll
+++ b/test/CodeGen/PowerPC/calls.ll
@@ -1,10 +1,10 @@
 ; Test various forms of calls.
 
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {bl } | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {bctrl} | count 1
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {bla } | count 1
 
 declare void @foo()
diff --git a/test/CodeGen/PowerPC/cmp-cmp.ll b/test/CodeGen/PowerPC/cmp-cmp.ll
index 07964d5aa315d..35a5e427853e4 100644
--- a/test/CodeGen/PowerPC/cmp-cmp.ll
+++ b/test/CodeGen/PowerPC/cmp-cmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep mfcr
+; RUN: llc < %s -march=ppc32 | not grep mfcr
 
 define void @test(i64 %X) {
         %tmp1 = and i64 %X, 3           ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/compare-duplicate.ll b/test/CodeGen/PowerPC/compare-duplicate.ll
index df2dfdc17b643..f5108c37a8ad7 100644
--- a/test/CodeGen/PowerPC/compare-duplicate.ll
+++ b/test/CodeGen/PowerPC/compare-duplicate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin8  | not grep slwi
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8  | not grep slwi
 
 define i32 @test(i32 %A, i32 %B) {
 	%C = sub i32 %B, %A
diff --git a/test/CodeGen/PowerPC/compare-simm.ll b/test/CodeGen/PowerPC/compare-simm.ll
index b0ef2d3f9464c..5ba050060fcb8 100644
--- a/test/CodeGen/PowerPC/compare-simm.ll
+++ b/test/CodeGen/PowerPC/compare-simm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
 ; RUN:   grep {cmpwi cr0, r3, -1}
 
 define i32 @test(i32 %x) {
diff --git a/test/CodeGen/PowerPC/constants.ll b/test/CodeGen/PowerPC/constants.ll
index b58f59a7eb554..8901e02d3b809 100644
--- a/test/CodeGen/PowerPC/constants.ll
+++ b/test/CodeGen/PowerPC/constants.ll
@@ -1,9 +1,9 @@
 ; All of these routines should be perform optimal load of constants.
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep lis | count 5
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep ori | count 3
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {li } | count 4
 
 define i32 @f1() {
diff --git a/test/CodeGen/PowerPC/cr_spilling.ll b/test/CodeGen/PowerPC/cr_spilling.ll
index 4584c7118237c..b21586873612a 100644
--- a/test/CodeGen/PowerPC/cr_spilling.ll
+++ b/test/CodeGen/PowerPC/cr_spilling.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -regalloc=local -O0 -relocation-model=pic -o -
+; RUN: llc < %s -march=ppc32 -regalloc=local -O0 -relocation-model=pic -o -
 ; PR1638
 
 @.str242 = external constant [3 x i8]		; <[3 x i8]*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll
index 2c51e8afa5580..ab493a068a32c 100644
--- a/test/CodeGen/PowerPC/cttz.ll
+++ b/test/CodeGen/PowerPC/cttz.ll
@@ -1,5 +1,5 @@
 ; Make sure this testcase does not use ctpop
-; RUN: llvm-as < %s | llc -march=ppc32 | grep -i cntlzw
+; RUN: llc < %s -march=ppc32 | grep -i cntlzw
 
 declare i32 @llvm.cttz.i32(i32)
 
diff --git a/test/CodeGen/PowerPC/darwin-labels.ll b/test/CodeGen/PowerPC/darwin-labels.ll
index ceebc7099e4ee..af233697403d5 100644
--- a/test/CodeGen/PowerPC/darwin-labels.ll
+++ b/test/CodeGen/PowerPC/darwin-labels.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {foo bar":}
+; RUN: llc < %s | grep {foo bar":}
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.2.0"
diff --git a/test/CodeGen/PowerPC/delete-node.ll b/test/CodeGen/PowerPC/delete-node.ll
index 0b1d734f8a8c0..a26c21154824a 100644
--- a/test/CodeGen/PowerPC/delete-node.ll
+++ b/test/CodeGen/PowerPC/delete-node.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 ; The DAGCombiner leaves behind a dead node in this testcase. Currently
 ; ISel is ignoring dead nodes, though it would be preferable for
diff --git a/test/CodeGen/PowerPC/div-2.ll b/test/CodeGen/PowerPC/div-2.ll
index 26e6221784087..2fc916ff005f0 100644
--- a/test/CodeGen/PowerPC/div-2.ll
+++ b/test/CodeGen/PowerPC/div-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep srawi 
-; RUN: llvm-as < %s | llc -march=ppc32 | grep blr
+; RUN: llc < %s -march=ppc32 | not grep srawi 
+; RUN: llc < %s -march=ppc32 | grep blr
 
 define i32 @test1(i32 %X) {
         %Y = and i32 %X, 15             ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll b/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
index 7be8a34be7ef9..558fd1b3199b4 100644
--- a/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
+++ b/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
@@ -1,12 +1,12 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep eqv | count 3
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
 ; RUN:   grep andc | count 3
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep orc | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
 ; RUN:   grep nor | count 3
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep nand | count 1
 
 define i32 @EQV1(i32 %X, i32 %Y) {
diff --git a/test/CodeGen/PowerPC/extsh.ll b/test/CodeGen/PowerPC/extsh.ll
index 5eca8cea36064..506ff86051ff5 100644
--- a/test/CodeGen/PowerPC/extsh.ll
+++ b/test/CodeGen/PowerPC/extsh.ll
@@ -1,5 +1,5 @@
 ; This should turn into a single extsh
-; RUN: llvm-as < %s | llc -march=ppc32 | grep extsh | count 1
+; RUN: llc < %s -march=ppc32 | grep extsh | count 1
 define i32 @test(i32 %X) {
         %tmp.81 = shl i32 %X, 16                ; <i32> [#uses=1]
         %tmp.82 = ashr i32 %tmp.81, 16          ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/fabs.ll b/test/CodeGen/PowerPC/fabs.ll
index 54e49b009ace8..6ef740f835cbc 100644
--- a/test/CodeGen/PowerPC/fabs.ll
+++ b/test/CodeGen/PowerPC/fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin | grep {fabs f1, f1}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | grep {fabs f1, f1}
 
 define double @fabs(double %f) {
 entry:
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll
index 4a6fe70574f46..815c72c1f8a74 100644
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 | \
 ; RUN:   egrep {fn?madd|fn?msub} | count 8
 
 define double @test_FMADD1(double %A, double %B, double %C) {
diff --git a/test/CodeGen/PowerPC/fnabs.ll b/test/CodeGen/PowerPC/fnabs.ll
index 6c10dfbd44b0c..bbd5c7159edc4 100644
--- a/test/CodeGen/PowerPC/fnabs.ll
+++ b/test/CodeGen/PowerPC/fnabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep fnabs
+; RUN: llc < %s -march=ppc32 | grep fnabs
 
 declare double @fabs(double)
 
diff --git a/test/CodeGen/PowerPC/fneg.ll b/test/CodeGen/PowerPC/fneg.ll
index 9579a748e98ef..0bd31bb082cd4 100644
--- a/test/CodeGen/PowerPC/fneg.ll
+++ b/test/CodeGen/PowerPC/fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg
+; RUN: llc < %s -march=ppc32 | not grep fneg
 
 define double @test1(double %a, double %b, double %c, double %d) {
 entry:
diff --git a/test/CodeGen/PowerPC/fold-li.ll b/test/CodeGen/PowerPC/fold-li.ll
index 2ac79f149131a..92d8da500e840 100644
--- a/test/CodeGen/PowerPC/fold-li.ll
+++ b/test/CodeGen/PowerPC/fold-li.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32  | \
+; RUN: llc < %s -march=ppc32  | \
 ; RUN:   grep -v align | not grep li
 
 ;; Test that immediates are folded into these instructions correctly.
diff --git a/test/CodeGen/PowerPC/fp-branch.ll b/test/CodeGen/PowerPC/fp-branch.ll
index 3db6ced572fe0..673da027e229b 100644
--- a/test/CodeGen/PowerPC/fp-branch.ll
+++ b/test/CodeGen/PowerPC/fp-branch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep fcmp | count 1
+; RUN: llc < %s -march=ppc32 | grep fcmp | count 1
 
 declare i1 @llvm.isunordered.f64(double, double)
 
diff --git a/test/CodeGen/PowerPC/fp-int-fp.ll b/test/CodeGen/PowerPC/fp-int-fp.ll
index 1b78b01e6c936..18f7f83852a26 100644
--- a/test/CodeGen/PowerPC/fp-int-fp.ll
+++ b/test/CodeGen/PowerPC/fp-int-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep r1
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep r1
 
 define double @test1(double %X) {
         %Y = fptosi double %X to i64            ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/fp_to_uint.ll b/test/CodeGen/PowerPC/fp_to_uint.ll
index 43502bbb3ef05..1360b62d273b8 100644
--- a/test/CodeGen/PowerPC/fp_to_uint.ll
+++ b/test/CodeGen/PowerPC/fp_to_uint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep fctiwz | count 1
+; RUN: llc < %s -march=ppc32 | grep fctiwz | count 1
 
 define i16 @foo(float %a) {
 entry:
diff --git a/test/CodeGen/PowerPC/fpcopy.ll b/test/CodeGen/PowerPC/fpcopy.ll
index 7d8059645ad22..7b9446baac071 100644
--- a/test/CodeGen/PowerPC/fpcopy.ll
+++ b/test/CodeGen/PowerPC/fpcopy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep fmr
+; RUN: llc < %s -march=ppc32 | not grep fmr
 
 define double @test(float %F) {
         %F.upgrd.1 = fpext float %F to double           ; <double> [#uses=1]
diff --git a/test/CodeGen/PowerPC/frounds.ll b/test/CodeGen/PowerPC/frounds.ll
index 0d8e621f354f3..8eeadc3a34696 100644
--- a/test/CodeGen/PowerPC/frounds.ll
+++ b/test/CodeGen/PowerPC/frounds.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define i32 @foo() {
 entry:
diff --git a/test/CodeGen/PowerPC/fsqrt.ll b/test/CodeGen/PowerPC/fsqrt.ll
index 1260c602f9d9c..74a8725eb12e3 100644
--- a/test/CodeGen/PowerPC/fsqrt.ll
+++ b/test/CodeGen/PowerPC/fsqrt.ll
@@ -1,17 +1,13 @@
 ; fsqrt should be generated when the fsqrt feature is enabled, but not 
 ; otherwise.
 
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \
 ; RUN:   grep {fsqrt f1, f1}
-; RUN: llvm-as < %s | \
-; RUN:  llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
-; RUN:  grep {fsqrt f1, f1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
+; RUN:   grep {fsqrt f1, f1}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \
 ; RUN:   not grep {fsqrt f1, f1}
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \
 ; RUN:   not grep {fsqrt f1, f1}
 
 declare double @llvm.sqrt.f64(double)
diff --git a/test/CodeGen/PowerPC/hello.ll b/test/CodeGen/PowerPC/hello.ll
index 1d7275f238bb7..ea27e9257a651 100644
--- a/test/CodeGen/PowerPC/hello.ll
+++ b/test/CodeGen/PowerPC/hello.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
 ; PR1399
 
 @.str = internal constant [13 x i8] c"Hello World!\00"
diff --git a/test/CodeGen/PowerPC/hidden-vis-2.ll b/test/CodeGen/PowerPC/hidden-vis-2.ll
index 4c9ae552f7c7e..e9e2c0a93a0d4 100644
--- a/test/CodeGen/PowerPC/hidden-vis-2.ll
+++ b/test/CodeGen/PowerPC/hidden-vis-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin9 | grep non_lazy_ptr | count 6
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9 | grep non_lazy_ptr | count 6
 
 @x = external hidden global i32		; <i32*> [#uses=1]
 @y = extern_weak hidden global i32	; <i32*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/hidden-vis.ll b/test/CodeGen/PowerPC/hidden-vis.ll
index e04c89aebcc23..b2cc1431ebde5 100644
--- a/test/CodeGen/PowerPC/hidden-vis.ll
+++ b/test/CodeGen/PowerPC/hidden-vis.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin9 | not grep non_lazy_ptr
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9 | not grep non_lazy_ptr
 
 @x = weak hidden global i32 0		; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/PowerPC/i128-and-beyond.ll b/test/CodeGen/PowerPC/i128-and-beyond.ll
index 9e0d6c30b8c7b..51bcab2441147 100644
--- a/test/CodeGen/PowerPC/i128-and-beyond.ll
+++ b/test/CodeGen/PowerPC/i128-and-beyond.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep 4294967295 | count 28
+; RUN: llc < %s -march=ppc32 | grep 4294967295 | count 28
 
 ; These static initializers are too big to hand off to assemblers
 ; as monolithic blobs.
diff --git a/test/CodeGen/PowerPC/i64_fp.ll b/test/CodeGen/PowerPC/i64_fp.ll
index 5ff2684d7b000..d53c94878409f 100644
--- a/test/CodeGen/PowerPC/i64_fp.ll
+++ b/test/CodeGen/PowerPC/i64_fp.ll
@@ -1,21 +1,21 @@
 ; fcfid and fctid should be generated when the 64bit feature is enabled, but not
 ; otherwise.
 
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=+64bit | \
+; RUN: llc < %s -march=ppc32 -mattr=+64bit | \
 ; RUN:   grep fcfid
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=+64bit | \
+; RUN: llc < %s -march=ppc32 -mattr=+64bit | \
 ; RUN:   grep fctidz
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
 ; RUN:   grep fcfid
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
 ; RUN:   grep fctidz
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=-64bit | \
+; RUN: llc < %s -march=ppc32 -mattr=-64bit | \
 ; RUN:   not grep fcfid
-; RUN: llvm-as < %s | llc -march=ppc32 -mattr=-64bit | \
+; RUN: llc < %s -march=ppc32 -mattr=-64bit | \
 ; RUN:   not grep fctidz
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g4 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g4 | \
 ; RUN:   not grep fcfid
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g4 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g4 | \
 ; RUN:   not grep fctidz
 
 define double @X(double %Y) {
diff --git a/test/CodeGen/PowerPC/iabs.ll b/test/CodeGen/PowerPC/iabs.ll
index 677b41bb12e16..a43f09c7d5618 100644
--- a/test/CodeGen/PowerPC/iabs.ll
+++ b/test/CodeGen/PowerPC/iabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -stats |& \
+; RUN: llc < %s -march=ppc32 -stats |& \
 ; RUN:   grep {4 .*Number of machine instrs printed}
 
 ;; Integer absolute value, should produce something as good as:
diff --git a/test/CodeGen/PowerPC/illegal-element-type.ll b/test/CodeGen/PowerPC/illegal-element-type.ll
index 54a06656b1b90..58bd0558e2bae 100644
--- a/test/CodeGen/PowerPC/illegal-element-type.ll
+++ b/test/CodeGen/PowerPC/illegal-element-type.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g3
+; RUN: llc < %s -march=ppc32 -mcpu=g3
 
 define void @foo() {
 entry:
diff --git a/test/CodeGen/PowerPC/inlineasm-copy.ll b/test/CodeGen/PowerPC/inlineasm-copy.ll
index c0a397982adf9..e1ff82d5f9b7f 100644
--- a/test/CodeGen/PowerPC/inlineasm-copy.ll
+++ b/test/CodeGen/PowerPC/inlineasm-copy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep mr
+; RUN: llc < %s -march=ppc32 | not grep mr
 
 define i32 @test(i32 %Y, i32 %X) {
 entry:
diff --git a/test/CodeGen/PowerPC/int-fp-conv-0.ll b/test/CodeGen/PowerPC/int-fp-conv-0.ll
index 82a182685143e..983d2b823f100 100644
--- a/test/CodeGen/PowerPC/int-fp-conv-0.ll
+++ b/test/CodeGen/PowerPC/int-fp-conv-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 > %t
+; RUN: llc < %s -march=ppc64 > %t
 ; RUN: grep  __floattitf %t
 ; RUN: grep  __fixunstfti %t
 
diff --git a/test/CodeGen/PowerPC/int-fp-conv-1.ll b/test/CodeGen/PowerPC/int-fp-conv-1.ll
index 583408c0eae27..6c8272351924c 100644
--- a/test/CodeGen/PowerPC/int-fp-conv-1.ll
+++ b/test/CodeGen/PowerPC/int-fp-conv-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep __floatditf
+; RUN: llc < %s -march=ppc64 | grep __floatditf
 
 define i64 @__fixunstfdi(ppc_fp128 %a) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/invalid-memcpy.ll b/test/CodeGen/PowerPC/invalid-memcpy.ll
index 6df968dddae56..3b1f3060a1c0b 100644
--- a/test/CodeGen/PowerPC/invalid-memcpy.ll
+++ b/test/CodeGen/PowerPC/invalid-memcpy.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
 
 ; This testcase is invalid (the alignment specified for memcpy is 
 ; greater than the alignment guaranteed for Qux or C.0.1173, but it
diff --git a/test/CodeGen/PowerPC/inverted-bool-compares.ll b/test/CodeGen/PowerPC/inverted-bool-compares.ll
index f8c5f11180ca1..aa7e4d6860246 100644
--- a/test/CodeGen/PowerPC/inverted-bool-compares.ll
+++ b/test/CodeGen/PowerPC/inverted-bool-compares.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep xori
+; RUN: llc < %s -march=ppc32 | not grep xori
 
 define i32 @test(i1 %B, i32* %P) {
         br i1 %B, label %T, label %F
diff --git a/test/CodeGen/PowerPC/ispositive.ll b/test/CodeGen/PowerPC/ispositive.ll
index 192d7384e9532..4161e3438a4ba 100644
--- a/test/CodeGen/PowerPC/ispositive.ll
+++ b/test/CodeGen/PowerPC/ispositive.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
 ; RUN:   grep {srwi r3, r3, 31}
 
 define i32 @test1(i32 %X) {
diff --git a/test/CodeGen/PowerPC/itofp128.ll b/test/CodeGen/PowerPC/itofp128.ll
index 4d745111b04ba..6d9ef9590399b 100644
--- a/test/CodeGen/PowerPC/itofp128.ll
+++ b/test/CodeGen/PowerPC/itofp128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-apple-darwin9.2.0"
diff --git a/test/CodeGen/PowerPC/lha.ll b/test/CodeGen/PowerPC/lha.ll
index e8f73eea2e2e8..3a100c1aae6d6 100644
--- a/test/CodeGen/PowerPC/lha.ll
+++ b/test/CodeGen/PowerPC/lha.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep lha
+; RUN: llc < %s -march=ppc32 | grep lha
 
 define i32 @test(i16* %a) {
         %tmp.1 = load i16* %a           ; <i16> [#uses=1]
diff --git a/test/CodeGen/PowerPC/load-constant-addr.ll b/test/CodeGen/PowerPC/load-constant-addr.ll
index d2be04efd0366..f1d061c1ad5a2 100644
--- a/test/CodeGen/PowerPC/load-constant-addr.ll
+++ b/test/CodeGen/PowerPC/load-constant-addr.ll
@@ -1,6 +1,6 @@
 ; Should fold the ori into the lfs.
-; RUN: llvm-as < %s | llc -march=ppc32 | grep lfs
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep ori
+; RUN: llc < %s -march=ppc32 | grep lfs
+; RUN: llc < %s -march=ppc32 | not grep ori
 
 define float @test() {
         %tmp.i = load float* inttoptr (i32 186018016 to float*)         ; <float> [#uses=1]
diff --git a/test/CodeGen/PowerPC/long-compare.ll b/test/CodeGen/PowerPC/long-compare.ll
index 7b907250e1fc2..94c2526cf5b9f 100644
--- a/test/CodeGen/PowerPC/long-compare.ll
+++ b/test/CodeGen/PowerPC/long-compare.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep cntlzw 
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep xori 
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep {li }
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep {mr }
+; RUN: llc < %s -march=ppc32 | grep cntlzw 
+; RUN: llc < %s -march=ppc32 | not grep xori 
+; RUN: llc < %s -march=ppc32 | not grep {li }
+; RUN: llc < %s -march=ppc32 | not grep {mr }
 
 define i1 @test(i64 %x) {
   %tmp = icmp ult i64 %x, 4294967296
diff --git a/test/CodeGen/PowerPC/longdbl-truncate.ll b/test/CodeGen/PowerPC/longdbl-truncate.ll
index a87382405a5d3..e5f63c6441855 100644
--- a/test/CodeGen/PowerPC/longdbl-truncate.ll
+++ b/test/CodeGen/PowerPC/longdbl-truncate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
 
diff --git a/test/CodeGen/PowerPC/mask64.ll b/test/CodeGen/PowerPC/mask64.ll
index 69d2200212f9e..139621af1f22b 100644
--- a/test/CodeGen/PowerPC/mask64.ll
+++ b/test/CodeGen/PowerPC/mask64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-apple-darwin9.2.0"
diff --git a/test/CodeGen/PowerPC/mem-rr-addr-mode.ll b/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
index fd0e1d4a2ea87..5661ef9768d13 100644
--- a/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
+++ b/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep li.*16
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep addi
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep li.*16
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep addi
 
 ; Codegen lvx (R+16) as t = li 16,  lvx t,R
 ; This shares the 16 between the two loads.
diff --git a/test/CodeGen/PowerPC/mem_update.ll b/test/CodeGen/PowerPC/mem_update.ll
index a1527629980b3..b267719421a38 100644
--- a/test/CodeGen/PowerPC/mem_update.ll
+++ b/test/CodeGen/PowerPC/mem_update.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -enable-ppc-preinc | \
+; RUN: llc < %s -march=ppc32 -enable-ppc-preinc | \
 ; RUN:   not grep addi
-; RUN: llvm-as < %s | llc -march=ppc64 -enable-ppc-preinc | \
+; RUN: llc < %s -march=ppc64 -enable-ppc-preinc | \
 ; RUN:   not grep addi
 
 @Glob = global i64 4		; <i64*> [#uses=2]
diff --git a/test/CodeGen/PowerPC/mul-neg-power-2.ll b/test/CodeGen/PowerPC/mul-neg-power-2.ll
index 90446d707db7f..9688d6e3d5199 100644
--- a/test/CodeGen/PowerPC/mul-neg-power-2.ll
+++ b/test/CodeGen/PowerPC/mul-neg-power-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep mul
+; RUN: llc < %s -march=ppc32 | not grep mul
 
 define i32 @test1(i32 %a) {
         %tmp.1 = mul i32 %a, -2         ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/mul-with-overflow.ll b/test/CodeGen/PowerPC/mul-with-overflow.ll
index 0276846d7cbba..f03e3cb5cd47c 100644
--- a/test/CodeGen/PowerPC/mul-with-overflow.ll
+++ b/test/CodeGen/PowerPC/mul-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
 define i1 @a(i32 %x) zeroext nounwind {
diff --git a/test/CodeGen/PowerPC/mulhs.ll b/test/CodeGen/PowerPC/mulhs.ll
index 3b0daad227e92..9ab8d997c0d00 100644
--- a/test/CodeGen/PowerPC/mulhs.ll
+++ b/test/CodeGen/PowerPC/mulhs.ll
@@ -1,5 +1,5 @@
 ; All of these ands and shifts should be folded into rlwimi's
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: not grep mulhwu %t
 ; RUN: not grep srawi %t 
 ; RUN: not grep add %t 
diff --git a/test/CodeGen/PowerPC/multiple-return-values.ll b/test/CodeGen/PowerPC/multiple-return-values.ll
index 3f75f7d28ed66..b9317f90c1da8 100644
--- a/test/CodeGen/PowerPC/multiple-return-values.ll
+++ b/test/CodeGen/PowerPC/multiple-return-values.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
 
 define {i64, float} @bar(i64 %a, float %b) {
         %y = add i64 %a, 7
diff --git a/test/CodeGen/PowerPC/neg.ll b/test/CodeGen/PowerPC/neg.ll
index c135599039014..c673912d2ef1e 100644
--- a/test/CodeGen/PowerPC/neg.ll
+++ b/test/CodeGen/PowerPC/neg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep neg
+; RUN: llc < %s -march=ppc32 | grep neg
 
 define i32 @test(i32 %X) {
         %Y = sub i32 0, %X              ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/no-dead-strip.ll b/test/CodeGen/PowerPC/no-dead-strip.ll
index e7ceaaeab6781..34594132530db 100644
--- a/test/CodeGen/PowerPC/no-dead-strip.ll
+++ b/test/CodeGen/PowerPC/no-dead-strip.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {no_dead_strip.*_X}
+; RUN: llc < %s | grep {no_dead_strip.*_X}
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "powerpc-apple-darwin8.8.0"
diff --git a/test/CodeGen/PowerPC/or-addressing-mode.ll b/test/CodeGen/PowerPC/or-addressing-mode.ll
index 9b6e9551bf047..e50374e306961 100644
--- a/test/CodeGen/PowerPC/or-addressing-mode.ll
+++ b/test/CodeGen/PowerPC/or-addressing-mode.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin8 | not grep ori
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin8 | not grep rlwimi
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 | not grep ori
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 | not grep rlwimi
 
 define i32 @test1(i8* %P) {
         %tmp.2.i = ptrtoint i8* %P to i32               ; <i32> [#uses=2]
diff --git a/test/CodeGen/PowerPC/ppcf128-1-opt.ll b/test/CodeGen/PowerPC/ppcf128-1-opt.ll
index e3c5ab1225457..2fc17209ccfd2 100644
--- a/test/CodeGen/PowerPC/ppcf128-1-opt.ll
+++ b/test/CodeGen/PowerPC/ppcf128-1-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc > %t
+; RUN: llc < %s > %t
 ; ModuleID = '<stdin>'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/ppcf128-1.ll b/test/CodeGen/PowerPC/ppcf128-1.ll
index a487de7fd5772..1047fe5d3ba9e 100644
--- a/test/CodeGen/PowerPC/ppcf128-1.ll
+++ b/test/CodeGen/PowerPC/ppcf128-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -std-compile-opts | llc > %t
+; RUN: opt < %s -std-compile-opts | llc > %t
 ; ModuleID = 'ld3.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/ppcf128-2.ll b/test/CodeGen/PowerPC/ppcf128-2.ll
index 43182266e7313..7eee3542d3bc5 100644
--- a/test/CodeGen/PowerPC/ppcf128-2.ll
+++ b/test/CodeGen/PowerPC/ppcf128-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 define i64 @__fixtfdi(ppc_fp128 %a) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/ppcf128-3.ll b/test/CodeGen/PowerPC/ppcf128-3.ll
index 3a51f4d3dd60e..5043b622584b8 100644
--- a/test/CodeGen/PowerPC/ppcf128-3.ll
+++ b/test/CodeGen/PowerPC/ppcf128-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 	%struct.stp_sequence = type { double, double }
 
 define i32 @stp_sequence_set_short_data(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
diff --git a/test/CodeGen/PowerPC/ppcf128-4.ll b/test/CodeGen/PowerPC/ppcf128-4.ll
index 16d61780a46c0..104a25eb43f28 100644
--- a/test/CodeGen/PowerPC/ppcf128-4.ll
+++ b/test/CodeGen/PowerPC/ppcf128-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 
 define ppc_fp128 @__floatditf(i64 %u) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/pr3711_widen_bit.ll b/test/CodeGen/PowerPC/pr3711_widen_bit.ll
index e601e968341ff..7abdedad98055 100644
--- a/test/CodeGen/PowerPC/pr3711_widen_bit.ll
+++ b/test/CodeGen/PowerPC/pr3711_widen_bit.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 
 ; Test that causes a abort in expanding a bit convert due to a missing support
 ; for widening.
diff --git a/test/CodeGen/PowerPC/private.ll b/test/CodeGen/PowerPC/private.ll
index 0f0d13492a088..d6e67708ac25c 100644
--- a/test/CodeGen/PowerPC/private.ll
+++ b/test/CodeGen/PowerPC/private.ll
@@ -1,25 +1,23 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -mtriple=powerpc-unknown-linux-gnu > %t
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu > %t
 ; RUN: grep .Lfoo: %t
 ; RUN: grep bl.*\.Lfoo %t
 ; RUN: grep .Lbaz: %t
 ; RUN: grep lis.*\.Lbaz %t
-; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin > %t
+; RUN: llc < %s -mtriple=powerpc-apple-darwin > %t
 ; RUN: grep L_foo: %t
 ; RUN: grep bl.*\L_foo %t
 ; RUN: grep L_baz: %t
 ; RUN: grep lis.*\L_baz %t
 
-declare void @foo()
-
-define private void @foo() {
+define private void @foo() nounwind {
         ret void
 }
 
 @baz = private global i32 4;
 
-define i32 @bar() {
+define i32 @bar() nounwind {
         call void @foo()
 	%1 = load i32* @baz, align 4
         ret i32 %1
diff --git a/test/CodeGen/PowerPC/reg-coalesce-simple.ll b/test/CodeGen/PowerPC/reg-coalesce-simple.ll
index b86ed1a6a76ed..e0ddb4250fd2a 100644
--- a/test/CodeGen/PowerPC/reg-coalesce-simple.ll
+++ b/test/CodeGen/PowerPC/reg-coalesce-simple.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32  | not grep or
+; RUN: llc < %s -march=ppc32  | not grep or
 
 %struct.foo = type { i32, i32, [0 x i8] }
 
diff --git a/test/CodeGen/PowerPC/retaddr.ll b/test/CodeGen/PowerPC/retaddr.ll
index f4cad34addad6..9f8647d087620 100644
--- a/test/CodeGen/PowerPC/retaddr.ll
+++ b/test/CodeGen/PowerPC/retaddr.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep mflr
-; RUN: llvm-as < %s | llc -march=ppc32 | grep lwz
-; RUN: llvm-as < %s | llc -march=ppc64 | grep {ld r., 16(r1)}
+; RUN: llc < %s -march=ppc32 | grep mflr
+; RUN: llc < %s -march=ppc32 | grep lwz
+; RUN: llc < %s -march=ppc64 | grep {ld r., 16(r1)}
 
 target triple = "powerpc-apple-darwin8"
 
diff --git a/test/CodeGen/PowerPC/return-val-i128.ll b/test/CodeGen/PowerPC/return-val-i128.ll
index 27a5004bd12a8..e14a43809a7b4 100644
--- a/test/CodeGen/PowerPC/return-val-i128.ll
+++ b/test/CodeGen/PowerPC/return-val-i128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64
+; RUN: llc < %s -march=ppc64
 
 define i128 @__fixsfdi(float %a) {
 entry:
diff --git a/test/CodeGen/PowerPC/rlwimi-commute.ll b/test/CodeGen/PowerPC/rlwimi-commute.ll
index f8a42b5142035..6410c63234d2e 100644
--- a/test/CodeGen/PowerPC/rlwimi-commute.ll
+++ b/test/CodeGen/PowerPC/rlwimi-commute.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwimi
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep {or }
+; RUN: llc < %s -march=ppc32 | grep rlwimi
+; RUN: llc < %s -march=ppc32 | not grep {or }
 
 ; Make sure there is no register-register copies here.
 
diff --git a/test/CodeGen/PowerPC/rlwimi.ll b/test/CodeGen/PowerPC/rlwimi.ll
index 5e310bb6a6ef6..556ca3d4a8c0d 100644
--- a/test/CodeGen/PowerPC/rlwimi.ll
+++ b/test/CodeGen/PowerPC/rlwimi.ll
@@ -1,6 +1,6 @@
 ; All of these ands and shifts should be folded into rlwimi's
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep and
-; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwimi | count 8
+; RUN: llc < %s -march=ppc32 | not grep and
+; RUN: llc < %s -march=ppc32 | grep rlwimi | count 8
 
 define i32 @test1(i32 %x, i32 %y) {
 entry:
diff --git a/test/CodeGen/PowerPC/rlwimi2.ll b/test/CodeGen/PowerPC/rlwimi2.ll
index 33eaacf8b4fac..59a36555bf867 100644
--- a/test/CodeGen/PowerPC/rlwimi2.ll
+++ b/test/CodeGen/PowerPC/rlwimi2.ll
@@ -1,5 +1,5 @@
 ; All of these ands and shifts should be folded into rlwimi's
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: grep rlwimi %t | count 3
 ; RUN: grep srwi   %t | count 1
 ; RUN: not grep slwi %t
diff --git a/test/CodeGen/PowerPC/rlwimi3.ll b/test/CodeGen/PowerPC/rlwimi3.ll
index fedcfbfdb2c5f..05d37bf1625fb 100644
--- a/test/CodeGen/PowerPC/rlwimi3.ll
+++ b/test/CodeGen/PowerPC/rlwimi3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -stats |& \
+; RUN: llc < %s -march=ppc32 -stats |& \
 ; RUN:   grep {Number of machine instrs printed} | grep 12
 
 define i16 @Trans16Bit(i32 %srcA, i32 %srcB, i32 %alpha) {
diff --git a/test/CodeGen/PowerPC/rlwinm.ll b/test/CodeGen/PowerPC/rlwinm.ll
index 9d34865be5a2e..699f6e78356e7 100644
--- a/test/CodeGen/PowerPC/rlwinm.ll
+++ b/test/CodeGen/PowerPC/rlwinm.ll
@@ -1,5 +1,5 @@
 ; All of these ands and shifts should be folded into rlwimi's
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: not grep and %t
 ; RUN: not grep srawi %t
 ; RUN: not grep srwi %t
diff --git a/test/CodeGen/PowerPC/rlwinm2.ll b/test/CodeGen/PowerPC/rlwinm2.ll
index 06ceaa2a9cdc3..46542d8e09bd0 100644
--- a/test/CodeGen/PowerPC/rlwinm2.ll
+++ b/test/CodeGen/PowerPC/rlwinm2.ll
@@ -1,5 +1,5 @@
 ; All of these ands and shifts should be folded into rlw[i]nm instructions
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: not grep and %t
 ; RUN: not grep srawi %t 
 ; RUN: not grep srwi %t 
diff --git a/test/CodeGen/PowerPC/rotl-2.ll b/test/CodeGen/PowerPC/rotl-2.ll
index df104599fe3e5..d32ef59be6c41 100644
--- a/test/CodeGen/PowerPC/rotl-2.ll
+++ b/test/CodeGen/PowerPC/rotl-2.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=ppc32  | grep rlwinm | count 4
-; RUN: llvm-as < %s | llc -march=ppc32  | grep rlwnm | count 2
-; RUN: llvm-as < %s | llc -march=ppc32  | not grep or
+; RUN: llc < %s -march=ppc32  | grep rlwinm | count 4
+; RUN: llc < %s -march=ppc32  | grep rlwnm | count 2
+; RUN: llc < %s -march=ppc32  | not grep or
 
 define i32 @rotl32(i32 %A, i8 %Amt) nounwind {
 	%shift.upgrd.1 = zext i8 %Amt to i32		; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/rotl-64.ll b/test/CodeGen/PowerPC/rotl-64.ll
index 3963d9a9d71a0..674c9e4cc951b 100644
--- a/test/CodeGen/PowerPC/rotl-64.ll
+++ b/test/CodeGen/PowerPC/rotl-64.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep rldicl
-; RUN: llvm-as < %s | llc -march=ppc64 | grep rldcl
+; RUN: llc < %s -march=ppc64 | grep rldicl
+; RUN: llc < %s -march=ppc64 | grep rldcl
 ; PR1613
 
 define i64 @t1(i64 %A) {
diff --git a/test/CodeGen/PowerPC/rotl.ll b/test/CodeGen/PowerPC/rotl.ll
index aab5c8316a3d7..56fc4a8c911fe 100644
--- a/test/CodeGen/PowerPC/rotl.ll
+++ b/test/CodeGen/PowerPC/rotl.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwnm | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwinm | count 2
+; RUN: llc < %s -march=ppc32 | grep rlwnm | count 2
+; RUN: llc < %s -march=ppc32 | grep rlwinm | count 2
 
 define i32 @rotlw(i32 %x, i32 %sh) {
 entry:
diff --git a/test/CodeGen/PowerPC/sections.ll b/test/CodeGen/PowerPC/sections.ll
new file mode 100644
index 0000000000000..1af370935e231
--- /dev/null
+++ b/test/CodeGen/PowerPC/sections.ll
@@ -0,0 +1,8 @@
+; Test to make sure that bss sections are printed with '.section' directive.
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
+
+@A = global i32 0
+
+; CHECK:  .section  .bss,"aw",@nobits
+; CHECK:  .global A
+
diff --git a/test/CodeGen/PowerPC/select-cc.ll b/test/CodeGen/PowerPC/select-cc.ll
index f9464c4b05168..ccc64898a34fc 100644
--- a/test/CodeGen/PowerPC/select-cc.ll
+++ b/test/CodeGen/PowerPC/select-cc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llc < %s -march=ppc32
 ; PR3011
 
 define <2 x double> @vector_select(<2 x double> %x, <2 x double> %y) nounwind  {
diff --git a/test/CodeGen/PowerPC/select_lt0.ll b/test/CodeGen/PowerPC/select_lt0.ll
index 86eb201c57964..95ba84ac6e245 100644
--- a/test/CodeGen/PowerPC/select_lt0.ll
+++ b/test/CodeGen/PowerPC/select_lt0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep cmp
+; RUN: llc < %s -march=ppc32 | not grep cmp
 
 define i32 @seli32_1(i32 %a) {
 entry:
diff --git a/test/CodeGen/PowerPC/setcc_no_zext.ll b/test/CodeGen/PowerPC/setcc_no_zext.ll
index c31f35ce9af34..9b2036e1dc52c 100644
--- a/test/CodeGen/PowerPC/setcc_no_zext.ll
+++ b/test/CodeGen/PowerPC/setcc_no_zext.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwinm
+; RUN: llc < %s -march=ppc32 | not grep rlwinm
 
 define i32 @setcc_one_or_zero(i32* %a) {
 entry:
diff --git a/test/CodeGen/PowerPC/seteq-0.ll b/test/CodeGen/PowerPC/seteq-0.ll
index 0f0afe9e665a0..688b29aa124f4 100644
--- a/test/CodeGen/PowerPC/seteq-0.ll
+++ b/test/CodeGen/PowerPC/seteq-0.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
 ; RUN:   grep {srwi r., r., 5}
 
 define i32 @eq0(i32 %a) {
diff --git a/test/CodeGen/PowerPC/shift128.ll b/test/CodeGen/PowerPC/shift128.ll
index cf5b3fc6ff0c4..8e518c12795ec 100644
--- a/test/CodeGen/PowerPC/shift128.ll
+++ b/test/CodeGen/PowerPC/shift128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 | grep sld | count 5
+; RUN: llc < %s -march=ppc64 | grep sld | count 5
 
 define i128 @foo_lshr(i128 %x, i128 %y) {
   %r = lshr i128 %x, %y
diff --git a/test/CodeGen/PowerPC/shl_elim.ll b/test/CodeGen/PowerPC/shl_elim.ll
index 3dc47729860b1..f177c4a3f4824 100644
--- a/test/CodeGen/PowerPC/shl_elim.ll
+++ b/test/CodeGen/PowerPC/shl_elim.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep slwi
+; RUN: llc < %s -march=ppc32 | not grep slwi
 
 define i32 @test1(i64 %a) {
         %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/shl_sext.ll b/test/CodeGen/PowerPC/shl_sext.ll
index 61e5cdb11a42c..1f35eb4c55a6e 100644
--- a/test/CodeGen/PowerPC/shl_sext.ll
+++ b/test/CodeGen/PowerPC/shl_sext.ll
@@ -1,5 +1,5 @@
 ; This test should not contain a sign extend
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep extsb 
+; RUN: llc < %s -march=ppc32 | not grep extsb 
 
 define i32 @test(i32 %mode.0.i.0) {
         %tmp.79 = trunc i32 %mode.0.i.0 to i8           ; <i8> [#uses=1]
diff --git a/test/CodeGen/PowerPC/sign_ext_inreg1.ll b/test/CodeGen/PowerPC/sign_ext_inreg1.ll
index 0e67f77038821..2679c8e6ae8ef 100644
--- a/test/CodeGen/PowerPC/sign_ext_inreg1.ll
+++ b/test/CodeGen/PowerPC/sign_ext_inreg1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep srwi
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwimi
+; RUN: llc < %s -march=ppc32 | grep srwi
+; RUN: llc < %s -march=ppc32 | not grep rlwimi
 
 define i32 @baz(i64 %a) {
         %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/small-arguments.ll b/test/CodeGen/PowerPC/small-arguments.ll
index e211e86875a20..31bcee6bc811f 100644
--- a/test/CodeGen/PowerPC/small-arguments.ll
+++ b/test/CodeGen/PowerPC/small-arguments.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep {extsh\\|rlwinm}
+; RUN: llc < %s -march=ppc32 | not grep {extsh\\|rlwinm}
 
 declare i16 @foo() signext 
 
diff --git a/test/CodeGen/PowerPC/stfiwx-2.ll b/test/CodeGen/PowerPC/stfiwx-2.ll
index 5c4a834be4457..c49b25cc23039 100644
--- a/test/CodeGen/PowerPC/stfiwx-2.ll
+++ b/test/CodeGen/PowerPC/stfiwx-2.ll
@@ -1,6 +1,6 @@
 ; This cannot be a stfiwx
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep stb
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep stfiwx
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep stb
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep stfiwx
 
 define void @test(float %F, i8* %P) {
 	%I = fptosi float %F to i32
diff --git a/test/CodeGen/PowerPC/stfiwx.ll b/test/CodeGen/PowerPC/stfiwx.ll
index c4afb63531b1f..d1c3f5234a261 100644
--- a/test/CodeGen/PowerPC/stfiwx.ll
+++ b/test/CodeGen/PowerPC/stfiwx.ll
@@ -1,10 +1,8 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=stfiwx -o %t1 -f
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=stfiwx -o %t1
 ; RUN: grep stfiwx %t1
 ; RUN: not grep r1 %t1
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-stfiwx \
-; RUN:   -o %t2 -f
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-stfiwx \
+; RUN:   -o %t2
 ; RUN: not grep stfiwx %t2
 ; RUN: grep r1 %t2
 
diff --git a/test/CodeGen/PowerPC/store-load-fwd.ll b/test/CodeGen/PowerPC/store-load-fwd.ll
index 5cc478448ff78..25663c1ac68ec 100644
--- a/test/CodeGen/PowerPC/store-load-fwd.ll
+++ b/test/CodeGen/PowerPC/store-load-fwd.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep lwz
+; RUN: llc < %s -march=ppc32 | not grep lwz
 
 define i32 @test(i32* %P) {
         store i32 1, i32* %P
diff --git a/test/CodeGen/PowerPC/subc.ll b/test/CodeGen/PowerPC/subc.ll
index 4ac95961f079b..5914dcad94bc0 100644
--- a/test/CodeGen/PowerPC/subc.ll
+++ b/test/CodeGen/PowerPC/subc.ll
@@ -1,5 +1,5 @@
 ; All of these should be codegen'd without loading immediates
-; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: llc < %s -march=ppc32 -o %t
 ; RUN: grep subfc %t | count 1
 ; RUN: grep subfe %t | count 1
 ; RUN: grep subfze %t | count 1
diff --git a/test/CodeGen/PowerPC/tailcall1-64.ll b/test/CodeGen/PowerPC/tailcall1-64.ll
index f39b40bdab811..e9c83a548807f 100644
--- a/test/CodeGen/PowerPC/tailcall1-64.ll
+++ b/test/CodeGen/PowerPC/tailcall1-64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc64 -tailcallopt | grep TC_RETURNd8
+; RUN: llc < %s -march=ppc64 -tailcallopt | grep TC_RETURNd8
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
 	ret i32 %a3
diff --git a/test/CodeGen/PowerPC/tailcall1.ll b/test/CodeGen/PowerPC/tailcall1.ll
index 1fc4b94ddcf95..08f3392c9d77d 100644
--- a/test/CodeGen/PowerPC/tailcall1.ll
+++ b/test/CodeGen/PowerPC/tailcall1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -tailcallopt | grep TC_RETURN
+; RUN: llc < %s -march=ppc32 -tailcallopt | grep TC_RETURN
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
 	ret i32 %a3
diff --git a/test/CodeGen/PowerPC/tailcallpic1.ll b/test/CodeGen/PowerPC/tailcallpic1.ll
index 678d366fb6a6e..f3f5028cf4a92 100644
--- a/test/CodeGen/PowerPC/tailcallpic1.ll
+++ b/test/CodeGen/PowerPC/tailcallpic1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc  -tailcallopt -mtriple=powerpc-apple-darwin -relocation-model=pic | grep TC_RETURN
+; RUN: llc < %s  -tailcallopt -mtriple=powerpc-apple-darwin -relocation-model=pic | grep TC_RETURN
 
 
 
diff --git a/test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll b/test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll
new file mode 100644
index 0000000000000..8a1288afa40c2
--- /dev/null
+++ b/test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll
@@ -0,0 +1,583 @@
+; RUN: llc < %s
+; PR4534
+
+; ModuleID = 'tango.net.ftp.FtpClient.bc'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9.6.0"
+	%"byte[]" = type { i32, i8* }
+@.str167 = external constant [11 x i8]		; <[11 x i8]*> [#uses=1]
+@.str170 = external constant [11 x i8]		; <[11 x i8]*> [#uses=2]
+@.str171 = external constant [5 x i8]		; <[5 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%"byte[]")* @foo to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define fastcc void @foo(%"byte[]" %line_arg) {
+entry:
+	%line_arg830 = extractvalue %"byte[]" %line_arg, 0		; <i32> [#uses=12]
+	%line_arg831 = extractvalue %"byte[]" %line_arg, 1		; <i8*> [#uses=17]
+	%t5 = load i8* %line_arg831		; <i8> [#uses=1]
+	br label %forcondi
+
+forcondi:		; preds = %forbodyi, %entry
+	%l.0i = phi i32 [ 10, %entry ], [ %t4i, %forbodyi ]		; <i32> [#uses=2]
+	%p.0i = phi i8* [ getelementptr ([11 x i8]* @.str167, i32 0, i32 -1), %entry ], [ %t7i, %forbodyi ]		; <i8*> [#uses=1]
+	%t4i = add i32 %l.0i, -1		; <i32> [#uses=1]
+	%t5i = icmp eq i32 %l.0i, 0		; <i1> [#uses=1]
+	br i1 %t5i, label %forcond.i, label %forbodyi
+
+forbodyi:		; preds = %forcondi
+	%t7i = getelementptr i8* %p.0i, i32 1		; <i8*> [#uses=2]
+	%t8i = load i8* %t7i		; <i8> [#uses=1]
+	%t12i = icmp eq i8 %t8i, %t5		; <i1> [#uses=1]
+	br i1 %t12i, label %forcond.i, label %forcondi
+
+forcond.i:		; preds = %forbody.i, %forbodyi, %forcondi
+	%storemerge.i = phi i32 [ %t106.i, %forbody.i ], [ 1, %forcondi ], [ 1, %forbodyi ]		; <i32> [#uses=1]
+	%t77.i286 = phi i1 [ %phit3, %forbody.i ], [ false, %forcondi ], [ false, %forbodyi ]		; <i1> [#uses=1]
+	br i1 %t77.i286, label %forcond.i295, label %forbody.i
+
+forbody.i:		; preds = %forcond.i
+	%t106.i = add i32 %storemerge.i, 1		; <i32> [#uses=2]
+	%phit3 = icmp ugt i32 %t106.i, 3		; <i1> [#uses=1]
+	br label %forcond.i
+
+forcond.i295:		; preds = %forbody.i301, %forcond.i
+	%storemerge.i292 = phi i32 [ %t106.i325, %forbody.i301 ], [ 4, %forcond.i ]		; <i32> [#uses=1]
+	%t77.i293 = phi i1 [ %phit2, %forbody.i301 ], [ false, %forcond.i ]		; <i1> [#uses=1]
+	br i1 %t77.i293, label %forcond.i332, label %forbody.i301
+
+forbody.i301:		; preds = %forcond.i295
+	%t106.i325 = add i32 %storemerge.i292, 1		; <i32> [#uses=2]
+	%phit2 = icmp ugt i32 %t106.i325, 6		; <i1> [#uses=1]
+	br label %forcond.i295
+
+forcond.i332:		; preds = %forbody.i338, %forcond.i295
+	%storemerge.i329 = phi i32 [ %t106.i362, %forbody.i338 ], [ 7, %forcond.i295 ]		; <i32> [#uses=3]
+	%t77.i330 = phi i1 [ %phit1, %forbody.i338 ], [ false, %forcond.i295 ]		; <i1> [#uses=1]
+	br i1 %t77.i330, label %wcond.i370, label %forbody.i338
+
+forbody.i338:		; preds = %forcond.i332
+	%t106.i362 = add i32 %storemerge.i329, 1		; <i32> [#uses=2]
+	%phit1 = icmp ugt i32 %t106.i362, 9		; <i1> [#uses=1]
+	br label %forcond.i332
+
+wcond.i370:		; preds = %wbody.i372, %forcond.i332
+	%.frame.0.11 = phi i32 [ %t18.i371.c, %wbody.i372 ], [ %storemerge.i329, %forcond.i332 ]		; <i32> [#uses=2]
+	%t3.i368 = phi i32 [ %t18.i371.c, %wbody.i372 ], [ %storemerge.i329, %forcond.i332 ]		; <i32> [#uses=5]
+	%t4.i369 = icmp ult i32 %t3.i368, %line_arg830		; <i1> [#uses=1]
+	br i1 %t4.i369, label %andand.i378, label %wcond22.i383
+
+wbody.i372:		; preds = %andand.i378
+	%t18.i371.c = add i32 %t3.i368, 1		; <i32> [#uses=2]
+	br label %wcond.i370
+
+andand.i378:		; preds = %wcond.i370
+	%t11.i375 = getelementptr i8* %line_arg831, i32 %t3.i368		; <i8*> [#uses=1]
+	%t12.i376 = load i8* %t11.i375		; <i8> [#uses=1]
+	%t14.i377 = icmp eq i8 %t12.i376, 32		; <i1> [#uses=1]
+	br i1 %t14.i377, label %wbody.i372, label %wcond22.i383
+
+wcond22.i383:		; preds = %wbody23.i385, %andand.i378, %wcond.i370
+	%.frame.0.10 = phi i32 [ %t50.i384, %wbody23.i385 ], [ %.frame.0.11, %wcond.i370 ], [ %.frame.0.11, %andand.i378 ]		; <i32> [#uses=2]
+	%t49.i381 = phi i32 [ %t50.i384, %wbody23.i385 ], [ %t3.i368, %wcond.i370 ], [ %t3.i368, %andand.i378 ]		; <i32> [#uses=5]
+	%t32.i382 = icmp ult i32 %t49.i381, %line_arg830		; <i1> [#uses=1]
+	br i1 %t32.i382, label %andand33.i391, label %wcond54.i396
+
+wbody23.i385:		; preds = %andand33.i391
+	%t50.i384 = add i32 %t49.i381, 1		; <i32> [#uses=2]
+	br label %wcond22.i383
+
+andand33.i391:		; preds = %wcond22.i383
+	%t42.i388 = getelementptr i8* %line_arg831, i32 %t49.i381		; <i8*> [#uses=1]
+	%t43.i389 = load i8* %t42.i388		; <i8> [#uses=1]
+	%t45.i390 = icmp eq i8 %t43.i389, 32		; <i1> [#uses=1]
+	br i1 %t45.i390, label %wcond54.i396, label %wbody23.i385
+
+wcond54.i396:		; preds = %wbody55.i401, %andand33.i391, %wcond22.i383
+	%.frame.0.9 = phi i32 [ %t82.i400, %wbody55.i401 ], [ %.frame.0.10, %wcond22.i383 ], [ %.frame.0.10, %andand33.i391 ]		; <i32> [#uses=2]
+	%t81.i394 = phi i32 [ %t82.i400, %wbody55.i401 ], [ %t49.i381, %wcond22.i383 ], [ %t49.i381, %andand33.i391 ]		; <i32> [#uses=3]
+	%t64.i395 = icmp ult i32 %t81.i394, %line_arg830		; <i1> [#uses=1]
+	br i1 %t64.i395, label %andand65.i407, label %wcond.i716
+
+wbody55.i401:		; preds = %andand65.i407
+	%t82.i400 = add i32 %t81.i394, 1		; <i32> [#uses=2]
+	br label %wcond54.i396
+
+andand65.i407:		; preds = %wcond54.i396
+	%t74.i404 = getelementptr i8* %line_arg831, i32 %t81.i394		; <i8*> [#uses=1]
+	%t75.i405 = load i8* %t74.i404		; <i8> [#uses=1]
+	%t77.i406 = icmp eq i8 %t75.i405, 32		; <i1> [#uses=1]
+	br i1 %t77.i406, label %wbody55.i401, label %wcond.i716
+
+wcond.i716:		; preds = %wbody.i717, %andand65.i407, %wcond54.i396
+	%.frame.0.0 = phi i32 [ %t18.i.c829, %wbody.i717 ], [ %.frame.0.9, %wcond54.i396 ], [ %.frame.0.9, %andand65.i407 ]		; <i32> [#uses=7]
+	%t4.i715 = icmp ult i32 %.frame.0.0, %line_arg830		; <i1> [#uses=1]
+	br i1 %t4.i715, label %andand.i721, label %wcond22.i724
+
+wbody.i717:		; preds = %andand.i721
+	%t18.i.c829 = add i32 %.frame.0.0, 1		; <i32> [#uses=1]
+	br label %wcond.i716
+
+andand.i721:		; preds = %wcond.i716
+	%t11.i718 = getelementptr i8* %line_arg831, i32 %.frame.0.0		; <i8*> [#uses=1]
+	%t12.i719 = load i8* %t11.i718		; <i8> [#uses=1]
+	%t14.i720 = icmp eq i8 %t12.i719, 32		; <i1> [#uses=1]
+	br i1 %t14.i720, label %wbody.i717, label %wcond22.i724
+
+wcond22.i724:		; preds = %wbody23.i726, %andand.i721, %wcond.i716
+	%.frame.0.1 = phi i32 [ %t50.i725, %wbody23.i726 ], [ %.frame.0.0, %wcond.i716 ], [ %.frame.0.0, %andand.i721 ]		; <i32> [#uses=2]
+	%t49.i722 = phi i32 [ %t50.i725, %wbody23.i726 ], [ %.frame.0.0, %wcond.i716 ], [ %.frame.0.0, %andand.i721 ]		; <i32> [#uses=5]
+	%t32.i723 = icmp ult i32 %t49.i722, %line_arg830		; <i1> [#uses=1]
+	br i1 %t32.i723, label %andand33.i731, label %wcond54.i734
+
+wbody23.i726:		; preds = %andand33.i731
+	%t50.i725 = add i32 %t49.i722, 1		; <i32> [#uses=2]
+	br label %wcond22.i724
+
+andand33.i731:		; preds = %wcond22.i724
+	%t42.i728 = getelementptr i8* %line_arg831, i32 %t49.i722		; <i8*> [#uses=1]
+	%t43.i729 = load i8* %t42.i728		; <i8> [#uses=1]
+	%t45.i730 = icmp eq i8 %t43.i729, 32		; <i1> [#uses=1]
+	br i1 %t45.i730, label %wcond54.i734, label %wbody23.i726
+
+wcond54.i734:		; preds = %wbody55.i736, %andand33.i731, %wcond22.i724
+	%.frame.0.2 = phi i32 [ %t82.i735, %wbody55.i736 ], [ %.frame.0.1, %wcond22.i724 ], [ %.frame.0.1, %andand33.i731 ]		; <i32> [#uses=2]
+	%t81.i732 = phi i32 [ %t82.i735, %wbody55.i736 ], [ %t49.i722, %wcond22.i724 ], [ %t49.i722, %andand33.i731 ]		; <i32> [#uses=3]
+	%t64.i733 = icmp ult i32 %t81.i732, %line_arg830		; <i1> [#uses=1]
+	br i1 %t64.i733, label %andand65.i740, label %wcond.i750
+
+wbody55.i736:		; preds = %andand65.i740
+	%t82.i735 = add i32 %t81.i732, 1		; <i32> [#uses=2]
+	br label %wcond54.i734
+
+andand65.i740:		; preds = %wcond54.i734
+	%t74.i737 = getelementptr i8* %line_arg831, i32 %t81.i732		; <i8*> [#uses=1]
+	%t75.i738 = load i8* %t74.i737		; <i8> [#uses=1]
+	%t77.i739 = icmp eq i8 %t75.i738, 32		; <i1> [#uses=1]
+	br i1 %t77.i739, label %wbody55.i736, label %wcond.i750
+
+wcond.i750:		; preds = %wbody.i752, %andand65.i740, %wcond54.i734
+	%.frame.0.3 = phi i32 [ %t18.i751.c, %wbody.i752 ], [ %.frame.0.2, %wcond54.i734 ], [ %.frame.0.2, %andand65.i740 ]		; <i32> [#uses=11]
+	%t4.i749 = icmp ult i32 %.frame.0.3, %line_arg830		; <i1> [#uses=1]
+	br i1 %t4.i749, label %andand.i758, label %wcond22.i761
+
+wbody.i752:		; preds = %andand.i758
+	%t18.i751.c = add i32 %.frame.0.3, 1		; <i32> [#uses=1]
+	br label %wcond.i750
+
+andand.i758:		; preds = %wcond.i750
+	%t11.i755 = getelementptr i8* %line_arg831, i32 %.frame.0.3		; <i8*> [#uses=1]
+	%t12.i756 = load i8* %t11.i755		; <i8> [#uses=1]
+	%t14.i757 = icmp eq i8 %t12.i756, 32		; <i1> [#uses=1]
+	br i1 %t14.i757, label %wbody.i752, label %wcond22.i761
+
+wcond22.i761:		; preds = %wbody23.i763, %andand.i758, %wcond.i750
+	%.frame.0.4 = phi i32 [ %t50.i762, %wbody23.i763 ], [ %.frame.0.3, %wcond.i750 ], [ %.frame.0.3, %andand.i758 ]		; <i32> [#uses=2]
+	%t49.i759 = phi i32 [ %t50.i762, %wbody23.i763 ], [ %.frame.0.3, %wcond.i750 ], [ %.frame.0.3, %andand.i758 ]		; <i32> [#uses=7]
+	%t32.i760 = icmp ult i32 %t49.i759, %line_arg830		; <i1> [#uses=1]
+	br i1 %t32.i760, label %andand33.i769, label %wcond54.i773
+
+wbody23.i763:		; preds = %andand33.i769
+	%t50.i762 = add i32 %t49.i759, 1		; <i32> [#uses=2]
+	br label %wcond22.i761
+
+andand33.i769:		; preds = %wcond22.i761
+	%t42.i766 = getelementptr i8* %line_arg831, i32 %t49.i759		; <i8*> [#uses=1]
+	%t43.i767 = load i8* %t42.i766		; <i8> [#uses=1]
+	%t45.i768 = icmp eq i8 %t43.i767, 32		; <i1> [#uses=1]
+	br i1 %t45.i768, label %wcond54.i773, label %wbody23.i763
+
+wcond54.i773:		; preds = %wbody55.i775, %andand33.i769, %wcond22.i761
+	%.frame.0.5 = phi i32 [ %t82.i774, %wbody55.i775 ], [ %.frame.0.4, %wcond22.i761 ], [ %.frame.0.4, %andand33.i769 ]		; <i32> [#uses=1]
+	%t81.i770 = phi i32 [ %t82.i774, %wbody55.i775 ], [ %t49.i759, %wcond22.i761 ], [ %t49.i759, %andand33.i769 ]		; <i32> [#uses=3]
+	%t64.i771 = icmp ult i32 %t81.i770, %line_arg830		; <i1> [#uses=1]
+	br i1 %t64.i771, label %andand65.i780, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786
+
+wbody55.i775:		; preds = %andand65.i780
+	%t82.i774 = add i32 %t81.i770, 1		; <i32> [#uses=2]
+	br label %wcond54.i773
+
+andand65.i780:		; preds = %wcond54.i773
+	%t74.i777 = getelementptr i8* %line_arg831, i32 %t81.i770		; <i8*> [#uses=1]
+	%t75.i778 = load i8* %t74.i777		; <i8> [#uses=1]
+	%t77.i779 = icmp eq i8 %t75.i778, 32		; <i1> [#uses=1]
+	br i1 %t77.i779, label %wbody55.i775, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786
+
+Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786:		; preds = %andand65.i780, %wcond54.i773
+	%t89.i782 = getelementptr i8* %line_arg831, i32 %.frame.0.3		; <i8*> [#uses=4]
+	%t90.i783 = sub i32 %t49.i759, %.frame.0.3		; <i32> [#uses=2]
+	br label %wcond.i792
+
+wcond.i792:		; preds = %wbody.i794, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786
+	%.frame.0.6 = phi i32 [ %.frame.0.5, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786 ], [ %t18.i793.c, %wbody.i794 ]		; <i32> [#uses=9]
+	%t4.i791 = icmp ult i32 %.frame.0.6, %line_arg830		; <i1> [#uses=1]
+	br i1 %t4.i791, label %andand.i800, label %wcond22.i803
+
+wbody.i794:		; preds = %andand.i800
+	%t18.i793.c = add i32 %.frame.0.6, 1		; <i32> [#uses=1]
+	br label %wcond.i792
+
+andand.i800:		; preds = %wcond.i792
+	%t11.i797 = getelementptr i8* %line_arg831, i32 %.frame.0.6		; <i8*> [#uses=1]
+	%t12.i798 = load i8* %t11.i797		; <i8> [#uses=1]
+	%t14.i799 = icmp eq i8 %t12.i798, 32		; <i1> [#uses=1]
+	br i1 %t14.i799, label %wbody.i794, label %wcond22.i803
+
+wcond22.i803:		; preds = %wbody23.i805, %andand.i800, %wcond.i792
+	%t49.i801 = phi i32 [ %t50.i804, %wbody23.i805 ], [ %.frame.0.6, %wcond.i792 ], [ %.frame.0.6, %andand.i800 ]		; <i32> [#uses=7]
+	%t32.i802 = icmp ult i32 %t49.i801, %line_arg830		; <i1> [#uses=1]
+	br i1 %t32.i802, label %andand33.i811, label %wcond54.i815
+
+wbody23.i805:		; preds = %andand33.i811
+	%t50.i804 = add i32 %t49.i801, 1		; <i32> [#uses=1]
+	br label %wcond22.i803
+
+andand33.i811:		; preds = %wcond22.i803
+	%t42.i808 = getelementptr i8* %line_arg831, i32 %t49.i801		; <i8*> [#uses=1]
+	%t43.i809 = load i8* %t42.i808		; <i8> [#uses=1]
+	%t45.i810 = icmp eq i8 %t43.i809, 32		; <i1> [#uses=1]
+	br i1 %t45.i810, label %wcond54.i815, label %wbody23.i805
+
+wcond54.i815:		; preds = %wbody55.i817, %andand33.i811, %wcond22.i803
+	%t81.i812 = phi i32 [ %t82.i816, %wbody55.i817 ], [ %t49.i801, %wcond22.i803 ], [ %t49.i801, %andand33.i811 ]		; <i32> [#uses=3]
+	%t64.i813 = icmp ult i32 %t81.i812, %line_arg830		; <i1> [#uses=1]
+	br i1 %t64.i813, label %andand65.i822, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828
+
+wbody55.i817:		; preds = %andand65.i822
+	%t82.i816 = add i32 %t81.i812, 1		; <i32> [#uses=1]
+	br label %wcond54.i815
+
+andand65.i822:		; preds = %wcond54.i815
+	%t74.i819 = getelementptr i8* %line_arg831, i32 %t81.i812		; <i8*> [#uses=1]
+	%t75.i820 = load i8* %t74.i819		; <i8> [#uses=1]
+	%t77.i821 = icmp eq i8 %t75.i820, 32		; <i1> [#uses=1]
+	br i1 %t77.i821, label %wbody55.i817, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828
+
+Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828:		; preds = %andand65.i822, %wcond54.i815
+	%t89.i824 = getelementptr i8* %line_arg831, i32 %.frame.0.6		; <i8*> [#uses=4]
+	%t90.i825 = sub i32 %t49.i801, %.frame.0.6		; <i32> [#uses=2]
+	%t63 = load i8* %t89.i824		; <i8> [#uses=2]
+	br label %forcondi622
+
+forcondi622:		; preds = %forbodyi626, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828
+	%l.0i618 = phi i32 [ 10, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828 ], [ %t4i620, %forbodyi626 ]		; <i32> [#uses=2]
+	%p.0i619 = phi i8* [ getelementptr ([11 x i8]* @.str170, i32 0, i32 -1), %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828 ], [ %t7i623, %forbodyi626 ]		; <i8*> [#uses=1]
+	%t4i620 = add i32 %l.0i618, -1		; <i32> [#uses=1]
+	%t5i621 = icmp eq i32 %l.0i618, 0		; <i1> [#uses=1]
+	br i1 %t5i621, label %if65, label %forbodyi626
+
+forbodyi626:		; preds = %forcondi622
+	%t7i623 = getelementptr i8* %p.0i619, i32 1		; <i8*> [#uses=3]
+	%t8i624 = load i8* %t7i623		; <i8> [#uses=1]
+	%t12i625 = icmp eq i8 %t8i624, %t63		; <i1> [#uses=1]
+	br i1 %t12i625, label %ifi630, label %forcondi622
+
+ifi630:		; preds = %forbodyi626
+	%t15i627 = ptrtoint i8* %t7i623 to i32		; <i32> [#uses=1]
+	%t17i629 = sub i32 %t15i627, ptrtoint ([11 x i8]* @.str170 to i32)		; <i32> [#uses=1]
+	%phit636 = icmp eq i32 %t17i629, 10		; <i1> [#uses=1]
+	br i1 %phit636, label %if65, label %e67
+
+if65:		; preds = %ifi630, %forcondi622
+	%t4i532 = icmp eq i32 %t49.i759, %.frame.0.3		; <i1> [#uses=1]
+	br i1 %t4i532, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576, label %forcondi539
+
+forcondi539:		; preds = %zi546, %if65
+	%sign.1.i533 = phi i1 [ %sign.0.i543, %zi546 ], [ false, %if65 ]		; <i1> [#uses=2]
+	%l.0i534 = phi i32 [ %t33i545, %zi546 ], [ %t90.i783, %if65 ]		; <i32> [#uses=3]
+	%p.0i535 = phi i8* [ %t30i544, %zi546 ], [ %t89.i782, %if65 ]		; <i8*> [#uses=6]
+	%c.0.ini536 = phi i8* [ %t30i544, %zi546 ], [ %t89.i782, %if65 ]		; <i8*> [#uses=1]
+	%c.0i537 = load i8* %c.0.ini536		; <i8> [#uses=2]
+	%t8i538 = icmp eq i32 %l.0i534, 0		; <i1> [#uses=1]
+	br i1 %t8i538, label %endfori550, label %forbodyi540
+
+forbodyi540:		; preds = %forcondi539
+	switch i8 %c.0i537, label %endfori550 [
+		i8 32, label %zi546
+		i8 9, label %zi546
+		i8 45, label %if20i541
+		i8 43, label %if26i542
+	]
+
+if20i541:		; preds = %forbodyi540
+	br label %zi546
+
+if26i542:		; preds = %forbodyi540
+	br label %zi546
+
+zi546:		; preds = %if26i542, %if20i541, %forbodyi540, %forbodyi540
+	%sign.0.i543 = phi i1 [ false, %if26i542 ], [ true, %if20i541 ], [ %sign.1.i533, %forbodyi540 ], [ %sign.1.i533, %forbodyi540 ]		; <i1> [#uses=1]
+	%t30i544 = getelementptr i8* %p.0i535, i32 1		; <i8*> [#uses=2]
+	%t33i545 = add i32 %l.0i534, -1		; <i32> [#uses=1]
+	br label %forcondi539
+
+endfori550:		; preds = %forbodyi540, %forcondi539
+	%t37i547 = icmp eq i8 %c.0i537, 48		; <i1> [#uses=1]
+	%t39i548 = icmp sgt i32 %l.0i534, 1		; <i1> [#uses=1]
+	%or.condi549 = and i1 %t37i547, %t39i548		; <i1> [#uses=1]
+	br i1 %or.condi549, label %if40i554, label %endif41i564
+
+if40i554:		; preds = %endfori550
+	%t43i551 = getelementptr i8* %p.0i535, i32 1		; <i8*> [#uses=2]
+	%t44i552 = load i8* %t43i551		; <i8> [#uses=1]
+	%t45i553 = zext i8 %t44i552 to i32		; <i32> [#uses=1]
+	switch i32 %t45i553, label %endif41i564 [
+		i32 120, label %case46i556
+		i32 88, label %case46i556
+		i32 98, label %case51i558
+		i32 66, label %case51i558
+		i32 111, label %case56i560
+		i32 79, label %case56i560
+	]
+
+case46i556:		; preds = %if40i554, %if40i554
+	%t48i555 = getelementptr i8* %p.0i535, i32 2		; <i8*> [#uses=1]
+	br label %endif41i564
+
+case51i558:		; preds = %if40i554, %if40i554
+	%t53i557 = getelementptr i8* %p.0i535, i32 2		; <i8*> [#uses=1]
+	br label %endif41i564
+
+case56i560:		; preds = %if40i554, %if40i554
+	%t58i559 = getelementptr i8* %p.0i535, i32 2		; <i8*> [#uses=1]
+	br label %endif41i564
+
+endif41i564:		; preds = %case56i560, %case51i558, %case46i556, %if40i554, %endfori550
+	%r.0i561 = phi i32 [ 0, %if40i554 ], [ 8, %case56i560 ], [ 2, %case51i558 ], [ 16, %case46i556 ], [ 0, %endfori550 ]		; <i32> [#uses=2]
+	%p.2i562 = phi i8* [ %t43i551, %if40i554 ], [ %t58i559, %case56i560 ], [ %t53i557, %case51i558 ], [ %t48i555, %case46i556 ], [ %p.0i535, %endfori550 ]		; <i8*> [#uses=2]
+	%t63i563 = icmp eq i32 %r.0i561, 0		; <i1> [#uses=1]
+	br i1 %t63i563, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576, label %if70i568
+
+if70i568:		; preds = %endif41i564
+	br label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576
+
+Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576:		; preds = %if70i568, %endif41i564, %if65
+	%radix.0.i570 = phi i32 [ 0, %if65 ], [ %r.0i561, %if70i568 ], [ 10, %endif41i564 ]		; <i32> [#uses=2]
+	%p.1i571 = phi i8* [ %p.2i562, %if70i568 ], [ %t89.i782, %if65 ], [ %p.2i562, %endif41i564 ]		; <i8*> [#uses=1]
+	%t84i572 = ptrtoint i8* %p.1i571 to i32		; <i32> [#uses=1]
+	%t85i573 = ptrtoint i8* %t89.i782 to i32		; <i32> [#uses=1]
+	%t86i574 = sub i32 %t84i572, %t85i573		; <i32> [#uses=2]
+	%t6.i575 = sub i32 %t90.i783, %t86i574		; <i32> [#uses=1]
+	%t59i604 = zext i32 %radix.0.i570 to i64		; <i64> [#uses=1]
+	br label %fcondi581
+
+fcondi581:		; preds = %if55i610, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576
+	%value.0i577 = phi i64 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576 ], [ %t65i607, %if55i610 ]		; <i64> [#uses=1]
+	%fkey.0i579 = phi i32 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576 ], [ %t70i609, %if55i610 ]		; <i32> [#uses=3]
+	%t3i580 = icmp ult i32 %fkey.0i579, %t6.i575		; <i1> [#uses=1]
+	br i1 %t3i580, label %fbodyi587, label %wcond.i422
+
+fbodyi587:		; preds = %fcondi581
+	%t5.s.i582 = add i32 %t86i574, %fkey.0i579		; <i32> [#uses=1]
+	%t89.i782.s = add i32 %.frame.0.3, %t5.s.i582		; <i32> [#uses=1]
+	%t5i583 = getelementptr i8* %line_arg831, i32 %t89.i782.s		; <i8*> [#uses=1]
+	%t6i584 = load i8* %t5i583		; <i8> [#uses=6]
+	%t6.off84i585 = add i8 %t6i584, -48		; <i8> [#uses=1]
+	%or.cond.i28.i586 = icmp ugt i8 %t6.off84i585, 9		; <i1> [#uses=1]
+	br i1 %or.cond.i28.i586, label %ei590, label %endifi603
+
+ei590:		; preds = %fbodyi587
+	%t6.off83i588 = add i8 %t6i584, -97		; <i8> [#uses=1]
+	%or.cond81i589 = icmp ugt i8 %t6.off83i588, 25		; <i1> [#uses=1]
+	br i1 %or.cond81i589, label %e24i595, label %if22i592
+
+if22i592:		; preds = %ei590
+	%t27i591 = add i8 %t6i584, -39		; <i8> [#uses=1]
+	br label %endifi603
+
+e24i595:		; preds = %ei590
+	%t6.offi593 = add i8 %t6i584, -65		; <i8> [#uses=1]
+	%or.cond82i594 = icmp ugt i8 %t6.offi593, 25		; <i1> [#uses=1]
+	br i1 %or.cond82i594, label %wcond.i422, label %if39i597
+
+if39i597:		; preds = %e24i595
+	%t44.i29.i596 = add i8 %t6i584, -7		; <i8> [#uses=1]
+	br label %endifi603
+
+endifi603:		; preds = %if39i597, %if22i592, %fbodyi587
+	%c.0.i30.i598 = phi i8 [ %t27i591, %if22i592 ], [ %t44.i29.i596, %if39i597 ], [ %t6i584, %fbodyi587 ]		; <i8> [#uses=1]
+	%t48.i31.i599 = zext i8 %c.0.i30.i598 to i32		; <i32> [#uses=1]
+	%t49i600 = add i32 %t48.i31.i599, 208		; <i32> [#uses=1]
+	%t52i601 = and i32 %t49i600, 255		; <i32> [#uses=2]
+	%t54i602 = icmp ult i32 %t52i601, %radix.0.i570		; <i1> [#uses=1]
+	br i1 %t54i602, label %if55i610, label %wcond.i422
+
+if55i610:		; preds = %endifi603
+	%t61i605 = mul i64 %value.0i577, %t59i604		; <i64> [#uses=1]
+	%t64i606 = zext i32 %t52i601 to i64		; <i64> [#uses=1]
+	%t65i607 = add i64 %t61i605, %t64i606		; <i64> [#uses=1]
+	%t70i609 = add i32 %fkey.0i579, 1		; <i32> [#uses=1]
+	br label %fcondi581
+
+e67:		; preds = %ifi630
+	%t4i447 = icmp eq i32 %t49.i801, %.frame.0.6		; <i1> [#uses=1]
+	br i1 %t4i447, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491, label %forcondi454
+
+forcondi454:		; preds = %zi461, %e67
+	%c.0i452 = phi i8 [ %c.0i452.pre, %zi461 ], [ %t63, %e67 ]		; <i8> [#uses=2]
+	%sign.1.i448 = phi i1 [ %sign.0.i458, %zi461 ], [ false, %e67 ]		; <i1> [#uses=2]
+	%l.0i449 = phi i32 [ %t33i460, %zi461 ], [ %t90.i825, %e67 ]		; <i32> [#uses=3]
+	%p.0i450 = phi i8* [ %t30i459, %zi461 ], [ %t89.i824, %e67 ]		; <i8*> [#uses=5]
+	%t8i453 = icmp eq i32 %l.0i449, 0		; <i1> [#uses=1]
+	br i1 %t8i453, label %endfori465, label %forbodyi455
+
+forbodyi455:		; preds = %forcondi454
+	switch i8 %c.0i452, label %endfori465 [
+		i8 32, label %zi461
+		i8 9, label %zi461
+		i8 45, label %if20i456
+		i8 43, label %if26i457
+	]
+
+if20i456:		; preds = %forbodyi455
+	br label %zi461
+
+if26i457:		; preds = %forbodyi455
+	br label %zi461
+
+zi461:		; preds = %if26i457, %if20i456, %forbodyi455, %forbodyi455
+	%sign.0.i458 = phi i1 [ false, %if26i457 ], [ true, %if20i456 ], [ %sign.1.i448, %forbodyi455 ], [ %sign.1.i448, %forbodyi455 ]		; <i1> [#uses=1]
+	%t30i459 = getelementptr i8* %p.0i450, i32 1		; <i8*> [#uses=2]
+	%t33i460 = add i32 %l.0i449, -1		; <i32> [#uses=1]
+	%c.0i452.pre = load i8* %t30i459		; <i8> [#uses=1]
+	br label %forcondi454
+
+endfori465:		; preds = %forbodyi455, %forcondi454
+	%t37i462 = icmp eq i8 %c.0i452, 48		; <i1> [#uses=1]
+	%t39i463 = icmp sgt i32 %l.0i449, 1		; <i1> [#uses=1]
+	%or.condi464 = and i1 %t37i462, %t39i463		; <i1> [#uses=1]
+	br i1 %or.condi464, label %if40i469, label %endif41i479
+
+if40i469:		; preds = %endfori465
+	%t43i466 = getelementptr i8* %p.0i450, i32 1		; <i8*> [#uses=2]
+	%t44i467 = load i8* %t43i466		; <i8> [#uses=1]
+	%t45i468 = zext i8 %t44i467 to i32		; <i32> [#uses=1]
+	switch i32 %t45i468, label %endif41i479 [
+		i32 120, label %case46i471
+		i32 111, label %case56i475
+	]
+
+case46i471:		; preds = %if40i469
+	%t48i470 = getelementptr i8* %p.0i450, i32 2		; <i8*> [#uses=1]
+	br label %endif41i479
+
+case56i475:		; preds = %if40i469
+	%t58i474 = getelementptr i8* %p.0i450, i32 2		; <i8*> [#uses=1]
+	br label %endif41i479
+
+endif41i479:		; preds = %case56i475, %case46i471, %if40i469, %endfori465
+	%r.0i476 = phi i32 [ 0, %if40i469 ], [ 8, %case56i475 ], [ 16, %case46i471 ], [ 0, %endfori465 ]		; <i32> [#uses=2]
+	%p.2i477 = phi i8* [ %t43i466, %if40i469 ], [ %t58i474, %case56i475 ], [ %t48i470, %case46i471 ], [ %p.0i450, %endfori465 ]		; <i8*> [#uses=2]
+	%t63i478 = icmp eq i32 %r.0i476, 0		; <i1> [#uses=1]
+	br i1 %t63i478, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491, label %if70i483
+
+if70i483:		; preds = %endif41i479
+	br label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491
+
+Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491:		; preds = %if70i483, %endif41i479, %e67
+	%radix.0.i485 = phi i32 [ 0, %e67 ], [ %r.0i476, %if70i483 ], [ 10, %endif41i479 ]		; <i32> [#uses=2]
+	%p.1i486 = phi i8* [ %p.2i477, %if70i483 ], [ %t89.i824, %e67 ], [ %p.2i477, %endif41i479 ]		; <i8*> [#uses=1]
+	%t84i487 = ptrtoint i8* %p.1i486 to i32		; <i32> [#uses=1]
+	%t85i488 = ptrtoint i8* %t89.i824 to i32		; <i32> [#uses=1]
+	%t86i489 = sub i32 %t84i487, %t85i488		; <i32> [#uses=2]
+	%ttt = sub i32 %t90.i825, %t86i489		; <i32> [#uses=1]
+	%t59i519 = zext i32 %radix.0.i485 to i64		; <i64> [#uses=1]
+	br label %fcondi496
+
+fcondi496:		; preds = %if55i525, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491
+	%value.0i492 = phi i64 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491 ], [ %t65i522, %if55i525 ]		; <i64> [#uses=1]
+	%fkey.0i494 = phi i32 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491 ], [ %t70i524, %if55i525 ]		; <i32> [#uses=3]
+	%t3i495 = icmp ult i32 %fkey.0i494, %ttt		; <i1> [#uses=1]
+	br i1 %t3i495, label %fbodyi502, label %wcond.i422
+
+fbodyi502:		; preds = %fcondi496
+	%t5.s.i497 = add i32 %t86i489, %fkey.0i494		; <i32> [#uses=1]
+	%t89.i824.s = add i32 %.frame.0.6, %t5.s.i497		; <i32> [#uses=1]
+	%t5i498 = getelementptr i8* %line_arg831, i32 %t89.i824.s		; <i8*> [#uses=1]
+	%t6i499 = load i8* %t5i498		; <i8> [#uses=6]
+	%t6.off84i500 = add i8 %t6i499, -48		; <i8> [#uses=1]
+	%or.cond.i28.i501 = icmp ugt i8 %t6.off84i500, 9		; <i1> [#uses=1]
+	br i1 %or.cond.i28.i501, label %ei505, label %endifi518
+
+ei505:		; preds = %fbodyi502
+	%t6.off83i503 = add i8 %t6i499, -97		; <i8> [#uses=1]
+	%or.cond81i504 = icmp ugt i8 %t6.off83i503, 25		; <i1> [#uses=1]
+	br i1 %or.cond81i504, label %e24i510, label %if22i507
+
+if22i507:		; preds = %ei505
+	%t27i506 = add i8 %t6i499, -39		; <i8> [#uses=1]
+	br label %endifi518
+
+e24i510:		; preds = %ei505
+	%t6.offi508 = add i8 %t6i499, -65		; <i8> [#uses=1]
+	%or.cond82i509 = icmp ugt i8 %t6.offi508, 25		; <i1> [#uses=1]
+	br i1 %or.cond82i509, label %wcond.i422, label %if39i512
+
+if39i512:		; preds = %e24i510
+	%t44.i29.i511 = add i8 %t6i499, -7		; <i8> [#uses=1]
+	br label %endifi518
+
+endifi518:		; preds = %if39i512, %if22i507, %fbodyi502
+	%c.0.i30.i513 = phi i8 [ %t27i506, %if22i507 ], [ %t44.i29.i511, %if39i512 ], [ %t6i499, %fbodyi502 ]		; <i8> [#uses=1]
+	%t48.i31.i514 = zext i8 %c.0.i30.i513 to i32		; <i32> [#uses=1]
+	%t49i515 = add i32 %t48.i31.i514, 208		; <i32> [#uses=1]
+	%t52i516 = and i32 %t49i515, 255		; <i32> [#uses=2]
+	%t54i517 = icmp ult i32 %t52i516, %radix.0.i485		; <i1> [#uses=1]
+	br i1 %t54i517, label %if55i525, label %wcond.i422
+
+if55i525:		; preds = %endifi518
+	%t61i520 = mul i64 %value.0i492, %t59i519		; <i64> [#uses=1]
+	%t64i521 = zext i32 %t52i516 to i64		; <i64> [#uses=1]
+	%t65i522 = add i64 %t61i520, %t64i521		; <i64> [#uses=1]
+	%t70i524 = add i32 %fkey.0i494, 1		; <i32> [#uses=1]
+	br label %fcondi496
+
+wcond.i422:		; preds = %e40.i, %endifi518, %e24i510, %fcondi496, %endifi603, %e24i595, %fcondi581
+	%sarg60.pn.i = phi i8* [ %p.0.i, %e40.i ], [ undef, %fcondi496 ], [ undef, %e24i510 ], [ undef, %endifi518 ], [ undef, %endifi603 ], [ undef, %e24i595 ], [ undef, %fcondi581 ]		; <i8*> [#uses=3]
+	%start_arg.pn.i = phi i32 [ %t49.i443, %e40.i ], [ 0, %fcondi496 ], [ 0, %e24i510 ], [ 0, %endifi518 ], [ 0, %endifi603 ], [ 0, %e24i595 ], [ 0, %fcondi581 ]		; <i32> [#uses=3]
+	%extent.0.i = phi i32 [ %t51.i, %e40.i ], [ undef, %fcondi496 ], [ undef, %e24i510 ], [ undef, %endifi518 ], [ undef, %endifi603 ], [ undef, %e24i595 ], [ undef, %fcondi581 ]		; <i32> [#uses=3]
+	%p.0.i = getelementptr i8* %sarg60.pn.i, i32 %start_arg.pn.i		; <i8*> [#uses=2]
+	%p.0.s63.i = add i32 %start_arg.pn.i, -1		; <i32> [#uses=1]
+	%t2i424 = getelementptr i8* %sarg60.pn.i, i32 %p.0.s63.i		; <i8*> [#uses=1]
+	br label %forcondi430
+
+forcondi430:		; preds = %forbodyi434, %wcond.i422
+	%l.0i426 = phi i32 [ %extent.0.i, %wcond.i422 ], [ %t4i428, %forbodyi434 ]		; <i32> [#uses=2]
+	%p.0i427 = phi i8* [ %t2i424, %wcond.i422 ], [ %t7i431, %forbodyi434 ]		; <i8*> [#uses=1]
+	%t4i428 = add i32 %l.0i426, -1		; <i32> [#uses=1]
+	%t5i429 = icmp eq i32 %l.0i426, 0		; <i1> [#uses=1]
+	br i1 %t5i429, label %e.i441, label %forbodyi434
+
+forbodyi434:		; preds = %forcondi430
+	%t7i431 = getelementptr i8* %p.0i427, i32 1		; <i8*> [#uses=3]
+	%t8i432 = load i8* %t7i431		; <i8> [#uses=1]
+	%t12i433 = icmp eq i8 %t8i432, 32		; <i1> [#uses=1]
+	br i1 %t12i433, label %ifi438, label %forcondi430
+
+ifi438:		; preds = %forbodyi434
+	%t15i435 = ptrtoint i8* %t7i431 to i32		; <i32> [#uses=1]
+	%t16i436 = ptrtoint i8* %p.0.i to i32		; <i32> [#uses=1]
+	%t17i437 = sub i32 %t15i435, %t16i436		; <i32> [#uses=1]
+	br label %e.i441
+
+e.i441:		; preds = %ifi438, %forcondi430
+	%t2561.i = phi i32 [ %t17i437, %ifi438 ], [ %extent.0.i, %forcondi430 ]		; <i32> [#uses=2]
+	%p.0.s.i = add i32 %start_arg.pn.i, %t2561.i		; <i32> [#uses=1]
+	%t32.s.i = add i32 %p.0.s.i, -1		; <i32> [#uses=1]
+	%t2i.i = getelementptr i8* %sarg60.pn.i, i32 %t32.s.i		; <i8*> [#uses=1]
+	br label %forbodyi.i
+
+forbodyi.i:		; preds = %forbodyi.i, %e.i441
+	%p.0i.i = phi i8* [ %t2i.i, %e.i441 ], [ %t7i.i, %forbodyi.i ]		; <i8*> [#uses=1]
+	%s2.0i.i = phi i8* [ getelementptr ([5 x i8]* @.str171, i32 0, i32 0), %e.i441 ], [ %t11i.i, %forbodyi.i ]		; <i8*> [#uses=2]
+	%t7i.i = getelementptr i8* %p.0i.i, i32 1		; <i8*> [#uses=2]
+	%t8i.i = load i8* %t7i.i		; <i8> [#uses=1]
+	%t11i.i = getelementptr i8* %s2.0i.i, i32 1		; <i8*> [#uses=1]
+	%t12i.i = load i8* %s2.0i.i		; <i8> [#uses=1]
+	%t14i.i = icmp eq i8 %t8i.i, %t12i.i		; <i1> [#uses=1]
+	br i1 %t14i.i, label %forbodyi.i, label %e40.i
+
+e40.i:		; preds = %forbodyi.i
+	%t49.i443 = add i32 %t2561.i, 1		; <i32> [#uses=2]
+	%t51.i = sub i32 %extent.0.i, %t49.i443		; <i32> [#uses=1]
+	br label %wcond.i422
+}
diff --git a/test/CodeGen/PowerPC/trampoline.ll b/test/CodeGen/PowerPC/trampoline.ll
index 530c7826ea837..bc05bb176352f 100644
--- a/test/CodeGen/PowerPC/trampoline.ll
+++ b/test/CodeGen/PowerPC/trampoline.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep {__trampoline_setup}
+; RUN: llc < %s -march=ppc32 | grep {__trampoline_setup}
 
 module asm "\09.lazy_reference .objc_class_name_NSImageRep"
 module asm "\09.objc_class_name_NSBitmapImageRep=0"
diff --git a/test/CodeGen/PowerPC/unsafe-math.ll b/test/CodeGen/PowerPC/unsafe-math.ll
index d211b3b76f523..ef9791277dcd1 100644
--- a/test/CodeGen/PowerPC/unsafe-math.ll
+++ b/test/CodeGen/PowerPC/unsafe-math.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 | grep fmul | count 2
-; RUN: llvm-as < %s | llc -march=ppc32 -enable-unsafe-fp-math | \
+; RUN: llc < %s -march=ppc32 | grep fmul | count 2
+; RUN: llc < %s -march=ppc32 -enable-unsafe-fp-math | \
 ; RUN:   grep fmul | count 1
 
 define double @foo(double %X) {
diff --git a/test/CodeGen/PowerPC/vcmp-fold.ll b/test/CodeGen/PowerPC/vcmp-fold.ll
index 815bb0aedff59..7a42c27d2b4ae 100644
--- a/test/CodeGen/PowerPC/vcmp-fold.ll
+++ b/test/CodeGen/PowerPC/vcmp-fold.ll
@@ -1,6 +1,6 @@
 ; This should fold the "vcmpbfp." and "vcmpbfp" instructions into a single
 ; "vcmpbfp.".
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vcmpbfp | count 1
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vcmpbfp | count 1
 
 
 define void @test(<4 x float>* %x, <4 x float>* %y, i32* %P) {
diff --git a/test/CodeGen/PowerPC/vec_br_cmp.ll b/test/CodeGen/PowerPC/vec_br_cmp.ll
index 6d799676b77b4..c34d850c0ac70 100644
--- a/test/CodeGen/PowerPC/vec_br_cmp.ll
+++ b/test/CodeGen/PowerPC/vec_br_cmp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 -o %t -f
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -o %t
 ; RUN: grep vcmpeqfp. %t
 ; RUN: not grep mfcr %t
 
diff --git a/test/CodeGen/PowerPC/vec_call.ll b/test/CodeGen/PowerPC/vec_call.ll
index 8e7a08ebb7d73..4511315c3bfad 100644
--- a/test/CodeGen/PowerPC/vec_call.ll
+++ b/test/CodeGen/PowerPC/vec_call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 
 define <4 x i32> @test_arg(<4 x i32> %A, <4 x i32> %B) {
 	%C = add <4 x i32> %A, %B		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll
index c4b42b9e9b8ac..32c6f4809cb49 100644
--- a/test/CodeGen/PowerPC/vec_constants.ll
+++ b/test/CodeGen/PowerPC/vec_constants.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep CPI
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep CPI
 
 define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
 	%tmp = load <4 x i32>* %P1		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vec_fneg.ll b/test/CodeGen/PowerPC/vec_fneg.ll
index 9fdbffd33ed51..e01e65979f6f7 100644
--- a/test/CodeGen/PowerPC/vec_fneg.ll
+++ b/test/CodeGen/PowerPC/vec_fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vsubfp
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubfp
 
 define void @t(<4 x float>* %A) {
 	%tmp2 = load <4 x float>* %A
diff --git a/test/CodeGen/PowerPC/vec_insert.ll b/test/CodeGen/PowerPC/vec_insert.ll
index 04bbe6574f628..185454cbd31df 100644
--- a/test/CodeGen/PowerPC/vec_insert.ll
+++ b/test/CodeGen/PowerPC/vec_insert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep sth
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep sth
 
 define <8 x i16> @insert(<8 x i16> %foo, i16 %a) nounwind  {
 entry:
diff --git a/test/CodeGen/PowerPC/vec_misaligned.ll b/test/CodeGen/PowerPC/vec_misaligned.ll
index 15376caebefaa..d7ed64a5b1cfc 100644
--- a/test/CodeGen/PowerPC/vec_misaligned.ll
+++ b/test/CodeGen/PowerPC/vec_misaligned.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/vec_mul.ll b/test/CodeGen/PowerPC/vec_mul.ll
index b061fa9a54ee2..80f4de4a1728a 100644
--- a/test/CodeGen/PowerPC/vec_mul.ll
+++ b/test/CodeGen/PowerPC/vec_mul.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep mullw
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vmsumuhm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep mullw
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vmsumuhm
 
 define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 	%tmp = load <4 x i32>* %X		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vec_perf_shuffle.ll b/test/CodeGen/PowerPC/vec_perf_shuffle.ll
index 5bb1b6083417a..2c3594d224fe0 100644
--- a/test/CodeGen/PowerPC/vec_perf_shuffle.ll
+++ b/test/CodeGen/PowerPC/vec_perf_shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep vperm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vperm
 
 define <4 x float> @test_uu72(<4 x float>* %P1, <4 x float>* %P2) {
 	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vec_shift.ll b/test/CodeGen/PowerPC/vec_shift.ll
index 0cc699cee42ca..646fb5f3866c3 100644
--- a/test/CodeGen/PowerPC/vec_shift.ll
+++ b/test/CodeGen/PowerPC/vec_shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc  -march=ppc32 -mcpu=g5
+; RUN: llc < %s  -march=ppc32 -mcpu=g5
 ; PR3628
 
 define void @update(<4 x i32> %val, <4 x i32>* %dst) nounwind {
diff --git a/test/CodeGen/PowerPC/vec_shuffle.ll b/test/CodeGen/PowerPC/vec_shuffle.ll
index 1289dca2d2116..82706321c1c19 100644
--- a/test/CodeGen/PowerPC/vec_shuffle.ll
+++ b/test/CodeGen/PowerPC/vec_shuffle.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | opt -instcombine | \
+; RUN: opt < %s -instcombine | \
 ; RUN:   llc -march=ppc32 -mcpu=g5 | not grep vperm
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 > %t
+; RUN: llc < %s -march=ppc32 -mcpu=g5 > %t
 ; RUN: grep vsldoi  %t | count 2
 ; RUN: grep vmrgh   %t | count 7
 ; RUN: grep vmrgl   %t | count 6
diff --git a/test/CodeGen/PowerPC/vec_splat.ll b/test/CodeGen/PowerPC/vec_splat.ll
index 7b7e4fe334778..61237284d36c8 100644
--- a/test/CodeGen/PowerPC/vec_splat.ll
+++ b/test/CodeGen/PowerPC/vec_splat.ll
@@ -1,7 +1,7 @@
 ; Test that vectors are scalarized/lowered correctly.
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g3 | \
+; RUN: llc < %s -march=ppc32 -mcpu=g3 | \
 ; RUN:    grep stfs | count 4
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 -o %t -f
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -o %t
 ; RUN: grep vspltw %t | count 2
 ; RUN: grep vsplti %t | count 3
 ; RUN: grep vsplth %t | count 1
diff --git a/test/CodeGen/PowerPC/vec_vrsave.ll b/test/CodeGen/PowerPC/vec_vrsave.ll
index 06769f6bf0f8b..2a03d5819b835 100644
--- a/test/CodeGen/PowerPC/vec_vrsave.ll
+++ b/test/CodeGen/PowerPC/vec_vrsave.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 -o %t -f
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -o %t
 ; RUN: grep vrlw %t
 ; RUN: not grep spr %t
 ; RUN: not grep vrsave %t
diff --git a/test/CodeGen/PowerPC/vec_zero.ll b/test/CodeGen/PowerPC/vec_zero.ll
index 7350e91b77418..f862b2cb4c4b9 100644
--- a/test/CodeGen/PowerPC/vec_zero.ll
+++ b/test/CodeGen/PowerPC/vec_zero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vxor
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vxor
 
 define void @foo(<4 x float>* %P) {
         %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vector-identity-shuffle.ll b/test/CodeGen/PowerPC/vector-identity-shuffle.ll
index aefd2661a8bcb..dfa2e35435a83 100644
--- a/test/CodeGen/PowerPC/vector-identity-shuffle.ll
+++ b/test/CodeGen/PowerPC/vector-identity-shuffle.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep test:
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep vperm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep test:
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vperm
 
 define void @test(<4 x float>* %tmp2.i) {
         %tmp2.i.upgrd.1 = load <4 x float>* %tmp2.i             ; <<4 x float>> [#uses=4]
diff --git a/test/CodeGen/PowerPC/vector.ll b/test/CodeGen/PowerPC/vector.ll
index a6c17b4bccf62..ee4da315f9277 100644
--- a/test/CodeGen/PowerPC/vector.ll
+++ b/test/CodeGen/PowerPC/vector.ll
@@ -1,6 +1,6 @@
 ; Test that vectors are scalarized/lowered correctly.
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 > %t
-; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g3 > %t
+; RUN: llc < %s -march=ppc32 -mcpu=g5 > %t
+; RUN: llc < %s -march=ppc32 -mcpu=g3 > %t
 
 %d8 = type <8 x double>
 %f1 = type <1 x float>
diff --git a/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll b/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
index 76f140ceaf859..082f9f40f289b 100644
--- a/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
+++ b/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 
 define void @execute_list() {
         %tmp.33.i = fdiv float 0.000000e+00, 0.000000e+00               ; <float> [#uses=1]
diff --git a/test/CodeGen/SPARC/2007-05-09-JumpTables.ll b/test/CodeGen/SPARC/2007-05-09-JumpTables.ll
index a014acefa904a..41ad3b27b5a7d 100644
--- a/test/CodeGen/SPARC/2007-05-09-JumpTables.ll
+++ b/test/CodeGen/SPARC/2007-05-09-JumpTables.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 
 ; We cannot emit jump tables on Sparc, but we should correctly handle this case.
 
diff --git a/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll b/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll
index d1ca44dbb112f..77c20028824fe 100644
--- a/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll
+++ b/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 ; PR1540
 
 declare float @sinf(float)
diff --git a/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll b/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
index f9f4c21fe1eac..e8315f17ebb6a 100644
--- a/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
+++ b/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 ; PR 1557
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
diff --git a/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll b/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
index aaa7bde683836..c12e9c13409ba 100644
--- a/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
+++ b/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 ; PR 1557
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
diff --git a/test/CodeGen/SPARC/2009-08-28-PIC.ll b/test/CodeGen/SPARC/2009-08-28-PIC.ll
new file mode 100644
index 0000000000000..a2ba0d02d45c2
--- /dev/null
+++ b/test/CodeGen/SPARC/2009-08-28-PIC.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=sparc --relocation-model=pic < %s | grep _GLOBAL_OFFSET_TABLE_
+
+@foo = global i32 0                               ; <i32*> [#uses=1]
+
+define i32 @func() nounwind readonly {
+entry:
+  %0 = load i32* @foo, align 4                    ; <i32> [#uses=1]
+  ret i32 %0
+}
diff --git a/test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll b/test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll
new file mode 100644
index 0000000000000..0167d3237aad8
--- /dev/null
+++ b/test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll
@@ -0,0 +1,6 @@
+; RUN: llc -march=sparc < %s | grep weak
+
+define weak i32 @func() nounwind {
+entry:
+  ret i32 0
+}
diff --git a/test/CodeGen/SPARC/basictest.ll b/test/CodeGen/SPARC/basictest.ll
index 5c3e07543b9da..9c2c16a6947cf 100644
--- a/test/CodeGen/SPARC/basictest.ll
+++ b/test/CodeGen/SPARC/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc
+; RUN: llc < %s -march=sparc
 
 define i32 @test(i32 %X) {
 	%tmp.1 = add i32 %X, 1
diff --git a/test/CodeGen/SPARC/ctpop.ll b/test/CodeGen/SPARC/ctpop.ll
index d603baa465dee..37d1c5a5706df 100644
--- a/test/CodeGen/SPARC/ctpop.ll
+++ b/test/CodeGen/SPARC/ctpop.ll
@@ -1,9 +1,7 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=sparc -mattr=v9 -enable-sparc-v9-insts
-; RUN: llvm-as < %s | llc -march=sparc -mattr=-v9 | \
+; RUN: llc < %s -march=sparc -mattr=v9 -enable-sparc-v9-insts
+; RUN: llc < %s -march=sparc -mattr=-v9 | \
 ; RUN:   not grep popc
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=sparc -mattr=v9 -enable-sparc-v9-insts | grep popc
+; RUN: llc < %s -march=sparc -mattr=v9 -enable-sparc-v9-insts | grep popc
 
 declare i32 @llvm.ctpop.i32(i32)
 
diff --git a/test/CodeGen/SPARC/private.ll b/test/CodeGen/SPARC/private.ll
index a9850b7def463..8fa3e7e52d8d8 100644
--- a/test/CodeGen/SPARC/private.ll
+++ b/test/CodeGen/SPARC/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc  -march=sparc > %t
+; RUN: llc < %s  -march=sparc > %t
 ; RUN: grep .foo: %t
 ; RUN: grep call.*\.foo %t
 ; RUN: grep .baz: %t
diff --git a/test/CodeGen/SPARC/xnor.ll b/test/CodeGen/SPARC/xnor.ll
index 9d8994c006132..6ff66bd6fcc67 100644
--- a/test/CodeGen/SPARC/xnor.ll
+++ b/test/CodeGen/SPARC/xnor.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=sparc | \
+; RUN: llc < %s -march=sparc | \
 ; RUN:   grep xnor | count 2
 
 define i32 @test1(i32 %X, i32 %Y) {
diff --git a/test/CodeGen/SystemZ/00-RetVoid.ll b/test/CodeGen/SystemZ/00-RetVoid.ll
new file mode 100644
index 0000000000000..de23795ab08a6
--- /dev/null
+++ b/test/CodeGen/SystemZ/00-RetVoid.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+
+define void @foo() {
+entry:
+    ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/01-RetArg.ll b/test/CodeGen/SystemZ/01-RetArg.ll
new file mode 100644
index 0000000000000..9ab2097a0c89c
--- /dev/null
+++ b/test/CodeGen/SystemZ/01-RetArg.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    ret i64 %b
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/01-RetImm.ll b/test/CodeGen/SystemZ/01-RetImm.ll
new file mode 100644
index 0000000000000..8b99e68dc7e1e
--- /dev/null
+++ b/test/CodeGen/SystemZ/01-RetImm.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=systemz | grep lghi  | count 1
+; RUN: llc < %s -march=systemz | grep llill | count 1
+; RUN: llc < %s -march=systemz | grep llilh | count 1
+; RUN: llc < %s -march=systemz | grep llihl | count 1
+; RUN: llc < %s -march=systemz | grep llihh | count 1
+; RUN: llc < %s -march=systemz | grep lgfi  | count 1
+; RUN: llc < %s -march=systemz | grep llilf | count 1
+; RUN: llc < %s -march=systemz | grep llihf | count 1
+
+
+define i64 @foo1() {
+entry:
+    ret i64 1
+}
+
+define i64 @foo2() {
+entry:
+    ret i64 65535 
+}
+
+define i64 @foo3() {
+entry:
+    ret i64 131072
+}
+
+define i64 @foo4() {
+entry:
+    ret i64 8589934592
+}
+
+define i64 @foo5() {
+entry:
+    ret i64 562949953421312
+}
+
+define i64 @foo6() {
+entry:
+    ret i64 65537
+}
+
+define i64 @foo7() {
+entry:
+    ret i64 4294967295
+}
+
+define i64 @foo8() {
+entry:
+    ret i64 281483566645248
+}
diff --git a/test/CodeGen/SystemZ/02-MemArith.ll b/test/CodeGen/SystemZ/02-MemArith.ll
new file mode 100644
index 0000000000000..04022a063f166
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-MemArith.ll
@@ -0,0 +1,133 @@
+; RUN: llc < %s -march=systemz | FileCheck %s
+
+define i32 @foo1(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo1:
+; CHECK:  a %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = add i32 %a, %c
+    ret i32 %d
+}
+
+define i32 @foo2(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo2:
+; CHECK:  ay %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = add i32 %a, %c
+    ret i32 %d
+}
+
+define i64 @foo3(i64 %a, i64 *%b, i64 %idx) signext {
+; CHECK: foo3:
+; CHECK:  ag %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = add i64 %a, %c
+    ret i64 %d
+}
+
+define i32 @foo4(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo4:
+; CHECK:  n %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = and i32 %a, %c
+    ret i32 %d
+}
+
+define i32 @foo5(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo5:
+; CHECK:  ny %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = and i32 %a, %c
+    ret i32 %d
+}
+
+define i64 @foo6(i64 %a, i64 *%b, i64 %idx) signext {
+; CHECK: foo6:
+; CHECK:  ng %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = and i64 %a, %c
+    ret i64 %d
+}
+
+define i32 @foo7(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo7:
+; CHECK:  o %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = or i32 %a, %c
+    ret i32 %d
+}
+
+define i32 @foo8(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo8:
+; CHECK:  oy %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = or i32 %a, %c
+    ret i32 %d
+}
+
+define i64 @foo9(i64 %a, i64 *%b, i64 %idx) signext {
+; CHECK: foo9:
+; CHECK:  og %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = or i64 %a, %c
+    ret i64 %d
+}
+
+define i32 @foo10(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo10:
+; CHECK:  x %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = xor i32 %a, %c
+    ret i32 %d
+}
+
+define i32 @foo11(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo11:
+; CHECK:  xy %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = xor i32 %a, %c
+    ret i32 %d
+}
+
+define i64 @foo12(i64 %a, i64 *%b, i64 %idx) signext {
+; CHECK: foo12:
+; CHECK:  xg %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = xor i64 %a, %c
+    ret i64 %d
+}
diff --git a/test/CodeGen/SystemZ/02-RetAdd.ll b/test/CodeGen/SystemZ/02-RetAdd.ll
new file mode 100644
index 0000000000000..9ff9b6ac38333
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetAdd.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = add i64 %a, %b
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetAddImm.ll b/test/CodeGen/SystemZ/02-RetAddImm.ll
new file mode 100644
index 0000000000000..6d73e4d42ab23
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetAddImm.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = add i64 %a, 1
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetAnd.ll b/test/CodeGen/SystemZ/02-RetAnd.ll
new file mode 100644
index 0000000000000..1492f9dbee759
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetAnd.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, %b
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetAndImm.ll b/test/CodeGen/SystemZ/02-RetAndImm.ll
new file mode 100644
index 0000000000000..53c5e54528bdb
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetAndImm.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=systemz | grep ngr   | count 4
+; RUN: llc < %s -march=systemz | grep llilh | count 1
+; RUN: llc < %s -march=systemz | grep llihl | count 1
+; RUN: llc < %s -march=systemz | grep llihh | count 1
+
+define i64 @foo1(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 1
+    ret i64 %c
+}
+
+define i64 @foo2(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 131072
+    ret i64 %c
+}
+
+define i64 @foo3(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 8589934592
+    ret i64 %c
+}
+
+define i64 @foo4(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 562949953421312
+    ret i64 %c
+}
diff --git a/test/CodeGen/SystemZ/02-RetNeg.ll b/test/CodeGen/SystemZ/02-RetNeg.ll
new file mode 100644
index 0000000000000..7f3380dc16acf
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetNeg.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz | grep lcgr | count 1
+
+define i64 @foo(i64 %a) {
+entry:
+    %c = sub i64 0, %a
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetOr.ll b/test/CodeGen/SystemZ/02-RetOr.ll
new file mode 100644
index 0000000000000..1e8134d2ddcc1
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetOr.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, %b
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetOrImm.ll b/test/CodeGen/SystemZ/02-RetOrImm.ll
new file mode 100644
index 0000000000000..68cd24d07f443
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetOrImm.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=systemz | grep oill | count 1
+; RUN: llc < %s -march=systemz | grep oilh | count 1
+; RUN: llc < %s -march=systemz | grep oihl | count 1
+; RUN: llc < %s -march=systemz | grep oihh | count 1
+
+define i64 @foo1(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 1
+    ret i64 %c
+}
+
+define i64 @foo2(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 131072
+    ret i64 %c
+}
+
+define i64 @foo3(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 8589934592
+    ret i64 %c
+}
+
+define i64 @foo4(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 562949953421312
+    ret i64 %c
+}
diff --git a/test/CodeGen/SystemZ/02-RetSub.ll b/test/CodeGen/SystemZ/02-RetSub.ll
new file mode 100644
index 0000000000000..1c4514f36c934
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetSub.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = sub i64 %a, %b
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetSubImm.ll b/test/CodeGen/SystemZ/02-RetSubImm.ll
new file mode 100644
index 0000000000000..4f91cb073997c
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetSubImm.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = sub i64 %a, 1
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetXor.ll b/test/CodeGen/SystemZ/02-RetXor.ll
new file mode 100644
index 0000000000000..a9439bf452a7f
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetXor.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = xor i64 %a, %b
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/02-RetXorImm.ll b/test/CodeGen/SystemZ/02-RetXorImm.ll
new file mode 100644
index 0000000000000..ea4b8290df3b4
--- /dev/null
+++ b/test/CodeGen/SystemZ/02-RetXorImm.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = xor i64 %a, 1
+    ret i64 %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll b/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
new file mode 100644
index 0000000000000..0a812715ae579
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=systemz | grep ahi   | count 3
+; RUN: llc < %s -march=systemz | grep afi   | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 4
+; RUN: llc < %s -march=systemz | grep llgfr | count 2
+
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = add i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = add i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = add i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) zeroext {
+entry:
+    %c = add i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) signext {
+entry:
+    %c = add i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = add i32 %a, 131072
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetAddSubreg.ll b/test/CodeGen/SystemZ/03-RetAddSubreg.ll
new file mode 100644
index 0000000000000..2787083f162b2
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetAddSubreg.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=systemz | grep ar    | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 2
+; RUN: llc < %s -march=systemz | grep llgfr | count 1
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = add i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = add i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = add i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll b/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
new file mode 100644
index 0000000000000..32673dd014c50
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=systemz | grep ngr  | count 6
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = and i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = and i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = and i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) signext {
+entry:
+    %c = and i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) zeroext {
+entry:
+    %c = and i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = and i32 %a, 131072
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetAndSubreg.ll b/test/CodeGen/SystemZ/03-RetAndSubreg.ll
new file mode 100644
index 0000000000000..ed5e5269525bf
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetAndSubreg.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=systemz | grep ngr | count 3
+; RUN: llc < %s -march=systemz | grep nihf | count 1
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = and i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = and i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = and i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetArgSubreg.ll b/test/CodeGen/SystemZ/03-RetArgSubreg.ll
new file mode 100644
index 0000000000000..0c9bb14eef3e0
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetArgSubreg.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=systemz | grep lgr   | count 2
+; RUN: llc < %s -march=systemz | grep nihf  | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 1
+
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    ret i32 %b
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    ret i32 %b
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    ret i32 %b
+}
diff --git a/test/CodeGen/SystemZ/03-RetImmSubreg.ll b/test/CodeGen/SystemZ/03-RetImmSubreg.ll
new file mode 100644
index 0000000000000..343e30b721385
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetImmSubreg.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=systemz | grep lghi  | count 2
+; RUN: llc < %s -march=systemz | grep llill | count 1
+; RUN: llc < %s -march=systemz | grep llilh | count 1
+; RUN: llc < %s -march=systemz | grep lgfi  | count 1
+; RUN: llc < %s -march=systemz | grep llilf | count 2
+
+
+define i32 @foo1() {
+entry:
+    ret i32 1
+}
+
+define i32 @foo2() {
+entry:
+    ret i32 65535 
+}
+
+define i32 @foo3() {
+entry:
+    ret i32 131072
+}
+
+define i32 @foo4() {
+entry:
+    ret i32 65537
+}
+
+define i32 @foo5() {
+entry:
+    ret i32 4294967295
+}
+
+define i32 @foo6() zeroext {
+entry:
+    ret i32 4294967295
+}
+
+define i32 @foo7() signext {
+entry:
+    ret i32 4294967295
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll b/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll
new file mode 100644
index 0000000000000..87ebcc1f0a4f8
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=systemz | grep lcr | count 1
+
+define i32 @foo(i32 %a) {
+entry:
+    %c = sub i32 0, %a
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll b/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
new file mode 100644
index 0000000000000..6d118b5e3d407
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -march=systemz | grep oill  | count 3
+; RUN: llc < %s -march=systemz | grep oilh  | count 3
+; RUN: llc < %s -march=systemz | grep oilf  | count 3
+; RUN: llc < %s -march=systemz | grep llgfr | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 6
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo7(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, 123456
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = or i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo8(i32 %a, i32 %b) zeroext {
+entry:
+    %c = or i32 %a, 123456
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) signext {
+entry:
+    %c = or i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) zeroext {
+entry:
+    %c = or i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = or i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo9(i32 %a, i32 %b) signext {
+entry:
+    %c = or i32 %a, 123456
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetOrSubreg.ll b/test/CodeGen/SystemZ/03-RetOrSubreg.ll
new file mode 100644
index 0000000000000..4d7661acb716d
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetOrSubreg.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=systemz | grep ogr   | count 3
+; RUN: llc < %s -march=systemz | grep nihf  | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 1
+
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = or i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = or i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll b/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
new file mode 100644
index 0000000000000..11ca796c7b17a
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=systemz | grep ahi   | count 3
+; RUN: llc < %s -march=systemz | grep afi   | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 4
+; RUN: llc < %s -march=systemz | grep llgfr | count 2
+
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = sub i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = sub i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = sub i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) signext {
+entry:
+    %c = sub i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) zeroext {
+entry:
+    %c = sub i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = sub i32 %a, 131072
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetSubSubreg.ll b/test/CodeGen/SystemZ/03-RetSubSubreg.ll
new file mode 100644
index 0000000000000..b3e1ac26e08c4
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetSubSubreg.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=systemz | grep sr    | count 3
+; RUN: llc < %s -march=systemz | grep llgfr | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 2
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = sub i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = sub i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = sub i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll b/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
new file mode 100644
index 0000000000000..0033126369e6f
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=systemz | grep xilf  | count 9
+; RUN: llc < %s -march=systemz | grep llgfr | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 6
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo7(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, 123456
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = xor i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo8(i32 %a, i32 %b) zeroext {
+entry:
+    %c = xor i32 %a, 123456
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) signext {
+entry:
+    %c = xor i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) zeroext {
+entry:
+    %c = xor i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = xor i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo9(i32 %a, i32 %b) signext {
+entry:
+    %c = xor i32 %a, 123456
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/03-RetXorSubreg.ll b/test/CodeGen/SystemZ/03-RetXorSubreg.ll
new file mode 100644
index 0000000000000..a9af23197ef85
--- /dev/null
+++ b/test/CodeGen/SystemZ/03-RetXorSubreg.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=systemz | grep xgr   | count 3
+; RUN: llc < %s -march=systemz | grep nihf  | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 1
+
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = xor i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = xor i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/test/CodeGen/SystemZ/04-RetShifts.ll b/test/CodeGen/SystemZ/04-RetShifts.ll
new file mode 100644
index 0000000000000..cccdc4737f768
--- /dev/null
+++ b/test/CodeGen/SystemZ/04-RetShifts.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -march=systemz | grep sra   | count 6
+; RUN: llc < %s -march=systemz | grep srag  | count 3
+; RUN: llc < %s -march=systemz | grep srl   | count 6
+; RUN: llc < %s -march=systemz | grep srlg  | count 3
+; RUN: llc < %s -march=systemz | grep sll   | count 6
+; RUN: llc < %s -march=systemz | grep sllg  | count 3
+
+define signext i32 @foo1(i32 %a, i32 %idx) nounwind readnone {
+entry:
+	%add = add i32 %idx, 1		; <i32> [#uses=1]
+	%shr = ashr i32 %a, %add		; <i32> [#uses=1]
+	ret i32 %shr
+}
+
+define signext i32 @foo2(i32 %a, i32 %idx) nounwind readnone {
+entry:
+	%add = add i32 %idx, 1		; <i32> [#uses=1]
+	%shr = shl i32 %a, %add		; <i32> [#uses=1]
+	ret i32 %shr
+}
+
+define signext i32 @foo3(i32 %a, i32 %idx) nounwind readnone {
+entry:
+	%add = add i32 %idx, 1		; <i32> [#uses=1]
+	%shr = lshr i32 %a, %add		; <i32> [#uses=1]
+	ret i32 %shr
+}
+
+define signext i64 @foo4(i64 %a, i64 %idx) nounwind readnone {
+entry:
+	%add = add i64 %idx, 1		; <i64> [#uses=1]
+	%shr = ashr i64 %a, %add		; <i64> [#uses=1]
+	ret i64 %shr
+}
+
+define signext i64 @foo5(i64 %a, i64 %idx) nounwind readnone {
+entry:
+	%add = add i64 %idx, 1		; <i64> [#uses=1]
+	%shr = shl i64 %a, %add		; <i64> [#uses=1]
+	ret i64 %shr
+}
+
+define signext i64 @foo6(i64 %a, i64 %idx) nounwind readnone {
+entry:
+	%add = add i64 %idx, 1		; <i64> [#uses=1]
+	%shr = lshr i64 %a, %add		; <i64> [#uses=1]
+	ret i64 %shr
+}
+
+define signext i32 @foo7(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = ashr i32 %a, 1
+        ret i32 %shr
+}
+
+define signext i32 @foo8(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = shl i32 %a, 1
+        ret i32 %shr
+}
+
+define signext i32 @foo9(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = lshr i32 %a, 1
+        ret i32 %shr
+}
+
+define signext i32 @foo10(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = ashr i32 %a, %idx
+        ret i32 %shr
+}
+
+define signext i32 @foo11(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = shl i32 %a, %idx
+        ret i32 %shr
+}
+
+define signext i32 @foo12(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = lshr i32 %a, %idx
+        ret i32 %shr
+}
+
+define signext i64 @foo13(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = ashr i64 %a, 1
+        ret i64 %shr
+}
+
+define signext i64 @foo14(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = shl i64 %a, 1
+        ret i64 %shr
+}
+
+define signext i64 @foo15(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = lshr i64 %a, 1
+        ret i64 %shr
+}
+
+define signext i64 @foo16(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = ashr i64 %a, %idx
+        ret i64 %shr
+}
+
+define signext i64 @foo17(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = shl i64 %a, %idx
+        ret i64 %shr
+}
+
+define signext i64 @foo18(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = lshr i64 %a, %idx
+        ret i64 %shr
+}
+
diff --git a/test/CodeGen/SystemZ/05-LoadAddr.ll b/test/CodeGen/SystemZ/05-LoadAddr.ll
new file mode 100644
index 0000000000000..cf02642839398
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-LoadAddr.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | grep lay | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64* @foo(i64* %a, i64 %idx) nounwind readnone {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	ret i64* %add.ptr2
+}
diff --git a/test/CodeGen/SystemZ/05-MemImmStores.ll b/test/CodeGen/SystemZ/05-MemImmStores.ll
new file mode 100644
index 0000000000000..3cf21ccd931a9
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-MemImmStores.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mattr=+z10 | grep mvghi | count 1
+; RUN: llc < %s -mattr=+z10 | grep mvhi  | count 1
+; RUN: llc < %s -mattr=+z10 | grep mvhhi | count 1
+; RUN: llc < %s | grep mvi   | count 2
+; RUN: llc < %s | grep mviy  | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo1(i64* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i64* %a, i64 1		; <i64*> [#uses=1]
+	store i64 1, i64* %add.ptr
+	ret void
+}
+
+define void @foo2(i32* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i32* %a, i64 1		; <i32*> [#uses=1]
+	store i32 2, i32* %add.ptr
+	ret void
+}
+
+define void @foo3(i16* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %a, i64 1		; <i16*> [#uses=1]
+	store i16 3, i16* %add.ptr
+	ret void
+}
+
+define void @foo4(i8* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i8* %a, i64 1		; <i8*> [#uses=1]
+	store i8 4, i8* %add.ptr
+	ret void
+}
+
+define void @foo5(i8* nocapture %a, i64 %idx) nounwind {
+entry:
+        %add.ptr = getelementptr i8* %a, i64 -1         ; <i8*> [#uses=1]
+        store i8 4, i8* %add.ptr
+        ret void
+}
+
+define void @foo6(i16* nocapture %a, i64 %idx) nounwind {
+entry:
+        %add.ptr = getelementptr i16* %a, i64 -1         ; <i16*> [#uses=1]
+        store i16 3, i16* %add.ptr
+        ret void
+}
diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores.ll b/test/CodeGen/SystemZ/05-MemLoadsStores.ll
new file mode 100644
index 0000000000000..cf12063e5d4c3
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-MemLoadsStores.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s | grep ly     | count 2
+; RUN: llc < %s | grep sty    | count 2
+; RUN: llc < %s | grep {l	%}  | count 2
+; RUN: llc < %s | grep {st	%} | count 2
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define void @foo1(i32* nocapture %foo, i32* nocapture %bar) nounwind {
+entry:
+	%tmp1 = load i32* %foo		; <i32> [#uses=1]
+	store i32 %tmp1, i32* %bar
+	ret void
+}
+
+define void @foo2(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i32* %foo, i64 1		; <i32*> [#uses=1]
+	%tmp1 = load i32* %add.ptr		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %tmp1, i32* %add.ptr5
+	ret void
+}
+
+define void @foo3(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%sub.ptr = getelementptr i32* %foo, i64 -1		; <i32*> [#uses=1]
+	%tmp1 = load i32* %sub.ptr		; <i32> [#uses=1]
+	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
+	%add.ptr = getelementptr i32* %bar, i64 %sub.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %tmp1, i32* %add.ptr
+	ret void
+}
+
+define void @foo4(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i32* %foo, i64 8192		; <i32*> [#uses=1]
+	%tmp1 = load i32* %add.ptr		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %tmp1, i32* %add.ptr5
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
new file mode 100644
index 0000000000000..1e6232a625508
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s | grep {sthy.%} | count 2
+; RUN: llc < %s | grep {lhy.%}  | count 2
+; RUN: llc < %s | grep {lh.%}   | count 6
+; RUN: llc < %s | grep {sth.%}  | count 2
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define void @foo1(i16* nocapture %foo, i16* nocapture %bar) nounwind {
+entry:
+	%tmp1 = load i16* %foo		; <i16> [#uses=1]
+	store i16 %tmp1, i16* %bar
+	ret void
+}
+
+define void @foo2(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i16* %bar, i64 %add.ptr3.sum		; <i16*> [#uses=1]
+	store i16 %tmp1, i16* %add.ptr5
+	ret void
+}
+
+define void @foo3(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%sub.ptr = getelementptr i16* %foo, i64 -1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %sub.ptr		; <i16> [#uses=1]
+	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
+	%add.ptr = getelementptr i16* %bar, i64 %sub.ptr3.sum		; <i16*> [#uses=1]
+	store i16 %tmp1, i16* %add.ptr
+	ret void
+}
+
+define void @foo4(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 8192		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i16* %bar, i64 %add.ptr3.sum		; <i16*> [#uses=1]
+	store i16 %tmp1, i16* %add.ptr5
+	ret void
+}
+
+define void @foo5(i16* nocapture %foo, i32* nocapture %bar) nounwind {
+entry:
+	%tmp1 = load i16* %foo		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	store i32 %conv, i32* %bar
+	ret void
+}
+
+define void @foo6(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %conv, i32* %add.ptr5
+	ret void
+}
+
+define void @foo7(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%sub.ptr = getelementptr i16* %foo, i64 -1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %sub.ptr		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
+	%add.ptr = getelementptr i32* %bar, i64 %sub.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %conv, i32* %add.ptr
+	ret void
+}
+
+define void @foo8(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 8192		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %conv, i32* %add.ptr5
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/05-MemRegLoads.ll b/test/CodeGen/SystemZ/05-MemRegLoads.ll
new file mode 100644
index 0000000000000..f690a4889962e
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-MemRegLoads.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -march=systemz | not grep aghi
+; RUN: llc < %s -march=systemz | grep llgf | count 1
+; RUN: llc < %s -march=systemz | grep llgh | count 1
+; RUN: llc < %s -march=systemz | grep llgc | count 1
+; RUN: llc < %s -march=systemz | grep lgf  | count 2
+; RUN: llc < %s -march=systemz | grep lgh  | count 2
+; RUN: llc < %s -march=systemz | grep lgb  | count 1
+
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define zeroext i64 @foo1(i64* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	%tmp3 = load i64* %add.ptr2		; <i64> [#uses=1]
+	ret i64 %tmp3
+}
+
+define zeroext i32 @foo2(i32* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	%tmp3 = load i32* %add.ptr2		; <i32> [#uses=1]
+	ret i32 %tmp3
+}
+
+define zeroext i16 @foo3(i16* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	%tmp3 = load i16* %add.ptr2		; <i16> [#uses=1]
+	ret i16 %tmp3
+}
+
+define zeroext i8 @foo4(i8* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	%tmp3 = load i8* %add.ptr2		; <i8> [#uses=1]
+	ret i8 %tmp3
+}
+
+define signext i64 @foo5(i64* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	%tmp3 = load i64* %add.ptr2		; <i64> [#uses=1]
+	ret i64 %tmp3
+}
+
+define signext i32 @foo6(i32* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	%tmp3 = load i32* %add.ptr2		; <i32> [#uses=1]
+	ret i32 %tmp3
+}
+
+define signext i16 @foo7(i16* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	%tmp3 = load i16* %add.ptr2		; <i16> [#uses=1]
+	ret i16 %tmp3
+}
+
+define signext i8 @foo8(i8* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	%tmp3 = load i8* %add.ptr2		; <i8> [#uses=1]
+	ret i8 %tmp3
+}
diff --git a/test/CodeGen/SystemZ/05-MemRegStores.ll b/test/CodeGen/SystemZ/05-MemRegStores.ll
new file mode 100644
index 0000000000000..b851c3fa6e004
--- /dev/null
+++ b/test/CodeGen/SystemZ/05-MemRegStores.ll
@@ -0,0 +1,79 @@
+; RUN: llc < %s | not grep aghi
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo1(i64* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+
+; CHECK: foo1:
+; CHECK:   stg %r4, 8(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	store i64 %val, i64* %add.ptr2
+	ret void
+}
+
+define void @foo2(i32* nocapture %a, i64 %idx, i32 %val) nounwind {
+entry:
+; CHECK: foo2:
+; CHECK:   st %r4, 4(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	store i32 %val, i32* %add.ptr2
+	ret void
+}
+
+define void @foo3(i16* nocapture %a, i64 %idx, i16 zeroext %val) nounwind {
+entry:
+; CHECK: foo3:
+; CHECK: sth     %r4, 2(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	store i16 %val, i16* %add.ptr2
+	ret void
+}
+
+define void @foo4(i8* nocapture %a, i64 %idx, i8 zeroext %val) nounwind {
+entry:
+; CHECK: foo4:
+; CHECK: stc     %r4, 1(%r3,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	store i8 %val, i8* %add.ptr2
+	ret void
+}
+
+define void @foo5(i8* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+; CHECK: foo5:
+; CHECK: stc     %r4, 1(%r3,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	%conv = trunc i64 %val to i8		; <i8> [#uses=1]
+	store i8 %conv, i8* %add.ptr2
+	ret void
+}
+
+define void @foo6(i16* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+; CHECK: foo6:
+; CHECK: sth     %r4, 2(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	%conv = trunc i64 %val to i16		; <i16> [#uses=1]
+	store i16 %conv, i16* %add.ptr2
+	ret void
+}
+
+define void @foo7(i32* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+; CHECK: foo7:
+; CHECK: st      %r4, 4(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	%conv = trunc i64 %val to i32		; <i32> [#uses=1]
+	store i32 %conv, i32* %add.ptr2
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/06-CallViaStack.ll b/test/CodeGen/SystemZ/06-CallViaStack.ll
new file mode 100644
index 0000000000000..e904f49de15fe
--- /dev/null
+++ b/test/CodeGen/SystemZ/06-CallViaStack.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s | grep 168 | count 1
+; RUN: llc < %s | grep 160 | count 3
+; RUN: llc < %s | grep 328 | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g) nounwind {
+entry:
+	%a = alloca i64, align 8		; <i64*> [#uses=3]
+	store i64 %g, i64* %a
+	call void @bar(i64* %a) nounwind
+	%tmp1 = load i64* %a		; <i64> [#uses=1]
+	ret i64 %tmp1
+}
+
+declare void @bar(i64*)
diff --git a/test/CodeGen/SystemZ/06-FrameIdxLoad.ll b/test/CodeGen/SystemZ/06-FrameIdxLoad.ll
new file mode 100644
index 0000000000000..c71da9b4418c4
--- /dev/null
+++ b/test/CodeGen/SystemZ/06-FrameIdxLoad.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | grep 160 | count 1
+; RUN: llc < %s | grep 168 | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %g) nounwind readnone {
+entry:
+        ret i64 %f
+}
+
+define i64 @bar(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %g) nounwind readnone {
+entry:
+	%conv = ptrtoint i64* %g to i64		; <i64> [#uses=1]
+	ret i64 %conv
+}
diff --git a/test/CodeGen/SystemZ/06-LocalFrame.ll b/test/CodeGen/SystemZ/06-LocalFrame.ll
new file mode 100644
index 0000000000000..d89b0dfc76c92
--- /dev/null
+++ b/test/CodeGen/SystemZ/06-LocalFrame.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s | grep 160 | count 1
+; RUN: llc < %s | grep 328 | count 1
+; RUN: llc < %s | grep 168 | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define noalias i64* @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f) nounwind readnone {
+entry:
+	%g = alloca i64, align 8		; <i64*> [#uses=1]
+	%add.ptr = getelementptr i64* %g, i64 %f		; <i64*> [#uses=1]
+	ret i64* %add.ptr
+}
diff --git a/test/CodeGen/SystemZ/06-SimpleCall.ll b/test/CodeGen/SystemZ/06-SimpleCall.ll
new file mode 100644
index 0000000000000..fd4b5029c731c
--- /dev/null
+++ b/test/CodeGen/SystemZ/06-SimpleCall.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo() nounwind {
+entry:
+	tail call void @bar() nounwind
+	ret void
+}
+
+declare void @bar()
diff --git a/test/CodeGen/SystemZ/07-BrCond.ll b/test/CodeGen/SystemZ/07-BrCond.ll
new file mode 100644
index 0000000000000..859971713aa35
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-BrCond.ll
@@ -0,0 +1,141 @@
+; RUN: llc < %s | grep je  | count 1
+; RUN: llc < %s | grep jne | count 1
+; RUN: llc < %s | grep jhe | count 2
+; RUN: llc < %s | grep jle | count 2
+; RUN: llc < %s | grep jh  | count 4
+; RUN: llc < %s | grep jl  | count 4
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/07-BrCond32.ll b/test/CodeGen/SystemZ/07-BrCond32.ll
new file mode 100644
index 0000000000000..8ece5ac09840d
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-BrCond32.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s | grep je  | count 1
+; RUN: llc < %s | grep jne | count 1
+; RUN: llc < %s | grep jhe | count 2
+; RUN: llc < %s | grep jle | count 2
+; RUN: llc < %s | grep jh  | count 4
+; RUN: llc < %s | grep jl  | count 4
+
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ult i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ult i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/07-BrUnCond.ll b/test/CodeGen/SystemZ/07-BrUnCond.ll
new file mode 100644
index 0000000000000..e0bc302c73141
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-BrUnCond.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-linux"
+
+define void @foo() noreturn nounwind {
+entry:
+	tail call void @baz() nounwind
+	br label %l1
+
+l1:		; preds = %entry, %l1
+	tail call void @bar() nounwind
+	br label %l1
+}
+
+declare void @bar()
+
+declare void @baz()
diff --git a/test/CodeGen/SystemZ/07-CmpImm.ll b/test/CodeGen/SystemZ/07-CmpImm.ll
new file mode 100644
index 0000000000000..4d0ebda0c0351
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-CmpImm.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s | grep cgfi | count 8
+; RUN: llc < %s | grep clgfi | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i64 %a) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i64 %a) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i64 %a) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i64 %a) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i64 %a) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i64 %a) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i64 %a) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/07-CmpImm32.ll b/test/CodeGen/SystemZ/07-CmpImm32.ll
new file mode 100644
index 0000000000000..add34faafd3f6
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-CmpImm32.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s | grep jl  | count 3
+; RUN: llc < %s | grep jh  | count 3
+; RUN: llc < %s | grep je  | count 2
+; RUN: llc < %s | grep jne | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i32 %a) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i32 %a) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i32 %a) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i32 %a) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i32 %a) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i32 %a) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i32 %a) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/07-SelectCC.ll b/test/CodeGen/SystemZ/07-SelectCC.ll
new file mode 100644
index 0000000000000..aa4b36e7d5d47
--- /dev/null
+++ b/test/CodeGen/SystemZ/07-SelectCC.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | grep clgr
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
+	%cond = select i1 %cmp, i64 %a, i64 %b		; <i64> [#uses=1]
+	ret i64 %cond
+}
diff --git a/test/CodeGen/SystemZ/08-DivRem.ll b/test/CodeGen/SystemZ/08-DivRem.ll
new file mode 100644
index 0000000000000..ff1e441882a0a
--- /dev/null
+++ b/test/CodeGen/SystemZ/08-DivRem.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s | grep dsgr  | count 2
+; RUN: llc < %s | grep dsgfr | count 2
+; RUN: llc < %s | grep dlr   | count 2
+; RUN: llc < %s | grep dlgr  | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @div(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%div = sdiv i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %div
+}
+
+define i32 @div1(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%div = sdiv i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %div
+}
+
+define i64 @div2(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%div = udiv i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %div
+}
+
+define i32 @div3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%div = udiv i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %div
+}
+
+define i64 @rem(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%rem = srem i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %rem
+}
+
+define i32 @rem1(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%rem = srem i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %rem
+}
+
+define i64 @rem2(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%rem = urem i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %rem
+}
+
+define i32 @rem3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%rem = urem i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %rem
+}
diff --git a/test/CodeGen/SystemZ/08-DivRemMemOp.ll b/test/CodeGen/SystemZ/08-DivRemMemOp.ll
new file mode 100644
index 0000000000000..d6ec0e7440ac1
--- /dev/null
+++ b/test/CodeGen/SystemZ/08-DivRemMemOp.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s | grep {dsgf.%} | count 2
+; RUN: llc < %s | grep {dsg.%}  | count 2
+; RUN: llc < %s | grep {dl.%}   | count 2
+; RUN: llc < %s | grep dlg      | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @div(i64 %a, i64* %b) nounwind readnone {
+entry:
+	%b1 = load i64* %b
+	%div = sdiv i64 %a, %b1
+	ret i64 %div
+}
+
+define i64 @div1(i64 %a, i64* %b) nounwind readnone {
+entry:
+        %b1 = load i64* %b
+        %div = udiv i64 %a, %b1
+        ret i64 %div
+}
+
+define i64 @rem(i64 %a, i64* %b) nounwind readnone {
+entry:
+        %b1 = load i64* %b
+        %div = srem i64 %a, %b1
+        ret i64 %div
+}
+
+define i64 @rem1(i64 %a, i64* %b) nounwind readnone {
+entry:
+        %b1 = load i64* %b
+        %div = urem i64 %a, %b1
+        ret i64 %div
+}
+
+define i32 @div2(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = sdiv i32 %a, %b1
+        ret i32 %div
+}
+
+define i32 @div3(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = udiv i32 %a, %b1
+        ret i32 %div
+}
+
+define i32 @rem2(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = srem i32 %a, %b1
+        ret i32 %div
+}
+
+define i32 @rem3(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = urem i32 %a, %b1
+        ret i32 %div
+}
+
diff --git a/test/CodeGen/SystemZ/08-SimpleMuls.ll b/test/CodeGen/SystemZ/08-SimpleMuls.ll
new file mode 100644
index 0000000000000..1ab88d6ee7dd1
--- /dev/null
+++ b/test/CodeGen/SystemZ/08-SimpleMuls.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s | grep msgr | count 2
+; RUN: llc < %s | grep msr  | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%mul = mul i64 %b, %a		; <i64> [#uses=1]
+	ret i64 %mul
+}
+
+define i64 @foo2(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%mul = mul i64 %b, %a		; <i64> [#uses=1]
+	ret i64 %mul
+}
+
+define i32 @foo3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%mul = mul i32 %b, %a		; <i32> [#uses=1]
+	ret i32 %mul
+}
+
+define i32 @foo4(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%mul = mul i32 %b, %a		; <i32> [#uses=1]
+	ret i32 %mul
+}
diff --git a/test/CodeGen/SystemZ/09-DynamicAlloca.ll b/test/CodeGen/SystemZ/09-DynamicAlloca.ll
new file mode 100644
index 0000000000000..27189ab41567c
--- /dev/null
+++ b/test/CodeGen/SystemZ/09-DynamicAlloca.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define void @foo(i64 %N) nounwind {
+entry:
+	%N3 = trunc i64 %N to i32		; <i32> [#uses=1]
+	%vla = alloca i8, i32 %N3, align 2		; <i8*> [#uses=1]
+	call void @bar(i8* %vla) nounwind
+	ret void
+}
+
+declare void @bar(i8*)
diff --git a/test/CodeGen/SystemZ/09-Globals.ll b/test/CodeGen/SystemZ/09-Globals.ll
new file mode 100644
index 0000000000000..6e0c1ab2c1657
--- /dev/null
+++ b/test/CodeGen/SystemZ/09-Globals.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s | grep larl | count 3
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-linux"
+@bar = common global i64 0, align 8		; <i64*> [#uses=3]
+
+define i64 @foo() nounwind readonly {
+entry:
+	%tmp = load i64* @bar		; <i64> [#uses=1]
+	ret i64 %tmp
+}
+
+define i64* @foo2() nounwind readnone {
+entry:
+	ret i64* @bar
+}
+
+define i64* @foo3(i64 %idx) nounwind readnone {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* @bar, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	ret i64* %add.ptr2
+}
diff --git a/test/CodeGen/SystemZ/09-Switches.ll b/test/CodeGen/SystemZ/09-Switches.ll
new file mode 100644
index 0000000000000..32aaa62a58cf5
--- /dev/null
+++ b/test/CodeGen/SystemZ/09-Switches.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=systemz | grep larl
+
+define i32 @main(i32 %tmp158) {
+entry:
+        switch i32 %tmp158, label %bb336 [
+		 i32 -2147483648, label %bb338
+		 i32 -2147483647, label %bb338
+		 i32 -2147483646, label %bb338
+		 i32 120, label %bb338
+		 i32 121, label %bb339
+		 i32 122, label %bb340
+                 i32 123, label %bb341
+                 i32 124, label %bb342
+                 i32 125, label %bb343
+                 i32 126, label %bb336
+		 i32 1024, label %bb338
+                 i32 0, label %bb338
+                 i32 1, label %bb338
+                 i32 2, label %bb338
+                 i32 3, label %bb338
+                 i32 4, label %bb338
+		 i32 5, label %bb338
+        ]
+bb336:
+  ret i32 10
+bb338:
+  ret i32 11
+bb339:
+  ret i32 12
+bb340:
+  ret i32 13
+bb341:
+  ret i32 14
+bb342:
+  ret i32 15
+bb343:
+  ret i32 18
+
+}
diff --git a/test/CodeGen/SystemZ/10-FuncsPic.ll b/test/CodeGen/SystemZ/10-FuncsPic.ll
new file mode 100644
index 0000000000000..cc325389d787c
--- /dev/null
+++ b/test/CodeGen/SystemZ/10-FuncsPic.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 3
+; RUN: llc < %s -relocation-model=pic | grep PLT | count 1
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+@ptr = external global void (...)*		; <void (...)**> [#uses=2]
+
+define void @foo1() nounwind {
+entry:
+	store void (...)* @func, void (...)** @ptr
+	ret void
+}
+
+declare void @func(...)
+
+define void @foo2() nounwind {
+entry:
+	tail call void (...)* @func() nounwind
+	ret void
+}
+
+define void @foo3() nounwind {
+entry:
+	%tmp = load void (...)** @ptr		; <void (...)*> [#uses=1]
+	tail call void (...)* %tmp() nounwind
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/10-GlobalsPic.ll b/test/CodeGen/SystemZ/10-GlobalsPic.ll
new file mode 100644
index 0000000000000..a77671e2ba7b9
--- /dev/null
+++ b/test/CodeGen/SystemZ/10-GlobalsPic.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 6
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+@src = external global i32		; <i32*> [#uses=2]
+@dst = external global i32		; <i32*> [#uses=2]
+@ptr = external global i32*		; <i32**> [#uses=2]
+
+define void @foo1() nounwind {
+entry:
+	%tmp = load i32* @src		; <i32> [#uses=1]
+	store i32 %tmp, i32* @dst
+	ret void
+}
+
+define void @foo2() nounwind {
+entry:
+	store i32* @dst, i32** @ptr
+	ret void
+}
+
+define void @foo3() nounwind {
+entry:
+	%tmp = load i32* @src		; <i32> [#uses=1]
+	%tmp1 = load i32** @ptr		; <i32*> [#uses=1]
+	%arrayidx = getelementptr i32* %tmp1, i64 1		; <i32*> [#uses=1]
+	store i32 %tmp, i32* %arrayidx
+	ret void
+}
diff --git a/test/CodeGen/SystemZ/11-BSwap.ll b/test/CodeGen/SystemZ/11-BSwap.ll
new file mode 100644
index 0000000000000..609d9dcf59c59
--- /dev/null
+++ b/test/CodeGen/SystemZ/11-BSwap.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s | FileCheck %s
+
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+
+define i16 @foo(i16 zeroext %a) zeroext {
+	%res = tail call i16 @llvm.bswap.i16(i16 %a)
+	ret i16 %res
+}
+
+define i32 @foo2(i32 zeroext %a) zeroext {
+; CHECK: foo2:
+; CHECK:  lrvr %r1, %r2
+        %res = tail call i32 @llvm.bswap.i32(i32 %a)
+        ret i32 %res
+}
+
+define i64 @foo3(i64 %a) zeroext {
+; CHECK: foo3:
+; CHECK:  lrvgr %r2, %r2
+        %res = tail call i64 @llvm.bswap.i64(i64 %a)
+        ret i64 %res
+}
+
+define i16 @foo4(i16* %b) zeroext {
+	%a = load i16* %b
+        %res = tail call i16 @llvm.bswap.i16(i16 %a)
+        ret i16 %res
+}
+
+define i32 @foo5(i32* %b) zeroext {
+; CHECK: foo5:
+; CHECK:  lrv %r1, 0(%r2)
+	%a = load i32* %b
+        %res = tail call i32 @llvm.bswap.i32(i32 %a)
+        ret i32 %res
+}
+
+define i64 @foo6(i64* %b) {
+; CHECK: foo6:
+; CHECK:  lrvg %r2, 0(%r2)
+	%a = load i64* %b
+        %res = tail call i64 @llvm.bswap.i64(i64 %a)
+        ret i64 %res
+}
+
+define void @foo7(i16 %a, i16* %b) {
+        %res = tail call i16 @llvm.bswap.i16(i16 %a)
+        store i16 %res, i16* %b
+        ret void
+}
+
+define void @foo8(i32 %a, i32* %b) {
+; CHECK: foo8:
+; CHECK:  strv %r2, 0(%r3)
+        %res = tail call i32 @llvm.bswap.i32(i32 %a)
+        store i32 %res, i32* %b
+        ret void
+}
+
+define void @foo9(i64 %a, i64* %b) {
+; CHECK: foo9:
+; CHECK:  strvg %r2, 0(%r3)
+        %res = tail call i64 @llvm.bswap.i64(i64 %a)
+        store i64 %res, i64* %b
+        ret void
+}
+
+declare i16 @llvm.bswap.i16(i16) nounwind readnone
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+
diff --git a/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll b/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll
new file mode 100644
index 0000000000000..65f8e14a9ee17
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i32 @main() nounwind {
+entry:
+	%call = call i32 (...)* @random() nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @random(...)
diff --git a/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll b/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll
new file mode 100644
index 0000000000000..3cfa97dfc2ba7
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=systemz | grep nilf | count 1
+; RUN: llc < %s -march=systemz | grep nill | count 1
+
+define i32 @gnu_dev_major(i64 %__dev) nounwind readnone {
+entry:
+        %shr = lshr i64 %__dev, 8               ; <i64> [#uses=1]
+        %shr8 = trunc i64 %shr to i32           ; <i32> [#uses=1]
+        %shr2 = lshr i64 %__dev, 32             ; <i64> [#uses=1]
+        %conv = trunc i64 %shr2 to i32          ; <i32> [#uses=1]
+        %and3 = and i32 %conv, -4096            ; <i32> [#uses=1]
+        %and6 = and i32 %shr8, 4095             ; <i32> [#uses=1]
+        %conv5 = or i32 %and6, %and3            ; <i32> [#uses=1]
+        ret i32 %conv5
+}
diff --git a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
new file mode 100644
index 0000000000000..3317864c01473
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=systemz | grep rll
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+	%shl = shl i32 %x, 0		; <i32> [#uses=1]
+	%sub = sub i32 32, 0		; <i32> [#uses=1]
+	%shr = lshr i32 %x, %sub		; <i32> [#uses=1]
+	%or = or i32 %shr, %shl		; <i32> [#uses=1]
+	ret i32 %or
+}
diff --git a/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll b/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll
new file mode 100644
index 0000000000000..5f6ec50df6c4b
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-ibm-linux"
+	%struct.re_pattern_buffer = type <{ i8*, i64, i64, i64, i8*, i8*, i64, i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%struct.re_registers = type <{ i32, i8, i8, i8, i8, i32*, i32* }>
+
+define i32 @xre_search_2(%struct.re_pattern_buffer* nocapture %bufp, i8* %string1, i32 %size1, i8* %string2, i32 %size2, i32 %startpos, i32 %range, %struct.re_registers* %regs, i32 %stop) nounwind {
+entry:
+	%cmp17.i = icmp slt i32 undef, %startpos		; <i1> [#uses=1]
+	%or.cond.i = or i1 undef, %cmp17.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %byte_re_search_2.exit, label %if.then20.i
+
+if.then20.i:		; preds = %entry
+	ret i32 -2
+
+byte_re_search_2.exit:		; preds = %entry
+	ret i32 -1
+}
diff --git a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
new file mode 100644
index 0000000000000..99d0ee7b03d97
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define void @compdecomp(i8* nocapture %data, i64 %data_len) nounwind {
+entry:
+	br label %for.body38
+
+for.body38:		; preds = %for.body38, %entry
+	br i1 undef, label %for.cond220, label %for.body38
+
+for.cond220:		; preds = %for.cond220, %for.body38
+	br i1 false, label %for.cond220, label %for.end297
+
+for.end297:		; preds = %for.cond220
+	%tmp334 = load i8* undef		; <i8> [#uses=1]
+	%conv343 = zext i8 %tmp334 to i32		; <i32> [#uses=1]
+	%sub344 = add i32 %conv343, -1		; <i32> [#uses=1]
+	%shl345 = shl i32 1, %sub344		; <i32> [#uses=1]
+	%conv346 = sext i32 %shl345 to i64		; <i64> [#uses=1]
+	br label %for.body356
+
+for.body356:		; preds = %for.body356, %for.end297
+	%mask.1633 = phi i64 [ %conv346, %for.end297 ], [ undef, %for.body356 ]		; <i64> [#uses=0]
+	br label %for.body356
+}
diff --git a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
new file mode 100644
index 0000000000000..a35167fba04f4
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define signext i32 @bit_place_piece(i32 signext %col, i32 signext %player, i64* nocapture %b1, i64* nocapture %b2) nounwind {
+entry:
+	br i1 undef, label %for.body, label %return
+
+for.body:		; preds = %entry
+	%add = add i32 0, %col		; <i32> [#uses=1]
+	%sh_prom = zext i32 %add to i64		; <i64> [#uses=1]
+	%shl = shl i64 1, %sh_prom		; <i64> [#uses=1]
+	br i1 undef, label %if.then13, label %if.else
+
+if.then13:		; preds = %for.body
+	ret i32 0
+
+if.else:		; preds = %for.body
+	%or34 = or i64 undef, %shl		; <i64> [#uses=0]
+	ret i32 0
+
+return:		; preds = %entry
+	ret i32 1
+}
diff --git a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
new file mode 100644
index 0000000000000..6a76a8e0cb1fa
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | grep 168
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+declare void @rdft(i32 signext, i32 signext, double*, i32* nocapture, double*) nounwind
+
+declare double @mp_mul_d2i_test(i32 signext, i32 signext, double* nocapture) nounwind
+
+define void @mp_mul_radix_test_bb3(i32 %radix, i32 %nfft, double* %tmpfft, i32* %ip, double* %w, double* %arrayidx44.reload, double* %call.out) nounwind {
+newFuncRoot:
+	br label %bb3
+
+bb4.exitStub:		; preds = %bb3
+	store double %call, double* %call.out
+	ret void
+
+bb3:		; preds = %newFuncRoot
+	tail call void @rdft(i32 signext %nfft, i32 signext -1, double* %arrayidx44.reload, i32* %ip, double* %w) nounwind
+	%call = tail call double @mp_mul_d2i_test(i32 signext %radix, i32 signext %nfft, double* %tmpfft)		; <double> [#uses=1]
+	br label %bb4.exitStub
+}
diff --git a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
new file mode 100644
index 0000000000000..564d3438310a9
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define float @foo(i32 signext %a) {
+entry:
+    %b = bitcast i32 %a to float
+    ret float %b
+}
+
+define i32 @bar(float %a) {
+entry:
+    %b = bitcast float %a to i32
+    ret i32 %b
+}
+\ No newline at end of file
diff --git a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
new file mode 100644
index 0000000000000..a91e29ea4f9d9
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-linux"
+
+define signext i32 @dfg_parse() nounwind {
+entry:
+	br i1 undef, label %if.then2208, label %if.else2360
+
+if.then2208:		; preds = %entry
+	br i1 undef, label %bb.nph3189, label %for.end2270
+
+bb.nph3189:		; preds = %if.then2208
+	unreachable
+
+for.end2270:		; preds = %if.then2208
+	%call2279 = call i64 @strlen(i8* undef) nounwind		; <i64> [#uses=1]
+	%add2281 = add i64 0, %call2279		; <i64> [#uses=1]
+	%tmp2283 = trunc i64 %add2281 to i32		; <i32> [#uses=1]
+	%tmp2284 = alloca i8, i32 %tmp2283, align 2		; <i8*> [#uses=1]
+	%yyd.0.i2561.13 = getelementptr i8* %tmp2284, i64 13		; <i8*> [#uses=1]
+	store i8 117, i8* %yyd.0.i2561.13
+	br label %while.cond.i2558
+
+while.cond.i2558:		; preds = %while.cond.i2558, %for.end2270
+	br label %while.cond.i2558
+
+if.else2360:		; preds = %entry
+	unreachable
+}
+
+declare i64 @strlen(i8* nocapture) nounwind readonly
diff --git a/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll b/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll
new file mode 100644
index 0000000000000..f7686f14da9e9
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:16:16-f128:128:128"
+target triple = "s390x-ibm-linux-gnu"
+
+@__JCR_LIST__ = internal global [0 x i8*] zeroinitializer, section ".jcr", align 8 ; <[0 x i8*]*> [#uses=1]
+
+define internal void @frame_dummy() nounwind {
+entry:
+  %asmtmp = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0"(void (i8*)* @_Jv_RegisterClasses) nounwind ; <void (i8*)*> [#uses=2]
+  %0 = icmp eq void (i8*)* %asmtmp, null          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb3
+
+bb3:                                              ; preds = %entry
+  tail call void %asmtmp(i8* bitcast ([0 x i8*]* @__JCR_LIST__ to i8*)) nounwind
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare extern_weak void @_Jv_RegisterClasses(i8*)
diff --git a/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll b/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll
new file mode 100644
index 0000000000000..fde7d9d281c2d
--- /dev/null
+++ b/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:16:16-f128:128:128"
+target triple = "s390x-ibm-linux-gnu"
+
+define double @foo(double %a, double %b) nounwind {
+entry:
+; CHECK: cpsdr %f0, %f2, %f0
+  %0 = tail call double @copysign(double %a, double %b) nounwind readnone
+  ret double %0
+}
+
+define float @bar(float %a, float %b) nounwind {
+entry:
+; CHECK: cpsdr %f0, %f2, %f0
+  %0 = tail call float @copysignf(float %a, float %b) nounwind readnone
+  ret float %0
+}
+
+
+declare double @copysign(double, double) nounwind readnone
+declare float @copysignf(float, float) nounwind readnone
diff --git a/test/CodeGen/SystemZ/dg.exp b/test/CodeGen/SystemZ/dg.exp
new file mode 100644
index 0000000000000..e9624bac68e97
--- /dev/null
+++ b/test/CodeGen/SystemZ/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target SystemZ] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll b/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
index 19c156d47f433..1e61b235a2bbe 100644
--- a/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
+++ b/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin
+; RUN: llc < %s -mtriple=thumb-apple-darwin
 
 %struct.rtx_def = type { i8 }
 @str = external global [7 x i8]
diff --git a/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll b/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll
index ee52cf0f4e7b1..be2b839c21d7e 100644
--- a/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll
+++ b/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin
+; RUN: llc < %s -mtriple=thumb-apple-darwin
 
 	%struct.color_sample = type { i32 }
 	%struct.ref = type { %struct.color_sample, i16, i16 }
diff --git a/test/CodeGen/Thumb/2007-03-06-AddR7.ll b/test/CodeGen/Thumb/2007-03-06-AddR7.ll
index ad3e195a0dd79..8d139e92bd3ba 100644
--- a/test/CodeGen/Thumb/2007-03-06-AddR7.ll
+++ b/test/CodeGen/Thumb/2007-03-06-AddR7.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb
-; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin -relocation-model=pic \
+; RUN: llc < %s -march=thumb
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic \
 ; RUN:   -mattr=+v6,+vfp2 | not grep {add r., r7, #2 \\* 4}
 
 	%struct.__fooAllocator = type opaque
diff --git a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
index 159be4eca3348..2074bfd5d7b9e 100644
--- a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
+++ b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep r11
+; RUN: llc < %s | not grep r11
 
 target triple = "thumb-linux-gnueabi"
 	%struct.__sched_param = type { i32 }
diff --git a/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll b/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
index 9b2aba94ec8d2..5c883b3930dce 100644
--- a/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
+++ b/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep r0 | count 1
+; RUN: llc < %s -march=thumb | grep r0 | count 1
 
 define i32 @a(i32 %x, i32 %y) nounwind readnone {
 entry:
diff --git a/test/CodeGen/Thumb/2009-07-19-SPDecBug.ll b/test/CodeGen/Thumb/2009-07-19-SPDecBug.ll
new file mode 100644
index 0000000000000..471a82f271e04
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-07-19-SPDecBug.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=thumbv6-elf | not grep "subs sp"
+; PR4567
+
+define arm_apcscc i8* @__gets_chk(i8* %s, i32 %slen) nounwind {
+entry:
+	br i1 undef, label %bb, label %bb1
+
+bb:		; preds = %entry
+	ret i8* undef
+
+bb1:		; preds = %entry
+	br i1 undef, label %bb3, label %bb2
+
+bb2:		; preds = %bb1
+	%0 = alloca i8, i32 undef, align 4		; <i8*> [#uses=0]
+	br label %bb4
+
+bb3:		; preds = %bb1
+	%1 = malloc i8, i32 undef		; <i8*> [#uses=0]
+	br label %bb4
+
+bb4:		; preds = %bb3, %bb2
+	br i1 undef, label %bb5, label %bb6
+
+bb5:		; preds = %bb4
+	%2 = call arm_apcscc  i8* @gets(i8* %s) nounwind		; <i8*> [#uses=1]
+	ret i8* %2
+
+bb6:		; preds = %bb4
+	unreachable
+}
+
+declare arm_apcscc i8* @gets(i8*) nounwind
diff --git a/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll b/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll
new file mode 100644
index 0000000000000..6e035d0f70e6a
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin10
+
+@Time.2535 = external global i64		; <i64*> [#uses=2]
+
+define arm_apcscc i64 @millisecs() nounwind {
+entry:
+	%0 = load i64* @Time.2535, align 4		; <i64> [#uses=2]
+	%1 = add i64 %0, 1		; <i64> [#uses=1]
+	store i64 %1, i64* @Time.2535, align 4
+	ret i64 %0
+}
diff --git a/test/CodeGen/Thumb/2009-07-27-PEIAssert.ll b/test/CodeGen/Thumb/2009-07-27-PEIAssert.ll
new file mode 100644
index 0000000000000..f195348e1403b
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-07-27-PEIAssert.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic -disable-fp-elim
+
+	%struct.LinkList = type { i32, %struct.LinkList* }
+	%struct.List = type { i32, i32* }
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @main to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define arm_apcscc i32 @main() nounwind {
+entry:
+	%ll = alloca %struct.LinkList*, align 4		; <%struct.LinkList**> [#uses=1]
+	%0 = call arm_apcscc  i32 @ReadList(%struct.LinkList** %ll, %struct.List** null) nounwind		; <i32> [#uses=1]
+	switch i32 %0, label %bb5 [
+		i32 7, label %bb4
+		i32 42, label %bb3
+	]
+
+bb3:		; preds = %entry
+	ret i32 1
+
+bb4:		; preds = %entry
+	ret i32 0
+
+bb5:		; preds = %entry
+	ret i32 1
+}
+
+declare arm_apcscc i32 @ReadList(%struct.LinkList** nocapture, %struct.List** nocapture) nounwind
diff --git a/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll b/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll
new file mode 100644
index 0000000000000..ef4b5ce67c69a
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll
@@ -0,0 +1,737 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin
+
+	%struct.BF_KEY = type { [18 x i32], [1024 x i32] }
+
+define arm_apcscc void @BF_encrypt(i32* nocapture %data, %struct.BF_KEY* nocapture %key, i32 %encrypt) nounwind {
+entry:
+	%0 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 0; <i32*> [#uses=2]
+	%1 = load i32* %data, align 4             ; <i32> [#uses=2]
+	%2 = load i32* undef, align 4             ; <i32> [#uses=2]
+	br i1 undef, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+	%3 = load i32* %0, align 4                ; <i32> [#uses=1]
+	%4 = xor i32 %3, %1                       ; <i32> [#uses=4]
+	%5 = load i32* null, align 4              ; <i32> [#uses=1]
+	%6 = lshr i32 %4, 24                      ; <i32> [#uses=1]
+	%7 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %6; <i32*> [#uses=1]
+	%8 = load i32* %7, align 4                ; <i32> [#uses=1]
+	%9 = lshr i32 %4, 16                      ; <i32> [#uses=1]
+	%10 = or i32 %9, 256                      ; <i32> [#uses=1]
+	%11 = and i32 %10, 511                    ; <i32> [#uses=1]
+	%12 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %11; <i32*> [#uses=1]
+	%13 = load i32* %12, align 4              ; <i32> [#uses=1]
+	%14 = add i32 %13, %8                     ; <i32> [#uses=1]
+	%15 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 undef; <i32*> [#uses=1]
+	%16 = load i32* %15, align 4              ; <i32> [#uses=1]
+	%17 = xor i32 %14, %16                    ; <i32> [#uses=1]
+	%18 = or i32 %4, 768                      ; <i32> [#uses=1]
+	%19 = and i32 %18, 1023                   ; <i32> [#uses=1]
+	%20 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %19; <i32*> [#uses=1]
+	%21 = load i32* %20, align 4              ; <i32> [#uses=1]
+	%22 = add i32 %17, %21                    ; <i32> [#uses=1]
+	%23 = xor i32 %5, %2                      ; <i32> [#uses=1]
+	%24 = xor i32 %23, %22                    ; <i32> [#uses=5]
+	%25 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 2; <i32*> [#uses=1]
+	%26 = load i32* %25, align 4              ; <i32> [#uses=1]
+	%27 = lshr i32 %24, 24                    ; <i32> [#uses=1]
+	%28 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %27; <i32*> [#uses=1]
+	%29 = load i32* %28, align 4              ; <i32> [#uses=1]
+	%30 = lshr i32 %24, 16                    ; <i32> [#uses=1]
+	%31 = or i32 %30, 256                     ; <i32> [#uses=1]
+	%32 = and i32 %31, 511                    ; <i32> [#uses=1]
+	%33 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %32; <i32*> [#uses=1]
+	%34 = load i32* %33, align 4              ; <i32> [#uses=1]
+	%35 = add i32 %34, %29                    ; <i32> [#uses=1]
+	%36 = lshr i32 %24, 8                     ; <i32> [#uses=1]
+	%37 = or i32 %36, 512                     ; <i32> [#uses=1]
+	%38 = and i32 %37, 767                    ; <i32> [#uses=1]
+	%39 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %38; <i32*> [#uses=1]
+	%40 = load i32* %39, align 4              ; <i32> [#uses=1]
+	%41 = xor i32 %35, %40                    ; <i32> [#uses=1]
+	%42 = or i32 %24, 768                     ; <i32> [#uses=1]
+	%43 = and i32 %42, 1023                   ; <i32> [#uses=1]
+	%44 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %43; <i32*> [#uses=1]
+	%45 = load i32* %44, align 4              ; <i32> [#uses=1]
+	%46 = add i32 %41, %45                    ; <i32> [#uses=1]
+	%47 = xor i32 %26, %4                     ; <i32> [#uses=1]
+	%48 = xor i32 %47, %46                    ; <i32> [#uses=5]
+	%49 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 3; <i32*> [#uses=1]
+	%50 = load i32* %49, align 4              ; <i32> [#uses=1]
+	%51 = lshr i32 %48, 24                    ; <i32> [#uses=1]
+	%52 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %51; <i32*> [#uses=1]
+	%53 = load i32* %52, align 4              ; <i32> [#uses=1]
+	%54 = lshr i32 %48, 16                    ; <i32> [#uses=1]
+	%55 = or i32 %54, 256                     ; <i32> [#uses=1]
+	%56 = and i32 %55, 511                    ; <i32> [#uses=1]
+	%57 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %56; <i32*> [#uses=1]
+	%58 = load i32* %57, align 4              ; <i32> [#uses=1]
+	%59 = add i32 %58, %53                    ; <i32> [#uses=1]
+	%60 = lshr i32 %48, 8                     ; <i32> [#uses=1]
+	%61 = or i32 %60, 512                     ; <i32> [#uses=1]
+	%62 = and i32 %61, 767                    ; <i32> [#uses=1]
+	%63 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %62; <i32*> [#uses=1]
+	%64 = load i32* %63, align 4              ; <i32> [#uses=1]
+	%65 = xor i32 %59, %64                    ; <i32> [#uses=1]
+	%66 = or i32 %48, 768                     ; <i32> [#uses=1]
+	%67 = and i32 %66, 1023                   ; <i32> [#uses=1]
+	%68 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %67; <i32*> [#uses=1]
+	%69 = load i32* %68, align 4              ; <i32> [#uses=1]
+	%70 = add i32 %65, %69                    ; <i32> [#uses=1]
+	%71 = xor i32 %50, %24                    ; <i32> [#uses=1]
+	%72 = xor i32 %71, %70                    ; <i32> [#uses=5]
+	%73 = load i32* null, align 4             ; <i32> [#uses=1]
+	%74 = lshr i32 %72, 24                    ; <i32> [#uses=1]
+	%75 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %74; <i32*> [#uses=1]
+	%76 = load i32* %75, align 4              ; <i32> [#uses=1]
+	%77 = lshr i32 %72, 16                    ; <i32> [#uses=1]
+	%78 = or i32 %77, 256                     ; <i32> [#uses=1]
+	%79 = and i32 %78, 511                    ; <i32> [#uses=1]
+	%80 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %79; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4              ; <i32> [#uses=1]
+	%82 = add i32 %81, %76                    ; <i32> [#uses=1]
+	%83 = lshr i32 %72, 8                     ; <i32> [#uses=1]
+	%84 = or i32 %83, 512                     ; <i32> [#uses=1]
+	%85 = and i32 %84, 767                    ; <i32> [#uses=1]
+	%86 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %85; <i32*> [#uses=1]
+	%87 = load i32* %86, align 4              ; <i32> [#uses=1]
+	%88 = xor i32 %82, %87                    ; <i32> [#uses=1]
+	%89 = or i32 %72, 768                     ; <i32> [#uses=1]
+	%90 = and i32 %89, 1023                   ; <i32> [#uses=1]
+	%91 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %90; <i32*> [#uses=1]
+	%92 = load i32* %91, align 4              ; <i32> [#uses=1]
+	%93 = add i32 %88, %92                    ; <i32> [#uses=1]
+	%94 = xor i32 %73, %48                    ; <i32> [#uses=1]
+	%95 = xor i32 %94, %93                    ; <i32> [#uses=5]
+	%96 = load i32* undef, align 4            ; <i32> [#uses=1]
+	%97 = lshr i32 %95, 24                    ; <i32> [#uses=1]
+	%98 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %97; <i32*> [#uses=1]
+	%99 = load i32* %98, align 4              ; <i32> [#uses=1]
+	%100 = lshr i32 %95, 16                   ; <i32> [#uses=1]
+	%101 = or i32 %100, 256                   ; <i32> [#uses=1]
+	%102 = and i32 %101, 511                  ; <i32> [#uses=1]
+	%103 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %102; <i32*> [#uses=1]
+	%104 = load i32* %103, align 4            ; <i32> [#uses=1]
+	%105 = add i32 %104, %99                  ; <i32> [#uses=1]
+	%106 = lshr i32 %95, 8                    ; <i32> [#uses=1]
+	%107 = or i32 %106, 512                   ; <i32> [#uses=1]
+	%108 = and i32 %107, 767                  ; <i32> [#uses=1]
+	%109 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %108; <i32*> [#uses=1]
+	%110 = load i32* %109, align 4            ; <i32> [#uses=1]
+	%111 = xor i32 %105, %110                 ; <i32> [#uses=1]
+	%112 = or i32 %95, 768                    ; <i32> [#uses=1]
+	%113 = and i32 %112, 1023                 ; <i32> [#uses=1]
+	%114 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %113; <i32*> [#uses=1]
+	%115 = load i32* %114, align 4            ; <i32> [#uses=1]
+	%116 = add i32 %111, %115                 ; <i32> [#uses=1]
+	%117 = xor i32 %96, %72                   ; <i32> [#uses=1]
+	%118 = xor i32 %117, %116                 ; <i32> [#uses=5]
+	%119 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 6; <i32*> [#uses=1]
+	%120 = load i32* %119, align 4            ; <i32> [#uses=1]
+	%121 = lshr i32 %118, 24                  ; <i32> [#uses=1]
+	%122 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %121; <i32*> [#uses=1]
+	%123 = load i32* %122, align 4            ; <i32> [#uses=1]
+	%124 = lshr i32 %118, 16                  ; <i32> [#uses=1]
+	%125 = or i32 %124, 256                   ; <i32> [#uses=1]
+	%126 = and i32 %125, 511                  ; <i32> [#uses=1]
+	%127 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %126; <i32*> [#uses=1]
+	%128 = load i32* %127, align 4            ; <i32> [#uses=1]
+	%129 = add i32 %128, %123                 ; <i32> [#uses=1]
+	%130 = lshr i32 %118, 8                   ; <i32> [#uses=1]
+	%131 = or i32 %130, 512                   ; <i32> [#uses=1]
+	%132 = and i32 %131, 767                  ; <i32> [#uses=1]
+	%133 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %132; <i32*> [#uses=1]
+	%134 = load i32* %133, align 4            ; <i32> [#uses=1]
+	%135 = xor i32 %129, %134                 ; <i32> [#uses=1]
+	%136 = or i32 %118, 768                   ; <i32> [#uses=1]
+	%137 = and i32 %136, 1023                 ; <i32> [#uses=1]
+	%138 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %137; <i32*> [#uses=1]
+	%139 = load i32* %138, align 4            ; <i32> [#uses=1]
+	%140 = add i32 %135, %139                 ; <i32> [#uses=1]
+	%141 = xor i32 %120, %95                  ; <i32> [#uses=1]
+	%142 = xor i32 %141, %140                 ; <i32> [#uses=5]
+	%143 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 7; <i32*> [#uses=1]
+	%144 = load i32* %143, align 4            ; <i32> [#uses=1]
+	%145 = lshr i32 %142, 24                  ; <i32> [#uses=1]
+	%146 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %145; <i32*> [#uses=1]
+	%147 = load i32* %146, align 4            ; <i32> [#uses=1]
+	%148 = lshr i32 %142, 16                  ; <i32> [#uses=1]
+	%149 = or i32 %148, 256                   ; <i32> [#uses=1]
+	%150 = and i32 %149, 511                  ; <i32> [#uses=1]
+	%151 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %150; <i32*> [#uses=1]
+	%152 = load i32* %151, align 4            ; <i32> [#uses=1]
+	%153 = add i32 %152, %147                 ; <i32> [#uses=1]
+	%154 = lshr i32 %142, 8                   ; <i32> [#uses=1]
+	%155 = or i32 %154, 512                   ; <i32> [#uses=1]
+	%156 = and i32 %155, 767                  ; <i32> [#uses=1]
+	%157 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %156; <i32*> [#uses=1]
+	%158 = load i32* %157, align 4            ; <i32> [#uses=1]
+	%159 = xor i32 %153, %158                 ; <i32> [#uses=1]
+	%160 = or i32 %142, 768                   ; <i32> [#uses=1]
+	%161 = and i32 %160, 1023                 ; <i32> [#uses=1]
+	%162 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %161; <i32*> [#uses=1]
+	%163 = load i32* %162, align 4            ; <i32> [#uses=1]
+	%164 = add i32 %159, %163                 ; <i32> [#uses=1]
+	%165 = xor i32 %144, %118                 ; <i32> [#uses=1]
+	%166 = xor i32 %165, %164                 ; <i32> [#uses=5]
+	%167 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%168 = lshr i32 %166, 24                  ; <i32> [#uses=1]
+	%169 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %168; <i32*> [#uses=1]
+	%170 = load i32* %169, align 4            ; <i32> [#uses=1]
+	%171 = lshr i32 %166, 16                  ; <i32> [#uses=1]
+	%172 = or i32 %171, 256                   ; <i32> [#uses=1]
+	%173 = and i32 %172, 511                  ; <i32> [#uses=1]
+	%174 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %173; <i32*> [#uses=1]
+	%175 = load i32* %174, align 4            ; <i32> [#uses=1]
+	%176 = add i32 %175, %170                 ; <i32> [#uses=1]
+	%177 = lshr i32 %166, 8                   ; <i32> [#uses=1]
+	%178 = or i32 %177, 512                   ; <i32> [#uses=1]
+	%179 = and i32 %178, 767                  ; <i32> [#uses=1]
+	%180 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %179; <i32*> [#uses=1]
+	%181 = load i32* %180, align 4            ; <i32> [#uses=1]
+	%182 = xor i32 %176, %181                 ; <i32> [#uses=1]
+	%183 = or i32 %166, 768                   ; <i32> [#uses=1]
+	%184 = and i32 %183, 1023                 ; <i32> [#uses=1]
+	%185 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %184; <i32*> [#uses=1]
+	%186 = load i32* %185, align 4            ; <i32> [#uses=1]
+	%187 = add i32 %182, %186                 ; <i32> [#uses=1]
+	%188 = xor i32 %167, %142                 ; <i32> [#uses=1]
+	%189 = xor i32 %188, %187                 ; <i32> [#uses=5]
+	%190 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 9; <i32*> [#uses=1]
+	%191 = load i32* %190, align 4            ; <i32> [#uses=1]
+	%192 = lshr i32 %189, 24                  ; <i32> [#uses=1]
+	%193 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %192; <i32*> [#uses=1]
+	%194 = load i32* %193, align 4            ; <i32> [#uses=1]
+	%195 = lshr i32 %189, 16                  ; <i32> [#uses=1]
+	%196 = or i32 %195, 256                   ; <i32> [#uses=1]
+	%197 = and i32 %196, 511                  ; <i32> [#uses=1]
+	%198 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %197; <i32*> [#uses=1]
+	%199 = load i32* %198, align 4            ; <i32> [#uses=1]
+	%200 = add i32 %199, %194                 ; <i32> [#uses=1]
+	%201 = lshr i32 %189, 8                   ; <i32> [#uses=1]
+	%202 = or i32 %201, 512                   ; <i32> [#uses=1]
+	%203 = and i32 %202, 767                  ; <i32> [#uses=1]
+	%204 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %203; <i32*> [#uses=1]
+	%205 = load i32* %204, align 4            ; <i32> [#uses=1]
+	%206 = xor i32 %200, %205                 ; <i32> [#uses=1]
+	%207 = or i32 %189, 768                   ; <i32> [#uses=1]
+	%208 = and i32 %207, 1023                 ; <i32> [#uses=1]
+	%209 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %208; <i32*> [#uses=1]
+	%210 = load i32* %209, align 4            ; <i32> [#uses=1]
+	%211 = add i32 %206, %210                 ; <i32> [#uses=1]
+	%212 = xor i32 %191, %166                 ; <i32> [#uses=1]
+	%213 = xor i32 %212, %211                 ; <i32> [#uses=5]
+	%214 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 10; <i32*> [#uses=1]
+	%215 = load i32* %214, align 4            ; <i32> [#uses=1]
+	%216 = lshr i32 %213, 24                  ; <i32> [#uses=1]
+	%217 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %216; <i32*> [#uses=1]
+	%218 = load i32* %217, align 4            ; <i32> [#uses=1]
+	%219 = lshr i32 %213, 16                  ; <i32> [#uses=1]
+	%220 = or i32 %219, 256                   ; <i32> [#uses=1]
+	%221 = and i32 %220, 511                  ; <i32> [#uses=1]
+	%222 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %221; <i32*> [#uses=1]
+	%223 = load i32* %222, align 4            ; <i32> [#uses=1]
+	%224 = add i32 %223, %218                 ; <i32> [#uses=1]
+	%225 = lshr i32 %213, 8                   ; <i32> [#uses=1]
+	%226 = or i32 %225, 512                   ; <i32> [#uses=1]
+	%227 = and i32 %226, 767                  ; <i32> [#uses=1]
+	%228 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %227; <i32*> [#uses=1]
+	%229 = load i32* %228, align 4            ; <i32> [#uses=1]
+	%230 = xor i32 %224, %229                 ; <i32> [#uses=1]
+	%231 = or i32 %213, 768                   ; <i32> [#uses=1]
+	%232 = and i32 %231, 1023                 ; <i32> [#uses=1]
+	%233 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %232; <i32*> [#uses=1]
+	%234 = load i32* %233, align 4            ; <i32> [#uses=1]
+	%235 = add i32 %230, %234                 ; <i32> [#uses=1]
+	%236 = xor i32 %215, %189                 ; <i32> [#uses=1]
+	%237 = xor i32 %236, %235                 ; <i32> [#uses=5]
+	%238 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 11; <i32*> [#uses=1]
+	%239 = load i32* %238, align 4            ; <i32> [#uses=1]
+	%240 = lshr i32 %237, 24                  ; <i32> [#uses=1]
+	%241 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %240; <i32*> [#uses=1]
+	%242 = load i32* %241, align 4            ; <i32> [#uses=1]
+	%243 = lshr i32 %237, 16                  ; <i32> [#uses=1]
+	%244 = or i32 %243, 256                   ; <i32> [#uses=1]
+	%245 = and i32 %244, 511                  ; <i32> [#uses=1]
+	%246 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %245; <i32*> [#uses=1]
+	%247 = load i32* %246, align 4            ; <i32> [#uses=1]
+	%248 = add i32 %247, %242                 ; <i32> [#uses=1]
+	%249 = lshr i32 %237, 8                   ; <i32> [#uses=1]
+	%250 = or i32 %249, 512                   ; <i32> [#uses=1]
+	%251 = and i32 %250, 767                  ; <i32> [#uses=1]
+	%252 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %251; <i32*> [#uses=1]
+	%253 = load i32* %252, align 4            ; <i32> [#uses=1]
+	%254 = xor i32 %248, %253                 ; <i32> [#uses=1]
+	%255 = or i32 %237, 768                   ; <i32> [#uses=1]
+	%256 = and i32 %255, 1023                 ; <i32> [#uses=1]
+	%257 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %256; <i32*> [#uses=1]
+	%258 = load i32* %257, align 4            ; <i32> [#uses=1]
+	%259 = add i32 %254, %258                 ; <i32> [#uses=1]
+	%260 = xor i32 %239, %213                 ; <i32> [#uses=1]
+	%261 = xor i32 %260, %259                 ; <i32> [#uses=5]
+	%262 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%263 = lshr i32 %261, 24                  ; <i32> [#uses=1]
+	%264 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %263; <i32*> [#uses=1]
+	%265 = load i32* %264, align 4            ; <i32> [#uses=1]
+	%266 = lshr i32 %261, 16                  ; <i32> [#uses=1]
+	%267 = or i32 %266, 256                   ; <i32> [#uses=1]
+	%268 = and i32 %267, 511                  ; <i32> [#uses=1]
+	%269 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %268; <i32*> [#uses=1]
+	%270 = load i32* %269, align 4            ; <i32> [#uses=1]
+	%271 = add i32 %270, %265                 ; <i32> [#uses=1]
+	%272 = lshr i32 %261, 8                   ; <i32> [#uses=1]
+	%273 = or i32 %272, 512                   ; <i32> [#uses=1]
+	%274 = and i32 %273, 767                  ; <i32> [#uses=1]
+	%275 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %274; <i32*> [#uses=1]
+	%276 = load i32* %275, align 4            ; <i32> [#uses=1]
+	%277 = xor i32 %271, %276                 ; <i32> [#uses=1]
+	%278 = or i32 %261, 768                   ; <i32> [#uses=1]
+	%279 = and i32 %278, 1023                 ; <i32> [#uses=1]
+	%280 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %279; <i32*> [#uses=1]
+	%281 = load i32* %280, align 4            ; <i32> [#uses=1]
+	%282 = add i32 %277, %281                 ; <i32> [#uses=1]
+	%283 = xor i32 %262, %237                 ; <i32> [#uses=1]
+	%284 = xor i32 %283, %282                 ; <i32> [#uses=4]
+	%285 = load i32* null, align 4            ; <i32> [#uses=1]
+	%286 = lshr i32 %284, 24                  ; <i32> [#uses=1]
+	%287 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %286; <i32*> [#uses=1]
+	%288 = load i32* %287, align 4            ; <i32> [#uses=1]
+	%289 = lshr i32 %284, 16                  ; <i32> [#uses=1]
+	%290 = or i32 %289, 256                   ; <i32> [#uses=1]
+	%291 = and i32 %290, 511                  ; <i32> [#uses=1]
+	%292 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %291; <i32*> [#uses=1]
+	%293 = load i32* %292, align 4            ; <i32> [#uses=1]
+	%294 = add i32 %293, %288                 ; <i32> [#uses=1]
+	%295 = lshr i32 %284, 8                   ; <i32> [#uses=1]
+	%296 = or i32 %295, 512                   ; <i32> [#uses=1]
+	%297 = and i32 %296, 767                  ; <i32> [#uses=1]
+	%298 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %297; <i32*> [#uses=1]
+	%299 = load i32* %298, align 4            ; <i32> [#uses=1]
+	%300 = xor i32 %294, %299                 ; <i32> [#uses=1]
+	%301 = or i32 %284, 768                   ; <i32> [#uses=1]
+	%302 = and i32 %301, 1023                 ; <i32> [#uses=1]
+	%303 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %302; <i32*> [#uses=1]
+	%304 = load i32* %303, align 4            ; <i32> [#uses=1]
+	%305 = add i32 %300, %304                 ; <i32> [#uses=1]
+	%306 = xor i32 %285, %261                 ; <i32> [#uses=1]
+	%307 = xor i32 %306, %305                 ; <i32> [#uses=1]
+	%308 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 15; <i32*> [#uses=1]
+	%309 = load i32* %308, align 4            ; <i32> [#uses=1]
+	%310 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 0; <i32*> [#uses=1]
+	%311 = load i32* %310, align 4            ; <i32> [#uses=1]
+	%312 = or i32 0, 256                      ; <i32> [#uses=1]
+	%313 = and i32 %312, 511                  ; <i32> [#uses=1]
+	%314 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %313; <i32*> [#uses=1]
+	%315 = load i32* %314, align 4            ; <i32> [#uses=1]
+	%316 = add i32 %315, %311                 ; <i32> [#uses=1]
+	%317 = or i32 0, 512                      ; <i32> [#uses=1]
+	%318 = and i32 %317, 767                  ; <i32> [#uses=1]
+	%319 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %318; <i32*> [#uses=1]
+	%320 = load i32* %319, align 4            ; <i32> [#uses=1]
+	%321 = xor i32 %316, %320                 ; <i32> [#uses=1]
+	%322 = or i32 0, 768                      ; <i32> [#uses=1]
+	%323 = and i32 %322, 1023                 ; <i32> [#uses=1]
+	%324 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %323; <i32*> [#uses=1]
+	%325 = load i32* %324, align 4            ; <i32> [#uses=1]
+	%326 = add i32 %321, %325                 ; <i32> [#uses=1]
+	%327 = xor i32 %309, %307                 ; <i32> [#uses=1]
+	%328 = xor i32 %327, %326                 ; <i32> [#uses=5]
+	%329 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 17; <i32*> [#uses=1]
+	br label %bb2
+
+bb1:                                              ; preds = %entry
+	%330 = load i32* null, align 4            ; <i32> [#uses=1]
+	%331 = xor i32 %330, %1                   ; <i32> [#uses=4]
+	%332 = load i32* null, align 4            ; <i32> [#uses=1]
+	%333 = lshr i32 %331, 24                  ; <i32> [#uses=1]
+	%334 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %333; <i32*> [#uses=1]
+	%335 = load i32* %334, align 4            ; <i32> [#uses=1]
+	%336 = load i32* null, align 4            ; <i32> [#uses=1]
+	%337 = add i32 %336, %335                 ; <i32> [#uses=1]
+	%338 = lshr i32 %331, 8                   ; <i32> [#uses=1]
+	%339 = or i32 %338, 512                   ; <i32> [#uses=1]
+	%340 = and i32 %339, 767                  ; <i32> [#uses=1]
+	%341 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %340; <i32*> [#uses=1]
+	%342 = load i32* %341, align 4            ; <i32> [#uses=1]
+	%343 = xor i32 %337, %342                 ; <i32> [#uses=1]
+	%344 = or i32 %331, 768                   ; <i32> [#uses=1]
+	%345 = and i32 %344, 1023                 ; <i32> [#uses=1]
+	%346 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %345; <i32*> [#uses=1]
+	%347 = load i32* %346, align 4            ; <i32> [#uses=1]
+	%348 = add i32 %343, %347                 ; <i32> [#uses=1]
+	%349 = xor i32 %332, %2                   ; <i32> [#uses=1]
+	%350 = xor i32 %349, %348                 ; <i32> [#uses=5]
+	%351 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 15; <i32*> [#uses=1]
+	%352 = load i32* %351, align 4            ; <i32> [#uses=1]
+	%353 = lshr i32 %350, 24                  ; <i32> [#uses=1]
+	%354 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %353; <i32*> [#uses=1]
+	%355 = load i32* %354, align 4            ; <i32> [#uses=1]
+	%356 = lshr i32 %350, 16                  ; <i32> [#uses=1]
+	%357 = or i32 %356, 256                   ; <i32> [#uses=1]
+	%358 = and i32 %357, 511                  ; <i32> [#uses=1]
+	%359 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %358; <i32*> [#uses=1]
+	%360 = load i32* %359, align 4            ; <i32> [#uses=1]
+	%361 = add i32 %360, %355                 ; <i32> [#uses=1]
+	%362 = lshr i32 %350, 8                   ; <i32> [#uses=1]
+	%363 = or i32 %362, 512                   ; <i32> [#uses=1]
+	%364 = and i32 %363, 767                  ; <i32> [#uses=1]
+	%365 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %364; <i32*> [#uses=1]
+	%366 = load i32* %365, align 4            ; <i32> [#uses=1]
+	%367 = xor i32 %361, %366                 ; <i32> [#uses=1]
+	%368 = or i32 %350, 768                   ; <i32> [#uses=1]
+	%369 = and i32 %368, 1023                 ; <i32> [#uses=1]
+	%370 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %369; <i32*> [#uses=1]
+	%371 = load i32* %370, align 4            ; <i32> [#uses=1]
+	%372 = add i32 %367, %371                 ; <i32> [#uses=1]
+	%373 = xor i32 %352, %331                 ; <i32> [#uses=1]
+	%374 = xor i32 %373, %372                 ; <i32> [#uses=5]
+	%375 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 14; <i32*> [#uses=1]
+	%376 = load i32* %375, align 4            ; <i32> [#uses=1]
+	%377 = lshr i32 %374, 24                  ; <i32> [#uses=1]
+	%378 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %377; <i32*> [#uses=1]
+	%379 = load i32* %378, align 4            ; <i32> [#uses=1]
+	%380 = lshr i32 %374, 16                  ; <i32> [#uses=1]
+	%381 = or i32 %380, 256                   ; <i32> [#uses=1]
+	%382 = and i32 %381, 511                  ; <i32> [#uses=1]
+	%383 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %382; <i32*> [#uses=1]
+	%384 = load i32* %383, align 4            ; <i32> [#uses=1]
+	%385 = add i32 %384, %379                 ; <i32> [#uses=1]
+	%386 = lshr i32 %374, 8                   ; <i32> [#uses=1]
+	%387 = or i32 %386, 512                   ; <i32> [#uses=1]
+	%388 = and i32 %387, 767                  ; <i32> [#uses=1]
+	%389 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %388; <i32*> [#uses=1]
+	%390 = load i32* %389, align 4            ; <i32> [#uses=1]
+	%391 = xor i32 %385, %390                 ; <i32> [#uses=1]
+	%392 = or i32 %374, 768                   ; <i32> [#uses=1]
+	%393 = and i32 %392, 1023                 ; <i32> [#uses=1]
+	%394 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %393; <i32*> [#uses=1]
+	%395 = load i32* %394, align 4            ; <i32> [#uses=1]
+	%396 = add i32 %391, %395                 ; <i32> [#uses=1]
+	%397 = xor i32 %376, %350                 ; <i32> [#uses=1]
+	%398 = xor i32 %397, %396                 ; <i32> [#uses=5]
+	%399 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 13; <i32*> [#uses=1]
+	%400 = load i32* %399, align 4            ; <i32> [#uses=1]
+	%401 = lshr i32 %398, 24                  ; <i32> [#uses=1]
+	%402 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %401; <i32*> [#uses=1]
+	%403 = load i32* %402, align 4            ; <i32> [#uses=1]
+	%404 = lshr i32 %398, 16                  ; <i32> [#uses=1]
+	%405 = or i32 %404, 256                   ; <i32> [#uses=1]
+	%406 = and i32 %405, 511                  ; <i32> [#uses=1]
+	%407 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %406; <i32*> [#uses=1]
+	%408 = load i32* %407, align 4            ; <i32> [#uses=1]
+	%409 = add i32 %408, %403                 ; <i32> [#uses=1]
+	%410 = lshr i32 %398, 8                   ; <i32> [#uses=1]
+	%411 = or i32 %410, 512                   ; <i32> [#uses=1]
+	%412 = and i32 %411, 767                  ; <i32> [#uses=1]
+	%413 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %412; <i32*> [#uses=1]
+	%414 = load i32* %413, align 4            ; <i32> [#uses=1]
+	%415 = xor i32 %409, %414                 ; <i32> [#uses=1]
+	%416 = or i32 %398, 768                   ; <i32> [#uses=1]
+	%417 = and i32 %416, 1023                 ; <i32> [#uses=1]
+	%418 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %417; <i32*> [#uses=1]
+	%419 = load i32* %418, align 4            ; <i32> [#uses=1]
+	%420 = add i32 %415, %419                 ; <i32> [#uses=1]
+	%421 = xor i32 %400, %374                 ; <i32> [#uses=1]
+	%422 = xor i32 %421, %420                 ; <i32> [#uses=5]
+	%423 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 12; <i32*> [#uses=1]
+	%424 = load i32* %423, align 4            ; <i32> [#uses=1]
+	%425 = lshr i32 %422, 24                  ; <i32> [#uses=1]
+	%426 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %425; <i32*> [#uses=1]
+	%427 = load i32* %426, align 4            ; <i32> [#uses=1]
+	%428 = lshr i32 %422, 16                  ; <i32> [#uses=1]
+	%429 = or i32 %428, 256                   ; <i32> [#uses=1]
+	%430 = and i32 %429, 511                  ; <i32> [#uses=1]
+	%431 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %430; <i32*> [#uses=1]
+	%432 = load i32* %431, align 4            ; <i32> [#uses=1]
+	%433 = add i32 %432, %427                 ; <i32> [#uses=1]
+	%434 = lshr i32 %422, 8                   ; <i32> [#uses=1]
+	%435 = or i32 %434, 512                   ; <i32> [#uses=1]
+	%436 = and i32 %435, 767                  ; <i32> [#uses=1]
+	%437 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %436; <i32*> [#uses=1]
+	%438 = load i32* %437, align 4            ; <i32> [#uses=1]
+	%439 = xor i32 %433, %438                 ; <i32> [#uses=1]
+	%440 = or i32 %422, 768                   ; <i32> [#uses=1]
+	%441 = and i32 %440, 1023                 ; <i32> [#uses=1]
+	%442 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %441; <i32*> [#uses=1]
+	%443 = load i32* %442, align 4            ; <i32> [#uses=1]
+	%444 = add i32 %439, %443                 ; <i32> [#uses=1]
+	%445 = xor i32 %424, %398                 ; <i32> [#uses=1]
+	%446 = xor i32 %445, %444                 ; <i32> [#uses=5]
+	%447 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%448 = lshr i32 %446, 24                  ; <i32> [#uses=1]
+	%449 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %448; <i32*> [#uses=1]
+	%450 = load i32* %449, align 4            ; <i32> [#uses=1]
+	%451 = lshr i32 %446, 16                  ; <i32> [#uses=1]
+	%452 = or i32 %451, 256                   ; <i32> [#uses=1]
+	%453 = and i32 %452, 511                  ; <i32> [#uses=1]
+	%454 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %453; <i32*> [#uses=1]
+	%455 = load i32* %454, align 4            ; <i32> [#uses=1]
+	%456 = add i32 %455, %450                 ; <i32> [#uses=1]
+	%457 = lshr i32 %446, 8                   ; <i32> [#uses=1]
+	%458 = or i32 %457, 512                   ; <i32> [#uses=1]
+	%459 = and i32 %458, 767                  ; <i32> [#uses=1]
+	%460 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %459; <i32*> [#uses=1]
+	%461 = load i32* %460, align 4            ; <i32> [#uses=1]
+	%462 = xor i32 %456, %461                 ; <i32> [#uses=1]
+	%463 = or i32 %446, 768                   ; <i32> [#uses=1]
+	%464 = and i32 %463, 1023                 ; <i32> [#uses=1]
+	%465 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %464; <i32*> [#uses=1]
+	%466 = load i32* %465, align 4            ; <i32> [#uses=1]
+	%467 = add i32 %462, %466                 ; <i32> [#uses=1]
+	%468 = xor i32 %447, %422                 ; <i32> [#uses=1]
+	%469 = xor i32 %468, %467                 ; <i32> [#uses=5]
+	%470 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 10; <i32*> [#uses=1]
+	%471 = load i32* %470, align 4            ; <i32> [#uses=1]
+	%472 = lshr i32 %469, 24                  ; <i32> [#uses=1]
+	%473 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %472; <i32*> [#uses=1]
+	%474 = load i32* %473, align 4            ; <i32> [#uses=1]
+	%475 = lshr i32 %469, 16                  ; <i32> [#uses=1]
+	%476 = or i32 %475, 256                   ; <i32> [#uses=1]
+	%477 = and i32 %476, 511                  ; <i32> [#uses=1]
+	%478 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %477; <i32*> [#uses=1]
+	%479 = load i32* %478, align 4            ; <i32> [#uses=1]
+	%480 = add i32 %479, %474                 ; <i32> [#uses=1]
+	%481 = lshr i32 %469, 8                   ; <i32> [#uses=1]
+	%482 = or i32 %481, 512                   ; <i32> [#uses=1]
+	%483 = and i32 %482, 767                  ; <i32> [#uses=1]
+	%484 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %483; <i32*> [#uses=1]
+	%485 = load i32* %484, align 4            ; <i32> [#uses=1]
+	%486 = xor i32 %480, %485                 ; <i32> [#uses=1]
+	%487 = or i32 %469, 768                   ; <i32> [#uses=1]
+	%488 = and i32 %487, 1023                 ; <i32> [#uses=1]
+	%489 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %488; <i32*> [#uses=1]
+	%490 = load i32* %489, align 4            ; <i32> [#uses=1]
+	%491 = add i32 %486, %490                 ; <i32> [#uses=1]
+	%492 = xor i32 %471, %446                 ; <i32> [#uses=1]
+	%493 = xor i32 %492, %491                 ; <i32> [#uses=5]
+	%494 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 9; <i32*> [#uses=1]
+	%495 = load i32* %494, align 4            ; <i32> [#uses=1]
+	%496 = lshr i32 %493, 24                  ; <i32> [#uses=1]
+	%497 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %496; <i32*> [#uses=1]
+	%498 = load i32* %497, align 4            ; <i32> [#uses=1]
+	%499 = lshr i32 %493, 16                  ; <i32> [#uses=1]
+	%500 = or i32 %499, 256                   ; <i32> [#uses=1]
+	%501 = and i32 %500, 511                  ; <i32> [#uses=1]
+	%502 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %501; <i32*> [#uses=1]
+	%503 = load i32* %502, align 4            ; <i32> [#uses=1]
+	%504 = add i32 %503, %498                 ; <i32> [#uses=1]
+	%505 = lshr i32 %493, 8                   ; <i32> [#uses=1]
+	%506 = or i32 %505, 512                   ; <i32> [#uses=1]
+	%507 = and i32 %506, 767                  ; <i32> [#uses=1]
+	%508 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %507; <i32*> [#uses=1]
+	%509 = load i32* %508, align 4            ; <i32> [#uses=1]
+	%510 = xor i32 %504, %509                 ; <i32> [#uses=1]
+	%511 = or i32 %493, 768                   ; <i32> [#uses=1]
+	%512 = and i32 %511, 1023                 ; <i32> [#uses=1]
+	%513 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %512; <i32*> [#uses=1]
+	%514 = load i32* %513, align 4            ; <i32> [#uses=1]
+	%515 = add i32 %510, %514                 ; <i32> [#uses=1]
+	%516 = xor i32 %495, %469                 ; <i32> [#uses=1]
+	%517 = xor i32 %516, %515                 ; <i32> [#uses=5]
+	%518 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 8; <i32*> [#uses=1]
+	%519 = load i32* %518, align 4            ; <i32> [#uses=1]
+	%520 = lshr i32 %517, 24                  ; <i32> [#uses=1]
+	%521 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %520; <i32*> [#uses=1]
+	%522 = load i32* %521, align 4            ; <i32> [#uses=1]
+	%523 = lshr i32 %517, 16                  ; <i32> [#uses=1]
+	%524 = or i32 %523, 256                   ; <i32> [#uses=1]
+	%525 = and i32 %524, 511                  ; <i32> [#uses=1]
+	%526 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %525; <i32*> [#uses=1]
+	%527 = load i32* %526, align 4            ; <i32> [#uses=1]
+	%528 = add i32 %527, %522                 ; <i32> [#uses=1]
+	%529 = lshr i32 %517, 8                   ; <i32> [#uses=1]
+	%530 = or i32 %529, 512                   ; <i32> [#uses=1]
+	%531 = and i32 %530, 767                  ; <i32> [#uses=1]
+	%532 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %531; <i32*> [#uses=1]
+	%533 = load i32* %532, align 4            ; <i32> [#uses=1]
+	%534 = xor i32 %528, %533                 ; <i32> [#uses=1]
+	%535 = or i32 %517, 768                   ; <i32> [#uses=1]
+	%536 = and i32 %535, 1023                 ; <i32> [#uses=1]
+	%537 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %536; <i32*> [#uses=1]
+	%538 = load i32* %537, align 4            ; <i32> [#uses=1]
+	%539 = add i32 %534, %538                 ; <i32> [#uses=1]
+	%540 = xor i32 %519, %493                 ; <i32> [#uses=1]
+	%541 = xor i32 %540, %539                 ; <i32> [#uses=5]
+	%542 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%543 = lshr i32 %541, 24                  ; <i32> [#uses=1]
+	%544 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %543; <i32*> [#uses=1]
+	%545 = load i32* %544, align 4            ; <i32> [#uses=1]
+	%546 = lshr i32 %541, 16                  ; <i32> [#uses=1]
+	%547 = or i32 %546, 256                   ; <i32> [#uses=1]
+	%548 = and i32 %547, 511                  ; <i32> [#uses=1]
+	%549 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %548; <i32*> [#uses=1]
+	%550 = load i32* %549, align 4            ; <i32> [#uses=1]
+	%551 = add i32 %550, %545                 ; <i32> [#uses=1]
+	%552 = lshr i32 %541, 8                   ; <i32> [#uses=1]
+	%553 = or i32 %552, 512                   ; <i32> [#uses=1]
+	%554 = and i32 %553, 767                  ; <i32> [#uses=1]
+	%555 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %554; <i32*> [#uses=1]
+	%556 = load i32* %555, align 4            ; <i32> [#uses=1]
+	%557 = xor i32 %551, %556                 ; <i32> [#uses=1]
+	%558 = or i32 %541, 768                   ; <i32> [#uses=1]
+	%559 = and i32 %558, 1023                 ; <i32> [#uses=1]
+	%560 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %559; <i32*> [#uses=1]
+	%561 = load i32* %560, align 4            ; <i32> [#uses=1]
+	%562 = add i32 %557, %561                 ; <i32> [#uses=1]
+	%563 = xor i32 %542, %517                 ; <i32> [#uses=1]
+	%564 = xor i32 %563, %562                 ; <i32> [#uses=5]
+	%565 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 6; <i32*> [#uses=1]
+	%566 = load i32* %565, align 4            ; <i32> [#uses=1]
+	%567 = lshr i32 %564, 24                  ; <i32> [#uses=1]
+	%568 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %567; <i32*> [#uses=1]
+	%569 = load i32* %568, align 4            ; <i32> [#uses=1]
+	%570 = lshr i32 %564, 16                  ; <i32> [#uses=1]
+	%571 = or i32 %570, 256                   ; <i32> [#uses=1]
+	%572 = and i32 %571, 511                  ; <i32> [#uses=1]
+	%573 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %572; <i32*> [#uses=1]
+	%574 = load i32* %573, align 4            ; <i32> [#uses=1]
+	%575 = add i32 %574, %569                 ; <i32> [#uses=1]
+	%576 = lshr i32 %564, 8                   ; <i32> [#uses=1]
+	%577 = or i32 %576, 512                   ; <i32> [#uses=1]
+	%578 = and i32 %577, 767                  ; <i32> [#uses=1]
+	%579 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %578; <i32*> [#uses=1]
+	%580 = load i32* %579, align 4            ; <i32> [#uses=1]
+	%581 = xor i32 %575, %580                 ; <i32> [#uses=1]
+	%582 = or i32 %564, 768                   ; <i32> [#uses=1]
+	%583 = and i32 %582, 1023                 ; <i32> [#uses=1]
+	%584 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %583; <i32*> [#uses=1]
+	%585 = load i32* %584, align 4            ; <i32> [#uses=1]
+	%586 = add i32 %581, %585                 ; <i32> [#uses=1]
+	%587 = xor i32 %566, %541                 ; <i32> [#uses=1]
+	%588 = xor i32 %587, %586                 ; <i32> [#uses=5]
+	%589 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 5; <i32*> [#uses=1]
+	%590 = load i32* %589, align 4            ; <i32> [#uses=1]
+	%591 = lshr i32 %588, 24                  ; <i32> [#uses=1]
+	%592 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %591; <i32*> [#uses=1]
+	%593 = load i32* %592, align 4            ; <i32> [#uses=1]
+	%594 = lshr i32 %588, 16                  ; <i32> [#uses=1]
+	%595 = or i32 %594, 256                   ; <i32> [#uses=1]
+	%596 = and i32 %595, 511                  ; <i32> [#uses=1]
+	%597 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %596; <i32*> [#uses=1]
+	%598 = load i32* %597, align 4            ; <i32> [#uses=1]
+	%599 = add i32 %598, %593                 ; <i32> [#uses=1]
+	%600 = lshr i32 %588, 8                   ; <i32> [#uses=1]
+	%601 = or i32 %600, 512                   ; <i32> [#uses=1]
+	%602 = and i32 %601, 767                  ; <i32> [#uses=1]
+	%603 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %602; <i32*> [#uses=1]
+	%604 = load i32* %603, align 4            ; <i32> [#uses=1]
+	%605 = xor i32 %599, %604                 ; <i32> [#uses=1]
+	%606 = or i32 %588, 768                   ; <i32> [#uses=1]
+	%607 = and i32 %606, 1023                 ; <i32> [#uses=1]
+	%608 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %607; <i32*> [#uses=1]
+	%609 = load i32* %608, align 4            ; <i32> [#uses=1]
+	%610 = add i32 %605, %609                 ; <i32> [#uses=1]
+	%611 = xor i32 %590, %564                 ; <i32> [#uses=1]
+	%612 = xor i32 %611, %610                 ; <i32> [#uses=5]
+	%613 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 4; <i32*> [#uses=1]
+	%614 = load i32* %613, align 4            ; <i32> [#uses=1]
+	%615 = lshr i32 %612, 24                  ; <i32> [#uses=1]
+	%616 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %615; <i32*> [#uses=1]
+	%617 = load i32* %616, align 4            ; <i32> [#uses=1]
+	%618 = lshr i32 %612, 16                  ; <i32> [#uses=1]
+	%619 = or i32 %618, 256                   ; <i32> [#uses=1]
+	%620 = and i32 %619, 511                  ; <i32> [#uses=1]
+	%621 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %620; <i32*> [#uses=1]
+	%622 = load i32* %621, align 4            ; <i32> [#uses=1]
+	%623 = add i32 %622, %617                 ; <i32> [#uses=1]
+	%624 = lshr i32 %612, 8                   ; <i32> [#uses=1]
+	%625 = or i32 %624, 512                   ; <i32> [#uses=1]
+	%626 = and i32 %625, 767                  ; <i32> [#uses=1]
+	%627 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %626; <i32*> [#uses=1]
+	%628 = load i32* %627, align 4            ; <i32> [#uses=1]
+	%629 = xor i32 %623, %628                 ; <i32> [#uses=1]
+	%630 = or i32 %612, 768                   ; <i32> [#uses=1]
+	%631 = and i32 %630, 1023                 ; <i32> [#uses=1]
+	%632 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %631; <i32*> [#uses=1]
+	%633 = load i32* %632, align 4            ; <i32> [#uses=1]
+	%634 = add i32 %629, %633                 ; <i32> [#uses=1]
+	%635 = xor i32 %614, %588                 ; <i32> [#uses=1]
+	%636 = xor i32 %635, %634                 ; <i32> [#uses=5]
+	%637 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 3; <i32*> [#uses=1]
+	%638 = load i32* %637, align 4            ; <i32> [#uses=1]
+	%639 = lshr i32 %636, 24                  ; <i32> [#uses=1]
+	%640 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %639; <i32*> [#uses=1]
+	%641 = load i32* %640, align 4            ; <i32> [#uses=1]
+	%642 = lshr i32 %636, 16                  ; <i32> [#uses=1]
+	%643 = or i32 %642, 256                   ; <i32> [#uses=1]
+	%644 = and i32 %643, 511                  ; <i32> [#uses=1]
+	%645 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %644; <i32*> [#uses=1]
+	%646 = load i32* %645, align 4            ; <i32> [#uses=1]
+	%647 = add i32 %646, %641                 ; <i32> [#uses=1]
+	%648 = lshr i32 %636, 8                   ; <i32> [#uses=1]
+	%649 = or i32 %648, 512                   ; <i32> [#uses=1]
+	%650 = and i32 %649, 767                  ; <i32> [#uses=1]
+	%651 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %650; <i32*> [#uses=1]
+	%652 = load i32* %651, align 4            ; <i32> [#uses=1]
+	%653 = xor i32 %647, %652                 ; <i32> [#uses=1]
+	%654 = or i32 %636, 768                   ; <i32> [#uses=1]
+	%655 = and i32 %654, 1023                 ; <i32> [#uses=1]
+	%656 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %655; <i32*> [#uses=1]
+	%657 = load i32* %656, align 4            ; <i32> [#uses=1]
+	%658 = add i32 %653, %657                 ; <i32> [#uses=1]
+	%659 = xor i32 %638, %612                 ; <i32> [#uses=1]
+	%660 = xor i32 %659, %658                 ; <i32> [#uses=5]
+	%661 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%662 = lshr i32 %660, 24                  ; <i32> [#uses=1]
+	%663 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %662; <i32*> [#uses=1]
+	%664 = load i32* %663, align 4            ; <i32> [#uses=1]
+	%665 = lshr i32 %660, 16                  ; <i32> [#uses=1]
+	%666 = or i32 %665, 256                   ; <i32> [#uses=1]
+	%667 = and i32 %666, 511                  ; <i32> [#uses=1]
+	%668 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %667; <i32*> [#uses=1]
+	%669 = load i32* %668, align 4            ; <i32> [#uses=1]
+	%670 = add i32 %669, %664                 ; <i32> [#uses=1]
+	%671 = lshr i32 %660, 8                   ; <i32> [#uses=1]
+	%672 = or i32 %671, 512                   ; <i32> [#uses=1]
+	%673 = and i32 %672, 767                  ; <i32> [#uses=1]
+	%674 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %673; <i32*> [#uses=1]
+	%675 = load i32* %674, align 4            ; <i32> [#uses=1]
+	%676 = xor i32 %670, %675                 ; <i32> [#uses=1]
+	%677 = or i32 %660, 768                   ; <i32> [#uses=1]
+	%678 = and i32 %677, 1023                 ; <i32> [#uses=1]
+	%679 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %678; <i32*> [#uses=1]
+	%680 = load i32* %679, align 4            ; <i32> [#uses=1]
+	%681 = add i32 %676, %680                 ; <i32> [#uses=1]
+	%682 = xor i32 %661, %636                 ; <i32> [#uses=1]
+	%683 = xor i32 %682, %681                 ; <i32> [#uses=5]
+	%684 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 1; <i32*> [#uses=1]
+	br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+	%.pn2.in = phi i32* [ %329, %bb ], [ %0, %bb1 ]; <i32*> [#uses=1]
+	%.pn3 = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn15.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn14.in.in.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn13.in.in.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn10.in.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn4.in = phi i32* [ null, %bb ], [ %684, %bb1 ]; <i32*> [#uses=1]
+	%.pn5 = phi i32 [ 0, %bb ], [ %660, %bb1 ]; <i32> [#uses=1]
+	%.pn14.in.in = lshr i32 %.pn14.in.in.in, 16; <i32> [#uses=1]
+	%.pn14.in = or i32 %.pn14.in.in, 256      ; <i32> [#uses=1]
+	%.pn13.in.in = lshr i32 %.pn13.in.in.in, 8; <i32> [#uses=1]
+	%.pn15 = lshr i32 %.pn15.in, 24           ; <i32> [#uses=1]
+	%.pn14 = and i32 %.pn14.in, 511           ; <i32> [#uses=1]
+	%.pn13.in = or i32 %.pn13.in.in, 512      ; <i32> [#uses=1]
+	%.pn11.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn15; <i32*> [#uses=1]
+	%.pn12.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn14; <i32*> [#uses=1]
+	%.pn13 = and i32 %.pn13.in, 767           ; <i32> [#uses=1]
+	%.pn10.in = or i32 %.pn10.in.in, 768      ; <i32> [#uses=1]
+	%.pn11 = load i32* %.pn11.in              ; <i32> [#uses=1]
+	%.pn12 = load i32* %.pn12.in              ; <i32> [#uses=1]
+	%.pn9.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn13; <i32*> [#uses=1]
+	%.pn10 = and i32 %.pn10.in, 1023          ; <i32> [#uses=1]
+	%.pn8 = add i32 %.pn12, %.pn11            ; <i32> [#uses=1]
+	%.pn9 = load i32* %.pn9.in                ; <i32> [#uses=1]
+	%.pn7.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn10; <i32*> [#uses=1]
+	%.pn6 = xor i32 %.pn8, %.pn9              ; <i32> [#uses=1]
+	%.pn7 = load i32* %.pn7.in                ; <i32> [#uses=1]
+	%.pn4 = load i32* %.pn4.in                ; <i32> [#uses=1]
+	%.pn2 = load i32* %.pn2.in                ; <i32> [#uses=1]
+	%.pn = add i32 %.pn6, %.pn7               ; <i32> [#uses=1]
+	%r.0 = xor i32 %.pn2, %.pn3               ; <i32> [#uses=1]
+	%.pn1 = xor i32 %.pn, %.pn5               ; <i32> [#uses=1]
+	%l.0 = xor i32 %.pn1, %.pn4               ; <i32> [#uses=1]
+	store i32 %l.0, i32* undef, align 4
+	store i32 %r.0, i32* %data, align 4
+	ret void
+}
diff --git a/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll b/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll
new file mode 100644
index 0000000000000..b6e67b1bee002
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin
+
+	%struct.vorbis_comment = type { i8**, i32*, i32, i8* }
+@.str16 = external constant [2 x i8], align 1     ; <[2 x i8]*> [#uses=1]
+
+declare arm_apcscc i8* @__strcpy_chk(i8*, i8*, i32) nounwind
+
+declare arm_apcscc i8* @__strcat_chk(i8*, i8*, i32) nounwind
+
+define arm_apcscc i8* @vorbis_comment_query(%struct.vorbis_comment* nocapture %vc, i8* %tag, i32 %count) nounwind {
+entry:
+	%0 = alloca i8, i32 undef, align 4        ; <i8*> [#uses=2]
+	%1 = call arm_apcscc  i8* @__strcpy_chk(i8* %0, i8* %tag, i32 -1) nounwind; <i8*> [#uses=0]
+	%2 = call arm_apcscc  i8* @__strcat_chk(i8* %0, i8* getelementptr ([2 x i8]* @.str16, i32 0, i32 0), i32 -1) nounwind; <i8*> [#uses=0]
+	%3 = getelementptr %struct.vorbis_comment* %vc, i32 0, i32 0; <i8***> [#uses=1]
+	br label %bb11
+
+bb6:                                              ; preds = %bb11
+	%4 = load i8*** %3, align 4               ; <i8**> [#uses=1]
+	%scevgep = getelementptr i8** %4, i32 %8  ; <i8**> [#uses=1]
+	%5 = load i8** %scevgep, align 4          ; <i8*> [#uses=1]
+	br label %bb3.i
+
+bb3.i:                                            ; preds = %bb3.i, %bb6
+	%scevgep7.i = getelementptr i8* %5, i32 0 ; <i8*> [#uses=1]
+	%6 = load i8* %scevgep7.i, align 1        ; <i8> [#uses=0]
+	br i1 undef, label %bb3.i, label %bb10
+
+bb10:                                             ; preds = %bb3.i
+	%7 = add i32 %8, 1                        ; <i32> [#uses=1]
+	br label %bb11
+
+bb11:                                             ; preds = %bb10, %entry
+	%8 = phi i32 [ %7, %bb10 ], [ 0, %entry ] ; <i32> [#uses=3]
+	%9 = icmp sgt i32 undef, %8               ; <i1> [#uses=1]
+	br i1 %9, label %bb6, label %bb13
+
+bb13:                                             ; preds = %bb11
+	ret i8* null
+}
diff --git a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
new file mode 100644
index 0000000000000..1627f61b39a77
--- /dev/null
+++ b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic -disable-fp-elim -mattr=+v6 | FileCheck %s
+; rdar://7157006
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+%struct.asl_file_t = type { i32, i32, i32, %struct.file_string_t*, i64, i64, i64, i64, i64, i64, i32, %struct.FILE*, i8*, i8* }
+%struct.file_string_t = type { i64, i32, %struct.file_string_t*, [0 x i8] }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.asl_file_t*, i64, i64*)* @t to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define arm_apcscc i32 @t(%struct.asl_file_t* %s, i64 %off, i64* %out) nounwind optsize {
+; CHECK: t:
+; CHECK: adds r4, #8
+entry:
+  %val = alloca i64, align 4                      ; <i64*> [#uses=3]
+  %0 = icmp eq %struct.asl_file_t* %s, null       ; <i1> [#uses=1]
+  br i1 %0, label %bb13, label %bb1
+
+bb1:                                              ; preds = %entry
+  %1 = getelementptr inbounds %struct.asl_file_t* %s, i32 0, i32 11 ; <%struct.FILE**> [#uses=2]
+  %2 = load %struct.FILE** %1, align 4            ; <%struct.FILE*> [#uses=2]
+  %3 = icmp eq %struct.FILE* %2, null             ; <i1> [#uses=1]
+  br i1 %3, label %bb13, label %bb3
+
+bb3:                                              ; preds = %bb1
+  %4 = add nsw i64 %off, 8                        ; <i64> [#uses=1]
+  %5 = getelementptr inbounds %struct.asl_file_t* %s, i32 0, i32 10 ; <i32*> [#uses=1]
+  %6 = load i32* %5, align 4                      ; <i32> [#uses=1]
+  %7 = zext i32 %6 to i64                         ; <i64> [#uses=1]
+  %8 = icmp sgt i64 %4, %7                        ; <i1> [#uses=1]
+  br i1 %8, label %bb13, label %bb5
+
+bb5:                                              ; preds = %bb3
+  %9 = call arm_apcscc  i32 @fseeko(%struct.FILE* %2, i64 %off, i32 0) nounwind ; <i32> [#uses=1]
+  %10 = icmp eq i32 %9, 0                         ; <i1> [#uses=1]
+  br i1 %10, label %bb7, label %bb13
+
+bb7:                                              ; preds = %bb5
+  store i64 0, i64* %val, align 4
+  %11 = load %struct.FILE** %1, align 4           ; <%struct.FILE*> [#uses=1]
+  %val8 = bitcast i64* %val to i8*                ; <i8*> [#uses=1]
+  %12 = call arm_apcscc  i32 @fread(i8* noalias %val8, i32 8, i32 1, %struct.FILE* noalias %11) nounwind ; <i32> [#uses=1]
+  %13 = icmp eq i32 %12, 1                        ; <i1> [#uses=1]
+  br i1 %13, label %bb10, label %bb13
+
+bb10:                                             ; preds = %bb7
+  %14 = icmp eq i64* %out, null                   ; <i1> [#uses=1]
+  br i1 %14, label %bb13, label %bb11
+
+bb11:                                             ; preds = %bb10
+  %15 = load i64* %val, align 4                   ; <i64> [#uses=1]
+  %16 = call arm_apcscc  i64 @asl_core_ntohq(i64 %15) nounwind ; <i64> [#uses=1]
+  store i64 %16, i64* %out, align 4
+  ret i32 0
+
+bb13:                                             ; preds = %bb10, %bb7, %bb5, %bb3, %bb1, %entry
+  %.0 = phi i32 [ 2, %entry ], [ 2, %bb1 ], [ 7, %bb3 ], [ 7, %bb5 ], [ 7, %bb7 ], [ 0, %bb10 ] ; <i32> [#uses=1]
+  ret i32 %.0
+}
+
+declare arm_apcscc i32 @fseeko(%struct.FILE* nocapture, i64, i32) nounwind
+
+declare arm_apcscc i32 @fread(i8* noalias nocapture, i32, i32, %struct.FILE* noalias nocapture) nounwind
+
+declare arm_apcscc i64 @asl_core_ntohq(i64)
diff --git a/test/CodeGen/Thumb/asmprinter-bug.ll b/test/CodeGen/Thumb/asmprinter-bug.ll
new file mode 100644
index 0000000000000..1e3c070a87514
--- /dev/null
+++ b/test/CodeGen/Thumb/asmprinter-bug.ll
@@ -0,0 +1,288 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin10 | grep rsbs | grep {#0}
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.adpcm_state = type { i16, i8 }
+@stepsizeTable = internal constant [89 x i32] [i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16, i32 17, i32 19, i32 21, i32 23, i32 25, i32 28, i32 31, i32 34, i32 37, i32 41, i32 45, i32 50, i32 55, i32 60, i32 66, i32 73, i32 80, i32 88, i32 97, i32 107, i32 118, i32 130, i32 143, i32 157, i32 173, i32 190, i32 209, i32 230, i32 253, i32 279, i32 307, i32 337, i32 371, i32 408, i32 449, i32 494, i32 544, i32 598, i32 658, i32 724, i32 796, i32 876, i32 963, i32 1060, i32 1166, i32 1282, i32 1411, i32 1552, i32 1707, i32 1878, i32 2066, i32 2272, i32 2499, i32 2749, i32 3024, i32 3327, i32 3660, i32 4026, i32 4428, i32 4871, i32 5358, i32 5894, i32 6484, i32 7132, i32 7845, i32 8630, i32 9493, i32 10442, i32 11487, i32 12635, i32 13899, i32 15289, i32 16818, i32 18500, i32 20350, i32 22385, i32 24623, i32 27086, i32 29794, i32 32767]		; <[89 x i32]*> [#uses=4]
+@indexTable = internal constant [16 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 2, i32 4, i32 6, i32 8, i32 -1, i32 -1, i32 -1, i32 -1, i32 2, i32 4, i32 6, i32 8]		; <[16 x i32]*> [#uses=2]
+@abuf = common global [500 x i8] zeroinitializer		; <[500 x i8]*> [#uses=1]
+@.str = private constant [11 x i8] c"input file\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[11 x i8]*> [#uses=1]
+@sbuf = common global [1000 x i16] zeroinitializer		; <[1000 x i16]*> [#uses=1]
+@state = common global %struct.adpcm_state zeroinitializer		; <%struct.adpcm_state*> [#uses=3]
+@__stderrp = external global %struct.FILE*		; <%struct.FILE**> [#uses=1]
+@.str1 = private constant [28 x i8] c"Final valprev=%d, index=%d\0A\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[28 x i8]*> [#uses=1]
+
+define arm_apcscc void @adpcm_coder(i16* nocapture %indata, i8* nocapture %outdata, i32 %len, %struct.adpcm_state* nocapture %state) nounwind {
+entry:
+	%0 = getelementptr %struct.adpcm_state* %state, i32 0, i32 0		; <i16*> [#uses=2]
+	%1 = load i16* %0, align 2		; <i16> [#uses=1]
+	%2 = sext i16 %1 to i32		; <i32> [#uses=2]
+	%3 = getelementptr %struct.adpcm_state* %state, i32 0, i32 1		; <i8*> [#uses=2]
+	%4 = load i8* %3, align 2		; <i8> [#uses=1]
+	%5 = sext i8 %4 to i32		; <i32> [#uses=3]
+	%6 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %5		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%8 = icmp sgt i32 %len, 0		; <i1> [#uses=1]
+	br i1 %8, label %bb, label %bb27
+
+bb:		; preds = %bb25, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb25 ]		; <i32> [#uses=2]
+	%outp.136 = phi i8* [ %outdata, %entry ], [ %outp.0, %bb25 ]		; <i8*> [#uses=3]
+	%bufferstep.035 = phi i32 [ 1, %entry ], [ %tmp, %bb25 ]		; <i32> [#uses=3]
+	%outputbuffer.134 = phi i32 [ undef, %entry ], [ %outputbuffer.0, %bb25 ]		; <i32> [#uses=2]
+	%index.033 = phi i32 [ %5, %entry ], [ %index.2, %bb25 ]		; <i32> [#uses=1]
+	%valpred.132 = phi i32 [ %2, %entry ], [ %valpred.2, %bb25 ]		; <i32> [#uses=2]
+	%step.031 = phi i32 [ %7, %entry ], [ %36, %bb25 ]		; <i32> [#uses=5]
+	%inp.038 = getelementptr i16* %indata, i32 %indvar		; <i16*> [#uses=1]
+	%9 = load i16* %inp.038, align 2		; <i16> [#uses=1]
+	%10 = sext i16 %9 to i32		; <i32> [#uses=1]
+	%11 = sub i32 %10, %valpred.132		; <i32> [#uses=3]
+	%12 = icmp slt i32 %11, 0		; <i1> [#uses=1]
+	%iftmp.1.0 = select i1 %12, i32 8, i32 0		; <i32> [#uses=2]
+	%13 = sub i32 0, %11		; <i32> [#uses=1]
+	%14 = icmp eq i32 %iftmp.1.0, 0		; <i1> [#uses=2]
+	%. = select i1 %14, i32 %11, i32 %13		; <i32> [#uses=2]
+	%15 = ashr i32 %step.031, 3		; <i32> [#uses=1]
+	%16 = icmp slt i32 %., %step.031		; <i1> [#uses=2]
+	%delta.0 = select i1 %16, i32 0, i32 4		; <i32> [#uses=2]
+	%17 = select i1 %16, i32 0, i32 %step.031		; <i32> [#uses=2]
+	%diff.1 = sub i32 %., %17		; <i32> [#uses=2]
+	%18 = ashr i32 %step.031, 1		; <i32> [#uses=2]
+	%19 = icmp slt i32 %diff.1, %18		; <i1> [#uses=2]
+	%20 = or i32 %delta.0, 2		; <i32> [#uses=1]
+	%21 = select i1 %19, i32 %delta.0, i32 %20		; <i32> [#uses=1]
+	%22 = select i1 %19, i32 0, i32 %18		; <i32> [#uses=2]
+	%diff.2 = sub i32 %diff.1, %22		; <i32> [#uses=1]
+	%23 = ashr i32 %step.031, 2		; <i32> [#uses=2]
+	%24 = icmp slt i32 %diff.2, %23		; <i1> [#uses=2]
+	%25 = zext i1 %24 to i32		; <i32> [#uses=1]
+	%26 = select i1 %24, i32 0, i32 %23		; <i32> [#uses=1]
+	%vpdiff.0 = add i32 %17, %15		; <i32> [#uses=1]
+	%vpdiff.1 = add i32 %vpdiff.0, %22		; <i32> [#uses=1]
+	%vpdiff.2 = add i32 %vpdiff.1, %26		; <i32> [#uses=2]
+	%tmp30 = sub i32 0, %vpdiff.2		; <i32> [#uses=1]
+	%valpred.0.p = select i1 %14, i32 %vpdiff.2, i32 %tmp30		; <i32> [#uses=1]
+	%valpred.0 = add i32 %valpred.0.p, %valpred.132		; <i32> [#uses=3]
+	%27 = icmp sgt i32 %valpred.0, 32767		; <i1> [#uses=1]
+	br i1 %27, label %bb18, label %bb16
+
+bb16:		; preds = %bb
+	%28 = icmp slt i32 %valpred.0, -32768		; <i1> [#uses=1]
+	br i1 %28, label %bb17, label %bb18
+
+bb17:		; preds = %bb16
+	br label %bb18
+
+bb18:		; preds = %bb17, %bb16, %bb
+	%valpred.2 = phi i32 [ -32768, %bb17 ], [ 32767, %bb ], [ %valpred.0, %bb16 ]		; <i32> [#uses=2]
+	%delta.1 = or i32 %21, %iftmp.1.0		; <i32> [#uses=1]
+	%delta.2 = or i32 %delta.1, %25		; <i32> [#uses=1]
+	%29 = xor i32 %delta.2, 1		; <i32> [#uses=3]
+	%30 = getelementptr [16 x i32]* @indexTable, i32 0, i32 %29		; <i32*> [#uses=1]
+	%31 = load i32* %30, align 4		; <i32> [#uses=1]
+	%32 = add i32 %31, %index.033		; <i32> [#uses=2]
+	%33 = icmp slt i32 %32, 0		; <i1> [#uses=1]
+	%index.1 = select i1 %33, i32 0, i32 %32		; <i32> [#uses=2]
+	%34 = icmp sgt i32 %index.1, 88		; <i1> [#uses=1]
+	%index.2 = select i1 %34, i32 88, i32 %index.1		; <i32> [#uses=3]
+	%35 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %index.2		; <i32*> [#uses=1]
+	%36 = load i32* %35, align 4		; <i32> [#uses=1]
+	%37 = icmp eq i32 %bufferstep.035, 0		; <i1> [#uses=1]
+	br i1 %37, label %bb24, label %bb23
+
+bb23:		; preds = %bb18
+	%38 = shl i32 %29, 4		; <i32> [#uses=1]
+	%39 = and i32 %38, 240		; <i32> [#uses=1]
+	br label %bb25
+
+bb24:		; preds = %bb18
+	%40 = trunc i32 %29 to i8		; <i8> [#uses=1]
+	%41 = and i8 %40, 15		; <i8> [#uses=1]
+	%42 = trunc i32 %outputbuffer.134 to i8		; <i8> [#uses=1]
+	%43 = or i8 %41, %42		; <i8> [#uses=1]
+	store i8 %43, i8* %outp.136, align 1
+	%44 = getelementptr i8* %outp.136, i32 1		; <i8*> [#uses=1]
+	br label %bb25
+
+bb25:		; preds = %bb24, %bb23
+	%outputbuffer.0 = phi i32 [ %39, %bb23 ], [ %outputbuffer.134, %bb24 ]		; <i32> [#uses=2]
+	%outp.0 = phi i8* [ %outp.136, %bb23 ], [ %44, %bb24 ]		; <i8*> [#uses=2]
+	%tmp = xor i32 %bufferstep.035, 1		; <i32> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %len		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb26.bb27_crit_edge, label %bb
+
+bb26.bb27_crit_edge:		; preds = %bb25
+	%phitmp44 = icmp eq i32 %bufferstep.035, 1		; <i1> [#uses=1]
+	br label %bb27
+
+bb27:		; preds = %bb26.bb27_crit_edge, %entry
+	%outp.1.lcssa = phi i8* [ %outp.0, %bb26.bb27_crit_edge ], [ %outdata, %entry ]		; <i8*> [#uses=1]
+	%bufferstep.0.lcssa = phi i1 [ %phitmp44, %bb26.bb27_crit_edge ], [ false, %entry ]		; <i1> [#uses=1]
+	%outputbuffer.1.lcssa = phi i32 [ %outputbuffer.0, %bb26.bb27_crit_edge ], [ undef, %entry ]		; <i32> [#uses=1]
+	%index.0.lcssa = phi i32 [ %index.2, %bb26.bb27_crit_edge ], [ %5, %entry ]		; <i32> [#uses=1]
+	%valpred.1.lcssa = phi i32 [ %valpred.2, %bb26.bb27_crit_edge ], [ %2, %entry ]		; <i32> [#uses=1]
+	br i1 %bufferstep.0.lcssa, label %bb28, label %bb29
+
+bb28:		; preds = %bb27
+	%45 = trunc i32 %outputbuffer.1.lcssa to i8		; <i8> [#uses=1]
+	store i8 %45, i8* %outp.1.lcssa, align 1
+	br label %bb29
+
+bb29:		; preds = %bb28, %bb27
+	%46 = trunc i32 %valpred.1.lcssa to i16		; <i16> [#uses=1]
+	store i16 %46, i16* %0, align 2
+	%47 = trunc i32 %index.0.lcssa to i8		; <i8> [#uses=1]
+	store i8 %47, i8* %3, align 2
+	ret void
+}
+
+define arm_apcscc void @adpcm_decoder(i8* nocapture %indata, i16* nocapture %outdata, i32 %len, %struct.adpcm_state* nocapture %state) nounwind {
+entry:
+	%0 = getelementptr %struct.adpcm_state* %state, i32 0, i32 0		; <i16*> [#uses=2]
+	%1 = load i16* %0, align 2		; <i16> [#uses=1]
+	%2 = sext i16 %1 to i32		; <i32> [#uses=2]
+	%3 = getelementptr %struct.adpcm_state* %state, i32 0, i32 1		; <i8*> [#uses=2]
+	%4 = load i8* %3, align 2		; <i8> [#uses=1]
+	%5 = sext i8 %4 to i32		; <i32> [#uses=3]
+	%6 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %5		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%8 = icmp sgt i32 %len, 0		; <i1> [#uses=1]
+	br i1 %8, label %bb, label %bb22
+
+bb:		; preds = %bb20, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb20 ]		; <i32> [#uses=2]
+	%inp.131 = phi i8* [ %indata, %entry ], [ %inp.0, %bb20 ]		; <i8*> [#uses=3]
+	%bufferstep.028 = phi i32 [ 0, %entry ], [ %tmp, %bb20 ]		; <i32> [#uses=2]
+	%inputbuffer.127 = phi i32 [ undef, %entry ], [ %inputbuffer.0, %bb20 ]		; <i32> [#uses=2]
+	%index.026 = phi i32 [ %5, %entry ], [ %index.2, %bb20 ]		; <i32> [#uses=1]
+	%valpred.125 = phi i32 [ %2, %entry ], [ %valpred.2, %bb20 ]		; <i32> [#uses=1]
+	%step.024 = phi i32 [ %7, %entry ], [ %35, %bb20 ]		; <i32> [#uses=4]
+	%outp.030 = getelementptr i16* %outdata, i32 %indvar		; <i16*> [#uses=1]
+	%9 = icmp eq i32 %bufferstep.028, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb2, label %bb3
+
+bb2:		; preds = %bb
+	%10 = load i8* %inp.131, align 1		; <i8> [#uses=1]
+	%11 = sext i8 %10 to i32		; <i32> [#uses=2]
+	%12 = getelementptr i8* %inp.131, i32 1		; <i8*> [#uses=1]
+	%13 = ashr i32 %11, 4		; <i32> [#uses=1]
+	br label %bb3
+
+bb3:		; preds = %bb2, %bb
+	%inputbuffer.0 = phi i32 [ %11, %bb2 ], [ %inputbuffer.127, %bb ]		; <i32> [#uses=1]
+	%delta.0.in = phi i32 [ %13, %bb2 ], [ %inputbuffer.127, %bb ]		; <i32> [#uses=5]
+	%inp.0 = phi i8* [ %12, %bb2 ], [ %inp.131, %bb ]		; <i8*> [#uses=1]
+	%delta.0 = and i32 %delta.0.in, 15		; <i32> [#uses=1]
+	%tmp = xor i32 %bufferstep.028, 1		; <i32> [#uses=1]
+	%14 = getelementptr [16 x i32]* @indexTable, i32 0, i32 %delta.0		; <i32*> [#uses=1]
+	%15 = load i32* %14, align 4		; <i32> [#uses=1]
+	%16 = add i32 %15, %index.026		; <i32> [#uses=2]
+	%17 = icmp slt i32 %16, 0		; <i1> [#uses=1]
+	%index.1 = select i1 %17, i32 0, i32 %16		; <i32> [#uses=2]
+	%18 = icmp sgt i32 %index.1, 88		; <i1> [#uses=1]
+	%index.2 = select i1 %18, i32 88, i32 %index.1		; <i32> [#uses=3]
+	%19 = and i32 %delta.0.in, 8		; <i32> [#uses=1]
+	%20 = ashr i32 %step.024, 3		; <i32> [#uses=1]
+	%21 = and i32 %delta.0.in, 4		; <i32> [#uses=1]
+	%22 = icmp eq i32 %21, 0		; <i1> [#uses=1]
+	%23 = select i1 %22, i32 0, i32 %step.024		; <i32> [#uses=1]
+	%vpdiff.0 = add i32 %23, %20		; <i32> [#uses=2]
+	%24 = and i32 %delta.0.in, 2		; <i32> [#uses=1]
+	%25 = icmp eq i32 %24, 0		; <i1> [#uses=1]
+	br i1 %25, label %bb11, label %bb10
+
+bb10:		; preds = %bb3
+	%26 = ashr i32 %step.024, 1		; <i32> [#uses=1]
+	%27 = add i32 %vpdiff.0, %26		; <i32> [#uses=1]
+	br label %bb11
+
+bb11:		; preds = %bb10, %bb3
+	%vpdiff.1 = phi i32 [ %27, %bb10 ], [ %vpdiff.0, %bb3 ]		; <i32> [#uses=2]
+	%28 = and i32 %delta.0.in, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %28, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb13, label %bb12
+
+bb12:		; preds = %bb11
+	%29 = ashr i32 %step.024, 2		; <i32> [#uses=1]
+	%30 = add i32 %vpdiff.1, %29		; <i32> [#uses=1]
+	br label %bb13
+
+bb13:		; preds = %bb12, %bb11
+	%vpdiff.2 = phi i32 [ %30, %bb12 ], [ %vpdiff.1, %bb11 ]		; <i32> [#uses=2]
+	%31 = icmp eq i32 %19, 0		; <i1> [#uses=1]
+	%tmp23 = sub i32 0, %vpdiff.2		; <i32> [#uses=1]
+	%valpred.0.p = select i1 %31, i32 %vpdiff.2, i32 %tmp23		; <i32> [#uses=1]
+	%valpred.0 = add i32 %valpred.0.p, %valpred.125		; <i32> [#uses=3]
+	%32 = icmp sgt i32 %valpred.0, 32767		; <i1> [#uses=1]
+	br i1 %32, label %bb20, label %bb18
+
+bb18:		; preds = %bb13
+	%33 = icmp slt i32 %valpred.0, -32768		; <i1> [#uses=1]
+	br i1 %33, label %bb19, label %bb20
+
+bb19:		; preds = %bb18
+	br label %bb20
+
+bb20:		; preds = %bb19, %bb18, %bb13
+	%valpred.2 = phi i32 [ -32768, %bb19 ], [ 32767, %bb13 ], [ %valpred.0, %bb18 ]		; <i32> [#uses=3]
+	%34 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %index.2		; <i32*> [#uses=1]
+	%35 = load i32* %34, align 4		; <i32> [#uses=1]
+	%36 = trunc i32 %valpred.2 to i16		; <i16> [#uses=1]
+	store i16 %36, i16* %outp.030, align 2
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %len		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb22, label %bb
+
+bb22:		; preds = %bb20, %entry
+	%index.0.lcssa = phi i32 [ %5, %entry ], [ %index.2, %bb20 ]		; <i32> [#uses=1]
+	%valpred.1.lcssa = phi i32 [ %2, %entry ], [ %valpred.2, %bb20 ]		; <i32> [#uses=1]
+	%37 = trunc i32 %valpred.1.lcssa to i16		; <i16> [#uses=1]
+	store i16 %37, i16* %0, align 2
+	%38 = trunc i32 %index.0.lcssa to i8		; <i8> [#uses=1]
+	store i8 %38, i8* %3, align 2
+	ret void
+}
+
+define arm_apcscc i32 @main() nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb3, %entry
+	%0 = tail call arm_apcscc  i32 (...)* @read(i32 0, i8* getelementptr ([500 x i8]* @abuf, i32 0, i32 0), i32 500) nounwind		; <i32> [#uses=4]
+	%1 = icmp slt i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb1, label %bb2
+
+bb1:		; preds = %bb
+	tail call arm_apcscc  void @perror(i8* getelementptr ([11 x i8]* @.str, i32 0, i32 0)) nounwind
+	ret i32 1
+
+bb2:		; preds = %bb
+	%2 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb4, label %bb3
+
+bb3:		; preds = %bb2
+	%3 = shl i32 %0, 1		; <i32> [#uses=1]
+	tail call arm_apcscc  void @adpcm_decoder(i8* getelementptr ([500 x i8]* @abuf, i32 0, i32 0), i16* getelementptr ([1000 x i16]* @sbuf, i32 0, i32 0), i32 %3, %struct.adpcm_state* @state) nounwind
+	%4 = shl i32 %0, 2		; <i32> [#uses=1]
+	%5 = tail call arm_apcscc  i32 (...)* @write(i32 1, i16* getelementptr ([1000 x i16]* @sbuf, i32 0, i32 0), i32 %4) nounwind		; <i32> [#uses=0]
+	br label %bb
+
+bb4:		; preds = %bb2
+	%6 = load %struct.FILE** @__stderrp, align 4		; <%struct.FILE*> [#uses=1]
+	%7 = load i16* getelementptr (%struct.adpcm_state* @state, i32 0, i32 0), align 4		; <i16> [#uses=1]
+	%8 = sext i16 %7 to i32		; <i32> [#uses=1]
+	%9 = load i8* getelementptr (%struct.adpcm_state* @state, i32 0, i32 1), align 2		; <i8> [#uses=1]
+	%10 = sext i8 %9 to i32		; <i32> [#uses=1]
+	%11 = tail call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %6, i8* getelementptr ([28 x i8]* @.str1, i32 0, i32 0), i32 %8, i32 %10) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare arm_apcscc i32 @read(...)
+
+declare arm_apcscc void @perror(i8* nocapture) nounwind
+
+declare arm_apcscc i32 @write(...)
+
+declare arm_apcscc i32 @fprintf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
index cd76250bf0a71..acfdc917ddf07 100644
--- a/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=thumb | not grep {ldr sp}
-; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin | \
+; RUN: llc < %s -march=thumb | not grep {ldr sp}
+; RUN: llc < %s -mtriple=thumb-apple-darwin | \
 ; RUN:   not grep {sub.*r7}
-; RUN: llvm-as < %s | llc -march=thumb | grep 4294967280
+; RUN: llc < %s -march=thumb | grep 4294967280
 
 	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
 	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
diff --git a/test/CodeGen/Thumb/fpconv.ll b/test/CodeGen/Thumb/fpconv.ll
index 2003131fbb734..7da36ddf58edd 100644
--- a/test/CodeGen/Thumb/fpconv.ll
+++ b/test/CodeGen/Thumb/fpconv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb
+; RUN: llc < %s -march=thumb
 
 define float @f1(double %x) {
 entry:
diff --git a/test/CodeGen/Thumb/fpow.ll b/test/CodeGen/Thumb/fpow.ll
index e5b92ad94ef85..be3dc0b3c1f8e 100644
--- a/test/CodeGen/Thumb/fpow.ll
+++ b/test/CodeGen/Thumb/fpow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb
+; RUN: llc < %s -march=thumb
 
 define double @t(double %x, double %y) nounwind optsize {
 entry:
diff --git a/test/CodeGen/Thumb/frame_thumb.ll b/test/CodeGen/Thumb/frame_thumb.ll
index 270e331cb52f5..0cac7554be03f 100644
--- a/test/CodeGen/Thumb/frame_thumb.ll
+++ b/test/CodeGen/Thumb/frame_thumb.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin \
+; RUN: llc < %s -mtriple=thumb-apple-darwin \
 ; RUN:     -disable-fp-elim | not grep {r11}
-; RUN: llvm-as < %s | llc -mtriple=thumb-linux-gnueabi \
+; RUN: llc < %s -mtriple=thumb-linux-gnueabi \
 ; RUN:     -disable-fp-elim | not grep {r11}
 
 define i32 @f() {
diff --git a/test/CodeGen/Thumb/iabs.ll b/test/CodeGen/Thumb/iabs.ll
index 13084f6870eea..d7cdcd8149aff 100644
--- a/test/CodeGen/Thumb/iabs.ll
+++ b/test/CodeGen/Thumb/iabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -stats |& \
+; RUN: llc < %s -march=thumb -stats |& \
 ; RUN:   grep {4 .*Number of machine instrs printed}
 
 ;; Integer absolute value, should produce something as good as:
diff --git a/test/CodeGen/Thumb/inlineasm-imm-thumb.ll b/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
index 2c872e7e310fd..5c8a52af59e4b 100644
--- a/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
+++ b/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb
+; RUN: llc < %s -march=thumb
 
 ; Test Thumb-mode "I" constraint, for ADD immediate.
 define i32 @testI(i32 %x) {
diff --git a/test/CodeGen/Thumb/ispositive.ll b/test/CodeGen/Thumb/ispositive.ll
index 91f5970ae9cbe..eac3ef28377bc 100644
--- a/test/CodeGen/Thumb/ispositive.ll
+++ b/test/CodeGen/Thumb/ispositive.ll
@@ -1,7 +1,9 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep {lsr r0, r0, #31}
+; RUN: llc < %s -march=thumb | FileCheck %s
 
 define i32 @test1(i32 %X) {
 entry:
+; CHECK: test1:
+; CHECK: lsrs r0, r0, #31
         icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
         zext i1 %0 to i32               ; <i32>:1 [#uses=1]
         ret i32 %1
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index f7c9ed07009f4..02de36af1cc7f 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep {ldr.*LCP} | count 5
+; RUN: llc < %s -march=thumb | grep {ldr.*LCP} | count 5
 
 define void @test1() {
     %tmp = alloca [ 64 x i32 ] , align 4
diff --git a/test/CodeGen/Thumb/ldr_ext.ll b/test/CodeGen/Thumb/ldr_ext.ll
index 4b2a7b201b55b..9a28124b84ce4 100644
--- a/test/CodeGen/Thumb/ldr_ext.ll
+++ b/test/CodeGen/Thumb/ldr_ext.ll
@@ -1,27 +1,56 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep ldrb | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep ldrh | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep ldrsb | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep ldrsh | count 1
+; RUN: llc < %s -march=thumb | FileCheck %s -check-prefix=V5
+; RUN: llc < %s -march=thumb -mattr=+v6 | FileCheck %s -check-prefix=V6
 
-define i32 @test1(i8* %v.pntr.s0.u1) {
-    %tmp.u = load i8* %v.pntr.s0.u1
+; rdar://7176514
+
+define i32 @test1(i8* %t1) nounwind {
+; V5: ldrb
+
+; V6: ldrb
+    %tmp.u = load i8* %t1
     %tmp1.s = zext i8 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test2(i16* %v.pntr.s0.u1) {
-    %tmp.u = load i16* %v.pntr.s0.u1
+define i32 @test2(i16* %t1) nounwind {
+; V5: ldrh
+
+; V6: ldrh
+    %tmp.u = load i16* %t1
     %tmp1.s = zext i16 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test3(i8* %v.pntr.s1.u0) {
-    %tmp.s = load i8* %v.pntr.s1.u0
+define i32 @test3(i8* %t0) nounwind {
+; V5: ldrb
+; V5: lsls
+; V5: asrs
+
+; V6: ldrb
+; V6: sxtb
+    %tmp.s = load i8* %t0
     %tmp1.s = sext i8 %tmp.s to i32
     ret i32 %tmp1.s
 }
 
-define i32 @test4() {
+define i32 @test4(i16* %t0) nounwind {
+; V5: ldrh
+; V5: lsls
+; V5: asrs
+
+; V6: ldrh
+; V6: sxth
+    %tmp.s = load i16* %t0
+    %tmp1.s = sext i16 %tmp.s to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test5() nounwind {
+; V5: movs r0, #0
+; V5: ldrsh
+
+; V6: movs r0, #0
+; V6: ldrsh
     %tmp.s = load i16* null
     %tmp1.s = sext i16 %tmp.s to i32
     ret i32 %tmp1.s
diff --git a/test/CodeGen/Thumb/ldr_frame.ll b/test/CodeGen/Thumb/ldr_frame.ll
index 0043fb502a329..81782cda4a900 100644
--- a/test/CodeGen/Thumb/ldr_frame.ll
+++ b/test/CodeGen/Thumb/ldr_frame.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep cpy | count 2
+; RUN: llc < %s -march=thumb | FileCheck %s
 
 define i32 @f1() {
+; CHECK: f1:
+; CHECK: ldr r0
 	%buf = alloca [32 x i32], align 4
 	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 0
 	%tmp1 = load i32* %tmp
@@ -8,6 +10,9 @@ define i32 @f1() {
 }
 
 define i32 @f2() {
+; CHECK: f2:
+; CHECK: mov r0
+; CHECK: ldrb
 	%buf = alloca [32 x i8], align 4
 	%tmp = getelementptr [32 x i8]* %buf, i32 0, i32 0
 	%tmp1 = load i8* %tmp
@@ -16,6 +21,8 @@ define i32 @f2() {
 }
 
 define i32 @f3() {
+; CHECK: f3:
+; CHECK: ldr r0
 	%buf = alloca [32 x i32], align 4
 	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 32
 	%tmp1 = load i32* %tmp
@@ -23,6 +30,9 @@ define i32 @f3() {
 }
 
 define i32 @f4() {
+; CHECK: f4:
+; CHECK: mov r0
+; CHECK: ldrb
 	%buf = alloca [32 x i8], align 4
 	%tmp = getelementptr [32 x i8]* %buf, i32 0, i32 2
 	%tmp1 = load i8* %tmp
diff --git a/test/CodeGen/Thumb/long-setcc.ll b/test/CodeGen/Thumb/long-setcc.ll
index df6d137a088fa..8f2d98fc43c99 100644
--- a/test/CodeGen/Thumb/long-setcc.ll
+++ b/test/CodeGen/Thumb/long-setcc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep cmp | count 1
+; RUN: llc < %s -march=thumb | grep cmp | count 1
 
 
 define i1 @t1(i64 %x) {
diff --git a/test/CodeGen/Thumb/long.ll b/test/CodeGen/Thumb/long.ll
index 22874437eb020..e3ef44a875868 100644
--- a/test/CodeGen/Thumb/long.ll
+++ b/test/CodeGen/Thumb/long.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=thumb | \
+; RUN: llc < %s -march=thumb | \
 ; RUN:   grep mvn | count 1
-; RUN: llvm-as < %s | llc -march=thumb | \
+; RUN: llc < %s -march=thumb | \
 ; RUN:   grep adc | count 1
-; RUN: llvm-as < %s | llc -march=thumb | \
+; RUN: llc < %s -march=thumb | \
 ; RUN:   grep sbc | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep __muldi3
+; RUN: llc < %s -march=thumb | grep __muldi3
 
 define i64 @f1() {
 entry:
diff --git a/test/CodeGen/Thumb/long_shift.ll b/test/CodeGen/Thumb/long_shift.ll
new file mode 100644
index 0000000000000..24317141fca6b
--- /dev/null
+++ b/test/CodeGen/Thumb/long_shift.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=thumb
+
+define i64 @f0(i64 %A, i64 %B) {
+        %tmp = bitcast i64 %A to i64
+        %tmp2 = lshr i64 %B, 1
+        %tmp3 = sub i64 %tmp, %tmp2
+        ret i64 %tmp3
+}
+
+define i32 @f1(i64 %x, i64 %y) {
+        %a = shl i64 %x, %y
+        %b = trunc i64 %a to i32
+        ret i32 %b
+}
+
+define i32 @f2(i64 %x, i64 %y) {
+        %a = ashr i64 %x, %y
+        %b = trunc i64 %a to i32
+        ret i32 %b
+}
+
+define i32 @f3(i64 %x, i64 %y) {
+        %a = lshr i64 %x, %y
+        %b = trunc i64 %a to i32
+        ret i32 %b
+}
diff --git a/test/CodeGen/Thumb/mul.ll b/test/CodeGen/Thumb/mul.ll
new file mode 100644
index 0000000000000..c1a2fb29477db
--- /dev/null
+++ b/test/CodeGen/Thumb/mul.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=thumb | grep mul | count 3
+; RUN: llc < %s -march=thumb | grep lsl | count 1
+
+define i32 @f1(i32 %u) {
+    %tmp = mul i32 %u, %u
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %u, i32 %v) {
+    %tmp = mul i32 %u, %v
+    ret i32 %tmp
+}
+
+define i32 @f3(i32 %u) {
+    %tmp = mul i32 %u, 5
+    ret i32 %tmp
+}
+
+define i32 @f4(i32 %u) {
+    %tmp = mul i32 %u, 4
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb/pop.ll b/test/CodeGen/Thumb/pop.ll
new file mode 100644
index 0000000000000..c5e86ad45bc31
--- /dev/null
+++ b/test/CodeGen/Thumb/pop.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; rdar://7268481
+
+define arm_apcscc void @t(i8* %a, ...) nounwind {
+; CHECK:      t:
+; CHECK:      pop {r3}
+; CHECK-NEXT: add sp, #3 * 4
+; CHECK-NEXT: bx r3
+entry:
+  %a.addr = alloca i8*
+  store i8* %a, i8** %a.addr
+  ret void
+}
diff --git a/test/CodeGen/Thumb/push.ll b/test/CodeGen/Thumb/push.ll
new file mode 100644
index 0000000000000..63773c4f6c9f8
--- /dev/null
+++ b/test/CodeGen/Thumb/push.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-fp-elim | FileCheck %s
+; rdar://7268481
+
+define arm_apcscc void @t() nounwind {
+; CHECK:       t:
+; CHECK-NEXT : push {r7}
+entry:
+  call void asm sideeffect ".long 0xe7ffdefe", ""() nounwind
+  ret void
+}
diff --git a/test/CodeGen/Thumb/select.ll b/test/CodeGen/Thumb/select.ll
index ae75549d723ef..7a183b0f9e264 100644
--- a/test/CodeGen/Thumb/select.ll
+++ b/test/CodeGen/Thumb/select.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep beq | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep bgt | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep blt | count 3
-; RUN: llvm-as < %s | llc -march=thumb | grep ble | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep bls | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep bhi | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep __ltdf2
+; RUN: llc < %s -march=thumb | grep beq | count 1
+; RUN: llc < %s -march=thumb | grep bgt | count 1
+; RUN: llc < %s -march=thumb | grep blt | count 3
+; RUN: llc < %s -march=thumb | grep ble | count 1
+; RUN: llc < %s -march=thumb | grep bls | count 1
+; RUN: llc < %s -march=thumb | grep bhi | count 1
+; RUN: llc < %s -march=thumb | grep __ltdf2
 
 define i32 @f1(i32 %a.s) {
 entry:
diff --git a/test/CodeGen/Thumb/stack-frame.ll b/test/CodeGen/Thumb/stack-frame.ll
index 756d257c2ae94..b103b331b7972 100644
--- a/test/CodeGen/Thumb/stack-frame.ll
+++ b/test/CodeGen/Thumb/stack-frame.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb
-; RUN: llvm-as < %s | llc -march=thumb | grep add | count 1
+; RUN: llc < %s -march=thumb
+; RUN: llc < %s -march=thumb | grep add | count 1
 
 define void @f1() {
 	%c = alloca i8, align 1
diff --git a/test/CodeGen/Thumb/thumb-imm.ll b/test/CodeGen/Thumb/thumb-imm.ll
index 2be393a95cacb..74a57ff271be5 100644
--- a/test/CodeGen/Thumb/thumb-imm.ll
+++ b/test/CodeGen/Thumb/thumb-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb | not grep CPI
+; RUN: llc < %s -march=thumb | not grep CPI
 
 
 define i32 @test1() {
diff --git a/test/CodeGen/Thumb/tst_teq.ll b/test/CodeGen/Thumb/tst_teq.ll
index 0456951e10503..21ada3ed83a08 100644
--- a/test/CodeGen/Thumb/tst_teq.ll
+++ b/test/CodeGen/Thumb/tst_teq.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep tst
+; RUN: llc < %s -march=thumb | grep tst
 
 define i32 @f(i32 %a) {
 entry:
diff --git a/test/CodeGen/Thumb/unord.ll b/test/CodeGen/Thumb/unord.ll
index 4202d269c0e7e..39458ae7b7bce 100644
--- a/test/CodeGen/Thumb/unord.ll
+++ b/test/CodeGen/Thumb/unord.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb | grep bne | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep beq | count 1
+; RUN: llc < %s -march=thumb | grep bne | count 1
+; RUN: llc < %s -march=thumb | grep beq | count 1
 
 define i32 @f1(float %X, float %Y) {
 	%tmp = fcmp uno float %X, %Y
diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll
index a18010f2fadd4..16a9c4442d8a8 100644
--- a/test/CodeGen/Thumb/vargs.ll
+++ b/test/CodeGen/Thumb/vargs.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=thumb
-; RUN: llvm-as < %s | llc -mtriple=thumb-linux | grep pop | count 1
-; RUN: llvm-as < %s | llc -mtriple=thumb-darwin | grep pop | count 2
+; RUN: llc < %s -march=thumb
+; RUN: llc < %s -mtriple=thumb-linux | grep pop | count 1
+; RUN: llc < %s -mtriple=thumb-darwin | grep pop | count 2
 
 @str = internal constant [4 x i8] c"%d\0A\00"           ; <[4 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll b/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll
new file mode 100644
index 0000000000000..8f2283f748654
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv6t2-elf"
+	%struct.dwarf_cie = type <{ i32, i32, i8, [0 x i8], [3 x i8] }>
+
+declare arm_apcscc i8* @read_sleb128(i8*, i32* nocapture) nounwind
+
+define arm_apcscc i32 @get_cie_encoding(%struct.dwarf_cie* %cie) nounwind {
+entry:
+	br i1 undef, label %bb1, label %bb13
+
+bb1:		; preds = %entry
+	%tmp38 = add i32 undef, 10		; <i32> [#uses=1]
+	br label %bb.i
+
+bb.i:		; preds = %bb.i, %bb1
+	%indvar.i = phi i32 [ 0, %bb1 ], [ %2, %bb.i ]		; <i32> [#uses=3]
+	%tmp39 = add i32 %indvar.i, %tmp38		; <i32> [#uses=1]
+	%p_addr.0.i = getelementptr i8* undef, i32 %tmp39		; <i8*> [#uses=1]
+	%0 = load i8* %p_addr.0.i, align 1		; <i8> [#uses=1]
+	%1 = icmp slt i8 %0, 0		; <i1> [#uses=1]
+	%2 = add i32 %indvar.i, 1		; <i32> [#uses=1]
+	br i1 %1, label %bb.i, label %read_uleb128.exit
+
+read_uleb128.exit:		; preds = %bb.i
+	%.sum40 = add i32 %indvar.i, undef		; <i32> [#uses=1]
+	%.sum31 = add i32 %.sum40, 2		; <i32> [#uses=1]
+	%scevgep.i = getelementptr %struct.dwarf_cie* %cie, i32 0, i32 3, i32 %.sum31		; <i8*> [#uses=1]
+	%3 = call arm_apcscc  i8* @read_sleb128(i8* %scevgep.i, i32* undef)		; <i8*> [#uses=0]
+	unreachable
+
+bb13:		; preds = %entry
+	ret i32 0
+}
diff --git a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
new file mode 100644
index 0000000000000..ec649c37bbe77
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+vfp2,+thumb2 | FileCheck %s
+; rdar://7076238
+
+@"\01LC" = external constant [36 x i8], align 1		; <[36 x i8]*> [#uses=1]
+
+define arm_apcscc i32 @t(i32, ...) nounwind {
+entry:
+; CHECK: t:
+; CHECK: add r7, sp, #3 * 4
+	%1 = load i8** undef, align 4		; <i8*> [#uses=3]
+	%2 = getelementptr i8* %1, i32 4		; <i8*> [#uses=1]
+	%3 = getelementptr i8* %1, i32 8		; <i8*> [#uses=1]
+	%4 = bitcast i8* %2 to i32*		; <i32*> [#uses=1]
+	%5 = load i32* %4, align 4		; <i32> [#uses=1]
+	%6 = trunc i32 %5 to i8		; <i8> [#uses=1]
+	%7 = getelementptr i8* %1, i32 12		; <i8*> [#uses=1]
+	%8 = bitcast i8* %3 to i32*		; <i32*> [#uses=1]
+	%9 = load i32* %8, align 4		; <i32> [#uses=1]
+	%10 = trunc i32 %9 to i16		; <i16> [#uses=1]
+	%11 = bitcast i8* %7 to i32*		; <i32*> [#uses=1]
+	%12 = load i32* %11, align 4		; <i32> [#uses=1]
+	%13 = trunc i32 %12 to i16		; <i16> [#uses=1]
+	%14 = load i32* undef, align 4		; <i32> [#uses=2]
+	%15 = sext i8 %6 to i32		; <i32> [#uses=2]
+	%16 = sext i16 %10 to i32		; <i32> [#uses=2]
+	%17 = sext i16 %13 to i32		; <i32> [#uses=2]
+	%18 = call arm_apcscc  i32 (i8*, ...)* @printf(i8* getelementptr ([36 x i8]* @"\01LC", i32 0, i32 0), i32 -128, i32 0, i32 %15, i32 %16, i32 %17, i32 0, i32 %14) nounwind		; <i32> [#uses=0]
+	%19 = add i32 0, %15		; <i32> [#uses=1]
+	%20 = add i32 %19, %16		; <i32> [#uses=1]
+	%21 = add i32 %20, %14		; <i32> [#uses=1]
+	%22 = add i32 %21, %17		; <i32> [#uses=1]
+	%23 = add i32 %22, 0		; <i32> [#uses=1]
+	ret i32 %23
+}
+
+declare arm_apcscc i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll b/test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll
new file mode 100644
index 0000000000000..4d21f9ba63021
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+vfp2,+thumb2
+; rdar://7083961
+
+define arm_apcscc i32 @value(i64 %b1, i64 %b2) nounwind readonly {
+entry:
+	%0 = icmp eq i32 undef, 0		; <i1> [#uses=1]
+	%mod.0.ph.ph = select i1 %0, float -1.000000e+00, float 1.000000e+00		; <float> [#uses=1]
+	br label %bb7
+
+bb7:		; preds = %bb7, %entry
+	br i1 undef, label %bb86.preheader, label %bb7
+
+bb86.preheader:		; preds = %bb7
+	%1 = fmul float %mod.0.ph.ph, 5.000000e+00		; <float> [#uses=0]
+	br label %bb79
+
+bb79:		; preds = %bb79, %bb86.preheader
+	br i1 undef, label %bb119, label %bb79
+
+bb119:		; preds = %bb79
+	ret i32 undef
+}
diff --git a/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll b/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll
new file mode 100644
index 0000000000000..f74d12ed27871
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll
@@ -0,0 +1,193 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.JHUFF_TBL = type { [17 x i8], [256 x i8], i32 }
+	%struct.JQUANT_TBL = type { [64 x i16], i32 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.anon = type { [8 x i32], [48 x i8] }
+	%struct.backing_store_info = type { void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*)*, %struct.FILE*, [64 x i8] }
+	%struct.jpeg_color_deconverter = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32, i8**, i32)* }
+	%struct.jpeg_color_quantizer = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i8**, i32)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_common_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32 }
+	%struct.jpeg_component_info = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.JQUANT_TBL*, i8* }
+	%struct.jpeg_d_coef_controller = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, i8***)*, %struct.jvirt_barray_control** }
+	%struct.jpeg_d_main_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i32*, i32)* }
+	%struct.jpeg_d_post_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)* }
+	%struct.jpeg_decomp_master = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 }
+	%struct.jpeg_decompress_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32, %struct.jpeg_source_mgr*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %struct.JQUANT_TBL*], [4 x %struct.JHUFF_TBL*], [4 x %struct.JHUFF_TBL*], i32, %struct.jpeg_component_info*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i16, i16, i32, i8, i32, i32, i32, i32, i32, i8*, i32, [4 x %struct.jpeg_component_info*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %struct.jpeg_decomp_master*, %struct.jpeg_d_main_controller*, %struct.jpeg_d_coef_controller*, %struct.jpeg_d_post_controller*, %struct.jpeg_input_controller*, %struct.jpeg_marker_reader*, %struct.jpeg_entropy_decoder*, %struct.jpeg_inverse_dct*, %struct.jpeg_upsampler*, %struct.jpeg_color_deconverter*, %struct.jpeg_color_quantizer* }
+	%struct.jpeg_entropy_decoder = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, [64 x i16]**)* }
+	%struct.jpeg_error_mgr = type { void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i8*)*, void (%struct.jpeg_common_struct*)*, i32, %struct.anon, i32, i32, i8**, i32, i8**, i32, i32 }
+	%struct.jpeg_input_controller = type { i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32, i32 }
+	%struct.jpeg_inverse_dct = type { void (%struct.jpeg_decompress_struct*)*, [10 x void (%struct.jpeg_decompress_struct*, %struct.jpeg_component_info*, i16*, i8**, i32)*] }
+	%struct.jpeg_marker_reader = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, [16 x i32 (%struct.jpeg_decompress_struct*)*], i32, i32, i32, i32 }
+	%struct.jpeg_memory_mgr = type { i8* (%struct.jpeg_common_struct*, i32, i32)*, i8* (%struct.jpeg_common_struct*, i32, i32)*, i8** (%struct.jpeg_common_struct*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, i32, i32, i32)*, %struct.jvirt_sarray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, %struct.jvirt_barray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, void (%struct.jpeg_common_struct*)*, i8** (%struct.jpeg_common_struct*, %struct.jvirt_sarray_control*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, %struct.jvirt_barray_control*, i32, i32, i32)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, i32 }
+	%struct.jpeg_progress_mgr = type { void (%struct.jpeg_common_struct*)*, i32, i32, i32, i32 }
+	%struct.jpeg_source_mgr = type { i8*, i32, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i32)*, i32 (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_upsampler = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
+	%struct.jvirt_barray_control = type { [64 x i16]**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_barray_control*, %struct.backing_store_info }
+	%struct.jvirt_sarray_control = type { i8**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_sarray_control*, %struct.backing_store_info }
+
+define arm_apcscc void @jpeg_idct_float(%struct.jpeg_decompress_struct* nocapture %cinfo, %struct.jpeg_component_info* nocapture %compptr, i16* nocapture %coef_block, i8** nocapture %output_buf, i32 %output_col) nounwind {
+entry:
+	%workspace = alloca [64 x float], align 4		; <[64 x float]*> [#uses=11]
+	%0 = load i8** undef, align 4		; <i8*> [#uses=5]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=11]
+	%tmp39 = add i32 %indvar, 8		; <i32> [#uses=0]
+	%tmp41 = add i32 %indvar, 16		; <i32> [#uses=2]
+	%scevgep42 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp41		; <float*> [#uses=1]
+	%tmp43 = add i32 %indvar, 24		; <i32> [#uses=1]
+	%scevgep44 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp43		; <float*> [#uses=1]
+	%tmp45 = add i32 %indvar, 32		; <i32> [#uses=1]
+	%scevgep46 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp45		; <float*> [#uses=1]
+	%tmp47 = add i32 %indvar, 40		; <i32> [#uses=1]
+	%scevgep48 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp47		; <float*> [#uses=1]
+	%tmp49 = add i32 %indvar, 48		; <i32> [#uses=1]
+	%scevgep50 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp49		; <float*> [#uses=1]
+	%tmp51 = add i32 %indvar, 56		; <i32> [#uses=1]
+	%scevgep52 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp51		; <float*> [#uses=1]
+	%wsptr.119 = getelementptr [64 x float]* %workspace, i32 0, i32 %indvar		; <float*> [#uses=1]
+	%tmp54 = shl i32 %indvar, 2		; <i32> [#uses=1]
+	%scevgep76 = getelementptr i8* undef, i32 %tmp54		; <i8*> [#uses=1]
+	%quantptr.118 = bitcast i8* %scevgep76 to float*		; <float*> [#uses=1]
+	%scevgep79 = getelementptr i16* %coef_block, i32 %tmp41		; <i16*> [#uses=0]
+	%inptr.117 = getelementptr i16* %coef_block, i32 %indvar		; <i16*> [#uses=1]
+	%1 = load i16* null, align 2		; <i16> [#uses=1]
+	%2 = load i16* undef, align 2		; <i16> [#uses=1]
+	%3 = load i16* %inptr.117, align 2		; <i16> [#uses=1]
+	%4 = sitofp i16 %3 to float		; <float> [#uses=1]
+	%5 = load float* %quantptr.118, align 4		; <float> [#uses=1]
+	%6 = fmul float %4, %5		; <float> [#uses=1]
+	%7 = fsub float %6, undef		; <float> [#uses=2]
+	%8 = fmul float undef, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%9 = fsub float %8, 0.000000e+00		; <float> [#uses=2]
+	%10 = fadd float undef, 0.000000e+00		; <float> [#uses=2]
+	%11 = fadd float %7, %9		; <float> [#uses=2]
+	%12 = fsub float %7, %9		; <float> [#uses=2]
+	%13 = sitofp i16 %1 to float		; <float> [#uses=1]
+	%14 = fmul float %13, undef		; <float> [#uses=2]
+	%15 = sitofp i16 %2 to float		; <float> [#uses=1]
+	%16 = load float* undef, align 4		; <float> [#uses=1]
+	%17 = fmul float %15, %16		; <float> [#uses=1]
+	%18 = fadd float %14, undef		; <float> [#uses=2]
+	%19 = fsub float %14, undef		; <float> [#uses=2]
+	%20 = fadd float undef, %17		; <float> [#uses=2]
+	%21 = fadd float %20, %18		; <float> [#uses=3]
+	%22 = fsub float %20, %18		; <float> [#uses=1]
+	%23 = fmul float %22, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%24 = fadd float %19, undef		; <float> [#uses=1]
+	%25 = fmul float %24, 0x3FFD906BC0000000		; <float> [#uses=2]
+	%26 = fmul float undef, 0x3FF1517A80000000		; <float> [#uses=1]
+	%27 = fsub float %26, %25		; <float> [#uses=1]
+	%28 = fmul float %19, 0xC004E7AEA0000000		; <float> [#uses=1]
+	%29 = fadd float %28, %25		; <float> [#uses=1]
+	%30 = fsub float %29, %21		; <float> [#uses=3]
+	%31 = fsub float %23, %30		; <float> [#uses=3]
+	%32 = fadd float %27, %31		; <float> [#uses=1]
+	%33 = fadd float %10, %21		; <float> [#uses=1]
+	store float %33, float* %wsptr.119, align 4
+	%34 = fsub float %10, %21		; <float> [#uses=1]
+	store float %34, float* %scevgep52, align 4
+	%35 = fadd float %11, %30		; <float> [#uses=1]
+	store float %35, float* null, align 4
+	%36 = fsub float %11, %30		; <float> [#uses=1]
+	store float %36, float* %scevgep50, align 4
+	%37 = fadd float %12, %31		; <float> [#uses=1]
+	store float %37, float* %scevgep42, align 4
+	%38 = fsub float %12, %31		; <float> [#uses=1]
+	store float %38, float* %scevgep48, align 4
+	%39 = fadd float undef, %32		; <float> [#uses=1]
+	store float %39, float* %scevgep46, align 4
+	store float undef, float* %scevgep44, align 4
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb6, label %bb
+
+bb6:		; preds = %bb
+	%.sum10 = add i32 %output_col, 1		; <i32> [#uses=1]
+	%.sum8 = add i32 %output_col, 6		; <i32> [#uses=1]
+	%.sum6 = add i32 %output_col, 2		; <i32> [#uses=1]
+	%.sum = add i32 %output_col, 3		; <i32> [#uses=1]
+	br label %bb8
+
+bb8:		; preds = %bb8, %bb6
+	%ctr.116 = phi i32 [ 0, %bb6 ], [ %88, %bb8 ]		; <i32> [#uses=3]
+	%scevgep = getelementptr i8** %output_buf, i32 %ctr.116		; <i8**> [#uses=1]
+	%tmp = shl i32 %ctr.116, 3		; <i32> [#uses=5]
+	%tmp2392 = or i32 %tmp, 4		; <i32> [#uses=1]
+	%scevgep24 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2392		; <float*> [#uses=1]
+	%tmp2591 = or i32 %tmp, 2		; <i32> [#uses=1]
+	%scevgep26 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2591		; <float*> [#uses=1]
+	%tmp2790 = or i32 %tmp, 6		; <i32> [#uses=1]
+	%scevgep28 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2790		; <float*> [#uses=1]
+	%tmp3586 = or i32 %tmp, 7		; <i32> [#uses=0]
+	%wsptr.215 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp		; <float*> [#uses=1]
+	%40 = load i8** %scevgep, align 4		; <i8*> [#uses=4]
+	%41 = load float* %wsptr.215, align 4		; <float> [#uses=1]
+	%42 = load float* %scevgep24, align 4		; <float> [#uses=1]
+	%43 = fadd float %41, %42		; <float> [#uses=1]
+	%44 = load float* %scevgep26, align 4		; <float> [#uses=1]
+	%45 = load float* %scevgep28, align 4		; <float> [#uses=1]
+	%46 = fadd float %44, %45		; <float> [#uses=1]
+	%47 = fsub float %43, %46		; <float> [#uses=2]
+	%48 = fsub float undef, 0.000000e+00		; <float> [#uses=1]
+	%49 = fadd float 0.000000e+00, undef		; <float> [#uses=1]
+	%50 = fptosi float %49 to i32		; <i32> [#uses=1]
+	%51 = add i32 %50, 4		; <i32> [#uses=1]
+	%52 = lshr i32 %51, 3		; <i32> [#uses=1]
+	%53 = and i32 %52, 1023		; <i32> [#uses=1]
+	%.sum14 = add i32 %53, 128		; <i32> [#uses=1]
+	%54 = getelementptr i8* %0, i32 %.sum14		; <i8*> [#uses=1]
+	%55 = load i8* %54, align 1		; <i8> [#uses=1]
+	store i8 %55, i8* null, align 1
+	%56 = getelementptr i8* %40, i32 %.sum10		; <i8*> [#uses=1]
+	store i8 0, i8* %56, align 1
+	%57 = load i8* null, align 1		; <i8> [#uses=1]
+	%58 = getelementptr i8* %40, i32 %.sum8		; <i8*> [#uses=1]
+	store i8 %57, i8* %58, align 1
+	%59 = fadd float undef, %48		; <float> [#uses=1]
+	%60 = fptosi float %59 to i32		; <i32> [#uses=1]
+	%61 = add i32 %60, 4		; <i32> [#uses=1]
+	%62 = lshr i32 %61, 3		; <i32> [#uses=1]
+	%63 = and i32 %62, 1023		; <i32> [#uses=1]
+	%.sum7 = add i32 %63, 128		; <i32> [#uses=1]
+	%64 = getelementptr i8* %0, i32 %.sum7		; <i8*> [#uses=1]
+	%65 = load i8* %64, align 1		; <i8> [#uses=1]
+	%66 = getelementptr i8* %40, i32 %.sum6		; <i8*> [#uses=1]
+	store i8 %65, i8* %66, align 1
+	%67 = fptosi float undef to i32		; <i32> [#uses=1]
+	%68 = add i32 %67, 4		; <i32> [#uses=1]
+	%69 = lshr i32 %68, 3		; <i32> [#uses=1]
+	%70 = and i32 %69, 1023		; <i32> [#uses=1]
+	%.sum5 = add i32 %70, 128		; <i32> [#uses=1]
+	%71 = getelementptr i8* %0, i32 %.sum5		; <i8*> [#uses=1]
+	%72 = load i8* %71, align 1		; <i8> [#uses=1]
+	store i8 %72, i8* undef, align 1
+	%73 = fadd float %47, undef		; <float> [#uses=1]
+	%74 = fptosi float %73 to i32		; <i32> [#uses=1]
+	%75 = add i32 %74, 4		; <i32> [#uses=1]
+	%76 = lshr i32 %75, 3		; <i32> [#uses=1]
+	%77 = and i32 %76, 1023		; <i32> [#uses=1]
+	%.sum3 = add i32 %77, 128		; <i32> [#uses=1]
+	%78 = getelementptr i8* %0, i32 %.sum3		; <i8*> [#uses=1]
+	%79 = load i8* %78, align 1		; <i8> [#uses=1]
+	store i8 %79, i8* undef, align 1
+	%80 = fsub float %47, undef		; <float> [#uses=1]
+	%81 = fptosi float %80 to i32		; <i32> [#uses=1]
+	%82 = add i32 %81, 4		; <i32> [#uses=1]
+	%83 = lshr i32 %82, 3		; <i32> [#uses=1]
+	%84 = and i32 %83, 1023		; <i32> [#uses=1]
+	%.sum1 = add i32 %84, 128		; <i32> [#uses=1]
+	%85 = getelementptr i8* %0, i32 %.sum1		; <i8*> [#uses=1]
+	%86 = load i8* %85, align 1		; <i8> [#uses=1]
+	%87 = getelementptr i8* %40, i32 %.sum		; <i8*> [#uses=1]
+	store i8 %86, i8* %87, align 1
+	%88 = add i32 %ctr.116, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %88, 8		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb8
+
+return:		; preds = %bb8
+	ret void
+}
diff --git a/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll b/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
new file mode 100644
index 0000000000000..a8e86d55e786e
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
+
+@csize = external global [100 x [20 x [4 x i8]]]		; <[100 x [20 x [4 x i8]]]*> [#uses=1]
+@vsize = external global [100 x [20 x [4 x i8]]]		; <[100 x [20 x [4 x i8]]]*> [#uses=1]
+@cll = external global [20 x [10 x i8]]		; <[20 x [10 x i8]]*> [#uses=1]
+@lefline = external global [100 x [20 x i32]]		; <[100 x [20 x i32]]*> [#uses=1]
+@sep = external global [20 x i32]		; <[20 x i32]*> [#uses=1]
+
+define arm_apcscc void @main(i32 %argc, i8** %argv) noreturn nounwind {
+; CHECK: main:
+; CHECK: ldrb
+entry:
+	%nb.i.i.i = alloca [25 x i8], align 1		; <[25 x i8]*> [#uses=0]
+	%line.i.i.i = alloca [200 x i8], align 1		; <[200 x i8]*> [#uses=1]
+	%line.i = alloca [1024 x i8], align 1		; <[1024 x i8]*> [#uses=0]
+	br i1 undef, label %bb.i.i, label %bb4.preheader.i
+
+bb.i.i:		; preds = %entry
+	unreachable
+
+bb4.preheader.i:		; preds = %entry
+	br i1 undef, label %tbl.exit, label %bb.i.preheader
+
+bb.i.preheader:		; preds = %bb4.preheader.i
+	%line3.i.i.i = getelementptr [200 x i8]* %line.i.i.i, i32 0, i32 0		; <i8*> [#uses=1]
+	br label %bb.i
+
+bb.i:		; preds = %bb4.backedge.i, %bb.i.preheader
+	br i1 undef, label %bb3.i, label %bb4.backedge.i
+
+bb3.i:		; preds = %bb.i
+	br i1 undef, label %bb2.i184.i.i, label %bb.i183.i.i
+
+bb.i183.i.i:		; preds = %bb.i183.i.i, %bb3.i
+	br i1 undef, label %bb2.i184.i.i, label %bb.i183.i.i
+
+bb2.i184.i.i:		; preds = %bb.i183.i.i, %bb3.i
+	br i1 undef, label %bb5.i185.i.i, label %bb35.preheader.i.i.i
+
+bb35.preheader.i.i.i:		; preds = %bb2.i184.i.i
+	%0 = load i8* %line3.i.i.i, align 1		; <i8> [#uses=1]
+	%1 = icmp eq i8 %0, 59		; <i1> [#uses=1]
+	br i1 %1, label %bb36.i.i.i, label %bb9.i186.i.i
+
+bb5.i185.i.i:		; preds = %bb2.i184.i.i
+	br label %bb.i171.i.i
+
+bb9.i186.i.i:		; preds = %bb35.preheader.i.i.i
+	unreachable
+
+bb36.i.i.i:		; preds = %bb35.preheader.i.i.i
+	br label %bb.i171.i.i
+
+bb.i171.i.i:		; preds = %bb3.i176.i.i, %bb36.i.i.i, %bb5.i185.i.i
+	%2 = phi i32 [ %4, %bb3.i176.i.i ], [ 0, %bb36.i.i.i ], [ 0, %bb5.i185.i.i ]		; <i32> [#uses=6]
+	%scevgep16.i.i.i = getelementptr [20 x i32]* @sep, i32 0, i32 %2		; <i32*> [#uses=1]
+	%scevgep18.i.i.i = getelementptr [20 x [10 x i8]]* @cll, i32 0, i32 %2, i32 0		; <i8*> [#uses=0]
+	store i32 -1, i32* %scevgep16.i.i.i, align 4
+	br label %bb1.i175.i.i
+
+bb1.i175.i.i:		; preds = %bb1.i175.i.i, %bb.i171.i.i
+	%i.03.i172.i.i = phi i32 [ 0, %bb.i171.i.i ], [ %3, %bb1.i175.i.i ]		; <i32> [#uses=4]
+	%scevgep11.i.i.i = getelementptr [100 x [20 x i32]]* @lefline, i32 0, i32 %i.03.i172.i.i, i32 %2		; <i32*> [#uses=1]
+	%scevgep12.i.i.i = getelementptr [100 x [20 x [4 x i8]]]* @vsize, i32 0, i32 %i.03.i172.i.i, i32 %2, i32 0		; <i8*> [#uses=1]
+	%scevgep13.i.i.i = getelementptr [100 x [20 x [4 x i8]]]* @csize, i32 0, i32 %i.03.i172.i.i, i32 %2, i32 0		; <i8*> [#uses=0]
+	store i8 0, i8* %scevgep12.i.i.i, align 1
+	store i32 0, i32* %scevgep11.i.i.i, align 4
+	store i32 108, i32* undef, align 4
+	%3 = add i32 %i.03.i172.i.i, 1		; <i32> [#uses=2]
+	%exitcond.i174.i.i = icmp eq i32 %3, 100		; <i1> [#uses=1]
+	br i1 %exitcond.i174.i.i, label %bb3.i176.i.i, label %bb1.i175.i.i
+
+bb3.i176.i.i:		; preds = %bb1.i175.i.i
+	%4 = add i32 %2, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb5.i177.i.i, label %bb.i171.i.i
+
+bb5.i177.i.i:		; preds = %bb3.i176.i.i
+	unreachable
+
+bb4.backedge.i:		; preds = %bb.i
+	br i1 undef, label %tbl.exit, label %bb.i
+
+tbl.exit:		; preds = %bb4.backedge.i, %bb4.preheader.i
+	unreachable
+}
diff --git a/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll b/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
new file mode 100644
index 0000000000000..6cbfd0d8d4dcf
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim
+
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16)*, i32 }		; type %0
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*)*, i32 }		; type %1
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*, %"struct.xalanc_1_8::XalanDOMString"*)*, i32 }		; type %2
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32, i32)*, i32 }		; type %3
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*)*, i32 }		; type %4
+	%"struct.std::CharVectorType" = type { %"struct.std::_Vector_base<char,std::allocator<char> >" }
+	%"struct.std::_Bit_const_iterator" = type { %"struct.std::_Bit_iterator_base" }
+	%"struct.std::_Bit_iterator_base" = type { i32*, i32 }
+	%"struct.std::_Bvector_base<std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" }
+	%"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" = type { %"struct.std::_Bit_const_iterator", %"struct.std::_Bit_const_iterator", i32* }
+	%"struct.std::_Vector_base<char,std::allocator<char> >" = type { %"struct.std::_Vector_base<char,std::allocator<char> >::_Vector_impl" }
+	%"struct.std::_Vector_base<char,std::allocator<char> >::_Vector_impl" = type { i8*, i8*, i8* }
+	%"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >" = type { %"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >::_Vector_impl" }
+	%"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >::_Vector_impl" = type { i16*, i16*, i16* }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >.base" = type { i32 (...)** }
+	%"struct.std::vector<bool,std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >" }
+	%"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >" = type { %"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >" }
+	%"struct.xalanc_1_8::FormatterListener" = type { %"struct.std::basic_ostream<char,std::char_traits<char> >.base", %"struct.std::basic_ostream<char,std::char_traits<char> >.base"*, i32 }
+	%"struct.xalanc_1_8::FormatterToXML" = type { %"struct.xalanc_1_8::FormatterListener", %"struct.std::basic_ostream<char,std::char_traits<char> >.base"*, %"struct.xalanc_1_8::XalanOutputStream"*, i16, [256 x i16], [256 x i16], i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", i32, i32, %"struct.std::vector<bool,std::allocator<bool> >", %"struct.xalanc_1_8::XalanDOMString", i8, i8, i8, i8, i8, %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >", i32, %"struct.std::CharVectorType", %"struct.std::vector<bool,std::allocator<bool> >", %0, %1, %2, %3, %0, %1, %2, %3, %4, i16*, i32 }
+	%"struct.xalanc_1_8::XalanDOMString" = type { %"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >", i32 }
+	%"struct.xalanc_1_8::XalanOutputStream" = type { i32 (...)**, i32, %"struct.std::basic_ostream<char,std::char_traits<char> >.base"*, i32, %"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >", %"struct.xalanc_1_8::XalanDOMString", i8, i8, %"struct.std::CharVectorType" }
+
+declare arm_apcscc void @_ZN10xalanc_1_814FormatterToXML17writeParentTagEndEv(%"struct.xalanc_1_8::FormatterToXML"*)
+
+define arm_apcscc void @_ZN10xalanc_1_814FormatterToXML5cdataEPKtj(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 %length) {
+entry:
+	%0 = getelementptr %"struct.xalanc_1_8::FormatterToXML"* %this, i32 0, i32 13		; <i8*> [#uses=1]
+	br i1 undef, label %bb4, label %bb
+
+bb:		; preds = %entry
+	store i8 0, i8* %0, align 1
+	%1 = getelementptr %"struct.xalanc_1_8::FormatterToXML"* %this, i32 0, i32 0, i32 0, i32 0		; <i32 (...)***> [#uses=1]
+	%2 = load i32 (...)*** %1, align 4		; <i32 (...)**> [#uses=1]
+	%3 = getelementptr i32 (...)** %2, i32 11		; <i32 (...)**> [#uses=1]
+	%4 = load i32 (...)** %3, align 4		; <i32 (...)*> [#uses=1]
+	%5 = bitcast i32 (...)* %4 to void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32)*		; <void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32)*> [#uses=1]
+	tail call arm_apcscc  void %5(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 %length)
+	ret void
+
+bb4:		; preds = %entry
+	tail call arm_apcscc  void @_ZN10xalanc_1_814FormatterToXML17writeParentTagEndEv(%"struct.xalanc_1_8::FormatterToXML"* %this)
+	tail call arm_apcscc  void undef(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 0, i32 %length, i8 zeroext undef)
+	ret void
+}
diff --git a/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll b/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll
new file mode 100644
index 0000000000000..ebe9d469f2291
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi
+; PR4681
+
+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+@.str2 = external constant [30 x i8], align 1		; <[30 x i8]*> [#uses=1]
+
+define arm_aapcscc i32 @__mf_heuristic_check(i32 %ptr, i32 %ptr_high) nounwind {
+entry:
+	br i1 undef, label %bb1, label %bb
+
+bb:		; preds = %entry
+	unreachable
+
+bb1:		; preds = %entry
+	br i1 undef, label %bb9, label %bb2
+
+bb2:		; preds = %bb1
+	%0 = call i8* @llvm.frameaddress(i32 0)		; <i8*> [#uses=1]
+	%1 = call arm_aapcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* noalias undef, i8* noalias getelementptr ([30 x i8]* @.str2, i32 0, i32 0), i8* %0, i8* null) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb9:		; preds = %bb1
+	ret i32 undef
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+declare arm_aapcscc i32 @fprintf(%struct.FILE* noalias nocapture, i8* noalias nocapture, ...) nounwind
diff --git a/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll b/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
new file mode 100644
index 0000000000000..319d29b790e80
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
@@ -0,0 +1,153 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+neon -arm-use-neon-fp -relocation-model=pic -disable-fp-elim
+
+	type { %struct.GAP }		; type %0
+	type { i16, i8, i8 }		; type %1
+	type { [2 x i32], [2 x i32] }		; type %2
+	type { %struct.rec* }		; type %3
+	type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { %1 }
+	%struct.STYLE = type { %0, %0, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %2 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, %3, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.rec = type { %struct.head_type }
+@.str24239 = external constant [20 x i8], align 1		; <[20 x i8]*> [#uses=1]
+@no_file_pos = external global %4		; <%4*> [#uses=1]
+@zz_tmp = external global %struct.rec*		; <%struct.rec**> [#uses=1]
+@.str81872 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@out_fp = external global %struct.FILE*		; <%struct.FILE**> [#uses=2]
+@cpexists = external global i32		; <i32*> [#uses=2]
+@.str212784 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@.str1822946 = external constant [8 x i8], align 1		; <[8 x i8]*> [#uses=1]
+@.str1842948 = external constant [11 x i8], align 1		; <[11 x i8]*> [#uses=1]
+
+declare arm_apcscc i32 @fprintf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind
+
+declare arm_apcscc i32 @"\01_fwrite"(i8*, i32, i32, i8*)
+
+declare arm_apcscc %struct.FILE* @OpenIncGraphicFile(i8*, i8 zeroext, %struct.rec** nocapture, %struct.FILE_POS*, i32* nocapture) nounwind
+
+declare arm_apcscc void @Error(i32, i32, i8*, i32, %struct.FILE_POS*, ...) nounwind
+
+declare arm_apcscc i8* @fgets(i8*, i32, %struct.FILE* nocapture) nounwind
+
+define arm_apcscc void @PS_PrintGraphicInclude(%struct.rec* %x, i32 %colmark, i32 %rowmark) nounwind {
+entry:
+	br label %bb5
+
+bb5:		; preds = %bb5, %entry
+	%.pn = phi %struct.rec* [ %y.0, %bb5 ], [ undef, %entry ]		; <%struct.rec*> [#uses=1]
+	%y.0.in = getelementptr %struct.rec* %.pn, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
+	%y.0 = load %struct.rec** %y.0.in		; <%struct.rec*> [#uses=2]
+	br i1 undef, label %bb5, label %bb6
+
+bb6:		; preds = %bb5
+	%0 = call arm_apcscc  %struct.FILE* @OpenIncGraphicFile(i8* undef, i8 zeroext 0, %struct.rec** undef, %struct.FILE_POS* null, i32* undef) nounwind		; <%struct.FILE*> [#uses=1]
+	br i1 false, label %bb.i, label %FontHalfXHeight.exit
+
+bb.i:		; preds = %bb6
+	br label %FontHalfXHeight.exit
+
+FontHalfXHeight.exit:		; preds = %bb.i, %bb6
+	br i1 undef, label %bb.i1, label %FontSize.exit
+
+bb.i1:		; preds = %FontHalfXHeight.exit
+	br label %FontSize.exit
+
+FontSize.exit:		; preds = %bb.i1, %FontHalfXHeight.exit
+	%1 = load i32* undef, align 4		; <i32> [#uses=1]
+	%2 = icmp ult i32 0, undef		; <i1> [#uses=1]
+	br i1 %2, label %bb.i5, label %FontName.exit
+
+bb.i5:		; preds = %FontSize.exit
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8]* @.str81872, i32 0, i32 0)) nounwind
+	br label %FontName.exit
+
+FontName.exit:		; preds = %bb.i5, %FontSize.exit
+	%3 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8]* @.str1822946, i32 0, i32 0), i32 %1, i8* undef) nounwind		; <i32> [#uses=0]
+	%4 = call arm_apcscc  i32 @"\01_fwrite"(i8* getelementptr ([11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind		; <i32> [#uses=0]
+	%5 = sub i32 %colmark, undef		; <i32> [#uses=1]
+	%6 = sub i32 %rowmark, undef		; <i32> [#uses=1]
+	%7 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%8 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %7, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 %5, i32 %6) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	%9 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
+	%10 = load i32* %9, align 4		; <i32> [#uses=1]
+	%11 = sub i32 0, %10		; <i32> [#uses=1]
+	%12 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%13 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %12, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %11) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	br label %bb100.outer.outer
+
+bb100.outer.outer:		; preds = %bb79.critedge, %bb1.i3, %FontName.exit
+	%x_addr.0.ph.ph = phi %struct.rec* [ %x, %FontName.exit ], [ null, %bb79.critedge ], [ null, %bb1.i3 ]		; <%struct.rec*> [#uses=1]
+	%14 = getelementptr %struct.rec* %x_addr.0.ph.ph, i32 0, i32 0, i32 1, i32 0		; <%struct.FILE_POS*> [#uses=0]
+	br label %bb100.outer
+
+bb.i80:		; preds = %bb3.i85
+	br i1 undef, label %bb2.i84, label %bb2.i51
+
+bb2.i84:		; preds = %bb100.outer, %bb.i80
+	br i1 undef, label %bb3.i77, label %bb3.i85
+
+bb3.i85:		; preds = %bb2.i84
+	br i1 false, label %StringBeginsWith.exit88, label %bb.i80
+
+StringBeginsWith.exit88:		; preds = %bb3.i85
+	br i1 undef, label %bb3.i77, label %bb2.i51
+
+bb2.i.i68:		; preds = %bb3.i77
+	br label %bb3.i77
+
+bb3.i77:		; preds = %bb2.i.i68, %StringBeginsWith.exit88, %bb2.i84
+	br i1 false, label %bb1.i58, label %bb2.i.i68
+
+bb1.i58:		; preds = %bb3.i77
+	unreachable
+
+bb.i47:		; preds = %bb3.i52
+	br i1 undef, label %bb2.i51, label %bb2.i.i15.critedge
+
+bb2.i51:		; preds = %bb.i47, %StringBeginsWith.exit88, %bb.i80
+	%15 = load i8* undef, align 1		; <i8> [#uses=0]
+	br i1 false, label %StringBeginsWith.exit55thread-split, label %bb3.i52
+
+bb3.i52:		; preds = %bb2.i51
+	br i1 false, label %StringBeginsWith.exit55, label %bb.i47
+
+StringBeginsWith.exit55thread-split:		; preds = %bb2.i51
+	br label %StringBeginsWith.exit55
+
+StringBeginsWith.exit55:		; preds = %StringBeginsWith.exit55thread-split, %bb3.i52
+	br label %bb2.i41
+
+bb2.i41:		; preds = %bb2.i41, %StringBeginsWith.exit55
+	br label %bb2.i41
+
+bb2.i.i15.critedge:		; preds = %bb.i47
+	%16 = call arm_apcscc  i8* @fgets(i8* undef, i32 512, %struct.FILE* %0) nounwind		; <i8*> [#uses=0]
+	%iftmp.560.0 = select i1 undef, i32 2, i32 0		; <i32> [#uses=1]
+	br label %bb100.outer
+
+bb2.i8:		; preds = %bb100.outer
+	br i1 undef, label %bb1.i3, label %bb79.critedge
+
+bb1.i3:		; preds = %bb2.i8
+	br label %bb100.outer.outer
+
+bb79.critedge:		; preds = %bb2.i8
+	store %struct.rec* null, %struct.rec** @zz_tmp, align 4
+	br label %bb100.outer.outer
+
+bb100.outer:		; preds = %bb2.i.i15.critedge, %bb100.outer.outer
+	%state.0.ph = phi i32 [ 0, %bb100.outer.outer ], [ %iftmp.560.0, %bb2.i.i15.critedge ]		; <i32> [#uses=1]
+	%cond = icmp eq i32 %state.0.ph, 1		; <i1> [#uses=1]
+	br i1 %cond, label %bb2.i8, label %bb2.i84
+}
diff --git a/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll b/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
new file mode 100644
index 0000000000000..a62b61290a5a1
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
@@ -0,0 +1,508 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+neon -arm-use-neon-fp -relocation-model=pic -disable-fp-elim -O3
+
+	type { i16, i8, i8 }		; type %0
+	type { [2 x i32], [2 x i32] }		; type %1
+	type { %struct.GAP }		; type %2
+	type { %struct.rec* }		; type %3
+	type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
+	type { i8, i8, i8, i8 }		; type %5
+	%struct.COMPOSITE = type { i8, i16, i16 }
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FONT_INFO = type { %struct.metrics*, i8*, i16*, %struct.COMPOSITE*, i32, %struct.rec*, %struct.rec*, i16, i16, i16*, i8*, i8*, i16* }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { %0 }
+	%struct.STYLE = type { %2, %2, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %1 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, %3, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.metrics = type { i16, i16, i16, i16, i16 }
+	%struct.rec = type { %struct.head_type }
+@.str24239 = external constant [20 x i8], align 1		; <[20 x i8]*> [#uses=1]
+@no_file_pos = external global %4		; <%4*> [#uses=1]
+@.str19294 = external constant [9 x i8], align 1		; <[9 x i8]*> [#uses=1]
+@zz_lengths = external global [150 x i8]		; <[150 x i8]*> [#uses=1]
+@next_free.4772 = external global i8**		; <i8***> [#uses=3]
+@top_free.4773 = external global i8**		; <i8***> [#uses=2]
+@.str1575 = external constant [32 x i8], align 1		; <[32 x i8]*> [#uses=1]
+@zz_free = external global [524 x %struct.rec*]		; <[524 x %struct.rec*]*> [#uses=2]
+@zz_hold = external global %struct.rec*		; <%struct.rec**> [#uses=5]
+@zz_tmp = external global %struct.rec*		; <%struct.rec**> [#uses=2]
+@zz_res = external global %struct.rec*		; <%struct.rec**> [#uses=2]
+@xx_link = external global %struct.rec*		; <%struct.rec**> [#uses=2]
+@font_count = external global i32		; <i32*> [#uses=1]
+@.str81872 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@.str101874 = external constant [30 x i8], align 1		; <[30 x i8]*> [#uses=1]
+@.str111875 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@.str141878 = external constant [27 x i8], align 1		; <[27 x i8]*> [#uses=1]
+@out_fp = external global %struct.FILE*		; <%struct.FILE**> [#uses=3]
+@.str192782 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@cpexists = external global i32		; <i32*> [#uses=2]
+@.str212784 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@currentfont = external global i32		; <i32*> [#uses=3]
+@wordcount = external global i32		; <i32*> [#uses=1]
+@needs = external global %struct.rec*		; <%struct.rec**> [#uses=1]
+@.str742838 = external constant [6 x i8], align 1		; <[6 x i8]*> [#uses=1]
+@.str752839 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@.str1802944 = external constant [40 x i8], align 1		; <[40 x i8]*> [#uses=1]
+@.str1822946 = external constant [8 x i8], align 1		; <[8 x i8]*> [#uses=1]
+@.str1842948 = external constant [11 x i8], align 1		; <[11 x i8]*> [#uses=1]
+@.str1852949 = external constant [23 x i8], align 1		; <[23 x i8]*> [#uses=1]
+@.str1872951 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@.str1932957 = external constant [26 x i8], align 1		; <[26 x i8]*> [#uses=1]
+
+declare arm_apcscc i32 @fprintf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind
+
+declare arm_apcscc i32 @"\01_fwrite"(i8*, i32, i32, i8*)
+
+declare arm_apcscc i32 @remove(i8* nocapture) nounwind
+
+declare arm_apcscc %struct.FILE* @OpenIncGraphicFile(i8*, i8 zeroext, %struct.rec** nocapture, %struct.FILE_POS*, i32* nocapture) nounwind
+
+declare arm_apcscc %struct.rec* @MakeWord(i32, i8* nocapture, %struct.FILE_POS*) nounwind
+
+declare arm_apcscc void @Error(i32, i32, i8*, i32, %struct.FILE_POS*, ...) nounwind
+
+declare arm_apcscc i32 @"\01_fputs"(i8*, %struct.FILE*)
+
+declare arm_apcscc noalias i8* @calloc(i32, i32) nounwind
+
+declare arm_apcscc i8* @fgets(i8*, i32, %struct.FILE* nocapture) nounwind
+
+define arm_apcscc void @PS_PrintGraphicInclude(%struct.rec* %x, i32 %colmark, i32 %rowmark) nounwind {
+entry:
+	%buff = alloca [512 x i8], align 4		; <[512 x i8]*> [#uses=5]
+	%0 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 1, i32 0, i32 0		; <i8*> [#uses=2]
+	%1 = load i8* %0, align 4		; <i8> [#uses=1]
+	%2 = add i8 %1, -94		; <i8> [#uses=1]
+	%3 = icmp ugt i8 %2, 1		; <i1> [#uses=1]
+	br i1 %3, label %bb, label %bb1
+
+bb:		; preds = %entry
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%4 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%5 = bitcast %struct.SECOND_UNION* %4 to %5*		; <%5*> [#uses=1]
+	%6 = getelementptr %5* %5, i32 0, i32 1		; <i8*> [#uses=1]
+	%7 = load i8* %6, align 1		; <i8> [#uses=1]
+	%8 = icmp eq i8 %7, 0		; <i1> [#uses=1]
+	br i1 %8, label %bb2, label %bb3
+
+bb2:		; preds = %bb1
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([40 x i8]* @.str1802944, i32 0, i32 0)) nounwind
+	br label %bb3
+
+bb3:		; preds = %bb2, %bb1
+	%9 = load %struct.rec** undef, align 4		; <%struct.rec*> [#uses=0]
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb3
+	%y.0 = load %struct.rec** null		; <%struct.rec*> [#uses=2]
+	br i1 false, label %bb5, label %bb6
+
+bb6:		; preds = %bb5
+	%10 = load i8* %0, align 4		; <i8> [#uses=1]
+	%11 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 1, i32 0		; <%struct.FILE_POS*> [#uses=1]
+	%12 = call arm_apcscc  %struct.FILE* @OpenIncGraphicFile(i8* undef, i8 zeroext %10, %struct.rec** null, %struct.FILE_POS* %11, i32* undef) nounwind		; <%struct.FILE*> [#uses=4]
+	br i1 false, label %bb7, label %bb8
+
+bb7:		; preds = %bb6
+	unreachable
+
+bb8:		; preds = %bb6
+	%13 = and i32 undef, 4095		; <i32> [#uses=2]
+	%14 = load i32* @currentfont, align 4		; <i32> [#uses=0]
+	br i1 false, label %bb10, label %bb9
+
+bb9:		; preds = %bb8
+	%15 = icmp ult i32 0, %13		; <i1> [#uses=1]
+	br i1 %15, label %bb.i, label %FontHalfXHeight.exit
+
+bb.i:		; preds = %bb9
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([17 x i8]* @.str111875, i32 0, i32 0)) nounwind
+	%.pre186 = load i32* @currentfont, align 4		; <i32> [#uses=1]
+	br label %FontHalfXHeight.exit
+
+FontHalfXHeight.exit:		; preds = %bb.i, %bb9
+	%16 = phi i32 [ %.pre186, %bb.i ], [ %13, %bb9 ]		; <i32> [#uses=1]
+	br i1 false, label %bb.i1, label %bb1.i
+
+bb.i1:		; preds = %FontHalfXHeight.exit
+	br label %bb1.i
+
+bb1.i:		; preds = %bb.i1, %FontHalfXHeight.exit
+	br i1 undef, label %bb2.i, label %FontSize.exit
+
+bb2.i:		; preds = %bb1.i
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 37, i32 61, i8* getelementptr ([30 x i8]* @.str101874, i32 0, i32 0), i32 1, %struct.FILE_POS* null) nounwind
+	unreachable
+
+FontSize.exit:		; preds = %bb1.i
+	%17 = getelementptr %struct.FONT_INFO* undef, i32 %16, i32 5		; <%struct.rec**> [#uses=0]
+	%18 = load i32* undef, align 4		; <i32> [#uses=1]
+	%19 = load i32* @currentfont, align 4		; <i32> [#uses=2]
+	%20 = load i32* @font_count, align 4		; <i32> [#uses=1]
+	%21 = icmp ult i32 %20, %19		; <i1> [#uses=1]
+	br i1 %21, label %bb.i5, label %FontName.exit
+
+bb.i5:		; preds = %FontSize.exit
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8]* @.str81872, i32 0, i32 0)) nounwind
+	br label %FontName.exit
+
+FontName.exit:		; preds = %bb.i5, %FontSize.exit
+	%22 = phi %struct.FONT_INFO* [ undef, %bb.i5 ], [ undef, %FontSize.exit ]		; <%struct.FONT_INFO*> [#uses=1]
+	%23 = getelementptr %struct.FONT_INFO* %22, i32 %19, i32 5		; <%struct.rec**> [#uses=0]
+	%24 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8]* @.str1822946, i32 0, i32 0), i32 %18, i8* null) nounwind		; <i32> [#uses=0]
+	br label %bb10
+
+bb10:		; preds = %FontName.exit, %bb8
+	%25 = call arm_apcscc  i32 @"\01_fwrite"(i8* getelementptr ([11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind		; <i32> [#uses=0]
+	%26 = sub i32 %rowmark, undef		; <i32> [#uses=1]
+	%27 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%28 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %27, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %26) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	%29 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([17 x i8]* @.str192782, i32 0, i32 0), double 2.000000e+01, double 2.000000e+01) nounwind		; <i32> [#uses=0]
+	%30 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
+	%31 = load i32* %30, align 4		; <i32> [#uses=1]
+	%32 = sub i32 0, %31		; <i32> [#uses=1]
+	%33 = load i32* undef, align 4		; <i32> [#uses=1]
+	%34 = sub i32 0, %33		; <i32> [#uses=1]
+	%35 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%36 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %35, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 %32, i32 %34) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	%37 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	%38 = getelementptr %struct.rec* %37, i32 0, i32 0, i32 4		; <%struct.FOURTH_UNION*> [#uses=1]
+	%39 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([23 x i8]* @.str1852949, i32 0, i32 0), %struct.FOURTH_UNION* %38) nounwind		; <i32> [#uses=0]
+	%buff14 = getelementptr [512 x i8]* %buff, i32 0, i32 0		; <i8*> [#uses=5]
+	%40 = call arm_apcscc  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=0]
+	%iftmp.506.0 = select i1 undef, i32 2, i32 0		; <i32> [#uses=1]
+	%41 = getelementptr [512 x i8]* %buff, i32 0, i32 26		; <i8*> [#uses=1]
+	br label %bb100.outer.outer
+
+bb100.outer.outer:		; preds = %bb83, %bb10
+	%state.0.ph.ph = phi i32 [ %iftmp.506.0, %bb10 ], [ undef, %bb83 ]		; <i32> [#uses=1]
+	%x_addr.0.ph.ph = phi %struct.rec* [ %x, %bb10 ], [ %71, %bb83 ]		; <%struct.rec*> [#uses=1]
+	%42 = getelementptr %struct.rec* %x_addr.0.ph.ph, i32 0, i32 0, i32 1, i32 0		; <%struct.FILE_POS*> [#uses=0]
+	br label %bb100.outer
+
+bb.i80:		; preds = %bb3.i85
+	%43 = icmp eq i8 %44, %46		; <i1> [#uses=1]
+	%indvar.next.i79 = add i32 %indvar.i81, 1		; <i32> [#uses=1]
+	br i1 %43, label %bb2.i84, label %bb2.i51
+
+bb2.i84:		; preds = %bb100.outer, %bb.i80
+	%indvar.i81 = phi i32 [ %indvar.next.i79, %bb.i80 ], [ 0, %bb100.outer ]		; <i32> [#uses=3]
+	%pp.0.i82 = getelementptr [27 x i8]* @.str141878, i32 0, i32 %indvar.i81		; <i8*> [#uses=2]
+	%sp.0.i83 = getelementptr [512 x i8]* %buff, i32 0, i32 %indvar.i81		; <i8*> [#uses=1]
+	%44 = load i8* %sp.0.i83, align 1		; <i8> [#uses=2]
+	%45 = icmp eq i8 %44, 0		; <i1> [#uses=1]
+	br i1 %45, label %StringBeginsWith.exit88thread-split, label %bb3.i85
+
+bb3.i85:		; preds = %bb2.i84
+	%46 = load i8* %pp.0.i82, align 1		; <i8> [#uses=3]
+	%47 = icmp eq i8 %46, 0		; <i1> [#uses=1]
+	br i1 %47, label %StringBeginsWith.exit88, label %bb.i80
+
+StringBeginsWith.exit88thread-split:		; preds = %bb2.i84
+	%.pr = load i8* %pp.0.i82		; <i8> [#uses=1]
+	br label %StringBeginsWith.exit88
+
+StringBeginsWith.exit88:		; preds = %StringBeginsWith.exit88thread-split, %bb3.i85
+	%48 = phi i8 [ %.pr, %StringBeginsWith.exit88thread-split ], [ %46, %bb3.i85 ]		; <i8> [#uses=1]
+	%phitmp91 = icmp eq i8 %48, 0		; <i1> [#uses=1]
+	br i1 %phitmp91, label %bb3.i77, label %bb2.i51
+
+bb2.i.i68:		; preds = %bb3.i77
+	br i1 false, label %bb2.i51, label %bb2.i75
+
+bb2.i75:		; preds = %bb2.i.i68
+	br label %bb3.i77
+
+bb3.i77:		; preds = %bb2.i75, %StringBeginsWith.exit88
+	%sp.0.i76 = getelementptr [512 x i8]* %buff, i32 0, i32 undef		; <i8*> [#uses=1]
+	%49 = load i8* %sp.0.i76, align 1		; <i8> [#uses=1]
+	%50 = icmp eq i8 %49, 0		; <i1> [#uses=1]
+	br i1 %50, label %bb24, label %bb2.i.i68
+
+bb24:		; preds = %bb3.i77
+	%51 = call arm_apcscc  %struct.rec* @MakeWord(i32 11, i8* %41, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind		; <%struct.rec*> [#uses=0]
+	%52 = load i8* getelementptr ([150 x i8]* @zz_lengths, i32 0, i32 0), align 4		; <i8> [#uses=1]
+	%53 = zext i8 %52 to i32		; <i32> [#uses=2]
+	%54 = getelementptr [524 x %struct.rec*]* @zz_free, i32 0, i32 %53		; <%struct.rec**> [#uses=2]
+	%55 = load %struct.rec** %54, align 4		; <%struct.rec*> [#uses=3]
+	%56 = icmp eq %struct.rec* %55, null		; <i1> [#uses=1]
+	br i1 %56, label %bb27, label %bb28
+
+bb27:		; preds = %bb24
+	br i1 undef, label %bb.i56, label %GetMemory.exit62
+
+bb.i56:		; preds = %bb27
+	br i1 undef, label %bb1.i58, label %bb2.i60
+
+bb1.i58:		; preds = %bb.i56
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 31, i32 1, i8* getelementptr ([32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
+	br label %bb2.i60
+
+bb2.i60:		; preds = %bb1.i58, %bb.i56
+	%.pre1.i59 = phi i8** [ undef, %bb1.i58 ], [ undef, %bb.i56 ]		; <i8**> [#uses=1]
+	store i8** undef, i8*** @top_free.4773, align 4
+	br label %GetMemory.exit62
+
+GetMemory.exit62:		; preds = %bb2.i60, %bb27
+	%57 = phi i8** [ %.pre1.i59, %bb2.i60 ], [ undef, %bb27 ]		; <i8**> [#uses=1]
+	%58 = getelementptr i8** %57, i32 %53		; <i8**> [#uses=1]
+	store i8** %58, i8*** @next_free.4772, align 4
+	store %struct.rec* undef, %struct.rec** @zz_hold, align 4
+	br label %bb29
+
+bb28:		; preds = %bb24
+	store %struct.rec* %55, %struct.rec** @zz_hold, align 4
+	%59 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %59, %struct.rec** %54, align 4
+	br label %bb29
+
+bb29:		; preds = %bb28, %GetMemory.exit62
+	%.pre184 = phi %struct.rec* [ %55, %bb28 ], [ undef, %GetMemory.exit62 ]		; <%struct.rec*> [#uses=3]
+	store i8 0, i8* undef
+	store %struct.rec* %.pre184, %struct.rec** @xx_link, align 4
+	br i1 undef, label %bb35, label %bb31
+
+bb31:		; preds = %bb29
+	store %struct.rec* %.pre184, %struct.rec** undef
+	br label %bb35
+
+bb35:		; preds = %bb31, %bb29
+	br i1 undef, label %bb41, label %bb37
+
+bb37:		; preds = %bb35
+	%60 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %60, %struct.rec** undef
+	store %struct.rec* undef, %struct.rec** null
+	store %struct.rec* %.pre184, %struct.rec** null, align 4
+	br label %bb41
+
+bb41:		; preds = %bb37, %bb35
+	%61 = call arm_apcscc  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=1]
+	%62 = icmp eq i8* %61, null		; <i1> [#uses=1]
+	%iftmp.554.0 = select i1 %62, i32 2, i32 1		; <i32> [#uses=1]
+	br label %bb100.outer
+
+bb.i47:		; preds = %bb3.i52
+	%63 = icmp eq i8 %64, %65		; <i1> [#uses=1]
+	br i1 %63, label %bb2.i51, label %bb2.i41
+
+bb2.i51:		; preds = %bb.i47, %bb2.i.i68, %StringBeginsWith.exit88, %bb.i80
+	%pp.0.i49 = getelementptr [17 x i8]* @.str1872951, i32 0, i32 0		; <i8*> [#uses=1]
+	%64 = load i8* null, align 1		; <i8> [#uses=1]
+	br i1 false, label %StringBeginsWith.exit55thread-split, label %bb3.i52
+
+bb3.i52:		; preds = %bb2.i51
+	%65 = load i8* %pp.0.i49, align 1		; <i8> [#uses=1]
+	br i1 false, label %StringBeginsWith.exit55, label %bb.i47
+
+StringBeginsWith.exit55thread-split:		; preds = %bb2.i51
+	br label %StringBeginsWith.exit55
+
+StringBeginsWith.exit55:		; preds = %StringBeginsWith.exit55thread-split, %bb3.i52
+	br i1 false, label %bb49, label %bb2.i41
+
+bb49:		; preds = %StringBeginsWith.exit55
+	br label %bb2.i41
+
+bb2.i41:		; preds = %bb2.i41, %bb49, %StringBeginsWith.exit55, %bb.i47
+	br i1 false, label %bb2.i41, label %bb2.i.i15
+
+bb2.i.i15:		; preds = %bb2.i41
+	%pp.0.i.i13 = getelementptr [6 x i8]* @.str742838, i32 0, i32 0		; <i8*> [#uses=1]
+	br i1 false, label %StringBeginsWith.exitthread-split.i18, label %bb3.i.i16
+
+bb3.i.i16:		; preds = %bb2.i.i15
+	%66 = load i8* %pp.0.i.i13, align 1		; <i8> [#uses=1]
+	br label %StringBeginsWith.exit.i20
+
+StringBeginsWith.exitthread-split.i18:		; preds = %bb2.i.i15
+	br label %StringBeginsWith.exit.i20
+
+StringBeginsWith.exit.i20:		; preds = %StringBeginsWith.exitthread-split.i18, %bb3.i.i16
+	%67 = phi i8 [ undef, %StringBeginsWith.exitthread-split.i18 ], [ %66, %bb3.i.i16 ]		; <i8> [#uses=1]
+	%phitmp.i19 = icmp eq i8 %67, 0		; <i1> [#uses=1]
+	br i1 %phitmp.i19, label %bb58, label %bb2.i6.i26
+
+bb2.i6.i26:		; preds = %bb2.i6.i26, %StringBeginsWith.exit.i20
+	%indvar.i3.i23 = phi i32 [ %indvar.next.i1.i21, %bb2.i6.i26 ], [ 0, %StringBeginsWith.exit.i20 ]		; <i32> [#uses=3]
+	%sp.0.i5.i25 = getelementptr [512 x i8]* %buff, i32 0, i32 %indvar.i3.i23		; <i8*> [#uses=0]
+	%pp.0.i4.i24 = getelementptr [10 x i8]* @.str752839, i32 0, i32 %indvar.i3.i23		; <i8*> [#uses=1]
+	%68 = load i8* %pp.0.i4.i24, align 1		; <i8> [#uses=0]
+	%indvar.next.i1.i21 = add i32 %indvar.i3.i23, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb2.i6.i26, label %bb55
+
+bb55:		; preds = %bb2.i6.i26
+	%69 = call arm_apcscc  i32 @"\01_fputs"(i8* %buff14, %struct.FILE* undef) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb58:		; preds = %StringBeginsWith.exit.i20
+	%70 = call arm_apcscc  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=0]
+	%iftmp.560.0 = select i1 undef, i32 2, i32 0		; <i32> [#uses=1]
+	br label %bb100.outer
+
+bb.i7:		; preds = %bb3.i
+	br i1 false, label %bb2.i8, label %bb2.i.i
+
+bb2.i8:		; preds = %bb100.outer, %bb.i7
+	br i1 undef, label %StringBeginsWith.exitthread-split, label %bb3.i
+
+bb3.i:		; preds = %bb2.i8
+	br i1 undef, label %StringBeginsWith.exit, label %bb.i7
+
+StringBeginsWith.exitthread-split:		; preds = %bb2.i8
+	br label %StringBeginsWith.exit
+
+StringBeginsWith.exit:		; preds = %StringBeginsWith.exitthread-split, %bb3.i
+	%phitmp93 = icmp eq i8 undef, 0		; <i1> [#uses=1]
+	br i1 %phitmp93, label %bb66, label %bb2.i.i
+
+bb66:		; preds = %StringBeginsWith.exit
+	%71 = call arm_apcscc  %struct.rec* @MakeWord(i32 11, i8* undef, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind		; <%struct.rec*> [#uses=4]
+	%72 = load i8* getelementptr ([150 x i8]* @zz_lengths, i32 0, i32 0), align 4		; <i8> [#uses=1]
+	%73 = zext i8 %72 to i32		; <i32> [#uses=2]
+	%74 = getelementptr [524 x %struct.rec*]* @zz_free, i32 0, i32 %73		; <%struct.rec**> [#uses=2]
+	%75 = load %struct.rec** %74, align 4		; <%struct.rec*> [#uses=3]
+	%76 = icmp eq %struct.rec* %75, null		; <i1> [#uses=1]
+	br i1 %76, label %bb69, label %bb70
+
+bb69:		; preds = %bb66
+	br i1 undef, label %bb.i2, label %GetMemory.exit
+
+bb.i2:		; preds = %bb69
+	%77 = call arm_apcscc  noalias i8* @calloc(i32 1020, i32 4) nounwind		; <i8*> [#uses=1]
+	%78 = bitcast i8* %77 to i8**		; <i8**> [#uses=3]
+	store i8** %78, i8*** @next_free.4772, align 4
+	br i1 undef, label %bb1.i3, label %bb2.i4
+
+bb1.i3:		; preds = %bb.i2
+	call arm_apcscc  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 31, i32 1, i8* getelementptr ([32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
+	br label %bb2.i4
+
+bb2.i4:		; preds = %bb1.i3, %bb.i2
+	%.pre1.i = phi i8** [ undef, %bb1.i3 ], [ %78, %bb.i2 ]		; <i8**> [#uses=1]
+	%79 = phi i8** [ undef, %bb1.i3 ], [ %78, %bb.i2 ]		; <i8**> [#uses=1]
+	%80 = getelementptr i8** %79, i32 1020		; <i8**> [#uses=1]
+	store i8** %80, i8*** @top_free.4773, align 4
+	br label %GetMemory.exit
+
+GetMemory.exit:		; preds = %bb2.i4, %bb69
+	%81 = phi i8** [ %.pre1.i, %bb2.i4 ], [ undef, %bb69 ]		; <i8**> [#uses=2]
+	%82 = bitcast i8** %81 to %struct.rec*		; <%struct.rec*> [#uses=3]
+	%83 = getelementptr i8** %81, i32 %73		; <i8**> [#uses=1]
+	store i8** %83, i8*** @next_free.4772, align 4
+	store %struct.rec* %82, %struct.rec** @zz_hold, align 4
+	br label %bb71
+
+bb70:		; preds = %bb66
+	%84 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %84, %struct.rec** %74, align 4
+	br label %bb71
+
+bb71:		; preds = %bb70, %GetMemory.exit
+	%.pre185 = phi %struct.rec* [ %75, %bb70 ], [ %82, %GetMemory.exit ]		; <%struct.rec*> [#uses=8]
+	%85 = phi %struct.rec* [ %75, %bb70 ], [ %82, %GetMemory.exit ]		; <%struct.rec*> [#uses=1]
+	%86 = getelementptr %struct.rec* %85, i32 0, i32 0, i32 1, i32 0, i32 0		; <i8*> [#uses=0]
+	%87 = getelementptr %struct.rec* %.pre185, i32 0, i32 0, i32 0, i32 1, i32 1		; <%struct.rec**> [#uses=0]
+	%88 = getelementptr %struct.rec* %.pre185, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
+	store %struct.rec* %.pre185, %struct.rec** @xx_link, align 4
+	store %struct.rec* %.pre185, %struct.rec** @zz_res, align 4
+	%89 = load %struct.rec** @needs, align 4		; <%struct.rec*> [#uses=2]
+	store %struct.rec* %89, %struct.rec** @zz_hold, align 4
+	br i1 false, label %bb77, label %bb73
+
+bb73:		; preds = %bb71
+	%90 = getelementptr %struct.rec* %89, i32 0, i32 0, i32 0, i32 0, i32 0		; <%struct.rec**> [#uses=1]
+	store %struct.rec* null, %struct.rec** @zz_tmp, align 4
+	store %struct.rec* %.pre185, %struct.rec** %90
+	store %struct.rec* %.pre185, %struct.rec** undef, align 4
+	br label %bb77
+
+bb77:		; preds = %bb73, %bb71
+	store %struct.rec* %.pre185, %struct.rec** @zz_res, align 4
+	store %struct.rec* %71, %struct.rec** @zz_hold, align 4
+	br i1 undef, label %bb83, label %bb79
+
+bb79:		; preds = %bb77
+	%91 = getelementptr %struct.rec* %71, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
+	store %struct.rec* null, %struct.rec** @zz_tmp, align 4
+	%92 = load %struct.rec** %88, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %92, %struct.rec** %91
+	%93 = getelementptr %struct.rec* undef, i32 0, i32 0, i32 0, i32 1, i32 1		; <%struct.rec**> [#uses=1]
+	store %struct.rec* %71, %struct.rec** %93, align 4
+	store %struct.rec* %.pre185, %struct.rec** undef, align 4
+	br label %bb83
+
+bb83:		; preds = %bb79, %bb77
+	br label %bb100.outer.outer
+
+bb.i.i:		; preds = %bb3.i.i
+	br i1 undef, label %bb2.i.i, label %bb2.i6.i
+
+bb2.i.i:		; preds = %bb.i.i, %StringBeginsWith.exit, %bb.i7
+	br i1 undef, label %StringBeginsWith.exitthread-split.i, label %bb3.i.i
+
+bb3.i.i:		; preds = %bb2.i.i
+	br i1 undef, label %StringBeginsWith.exit.i, label %bb.i.i
+
+StringBeginsWith.exitthread-split.i:		; preds = %bb2.i.i
+	br label %StringBeginsWith.exit.i
+
+StringBeginsWith.exit.i:		; preds = %StringBeginsWith.exitthread-split.i, %bb3.i.i
+	br i1 false, label %bb94, label %bb2.i6.i
+
+bb.i2.i:		; preds = %bb3.i7.i
+	br i1 false, label %bb2.i6.i, label %bb91
+
+bb2.i6.i:		; preds = %bb.i2.i, %StringBeginsWith.exit.i, %bb.i.i
+	br i1 undef, label %strip_out.exitthread-split, label %bb3.i7.i
+
+bb3.i7.i:		; preds = %bb2.i6.i
+	%94 = load i8* undef, align 1		; <i8> [#uses=1]
+	br i1 undef, label %strip_out.exit, label %bb.i2.i
+
+strip_out.exitthread-split:		; preds = %bb2.i6.i
+	%.pr100 = load i8* undef		; <i8> [#uses=1]
+	br label %strip_out.exit
+
+strip_out.exit:		; preds = %strip_out.exitthread-split, %bb3.i7.i
+	%95 = phi i8 [ %.pr100, %strip_out.exitthread-split ], [ %94, %bb3.i7.i ]		; <i8> [#uses=0]
+	br i1 undef, label %bb94, label %bb91
+
+bb91:		; preds = %strip_out.exit, %bb.i2.i
+	unreachable
+
+bb94:		; preds = %strip_out.exit, %StringBeginsWith.exit.i
+	%96 = call arm_apcscc  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb100.outer:		; preds = %bb58, %bb41, %bb100.outer.outer
+	%state.0.ph = phi i32 [ %state.0.ph.ph, %bb100.outer.outer ], [ %iftmp.560.0, %bb58 ], [ %iftmp.554.0, %bb41 ]		; <i32> [#uses=1]
+	switch i32 %state.0.ph, label %bb2.i84 [
+		i32 2, label %bb101.split
+		i32 1, label %bb2.i8
+	]
+
+bb101.split:		; preds = %bb100.outer
+	%97 = icmp eq i32 undef, 0		; <i1> [#uses=1]
+	br i1 %97, label %bb103, label %bb102
+
+bb102:		; preds = %bb101.split
+	%98 = call arm_apcscc  i32 @remove(i8* getelementptr ([9 x i8]* @.str19294, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb103:		; preds = %bb101.split
+	%99 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%100 = call arm_apcscc  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %99, i8* getelementptr ([26 x i8]* @.str1932957, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @wordcount, align 4
+	ret void
+}
diff --git a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
new file mode 100644
index 0000000000000..3cbb212b628bc
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp 
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp | grep fcpys | count 1
+; rdar://7117307
+
+	%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+	%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+	%struct.Patient = type { i32, i32, i32, %struct.Village* }
+	%struct.Results = type { float, float, float }
+	%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define arm_apcscc void @get_results(%struct.Results* noalias nocapture sret %agg.result, %struct.Village* %village) nounwind {
+entry:
+	br i1 undef, label %bb, label %bb6.preheader
+
+bb6.preheader:		; preds = %entry
+	call void @llvm.memcpy.i32(i8* undef, i8* undef, i32 12, i32 4)
+	br i1 undef, label %bb15, label %bb13
+
+bb:		; preds = %entry
+	ret void
+
+bb13:		; preds = %bb13, %bb6.preheader
+	%0 = fadd float undef, undef		; <float> [#uses=1]
+	%1 = fadd float undef, 1.000000e+00		; <float> [#uses=1]
+	br i1 undef, label %bb15, label %bb13
+
+bb15:		; preds = %bb13, %bb6.preheader
+	%r1.0.0.lcssa = phi float [ 0.000000e+00, %bb6.preheader ], [ %1, %bb13 ]		; <float> [#uses=1]
+	%r1.1.0.lcssa = phi float [ undef, %bb6.preheader ], [ %0, %bb13 ]		; <float> [#uses=0]
+	store float %r1.0.0.lcssa, float* undef, align 4
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
diff --git a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll
new file mode 100644
index 0000000000000..acf562c74a2a8
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp
+; rdar://7117307
+
+	%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+	%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+	%struct.Patient = type { i32, i32, i32, %struct.Village* }
+	%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define arm_apcscc %struct.List* @sim(%struct.Village* %village) nounwind {
+entry:
+	br i1 undef, label %bb14, label %bb3.preheader
+
+bb3.preheader:		; preds = %entry
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb3.preheader
+	br i1 undef, label %bb11, label %bb5
+
+bb11:		; preds = %bb5
+	%0 = fmul float undef, 0x41E0000000000000		; <float> [#uses=1]
+	%1 = fptosi float %0 to i32		; <i32> [#uses=1]
+	store i32 %1, i32* undef, align 4
+	br i1 undef, label %generate_patient.exit, label %generate_patient.exit.thread
+
+generate_patient.exit.thread:		; preds = %bb11
+	ret %struct.List* null
+
+generate_patient.exit:		; preds = %bb11
+	br i1 undef, label %bb14, label %bb12
+
+bb12:		; preds = %generate_patient.exit
+	br i1 undef, label %bb.i, label %bb1.i
+
+bb.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb1.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb14:		; preds = %generate_patient.exit, %entry
+	ret %struct.List* undef
+}
diff --git a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll
new file mode 100644
index 0000000000000..3ada02676bfc3
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp
+; rdar://7117307
+
+	%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+	%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+	%struct.Patient = type { i32, i32, i32, %struct.Village* }
+	%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define arm_apcscc %struct.List* @sim(%struct.Village* %village) nounwind {
+entry:
+	br i1 undef, label %bb14, label %bb3.preheader
+
+bb3.preheader:		; preds = %entry
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb3.preheader
+	br i1 undef, label %bb11, label %bb5
+
+bb11:		; preds = %bb5
+	%0 = load i32* undef, align 4		; <i32> [#uses=1]
+	%1 = xor i32 %0, 123459876		; <i32> [#uses=1]
+	%2 = sdiv i32 %1, 127773		; <i32> [#uses=1]
+	%3 = mul i32 %2, 2836		; <i32> [#uses=1]
+	%4 = sub i32 0, %3		; <i32> [#uses=1]
+	%5 = xor i32 %4, 123459876		; <i32> [#uses=1]
+	%idum_addr.0.i.i = select i1 undef, i32 undef, i32 %5		; <i32> [#uses=1]
+	%6 = sitofp i32 %idum_addr.0.i.i to double		; <double> [#uses=1]
+	%7 = fmul double %6, 0x3E00000000200000		; <double> [#uses=1]
+	%8 = fptrunc double %7 to float		; <float> [#uses=2]
+	%9 = fmul float %8, 0x41E0000000000000		; <float> [#uses=1]
+	%10 = fptosi float %9 to i32		; <i32> [#uses=1]
+	store i32 %10, i32* undef, align 4
+	%11 = fpext float %8 to double		; <double> [#uses=1]
+	%12 = fcmp ogt double %11, 6.660000e-01		; <i1> [#uses=1]
+	br i1 %12, label %generate_patient.exit, label %generate_patient.exit.thread
+
+generate_patient.exit.thread:		; preds = %bb11
+	ret %struct.List* null
+
+generate_patient.exit:		; preds = %bb11
+	br i1 undef, label %bb14, label %bb12
+
+bb12:		; preds = %generate_patient.exit
+	br i1 undef, label %bb.i, label %bb1.i
+
+bb.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb1.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb14:		; preds = %generate_patient.exit, %entry
+	ret %struct.List* undef
+}
diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
new file mode 100644
index 0000000000000..03f9facfa9557
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s
+; PR4659
+; PR4682
+
+define hidden arm_aapcscc i32 @__gcov_execlp(i8* %path, i8* %arg, ...) nounwind {
+entry:
+; CHECK: __gcov_execlp:
+; CHECK: mov sp, r7
+; CHECK: sub sp, #1 * 4
+	call arm_aapcscc  void @__gcov_flush() nounwind
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br i1 undef, label %bb5, label %bb
+
+bb5:		; preds = %bb, %entry
+	%0 = alloca i8*, i32 undef, align 4		; <i8**> [#uses=1]
+	%1 = call arm_aapcscc  i32 @execvp(i8* %path, i8** %0) nounwind		; <i32> [#uses=1]
+	ret i32 %1
+}
+
+declare hidden arm_aapcscc void @__gcov_flush()
+
+declare arm_aapcscc i32 @execvp(i8*, i8**) nounwind
diff --git a/test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll b/test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll
new file mode 100644
index 0000000000000..93f5a0f6c41fd
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+vfp2
+; PR4686
+
+	%a = type { i32 (...)** }
+	%b = type { %a }
+	%c = type { float, float, float, float }
+
+declare arm_aapcs_vfpcc float @bar(%c*)
+
+define arm_aapcs_vfpcc void @foo(%b* %x, %c* %y) {
+entry:
+	%0 = call arm_aapcs_vfpcc  float @bar(%c* %y)		; <float> [#uses=0]
+	%1 = fadd float undef, undef		; <float> [#uses=1]
+	store float %1, float* undef, align 8
+	ret void
+}
diff --git a/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll b/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
new file mode 100644
index 0000000000000..090ed2d81f60f
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 -arm-use-neon-fp
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.JHUFF_TBL = type { [17 x i8], [256 x i8], i32 }
+	%struct.JQUANT_TBL = type { [64 x i16], i32 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.anon = type { [8 x i32], [48 x i8] }
+	%struct.backing_store_info = type { void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*)*, %struct.FILE*, [64 x i8] }
+	%struct.jpeg_color_deconverter = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32, i8**, i32)* }
+	%struct.jpeg_color_quantizer = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i8**, i32)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_common_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32 }
+	%struct.jpeg_component_info = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.JQUANT_TBL*, i8* }
+	%struct.jpeg_d_coef_controller = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, i8***)*, %struct.jvirt_barray_control** }
+	%struct.jpeg_d_main_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i32*, i32)* }
+	%struct.jpeg_d_post_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)* }
+	%struct.jpeg_decomp_master = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 }
+	%struct.jpeg_decompress_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32, %struct.jpeg_source_mgr*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %struct.JQUANT_TBL*], [4 x %struct.JHUFF_TBL*], [4 x %struct.JHUFF_TBL*], i32, %struct.jpeg_component_info*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i16, i16, i32, i8, i32, i32, i32, i32, i32, i8*, i32, [4 x %struct.jpeg_component_info*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %struct.jpeg_decomp_master*, %struct.jpeg_d_main_controller*, %struct.jpeg_d_coef_controller*, %struct.jpeg_d_post_controller*, %struct.jpeg_input_controller*, %struct.jpeg_marker_reader*, %struct.jpeg_entropy_decoder*, %struct.jpeg_inverse_dct*, %struct.jpeg_upsampler*, %struct.jpeg_color_deconverter*, %struct.jpeg_color_quantizer* }
+	%struct.jpeg_entropy_decoder = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, [64 x i16]**)* }
+	%struct.jpeg_error_mgr = type { void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i8*)*, void (%struct.jpeg_common_struct*)*, i32, %struct.anon, i32, i32, i8**, i32, i8**, i32, i32 }
+	%struct.jpeg_input_controller = type { i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32, i32 }
+	%struct.jpeg_inverse_dct = type { void (%struct.jpeg_decompress_struct*)*, [10 x void (%struct.jpeg_decompress_struct*, %struct.jpeg_component_info*, i16*, i8**, i32)*] }
+	%struct.jpeg_marker_reader = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, [16 x i32 (%struct.jpeg_decompress_struct*)*], i32, i32, i32, i32 }
+	%struct.jpeg_memory_mgr = type { i8* (%struct.jpeg_common_struct*, i32, i32)*, i8* (%struct.jpeg_common_struct*, i32, i32)*, i8** (%struct.jpeg_common_struct*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, i32, i32, i32)*, %struct.jvirt_sarray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, %struct.jvirt_barray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, void (%struct.jpeg_common_struct*)*, i8** (%struct.jpeg_common_struct*, %struct.jvirt_sarray_control*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, %struct.jvirt_barray_control*, i32, i32, i32)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, i32 }
+	%struct.jpeg_progress_mgr = type { void (%struct.jpeg_common_struct*)*, i32, i32, i32, i32 }
+	%struct.jpeg_source_mgr = type { i8*, i32, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i32)*, i32 (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_upsampler = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
+	%struct.jvirt_barray_control = type { [64 x i16]**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_barray_control*, %struct.backing_store_info }
+	%struct.jvirt_sarray_control = type { i8**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_sarray_control*, %struct.backing_store_info }
+
+define arm_apcscc void @jpeg_idct_float(%struct.jpeg_decompress_struct* nocapture %cinfo, %struct.jpeg_component_info* nocapture %compptr, i16* nocapture %coef_block, i8** nocapture %output_buf, i32 %output_col) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%0 = load float* undef, align 4		; <float> [#uses=1]
+	%1 = fmul float undef, %0		; <float> [#uses=2]
+	%tmp73 = add i32 0, 224		; <i32> [#uses=1]
+	%scevgep74 = getelementptr i8* null, i32 %tmp73		; <i8*> [#uses=1]
+	%scevgep7475 = bitcast i8* %scevgep74 to float*		; <float*> [#uses=1]
+	%2 = load float* null, align 4		; <float> [#uses=1]
+	%3 = fmul float 0.000000e+00, %2		; <float> [#uses=2]
+	%4 = fadd float %1, %3		; <float> [#uses=1]
+	%5 = fsub float %1, %3		; <float> [#uses=2]
+	%6 = fadd float undef, 0.000000e+00		; <float> [#uses=2]
+	%7 = fmul float undef, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%8 = fsub float %7, %6		; <float> [#uses=2]
+	%9 = fsub float %4, %6		; <float> [#uses=1]
+	%10 = fadd float %5, %8		; <float> [#uses=2]
+	%11 = fsub float %5, %8		; <float> [#uses=1]
+	%12 = sitofp i16 undef to float		; <float> [#uses=1]
+	%13 = fmul float %12, 0.000000e+00		; <float> [#uses=2]
+	%14 = sitofp i16 undef to float		; <float> [#uses=1]
+	%15 = load float* %scevgep7475, align 4		; <float> [#uses=1]
+	%16 = fmul float %14, %15		; <float> [#uses=2]
+	%17 = fadd float undef, undef		; <float> [#uses=2]
+	%18 = fadd float %13, %16		; <float> [#uses=2]
+	%19 = fsub float %13, %16		; <float> [#uses=1]
+	%20 = fadd float %18, %17		; <float> [#uses=2]
+	%21 = fsub float %18, %17		; <float> [#uses=1]
+	%22 = fmul float %21, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%23 = fmul float undef, 0x3FFD906BC0000000		; <float> [#uses=2]
+	%24 = fmul float %19, 0x3FF1517A80000000		; <float> [#uses=1]
+	%25 = fsub float %24, %23		; <float> [#uses=1]
+	%26 = fadd float undef, %23		; <float> [#uses=1]
+	%27 = fsub float %26, %20		; <float> [#uses=3]
+	%28 = fsub float %22, %27		; <float> [#uses=2]
+	%29 = fadd float %25, %28		; <float> [#uses=1]
+	%30 = fadd float undef, %20		; <float> [#uses=1]
+	store float %30, float* undef, align 4
+	%31 = fadd float %10, %27		; <float> [#uses=1]
+	store float %31, float* undef, align 4
+	%32 = fsub float %10, %27		; <float> [#uses=1]
+	store float %32, float* undef, align 4
+	%33 = fadd float %11, %28		; <float> [#uses=1]
+	store float %33, float* undef, align 4
+	%34 = fsub float %9, %29		; <float> [#uses=1]
+	store float %34, float* undef, align 4
+	br label %bb
+}
diff --git a/test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll b/test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll
new file mode 100644
index 0000000000000..a0f99187a4a64
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+vfp2
+; PR4686
+
+@g_d = external global double		; <double*> [#uses=1]
+
+define arm_aapcscc void @foo(float %yIncr) {
+entry:
+	br i1 undef, label %bb, label %bb4
+
+bb:		; preds = %entry
+	%0 = call arm_aapcs_vfpcc  float @bar()		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	store double %1, double* @g_d, align 8
+	br label %bb4
+
+bb4:		; preds = %bb, %entry
+	unreachable
+}
+
+declare arm_aapcs_vfpcc float @bar()
diff --git a/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll b/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll
new file mode 100644
index 0000000000000..cbe250b6df7a4
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+vfp2
+
+define arm_apcscc float @t1(i32 %v0) nounwind {
+entry:
+	store i32 undef, i32* undef, align 4
+	%0 = load [4 x i8]** undef, align 4		; <[4 x i8]*> [#uses=1]
+	%1 = load i8* undef, align 1		; <i8> [#uses=1]
+	%2 = zext i8 %1 to i32		; <i32> [#uses=1]
+	%3 = getelementptr [4 x i8]* %0, i32 %v0, i32 0		; <i8*> [#uses=1]
+	%4 = load i8* %3, align 1		; <i8> [#uses=1]
+	%5 = zext i8 %4 to i32		; <i32> [#uses=1]
+	%6 = sub i32 %5, %2		; <i32> [#uses=1]
+	%7 = sitofp i32 %6 to float		; <float> [#uses=1]
+	ret float %7
+}
diff --git a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
new file mode 100644
index 0000000000000..e84e86702493d
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
@@ -0,0 +1,154 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+
+%struct.pix_pos = type { i32, i32, i32, i32, i32, i32 }
+
+@getNeighbour = external global void (i32, i32, i32, i32, %struct.pix_pos*)*, align 4 ; <void (i32, i32, i32, i32, %struct.pix_pos*)**> [#uses=2]
+
+define arm_apcscc void @t() nounwind {
+; CHECK: t:
+; CHECK:      ittt eq
+; CHECK-NEXT: addeq
+; CHECK-NEXT: movweq
+; CHECK-NEXT: movteq
+entry:
+  %pix_a.i294 = alloca [4 x %struct.pix_pos], align 4 ; <[4 x %struct.pix_pos]*> [#uses=2]
+  br i1 undef, label %land.rhs, label %lor.end
+
+land.rhs:                                         ; preds = %entry
+  br label %lor.end
+
+lor.end:                                          ; preds = %land.rhs, %entry
+  switch i32 0, label %if.end371 [
+    i32 10, label %if.then366
+    i32 14, label %if.then366
+  ]
+
+if.then366:                                       ; preds = %lor.end, %lor.end
+  unreachable
+
+if.end371:                                        ; preds = %lor.end
+  %arrayidx56.2.i = getelementptr [4 x %struct.pix_pos]* %pix_a.i294, i32 0, i32 2 ; <%struct.pix_pos*> [#uses=1]
+  %arrayidx56.3.i = getelementptr [4 x %struct.pix_pos]* %pix_a.i294, i32 0, i32 3 ; <%struct.pix_pos*> [#uses=1]
+  br i1 undef, label %for.body1857, label %for.end4557
+
+for.body1857:                                     ; preds = %if.end371
+  br i1 undef, label %if.then1867, label %for.cond1933
+
+if.then1867:                                      ; preds = %for.body1857
+  unreachable
+
+for.cond1933:                                     ; preds = %for.body1857
+  br i1 undef, label %for.body1940, label %if.then4493
+
+for.body1940:                                     ; preds = %for.cond1933
+  %shl = shl i32 undef, 2                         ; <i32> [#uses=1]
+  %shl1959 = shl i32 undef, 2                     ; <i32> [#uses=4]
+  br i1 undef, label %if.then1992, label %if.else2003
+
+if.then1992:                                      ; preds = %for.body1940
+  %tmp14.i302 = load i32* undef                   ; <i32> [#uses=4]
+  %add.i307452 = or i32 %shl1959, 1               ; <i32> [#uses=1]
+  %sub.i308 = add i32 %shl, -1                    ; <i32> [#uses=4]
+  call arm_apcscc  void undef(i32 %tmp14.i302, i32 %sub.i308, i32 %shl1959, i32 0, %struct.pix_pos* undef) nounwind
+  %tmp49.i309 = load void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
+  call arm_apcscc  void %tmp49.i309(i32 %tmp14.i302, i32 %sub.i308, i32 %add.i307452, i32 0, %struct.pix_pos* null) nounwind
+  %tmp49.1.i = load void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
+  call arm_apcscc  void %tmp49.1.i(i32 %tmp14.i302, i32 %sub.i308, i32 undef, i32 0, %struct.pix_pos* %arrayidx56.2.i) nounwind
+  call arm_apcscc  void undef(i32 %tmp14.i302, i32 %sub.i308, i32 undef, i32 0, %struct.pix_pos* %arrayidx56.3.i) nounwind
+  unreachable
+
+if.else2003:                                      ; preds = %for.body1940
+  switch i32 undef, label %if.then2015 [
+    i32 10, label %if.then4382
+    i32 14, label %if.then4382
+  ]
+
+if.then2015:                                      ; preds = %if.else2003
+  br i1 undef, label %if.else2298, label %if.then2019
+
+if.then2019:                                      ; preds = %if.then2015
+  br i1 undef, label %if.then2065, label %if.else2081
+
+if.then2065:                                      ; preds = %if.then2019
+  br label %if.end2128
+
+if.else2081:                                      ; preds = %if.then2019
+  br label %if.end2128
+
+if.end2128:                                       ; preds = %if.else2081, %if.then2065
+  unreachable
+
+if.else2298:                                      ; preds = %if.then2015
+  br i1 undef, label %land.lhs.true2813, label %cond.end2841
+
+land.lhs.true2813:                                ; preds = %if.else2298
+  br i1 undef, label %cond.end2841, label %cond.true2824
+
+cond.true2824:                                    ; preds = %land.lhs.true2813
+  br label %cond.end2841
+
+cond.end2841:                                     ; preds = %cond.true2824, %land.lhs.true2813, %if.else2298
+  br i1 undef, label %for.cond2882.preheader, label %for.cond2940.preheader
+
+for.cond2882.preheader:                           ; preds = %cond.end2841
+  %mul3693 = shl i32 undef, 1                     ; <i32> [#uses=2]
+  br i1 undef, label %if.then3689, label %if.else3728
+
+for.cond2940.preheader:                           ; preds = %cond.end2841
+  br label %for.inc3040
+
+for.inc3040:                                      ; preds = %for.inc3040, %for.cond2940.preheader
+  br label %for.inc3040
+
+if.then3689:                                      ; preds = %for.cond2882.preheader
+  %add3695 = add nsw i32 %mul3693, %shl1959       ; <i32> [#uses=1]
+  %mul3697 = shl i32 %add3695, 2                  ; <i32> [#uses=2]
+  %arrayidx3705 = getelementptr inbounds i16* undef, i32 1 ; <i16*> [#uses=1]
+  %tmp3706 = load i16* %arrayidx3705              ; <i16> [#uses=1]
+  %conv3707 = sext i16 %tmp3706 to i32            ; <i32> [#uses=1]
+  %add3708 = add nsw i32 %conv3707, %mul3697      ; <i32> [#uses=1]
+  %arrayidx3724 = getelementptr inbounds i16* null, i32 1 ; <i16*> [#uses=1]
+  %tmp3725 = load i16* %arrayidx3724              ; <i16> [#uses=1]
+  %conv3726 = sext i16 %tmp3725 to i32            ; <i32> [#uses=1]
+  %add3727 = add nsw i32 %conv3726, %mul3697      ; <i32> [#uses=1]
+  br label %if.end3770
+
+if.else3728:                                      ; preds = %for.cond2882.preheader
+  %mul3733 = add i32 %shl1959, 1073741816         ; <i32> [#uses=1]
+  %add3735 = add nsw i32 %mul3733, %mul3693       ; <i32> [#uses=1]
+  %mul3737 = shl i32 %add3735, 2                  ; <i32> [#uses=2]
+  %tmp3746 = load i16* undef                      ; <i16> [#uses=1]
+  %conv3747 = sext i16 %tmp3746 to i32            ; <i32> [#uses=1]
+  %add3748 = add nsw i32 %conv3747, %mul3737      ; <i32> [#uses=1]
+  %arrayidx3765 = getelementptr inbounds i16* null, i32 1 ; <i16*> [#uses=1]
+  %tmp3766 = load i16* %arrayidx3765              ; <i16> [#uses=1]
+  %conv3767 = sext i16 %tmp3766 to i32            ; <i32> [#uses=1]
+  %add3768 = add nsw i32 %conv3767, %mul3737      ; <i32> [#uses=1]
+  br label %if.end3770
+
+if.end3770:                                       ; preds = %if.else3728, %if.then3689
+  %vec2_y.1 = phi i32 [ %add3727, %if.then3689 ], [ %add3768, %if.else3728 ] ; <i32> [#uses=0]
+  %vec1_y.2 = phi i32 [ %add3708, %if.then3689 ], [ %add3748, %if.else3728 ] ; <i32> [#uses=0]
+  unreachable
+
+if.then4382:                                      ; preds = %if.else2003, %if.else2003
+  switch i32 undef, label %if.then4394 [
+    i32 10, label %if.else4400
+    i32 14, label %if.else4400
+  ]
+
+if.then4394:                                      ; preds = %if.then4382
+  unreachable
+
+if.else4400:                                      ; preds = %if.then4382, %if.then4382
+  br label %for.cond4451.preheader
+
+for.cond4451.preheader:                           ; preds = %for.cond4451.preheader, %if.else4400
+  br label %for.cond4451.preheader
+
+if.then4493:                                      ; preds = %for.cond1933
+  unreachable
+
+for.end4557:                                      ; preds = %if.end371
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/carry.ll b/test/CodeGen/Thumb2/carry.ll
index 3450c5aea4051..de6f6e260de3d 100644
--- a/test/CodeGen/Thumb2/carry.ll
+++ b/test/CodeGen/Thumb2/carry.ll
@@ -1,15 +1,21 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "subs r" | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "adc r"
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "sbc r"  | count 2
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
 entry:
+; CHECK: f1:
+; CHECK: subs r0, r0, r2
+; CHECK: sbcs r1, r3
 	%tmp = sub i64 %a, %b
 	ret i64 %tmp
 }
 
 define i64 @f2(i64 %a, i64 %b) {
 entry:
+; CHECK: f2:
+; CHECK: adds r0, r0, r0
+; CHECK: adcs r1, r1
+; CHECK: subs r0, r0, r2
+; CHECK: sbcs r1, r3
         %tmp1 = shl i64 %a, 1
 	%tmp2 = sub i64 %tmp1, %b
 	ret i64 %tmp2
diff --git a/test/CodeGen/Thumb2/frameless.ll b/test/CodeGen/Thumb2/frameless.ll
new file mode 100644
index 0000000000000..c3c8cf1dd141f
--- /dev/null
+++ b/test/CodeGen/Thumb2/frameless.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-fp-elim | not grep mov
+; RUN: llc < %s -mtriple=thumbv7-linux -disable-fp-elim | not grep mov
+
+define arm_apcscc void @t() nounwind readnone {
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/frameless2.ll b/test/CodeGen/Thumb2/frameless2.ll
new file mode 100644
index 0000000000000..7cc7b19142874
--- /dev/null
+++ b/test/CodeGen/Thumb2/frameless2.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-fp-elim | not grep r7
+
+%struct.noise3 = type { [3 x [17 x i32]] }
+%struct.noiseguard = type { i32, i32, i32 }
+
+define arm_apcscc void @vorbis_encode_noisebias_setup(i8* nocapture %vi.0.7.val, double %s, i32 %block, i32* nocapture %suppress, %struct.noise3* nocapture %in, %struct.noiseguard* nocapture %guard, double %userbias) nounwind {
+entry:
+  %0 = getelementptr %struct.noiseguard* %guard, i32 %block, i32 2; <i32*> [#uses=1]
+  %1 = load i32* %0, align 4                      ; <i32> [#uses=1]
+  store i32 %1, i32* undef, align 4
+  unreachable
+}
diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll
new file mode 100644
index 0000000000000..865b17b7f1f48
--- /dev/null
+++ b/test/CodeGen/Thumb2/large-stack.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define void @test1() {
+; CHECK: test1:
+; CHECK: sub sp, #64 * 4
+    %tmp = alloca [ 64 x i32 ] , align 4
+    ret void
+}
+
+define void @test2() {
+; CHECK: test2:
+; CHECK: sub.w sp, sp, #4160
+; CHECK: sub sp, #2 * 4
+    %tmp = alloca [ 4168 x i8 ] , align 4
+    ret void
+}
+
+define i32 @test3() {
+; CHECK: test3:
+; CHECK: sub.w sp, sp, #805306368
+; CHECK: sub sp, #4 * 4
+    %retval = alloca i32, align 4
+    %tmp = alloca i32, align 4
+    %a = alloca [805306369 x i8], align 16
+    store i32 0, i32* %tmp
+    %tmp1 = load i32* %tmp
+    ret i32 %tmp1
+}
diff --git a/test/CodeGen/Thumb2/load-global.ll b/test/CodeGen/Thumb2/load-global.ll
index 1b1fe7b1b5fc5..4fd4525b04555 100644
--- a/test/CodeGen/Thumb2/load-global.ll
+++ b/test/CodeGen/Thumb2/load-global.ll
@@ -1,19 +1,23 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=thumbv7-apple-darwin -relocation-model=static | \
-; RUN:   not grep {L_G\$non_lazy_ptr}
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=thumbv7-apple-darwin -relocation-model=dynamic-no-pic | \
-; RUN:   grep {L_G\$non_lazy_ptr} | count 2
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=thumbv7-apple-darwin -relocation-model=pic | \
-; RUN:   grep {ldr.*pc} | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=thumbv7-linux-gnueabi -relocation-model=pic | \
-; RUN:   grep {GOT} | count 1
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LINUX
 
 @G = external global i32
 
 define i32 @test1() {
+; STATIC: _test1:
+; STATIC: .long _G
+
+; DYNAMIC: _test1:
+; DYNAMIC: .long L_G$non_lazy_ptr
+
+; PIC: _test1
+; PIC: add r0, pc
+; PIC: .long L_G$non_lazy_ptr-(LPC0+4)
+
+; LINUX: test1
+; LINUX: .long G(GOT)
 	%tmp = load i32* @G
 	ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/mul_const.ll b/test/CodeGen/Thumb2/mul_const.ll
new file mode 100644
index 0000000000000..9a2ec93a5adc6
--- /dev/null
+++ b/test/CodeGen/Thumb2/mul_const.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; rdar://7069502
+
+define i32 @t1(i32 %v) nounwind readnone {
+entry:
+; CHECK: t1:
+; CHECK: add.w r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 9
+	ret i32 %0
+}
+
+define i32 @t2(i32 %v) nounwind readnone {
+entry:
+; CHECK: t2:
+; CHECK: rsb r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 7
+	ret i32 %0
+}
diff --git a/test/CodeGen/Thumb2/pic-load.ll b/test/CodeGen/Thumb2/pic-load.ll
new file mode 100644
index 0000000000000..1f8aea912f6f2
--- /dev/null
+++ b/test/CodeGen/Thumb2/pic-load.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -relocation-model=pic | FileCheck %s
+
+	%struct.anon = type { void ()* }
+	%struct.one_atexit_routine = type { %struct.anon, i32, i8* }
+@__dso_handle = external global { }		; <{ }*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (void ()*)* @atexit to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define hidden arm_apcscc i32 @atexit(void ()* %func) nounwind {
+entry:
+; CHECK: atexit:
+; CHECK: add r0, pc
+	%r = alloca %struct.one_atexit_routine, align 4		; <%struct.one_atexit_routine*> [#uses=3]
+	%0 = getelementptr %struct.one_atexit_routine* %r, i32 0, i32 0, i32 0		; <void ()**> [#uses=1]
+	store void ()* %func, void ()** %0, align 4
+	%1 = getelementptr %struct.one_atexit_routine* %r, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 0, i32* %1, align 4
+	%2 = call arm_apcscc  i32 @atexit_common(%struct.one_atexit_routine* %r, i8* bitcast ({ }* @__dso_handle to i8*)) nounwind		; <i32> [#uses=1]
+	ret i32 %2
+}
+
+declare arm_apcscc i32 @atexit_common(%struct.one_atexit_routine*, i8*) nounwind
diff --git a/test/CodeGen/Thumb2/thumb2-adc.ll b/test/CodeGen/Thumb2/thumb2-adc.ll
index c1565b3009600..702df91c85953 100644
--- a/test/CodeGen/Thumb2/thumb2-adc.ll
+++ b/test/CodeGen/Thumb2/thumb2-adc.ll
@@ -1,32 +1,48 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {adc\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 734439407618 = 0x000000ab00000002
 define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: adds r0, #2
     %tmp = add i64 %a, 734439407618
     ret i64 %tmp
 }
 
 ; 5066626890203138 = 0x0012001200000002
 define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: adds r0, #2
     %tmp = add i64 %a, 5066626890203138
     ret i64 %tmp
 }
 
 ; 3747052064576897026 = 0x3400340000000002
 define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: adds r0, #2
     %tmp = add i64 %a, 3747052064576897026
     ret i64 %tmp
 }
 
 ; 6221254862626095106 = 0x5656565600000002
 define i64 @f4(i64 %a) {
+; CHECK: f4:
+; CHECK: adds r0, #2
     %tmp = add i64 %a, 6221254862626095106 
     ret i64 %tmp
 }
 
 ; 287104476244869122 = 0x03fc000000000002
 define i64 @f5(i64 %a) {
+; CHECK: f5:
+; CHECK: adds r0, #2
     %tmp = add i64 %a, 287104476244869122
     ret i64 %tmp
 }
 
+define i64 @f6(i64 %a, i64 %b) {
+; CHECK: f6:
+; CHECK: adds r0, r0, r2
+    %tmp = add i64 %a, %b
+    ret i64 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-add.ll b/test/CodeGen/Thumb2/thumb2-add.ll
index d4f408ff76e7a..d42ea7138e465 100644
--- a/test/CodeGen/Thumb2/thumb2-add.ll
+++ b/test/CodeGen/Thumb2/thumb2-add.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #255
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #256
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #257
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #4094
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #4095
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #4096
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep lsl | grep #8
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #255
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #256
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #257
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #4094
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #4095
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #4096
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep lsl | grep #8
 
 define i32 @t2ADDrc_255(i32 %lhs) {
     %Rd = add i32 %lhs, 255;
diff --git a/test/CodeGen/Thumb2/thumb2-add2.ll b/test/CodeGen/Thumb2/thumb2-add2.ll
index be89508c7ef23..e496654706ec2 100644
--- a/test/CodeGen/Thumb2/thumb2-add2.ll
+++ b/test/CodeGen/Thumb2/thumb2-add2.ll
@@ -1,31 +1,41 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#510} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: adds r0, #171
     %tmp = add i32 %a, 171
     ret i32 %tmp
 }
 
 ; 1179666 = 0x00120012
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: add.w r0, r0, #1179666
     %tmp = add i32 %a, 1179666
     ret i32 %tmp
 }
 
 ; 872428544 = 0x34003400
 define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: add.w r0, r0, #872428544
     %tmp = add i32 %a, 872428544
     ret i32 %tmp
 }
 
 ; 1448498774 = 0x56565656
 define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: add.w r0, r0, #1448498774
     %tmp = add i32 %a, 1448498774
     ret i32 %tmp
 }
 
 ; 510 = 0x000001fe
 define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: add.w r0, r0, #510
     %tmp = add i32 %a, 510
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-add3.ll b/test/CodeGen/Thumb2/thumb2-add3.ll
index 1e6341e882fdf..8d472cb110b84 100644
--- a/test/CodeGen/Thumb2/thumb2-add3.ll
+++ b/test/CodeGen/Thumb2/thumb2-add3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {addw\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#4095} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {addw\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#4095} | count 1
 
 define i32 @f1(i32 %a) {
     %tmp = add i32 %a, 4095
diff --git a/test/CodeGen/Thumb2/thumb2-add4.ll b/test/CodeGen/Thumb2/thumb2-add4.ll
index b74a33c90a102..b94e84daee1b7 100644
--- a/test/CodeGen/Thumb2/thumb2-add4.ll
+++ b/test/CodeGen/Thumb2/thumb2-add4.ll
@@ -1,31 +1,46 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {adds\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 171 = 0x000000ab
 define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: adds r0, #171
+; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 171
     ret i64 %tmp
 }
 
 ; 1179666 = 0x00120012
 define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: adds.w r0, r0, #1179666
+; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 1179666
     ret i64 %tmp
 }
 
 ; 872428544 = 0x34003400
 define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: adds.w r0, r0, #872428544
+; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 872428544
     ret i64 %tmp
 }
 
 ; 1448498774 = 0x56565656
 define i64 @f4(i64 %a) {
+; CHECK: f4:
+; CHECK: adds.w r0, r0, #1448498774
+; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 1448498774
     ret i64 %tmp
 }
 
 ; 66846720 = 0x03fc0000
 define i64 @f5(i64 %a) {
+; CHECK: f5:
+; CHECK: adds.w r0, r0, #66846720
+; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 66846720
     ret i64 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-add5.ll b/test/CodeGen/Thumb2/thumb2-add5.ll
index 22452143d9581..8b3a4f6d12a87 100644
--- a/test/CodeGen/Thumb2/thumb2-add5.ll
+++ b/test/CodeGen/Thumb2/thumb2-add5.ll
@@ -1,33 +1,39 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: add r0, r1
     %tmp = add i32 %a, %b
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: add.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = add i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: add.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = add i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: add.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = add i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: add.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-add6.ll b/test/CodeGen/Thumb2/thumb2-add6.ll
index 9dd3efcacd580..0ecaa793909f7 100644
--- a/test/CodeGen/Thumb2/thumb2-add6.ll
+++ b/test/CodeGen/Thumb2/thumb2-add6.ll
@@ -1,6 +1,9 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {adds\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: adds r0, r0, r2
+; CHECK: adcs r1, r3
     %tmp = add i64 %a, %b
     ret i64 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-and.ll b/test/CodeGen/Thumb2/thumb2-and.ll
index ab191d56843ae..8e2245a85926c 100644
--- a/test/CodeGen/Thumb2/thumb2-and.ll
+++ b/test/CodeGen/Thumb2/thumb2-and.ll
@@ -1,33 +1,39 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: ands r0, r1
     %tmp = and i32 %a, %b
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: and.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = and i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: and.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = and i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: and.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = and i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: and.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-and2.ll b/test/CodeGen/Thumb2/thumb2-and2.ll
index 266d256fce511..1e2666f403682 100644
--- a/test/CodeGen/Thumb2/thumb2-and2.ll
+++ b/test/CodeGen/Thumb2/thumb2-and2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-asr.ll b/test/CodeGen/Thumb2/thumb2-asr.ll
index 4edf92be1339e..a0a60e68989f9 100644
--- a/test/CodeGen/Thumb2/thumb2-asr.ll
+++ b/test/CodeGen/Thumb2/thumb2-asr.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {asr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: asrs r0, r1
     %tmp = ashr i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-asr2.ll b/test/CodeGen/Thumb2/thumb2-asr2.ll
index 700794873f3f3..9c8634f7097cc 100644
--- a/test/CodeGen/Thumb2/thumb2-asr2.ll
+++ b/test/CodeGen/Thumb2/thumb2-asr2.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {asr\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#17} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: asrs r0, r0, #17
     %tmp = ashr i32 %a, 17
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-bcc.ll b/test/CodeGen/Thumb2/thumb2-bcc.ll
new file mode 100644
index 0000000000000..e1f9cdbf8c643
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-bcc.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | not grep it
+
+define i32 @t1(i32 %a, i32 %b, i32 %c) {
+; CHECK: t1
+; CHECK: beq
+	%tmp2 = icmp eq i32 %a, 0
+	br i1 %tmp2, label %cond_false, label %cond_true
+
+cond_true:
+	%tmp5 = add i32 %b, 1
+        %tmp6 = and i32 %tmp5, %c
+	ret i32 %tmp6
+
+cond_false:
+	%tmp7 = add i32 %b, -1
+        %tmp8 = xor i32 %tmp7, %c
+	ret i32 %tmp8
+}
diff --git a/test/CodeGen/Thumb2/thumb2-bfc.ll b/test/CodeGen/Thumb2/thumb2-bfc.ll
index 1e5016c91294b..d33cf7ebdb27f 100644
--- a/test/CodeGen/Thumb2/thumb2-bfc.ll
+++ b/test/CodeGen/Thumb2/thumb2-bfc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "bfc " | count 3
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep "bfc " | count 3
 
 ; 4278190095 = 0xff00000f
 define i32 @f1(i32 %a) {
@@ -17,3 +17,9 @@ define i32 @f3(i32 %a) {
     %tmp = and i32 %a, 4095
     ret i32 %tmp
 }
+
+; 2147483646 = 0x7ffffffe   not implementable w/ BFC
+define i32 @f4(i32 %a) {
+    %tmp = and i32 %a, 2147483646
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-bic.ll b/test/CodeGen/Thumb2/thumb2-bic.ll
index f5a3d2038d071..4e35383997d94 100644
--- a/test/CodeGen/Thumb2/thumb2-bic.ll
+++ b/test/CodeGen/Thumb2/thumb2-bic.ll
@@ -1,34 +1,40 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {bic\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*$} | count 4
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {bic\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {bic\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {bic\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {bic\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: bics r0, r1
     %tmp = xor i32 %b, 4294967295
     %tmp1 = and i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: bics r0, r1
     %tmp = xor i32 %b, 4294967295
     %tmp1 = and i32 %tmp, %a
     ret i32 %tmp1
 }
 
 define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: bics r0, r1
     %tmp = xor i32 4294967295, %b
     %tmp1 = and i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: bics r0, r1
     %tmp = xor i32 4294967295, %b
     %tmp1 = and i32 %tmp, %a
     ret i32 %tmp1
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: bic.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = xor i32 4294967295, %tmp
     %tmp2 = and i32 %a, %tmp1
@@ -36,6 +42,8 @@ define i32 @f5(i32 %a, i32 %b) {
 }
 
 define i32 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: bic.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = xor i32 %tmp, 4294967295
     %tmp2 = and i32 %tmp1, %a
@@ -43,6 +51,8 @@ define i32 @f6(i32 %a, i32 %b) {
 }
 
 define i32 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: bic.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = xor i32 %tmp, 4294967295
     %tmp2 = and i32 %a, %tmp1
@@ -50,6 +60,8 @@ define i32 @f7(i32 %a, i32 %b) {
 }
 
 define i32 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: bic.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
@@ -57,3 +69,37 @@ define i32 @f8(i32 %a, i32 %b) {
     %tmp2 = and i32 %tmp1, %a
     ret i32 %tmp2
 }
+
+; ~0x000000bb = 4294967108
+define i32 @f9(i32 %a) {
+    %tmp = and i32 %a, 4294967108
+    ret i32 %tmp
+    
+; CHECK: f9:
+; CHECK: bic r0, r0, #187
+}
+
+; ~0x00aa00aa = 4283826005
+define i32 @f10(i32 %a) {
+    %tmp = and i32 %a, 4283826005
+    ret i32 %tmp
+    
+; CHECK: f10:
+; CHECK: bic r0, r0, #11141290
+}
+
+; ~0xcc00cc00 = 872363007
+define i32 @f11(i32 %a) {
+    %tmp = and i32 %a, 872363007
+    ret i32 %tmp
+; CHECK: f11:
+; CHECK: bic r0, r0, #-872363008
+}
+
+; ~0x00110000 = 4293853183
+define i32 @f12(i32 %a) {
+    %tmp = and i32 %a, 4293853183
+    ret i32 %tmp
+; CHECK: f12:
+; CHECK: bic r0, r0, #1114112
+}
diff --git a/test/CodeGen/Thumb2/thumb2-branch.ll b/test/CodeGen/Thumb2/thumb2-branch.ll
new file mode 100644
index 0000000000000..b46cb5f7c70e9
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-branch.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define void @f1(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f1:
+; CHECK bne LBB
+        %tmp = icmp eq i32 %a, %b               ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
+
+cond_true:              ; preds = %entry
+        store i32 0, i32* %v
+        ret void
+
+return:         ; preds = %entry
+        ret void
+}
+
+define void @f2(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f2:
+; CHECK bge LBB
+        %tmp = icmp slt i32 %a, %b              ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
+
+cond_true:              ; preds = %entry
+        store i32 0, i32* %v
+        ret void
+
+return:         ; preds = %entry
+        ret void
+}
+
+define void @f3(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f3:
+; CHECK bhs LBB
+        %tmp = icmp ult i32 %a, %b              ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
+
+cond_true:              ; preds = %entry
+        store i32 0, i32* %v
+        ret void
+
+return:         ; preds = %entry
+        ret void
+}
+
+define void @f4(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f4:
+; CHECK blo LBB
+        %tmp = icmp ult i32 %a, %b              ; <i1> [#uses=1]
+        br i1 %tmp, label %return, label %cond_true
+
+cond_true:              ; preds = %entry
+        store i32 0, i32* %v
+        ret void
+
+return:         ; preds = %entry
+        ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-call.ll b/test/CodeGen/Thumb2/thumb2-call.ll
new file mode 100644
index 0000000000000..7dc6b2601b206
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-call.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX
+
+@t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
+
+declare void @g(i32, i32, i32, i32)
+
+define void @f() {
+; DARWIN: f:
+; DARWIN: blx _g
+
+; LINUX: f:
+; LINUX: bl g
+        call void @g( i32 1, i32 2, i32 3, i32 4 )
+        ret void
+}
+
+define void @h() {
+; DARWIN: h:
+; DARWIN: blx r0
+
+; LINUX: h:
+; LINUX: blx r0
+        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
+        %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
+        ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-clz.ll b/test/CodeGen/Thumb2/thumb2-clz.ll
index e5f94a6c4929d..0bed0585b5d16 100644
--- a/test/CodeGen/Thumb2/thumb2-clz.ll
+++ b/test/CodeGen/Thumb2/thumb2-clz.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2,+v7a | grep "clz " | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7a | grep "clz " | count 1
 
 define i32 @f1(i32 %a) {
     %tmp = tail call i32 @llvm.ctlz.i32(i32 %a)
diff --git a/test/CodeGen/Thumb2/thumb2-cmn.ll b/test/CodeGen/Thumb2/thumb2-cmn.ll
index ffe8b980e895c..401c56a721393 100644
--- a/test/CodeGen/Thumb2/thumb2-cmn.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmn.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmn\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmn\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmn\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmn\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmn\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
 
 define i1 @f1(i32 %a, i32 %b) {
     %nb = sub i32 0, %b
diff --git a/test/CodeGen/Thumb2/thumb2-cmn2.ll b/test/CodeGen/Thumb2/thumb2-cmn2.ll
index 9763dea045cf2..c1fcac00e643b 100644
--- a/test/CodeGen/Thumb2/thumb2-cmn2.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmn2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "cmn "  | grep {#187\\|#11141290\\|#3422604288\\|#1114112} | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep "cmn\\.w "  | grep {#187\\|#11141290\\|#-872363008\\|#1114112} | count 4
 
 ; -0x000000bb = 4294967109
 define i1 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-cmp.ll b/test/CodeGen/Thumb2/thumb2-cmp.ll
index 63f20cd98370a..d4773bb5809b0 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp.ll
@@ -1,31 +1,41 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*#\[0-9\]*$} | grep {#187\\|#11141290\\|#3422604288\\|#1114112\\|#3722304989} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: cmp r0, #187
     %tmp = icmp ne i32 %a, 187
     ret i1 %tmp
 }
 
 ; 0x00aa00aa = 11141290
 define i1 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: cmp.w r0, #11141290
     %tmp = icmp eq i32 %a, 11141290 
     ret i1 %tmp
 }
 
 ; 0xcc00cc00 = 3422604288
 define i1 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: cmp.w r0, #-872363008
     %tmp = icmp ne i32 %a, 3422604288
     ret i1 %tmp
 }
 
 ; 0xdddddddd = 3722304989
 define i1 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: cmp.w r0, #-572662307
     %tmp = icmp ne i32 %a, 3722304989
     ret i1 %tmp
 }
 
 ; 0x00110000 = 1114112
 define i1 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: cmp.w r0, #1114112
     %tmp = icmp eq i32 %a, 1114112
     ret i1 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-cmp2.ll b/test/CodeGen/Thumb2/thumb2-cmp2.ll
index 368a3b3fed149..55c321dc2b31e 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp2.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp2.ll
@@ -1,38 +1,46 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*r\[0-9\]$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i1 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: cmp r0, r1
     %tmp = icmp ne i32 %a, %b
     ret i1 %tmp
 }
 
 define i1 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: cmp r0, r1
     %tmp = icmp eq i32 %a, %b
     ret i1 %tmp
 }
 
 define i1 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: cmp.w r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = icmp eq i32 %tmp, %a
     ret i1 %tmp1
 }
 
 define i1 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: cmp.w r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = icmp ne i32 %tmp, %a
     ret i1 %tmp1
 }
 
 define i1 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: cmp.w r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = icmp eq i32 %a, %tmp
     ret i1 %tmp1
 }
 
 define i1 @f9(i32 %a, i32 %b) {
+; CHECK: f9:
+; CHECK: cmp.w r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-eor.ll b/test/CodeGen/Thumb2/thumb2-eor.ll
index 56bb46a5457f7..b7e276673c421 100644
--- a/test/CodeGen/Thumb2/thumb2-eor.ll
+++ b/test/CodeGen/Thumb2/thumb2-eor.ll
@@ -1,38 +1,46 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {eor\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {eor\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {eor\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {eor\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {eor\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: eors r0, r1
     %tmp = xor i32 %a, %b
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: eor.w r0, r1, r0
     %tmp = xor i32 %b, %a
     ret i32 %tmp
 }
 
 define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: eor.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = xor i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: eor.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = xor i32 %tmp, %a
     ret i32 %tmp1
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: eor.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = xor i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: eor.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-eor2.ll b/test/CodeGen/Thumb2/thumb2-eor2.ll
index 11784ca02c144..185634cdd6fc9 100644
--- a/test/CodeGen/Thumb2/thumb2-eor2.ll
+++ b/test/CodeGen/Thumb2/thumb2-eor2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "eor "  | grep {#187\\|#11141290\\|#3422604288\\|#1114112\\|#3722304989} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep "eor "  | grep {#187\\|#11141290\\|#-872363008\\|#1114112\\|#-572662307} | count 5
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
new file mode 100644
index 0000000000000..71199abc57286
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t1:
+; CHECK: it ne
+; CHECK: cmpne
+	switch i32 %c, label %cond_next [
+		 i32 1, label %cond_true
+		 i32 7, label %cond_true
+	]
+
+cond_true:
+	%tmp12 = add i32 %a, 1
+	%tmp1518 = add i32 %tmp12, %b
+	ret i32 %tmp1518
+
+cond_next:
+	%tmp15 = add i32 %b, %a
+	ret i32 %tmp15
+}
+
+; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
+define i32 @t2(i32 %a, i32 %b) {
+entry:
+; CHECK: t2:
+; CHECK: ite le
+; CHECK: suble
+; CHECK: subgt
+	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %tmp1434, label %bb17, label %bb.outer
+
+bb.outer:		; preds = %cond_false, %entry
+	%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]		; <i32> [#uses=5]
+	%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %cond_true, %bb.outer
+	%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%tmp. = sub i32 0, %b_addr.021.0.ph		; <i32> [#uses=1]
+	%tmp.40 = mul i32 %indvar, %tmp.		; <i32> [#uses=1]
+	%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph		; <i32> [#uses=6]
+	%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph		; <i1> [#uses=1]
+	br i1 %tmp3, label %cond_true, label %cond_false
+
+cond_true:		; preds = %bb
+	%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph		; <i32> [#uses=2]
+	%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp1437, label %bb17, label %bb
+
+cond_false:		; preds = %bb
+	%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0		; <i32> [#uses=2]
+	%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb17, label %bb.outer
+
+bb17:		; preds = %cond_false, %cond_true, %entry
+	%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	ret i32 %a_addr.026.1
+}
+
+@x = external global i32*		; <i32**> [#uses=1]
+
+define void @foo(i32 %a) {
+entry:
+	%tmp = load i32** @x		; <i32*> [#uses=1]
+	store i32 %a, i32* %tmp
+	ret void
+}
+
+define void @t3(i32 %a, i32 %b) {
+entry:
+; CHECK: t3:
+; CHECK: it lt
+; CHECK: poplt {r7, pc}
+	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
+	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	tail call void @foo( i32 %b )
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
new file mode 100644
index 0000000000000..d917ffe56bbcb
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+define void @foo(i32 %X, i32 %Y) {
+entry:
+; CHECK: foo:
+; CHECK: it ne
+; CHECK: cmpne
+; CHECK: it hi
+; CHECK: pophi {r7, pc}
+	%tmp1 = icmp ult i32 %X, 4		; <i1> [#uses=1]
+	%tmp4 = icmp eq i32 %Y, 0		; <i1> [#uses=1]
+	%tmp7 = or i1 %tmp4, %tmp1		; <i1> [#uses=1]
+	br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	%tmp10 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+declare i32 @bar(...)
+
+; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
+
+	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
+entry:
+; CHECK: CountTree:
+; CHECK: it eq
+; CHECK: cmpeq
+; CHECK: bne
+; CHECK: itt eq
+; CHECK: moveq
+; CHECK: popeq
+	br label %tailrecurse
+
+tailrecurse:		; preds = %bb, %entry
+	%tmp6 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp9 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]
+	%tmp12 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp14 = icmp eq %struct.quad_struct* null, null		; <i1> [#uses=1]
+	%tmp17 = icmp eq %struct.quad_struct* %tmp6, null		; <i1> [#uses=1]
+	%tmp23 = icmp eq %struct.quad_struct* %tmp9, null		; <i1> [#uses=1]
+	%tmp29 = icmp eq %struct.quad_struct* %tmp12, null		; <i1> [#uses=1]
+	%bothcond = and i1 %tmp17, %tmp14		; <i1> [#uses=1]
+	%bothcond1 = and i1 %bothcond, %tmp23		; <i1> [#uses=1]
+	%bothcond2 = and i1 %bothcond1, %tmp29		; <i1> [#uses=1]
+	br i1 %bothcond2, label %return, label %bb
+
+bb:		; preds = %tailrecurse
+	%tmp41 = tail call fastcc i32 @CountTree( %struct.quad_struct* %tmp9 )		; <i32> [#uses=0]
+	br label %tailrecurse
+
+return:		; preds = %tailrecurse
+	ret i32 0
+}
+
+	%struct.SString = type { i8*, i32, i32 }
+
+declare void @abort()
+
+define fastcc void @t1(%struct.SString* %word, i8 signext  %c) {
+entry:
+; CHECK: t1:
+; CHECK: it ne
+; CHECK: popne {r7, pc}
+	%tmp1 = icmp eq %struct.SString* %word, null		; <i1> [#uses=1]
+	br i1 %tmp1, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	tail call void @abort( )
+	unreachable
+
+cond_false:		; preds = %entry
+	ret void
+}
+
+define fastcc void @t2() nounwind {
+entry:
+; CHECK: t2:
+; CHECK: cmp r0, #0
+; CHECK: beq
+	br i1 undef, label %bb.i.i3, label %growMapping.exit
+
+bb.i.i3:		; preds = %entry
+	unreachable
+
+growMapping.exit:		; preds = %entry
+	unreachable
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
new file mode 100644
index 0000000000000..1d45d3ce7fe8d
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+; There shouldn't be a unconditional branch at end of bb52.
+; rdar://7184787
+
+@posed = external global i64                      ; <i64*> [#uses=1]
+
+define i1 @ab_bb52(i64 %.reload78, i64* %.out, i64* %.out1) nounwind {
+newFuncRoot:
+  br label %bb52
+
+bb52.bb55_crit_edge.exitStub:                     ; preds = %bb52
+  store i64 %0, i64* %.out
+  store i64 %2, i64* %.out1
+  ret i1 true
+
+bb52.bb53_crit_edge.exitStub:                     ; preds = %bb52
+  store i64 %0, i64* %.out
+  store i64 %2, i64* %.out1
+  ret i1 false
+
+bb52:                                             ; preds = %newFuncRoot
+; CHECK: movne
+; CHECK: moveq
+; CHECK: pop
+; CHECK-NEXT: LBB1_2:
+  %0 = load i64* @posed, align 4                  ; <i64> [#uses=3]
+  %1 = sub i64 %0, %.reload78                     ; <i64> [#uses=1]
+  %2 = ashr i64 %1, 1                             ; <i64> [#uses=3]
+  %3 = icmp eq i64 %2, 0                          ; <i1> [#uses=1]
+  br i1 %3, label %bb52.bb55_crit_edge.exitStub, label %bb52.bb53_crit_edge.exitStub
+}
diff --git a/test/CodeGen/Thumb2/thumb2-jtb.ll b/test/CodeGen/Thumb2/thumb2-jtb.ll
new file mode 100644
index 0000000000000..7d093ecce201a
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-jtb.ll
@@ -0,0 +1,120 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | not grep tbb
+
+; Do not use tbb / tbh if any destination is before the jumptable.
+; rdar://7102917
+
+define i16 @main__getopt_internal_2E_exit_2E_ce(i32) nounwind {
+newFuncRoot:
+	br label %_getopt_internal.exit.ce
+
+codeRepl127.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 0
+
+parse_options.exit.loopexit.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 1
+
+bb1.i.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 2
+
+bb90.i.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 3
+
+codeRepl104.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 4
+
+codeRepl113.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 5
+
+codeRepl51.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 6
+
+codeRepl70.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 7
+
+codeRepl119.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 8
+
+codeRepl93.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 9
+
+codeRepl101.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 10
+
+codeRepl120.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 11
+
+codeRepl89.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 12
+
+codeRepl45.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 13
+
+codeRepl58.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 14
+
+codeRepl46.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 15
+
+codeRepl50.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 16
+
+codeRepl52.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 17
+
+codeRepl53.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 18
+
+codeRepl61.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 19
+
+codeRepl85.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 20
+
+codeRepl97.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 21
+
+codeRepl79.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 22
+
+codeRepl102.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 23
+
+codeRepl54.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 24
+
+codeRepl57.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 25
+
+codeRepl103.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 26
+
+_getopt_internal.exit.ce:		; preds = %newFuncRoot
+	switch i32 %0, label %codeRepl127.exitStub [
+		i32 -1, label %parse_options.exit.loopexit.exitStub
+		i32 0, label %bb1.i.exitStub
+		i32 63, label %bb90.i.exitStub
+		i32 66, label %codeRepl104.exitStub
+		i32 67, label %codeRepl113.exitStub
+		i32 71, label %codeRepl51.exitStub
+		i32 77, label %codeRepl70.exitStub
+		i32 78, label %codeRepl119.exitStub
+		i32 80, label %codeRepl93.exitStub
+		i32 81, label %codeRepl101.exitStub
+		i32 82, label %codeRepl120.exitStub
+		i32 88, label %codeRepl89.exitStub
+		i32 97, label %codeRepl45.exitStub
+		i32 98, label %codeRepl58.exitStub
+		i32 99, label %codeRepl46.exitStub
+		i32 100, label %codeRepl50.exitStub
+		i32 104, label %codeRepl52.exitStub
+		i32 108, label %codeRepl53.exitStub
+		i32 109, label %codeRepl61.exitStub
+		i32 110, label %codeRepl85.exitStub
+		i32 111, label %codeRepl97.exitStub
+		i32 113, label %codeRepl79.exitStub
+		i32 114, label %codeRepl102.exitStub
+		i32 115, label %codeRepl54.exitStub
+		i32 116, label %codeRepl57.exitStub
+		i32 118, label %codeRepl103.exitStub
+	]
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ldm.ll b/test/CodeGen/Thumb2/thumb2-ldm.ll
new file mode 100644
index 0000000000000..da2874d1e0c4c
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+
+@X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
+
+define i32 @t1() {
+; CHECK: t1:
+; CHECK: push {r7, lr}
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
+        %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define i32 @t2() {
+; CHECK: t2:
+; CHECK: push {r7, lr}
+; CHECK: ldmia
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
+        %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+define i32 @t3() {
+; CHECK: t3:
+; CHECK: push {r7, lr}
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+declare i32 @f1(i32, i32)
+
+declare i32 @f2(i32, i32, i32)
diff --git a/test/CodeGen/Thumb2/thumb2-ldr.ll b/test/CodeGen/Thumb2/thumb2-ldr.ll
index 19c75849e1107..94888fd94050b 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr.ll
@@ -1,17 +1,17 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ldr r0} | count 7
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep mvn
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldr | grep lsl
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsr | not grep ldr
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32* %v) {
 entry:
+; CHECK: f1:
+; CHECK: ldr r0, [r0]
         %tmp = load i32* %v
         ret i32 %tmp
 }
 
 define i32 @f2(i32* %v) {
 entry:
+; CHECK: f2:
+; CHECK: ldr.w r0, [r0, #+4092]
         %tmp2 = getelementptr i32* %v, i32 1023
         %tmp = load i32* %tmp2
         ret i32 %tmp
@@ -19,6 +19,9 @@ entry:
 
 define i32 @f3(i32* %v) {
 entry:
+; CHECK: f3:
+; CHECK: mov.w r1, #4096
+; CHECK: ldr r0, [r0, r1]
         %tmp2 = getelementptr i32* %v, i32 1024
         %tmp = load i32* %tmp2
         ret i32 %tmp
@@ -26,6 +29,8 @@ entry:
 
 define i32 @f4(i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: ldr r0, [r0, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i32*
         %tmp3 = load i32* %tmp2
@@ -34,6 +39,8 @@ entry:
 
 define i32 @f5(i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: ldr r0, [r0, r1]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i32*
         %tmp3 = load i32* %tmp2
@@ -42,6 +49,8 @@ entry:
 
 define i32 @f6(i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: ldr.w r0, [r0, r1, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
@@ -51,6 +60,10 @@ entry:
 
 define i32 @f7(i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r1, r1, #2
+; CHECK: ldr r0, [r0, r1]
+
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_ext.ll b/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
index d48ecef1c1131..9e6aef4e09742 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrb | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrh | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrsb | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrsh | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrb | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrh | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrsb | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrsh | count 1
 
 define i32 @test1(i8* %v.pntr.s0.u1) {
     %tmp.u = load i8* %v.pntr.s0.u1
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
index 79ffa8293521d..d1af4ba47fe08 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {ldr.*\\\[.*\],} | count 1
 
 define i32 @test(i32 %a, i32 %b, i32 %c) {
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_pre.ll b/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
index f773e6331bfee..9cc3f4a2eda5e 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {ldr.*\\!} | count 3
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {ldrsb.*\\!} | count 1
 
 define i32* @test1(i32* %X, i32* %dest) {
diff --git a/test/CodeGen/Thumb2/thumb2-ldrb.ll b/test/CodeGen/Thumb2/thumb2-ldrb.ll
index 5bacb8eb2b4c0..bf1009743afc9 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrb.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrb.ll
@@ -1,17 +1,17 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ldrb r0} | count 7
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep mvn
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrb | grep lsl
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsr | not grep ldrb
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i8 @f1(i8* %v) {
 entry:
+; CHECK: f1:
+; CHECK: ldrb r0, [r0]
         %tmp = load i8* %v
         ret i8 %tmp
 }
 
 define i8 @f2(i8* %v) {
 entry:
+; CHECK: f2:
+; CHECK: ldrb r0, [r0, #-1]
         %tmp2 = getelementptr i8* %v, i8 1023
         %tmp = load i8* %tmp2
         ret i8 %tmp
@@ -19,6 +19,9 @@ entry:
 
 define i8 @f3(i32 %base) {
 entry:
+; CHECK: f3:
+; CHECK: mov.w r1, #4096
+; CHECK: ldrb r0, [r0, r1]
         %tmp1 = add i32 %base, 4096
         %tmp2 = inttoptr i32 %tmp1 to i8*
         %tmp3 = load i8* %tmp2
@@ -27,6 +30,8 @@ entry:
 
 define i8 @f4(i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: ldrb r0, [r0, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i8*
         %tmp3 = load i8* %tmp2
@@ -35,6 +40,8 @@ entry:
 
 define i8 @f5(i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: ldrb r0, [r0, r1]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i8*
         %tmp3 = load i8* %tmp2
@@ -43,6 +50,8 @@ entry:
 
 define i8 @f6(i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: ldrb.w r0, [r0, r1, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i8*
@@ -52,6 +61,9 @@ entry:
 
 define i8 @f7(i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r1, r1, #2
+; CHECK: ldrb r0, [r0, r1]
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i8*
diff --git a/test/CodeGen/Thumb2/thumb2-ldrd.ll b/test/CodeGen/Thumb2/thumb2-ldrd.ll
new file mode 100644
index 0000000000000..22d4e88ed17d6
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ldrd.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+
+@b = external global i64*
+
+define i64 @t(i64 %a) nounwind readonly {
+entry:
+;CHECK: ldrd r2, [r2]
+	%0 = load i64** @b, align 4
+	%1 = load i64* %0, align 4
+	%2 = mul i64 %1, %a
+	ret i64 %2
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ldrh.ll b/test/CodeGen/Thumb2/thumb2-ldrh.ll
index 15f803e11086f..f1fb79c35ed0b 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrh.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrh.ll
@@ -1,17 +1,17 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ldrh r0} | count 7
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep mvn
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ldrh | grep lsl
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsr | not grep ldrh
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i16 @f1(i16* %v) {
 entry:
+; CHECK: f1:
+; CHECK: ldrh r0, [r0]
         %tmp = load i16* %v
         ret i16 %tmp
 }
 
 define i16 @f2(i16* %v) {
 entry:
+; CHECK: f2:
+; CHECK: ldrh.w r0, [r0, #+2046]
         %tmp2 = getelementptr i16* %v, i16 1023
         %tmp = load i16* %tmp2
         ret i16 %tmp
@@ -19,6 +19,9 @@ entry:
 
 define i16 @f3(i16* %v) {
 entry:
+; CHECK: f3:
+; CHECK: mov.w r1, #4096
+; CHECK: ldrh r0, [r0, r1]
         %tmp2 = getelementptr i16* %v, i16 2048
         %tmp = load i16* %tmp2
         ret i16 %tmp
@@ -26,6 +29,8 @@ entry:
 
 define i16 @f4(i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: ldrh r0, [r0, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i16*
         %tmp3 = load i16* %tmp2
@@ -34,6 +39,8 @@ entry:
 
 define i16 @f5(i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: ldrh r0, [r0, r1]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i16*
         %tmp3 = load i16* %tmp2
@@ -42,6 +49,8 @@ entry:
 
 define i16 @f6(i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: ldrh.w r0, [r0, r1, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i16*
@@ -51,6 +60,9 @@ entry:
 
 define i16 @f7(i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r1, r1, #2
+; CHECK: ldrh r0, [r0, r1]
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i16*
diff --git a/test/CodeGen/Thumb2/thumb2-lsl.ll b/test/CodeGen/Thumb2/thumb2-lsl.ll
index 666963a4b4998..6b0818a34b9bf 100644
--- a/test/CodeGen/Thumb2/thumb2-lsl.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsl.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {lsl\\W*r\[0-9\],\\W*r\[0-9\],\\W*\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: lsls r0, r0, #5
     %tmp = shl i32 %a, 5
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-lsl2.ll b/test/CodeGen/Thumb2/thumb2-lsl2.ll
index eb7a2795343d6..f283eef89a37f 100644
--- a/test/CodeGen/Thumb2/thumb2-lsl2.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsl2.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {lsl\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: lsls r0, r1
     %tmp = shl i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-lsr.ll b/test/CodeGen/Thumb2/thumb2-lsr.ll
index cf4d2f81c55d9..7cbee54f381f1 100644
--- a/test/CodeGen/Thumb2/thumb2-lsr.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsr.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {lsr\\W*r\[0-9\],\\W*r\[0-9\],\\W*\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: lsrs r0, r0, #13
     %tmp = lshr i32 %a, 13
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-lsr2.ll b/test/CodeGen/Thumb2/thumb2-lsr2.ll
index 01fd56d52c175..87800f9d73fb6 100644
--- a/test/CodeGen/Thumb2/thumb2-lsr2.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsr2.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {lsr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: lsrs r0, r1
     %tmp = lshr i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-lsr3.ll b/test/CodeGen/Thumb2/thumb2-lsr3.ll
new file mode 100644
index 0000000000000..5cfd3f5198b74
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-lsr3.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2
+
+define i1 @test1(i64 %poscnt, i32 %work) {
+entry:
+; CHECK: rrx r0, r0
+; CHECK: lsrs.w r1, r1, #1
+	%0 = lshr i64 %poscnt, 1
+	%1 = icmp eq i64 %0, 0
+	ret i1 %1
+}
+
+define i1 @test2(i64 %poscnt, i32 %work) {
+entry:
+; CHECK: rrx r0, r0
+; CHECK: asrs.w r1, r1, #1
+	%0 = ashr i64 %poscnt, 1
+	%1 = icmp eq i64 %0, 0
+	ret i1 %1
+}
diff --git a/test/CodeGen/Thumb2/thumb2-mla.ll b/test/CodeGen/Thumb2/thumb2-mla.ll
index 0772d7f69ad5d..be66425d7e66c 100644
--- a/test/CodeGen/Thumb2/thumb2-mla.ll
+++ b/test/CodeGen/Thumb2/thumb2-mla.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mla\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 2
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {mla\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 2
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
     %tmp1 = mul i32 %a, %b
diff --git a/test/CodeGen/Thumb2/thumb2-mls.ll b/test/CodeGen/Thumb2/thumb2-mls.ll
index 6d1640f340ae5..782def966615d 100644
--- a/test/CodeGen/Thumb2/thumb2-mls.ll
+++ b/test/CodeGen/Thumb2/thumb2-mls.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mls\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {mls\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
     %tmp1 = mul i32 %a, %b
diff --git a/test/CodeGen/Thumb2/thumb2-mov.ll b/test/CodeGen/Thumb2/thumb2-mov.ll
index 0c4c59689b600..e9fdec8820eab 100644
--- a/test/CodeGen/Thumb2/thumb2-mov.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov.ll
@@ -1,127 +1,147 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #11206827
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #2868947712
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #2880154539
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #251658240
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #3948544
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #258
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #4026531840
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; Test #<const>
 
 ; var 2.1 - 0x00ab00ab
 define i32 @t2_const_var2_1_ok_1(i32 %lhs) {
+;CHECK: t2_const_var2_1_ok_1:
+;CHECK: #11206827
     %ret = add i32 %lhs, 11206827 ; 0x00ab00ab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_fail_1(i32 %lhs) {
+;CHECK: t2_const_var2_1_fail_1:
+;CHECK: movt
     %ret = add i32 %lhs, 11206843 ; 0x00ab00bb
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_fail_2(i32 %lhs) {
+;CHECK: t2_const_var2_1_fail_2:
+;CHECK: movt
     %ret = add i32 %lhs, 27984043 ; 0x01ab00ab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_fail_3(i32 %lhs) {
+;CHECK: t2_const_var2_1_fail_3:
+;CHECK: movt
     %ret = add i32 %lhs, 27984299 ; 0x01ab01ab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_fail_4(i32 %lhs) {
+;CHECK: t2_const_var2_1_fail_4:
+;CHECK: movt
     %ret = add i32 %lhs, 28027649 ; 0x01abab01
     ret i32 %ret
 }
 
 ; var 2.2 - 0xab00ab00
 define i32 @t2_const_var2_2_ok_1(i32 %lhs) {
+;CHECK: t2_const_var2_2_ok_1:
+;CHECK: #-1426019584
     %ret = add i32 %lhs, 2868947712 ; 0xab00ab00
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_fail_1(i32 %lhs) {
+;CHECK: t2_const_var2_2_fail_1:
+;CHECK: movt
     %ret = add i32 %lhs, 2868951552 ; 0xab00ba00
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_fail_2(i32 %lhs) {
+;CHECK: t2_const_var2_2_fail_2:
+;CHECK: movt
     %ret = add i32 %lhs, 2868947728 ; 0xab00ab10
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_fail_3(i32 %lhs) {
+;CHECK: t2_const_var2_2_fail_3:
+;CHECK: movt
     %ret = add i32 %lhs, 2869996304 ; 0xab10ab10
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_fail_4(i32 %lhs) {
+;CHECK: t2_const_var2_2_fail_4:
+;CHECK: movt
     %ret = add i32 %lhs, 279685904 ; 0x10abab10
     ret i32 %ret
 }
 
 ; var 2.3 - 0xabababab
 define i32 @t2_const_var2_3_ok_1(i32 %lhs) {
+;CHECK: t2_const_var2_3_ok_1:
+;CHECK: #-1414812757
     %ret = add i32 %lhs, 2880154539 ; 0xabababab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_1(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_1:
+;CHECK: movt
     %ret = add i32 %lhs, 2880154554 ; 0xabababba
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_2(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_2:
+;CHECK: movt
     %ret = add i32 %lhs, 2880158379 ; 0xababbaab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_3(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_3:
+;CHECK: movt
     %ret = add i32 %lhs, 2881137579 ; 0xabbaabab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_4(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_4:
+;CHECK: movt
     %ret = add i32 %lhs, 3131812779 ; 0xbaababab
     ret i32 %ret
 }
 
 ; var 3 - 0x0F000000
 define i32 @t2_const_var3_1_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_1_ok_1:
+;CHECK: #251658240
     %ret = add i32 %lhs, 251658240 ; 0x0F000000
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_2_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_2_ok_1:
+;CHECK: #3948544
     %ret = add i32 %lhs, 3948544 ; 0b00000000001111000100000000000000
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_2_fail_1(i32 %lhs) {
+;CHECK: t2_const_var3_2_fail_1:
+;CHECK: movt
     %ret = add i32 %lhs, 3940352 ; 0b00000000001111000010000000000000
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_3_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_3_ok_1:
+;CHECK: #258
     %ret = add i32 %lhs, 258 ; 0b00000000000000000000000100000010
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_4_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_4_ok_1:
+;CHECK: #-268435456
     %ret = add i32 %lhs, 4026531840 ; 0xF0000000
     ret i32 %ret
 }
-
diff --git a/test/CodeGen/Thumb2/thumb2-mov2.ll b/test/CodeGen/Thumb2/thumb2-mov2.ll
index d2f8c0b91a58c..a02f4f087365e 100644
--- a/test/CodeGen/Thumb2/thumb2-mov2.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov2.ll
@@ -1,10 +1,11 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep movt | grep #1234
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep movt | grep #1234
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep movt | grep #1234
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep movt | grep #1234
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov  | grep movt
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @t2MOVTi16_ok_1(i32 %a) {
+; CHECK: t2MOVTi16_ok_1:
+; CHECK:      movs r1, #0
+; CHECK-NEXT: movt r1, #1234
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #1234
     %1 = and i32 %a, 65535
     %2 = shl i32 1234, 16
     %3 = or  i32 %1, %2
@@ -13,6 +14,11 @@ define i32 @t2MOVTi16_ok_1(i32 %a) {
 }
 
 define i32 @t2MOVTi16_test_1(i32 %a) {
+; CHECK: t2MOVTi16_test_1:
+; CHECK:      movs r1, #0
+; CHECK-NEXT: movt r1, #1234
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #1234
     %1 = shl i32  255,   8
     %2 = shl i32 1234,   8
     %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
@@ -24,6 +30,11 @@ define i32 @t2MOVTi16_test_1(i32 %a) {
 }
 
 define i32 @t2MOVTi16_test_2(i32 %a) {
+; CHECK: t2MOVTi16_test_2:
+; CHECK:      movs r1, #0
+; CHECK-NEXT: movt r1, #1234
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #1234
     %1 = shl i32  255,   8
     %2 = shl i32 1234,   8
     %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
@@ -36,6 +47,11 @@ define i32 @t2MOVTi16_test_2(i32 %a) {
 }
 
 define i32 @t2MOVTi16_test_3(i32 %a) {
+; CHECK: t2MOVTi16_test_3:
+; CHECK:      movs r1, #0
+; CHECK-NEXT: movt r1, #1234
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #1234
     %1 = shl i32  255,   8
     %2 = shl i32 1234,   8
     %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
@@ -50,6 +66,11 @@ define i32 @t2MOVTi16_test_3(i32 %a) {
 }
 
 define i32 @t2MOVTi16_test_nomatch_1(i32 %a) {
+; CHECK: t2MOVTi16_test_nomatch_1:
+; CHECK:      movw r1, #16384
+; CHECK-NEXT: movt r1, #154
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #154
     %1 = shl i32  255,   8
     %2 = shl i32 1234,   8
     %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
@@ -58,7 +79,6 @@ define i32 @t2MOVTi16_test_nomatch_1(i32 %a) {
     %6 = shl i32   %4,   2  ; This gives us (1234 << 16) in %6
     %7 = lshr i32  %6,   3
     %8 = or  i32   %5,  %7
-
     ret i32 %8
 }
 
diff --git a/test/CodeGen/Thumb2/thumb2-mov3.ll b/test/CodeGen/Thumb2/thumb2-mov3.ll
index 74418c1000c94..46af6fb16c499 100644
--- a/test/CodeGen/Thumb2/thumb2-mov3.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov3.ll
@@ -1,31 +1,41 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mov\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: movs r0, #171
     %tmp = add i32 0, 171
     ret i32 %tmp
 }
 
 ; 1179666 = 0x00120012
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: mov.w r0, #1179666
     %tmp = add i32 0, 1179666
     ret i32 %tmp
 }
 
 ; 872428544 = 0x34003400
 define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: mov.w r0, #872428544
     %tmp = add i32 0, 872428544
     ret i32 %tmp
 }
 
 ; 1448498774 = 0x56565656
 define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: mov.w r0, #1448498774
     %tmp = add i32 0, 1448498774
     ret i32 %tmp
 }
 
 ; 66846720 = 0x03fc0000
 define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: mov.w r0, #66846720
     %tmp = add i32 0, 66846720
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-mov4.ll b/test/CodeGen/Thumb2/thumb2-mov4.ll
index 74c522f94f07b..06fa238263ab0 100644
--- a/test/CodeGen/Thumb2/thumb2-mov4.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {movw\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#65535} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {movw\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#65535} | count 1
 
 define i32 @f6(i32 %a) {
     %tmp = add i32 0, 65535
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index e976e66c00137..b1515b5148202 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mul\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
+; CHECK: f1:
+; CHECK: muls r0, r1
     %tmp = mul i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-mulhi.ll b/test/CodeGen/Thumb2/thumb2-mulhi.ll
new file mode 100644
index 0000000000000..5d47770aed3ec
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-mulhi.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep smmul | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep umull | count 1
+
+define i32 @smulhi(i32 %x, i32 %y) {
+        %tmp = sext i32 %x to i64               ; <i64> [#uses=1]
+        %tmp1 = sext i32 %y to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        %tmp3 = lshr i64 %tmp2, 32              ; <i64> [#uses=1]
+        %tmp3.upgrd.1 = trunc i64 %tmp3 to i32          ; <i32> [#uses=1]
+        ret i32 %tmp3.upgrd.1
+}
+
+define i32 @umulhi(i32 %x, i32 %y) {
+        %tmp = zext i32 %x to i64               ; <i64> [#uses=1]
+        %tmp1 = zext i32 %y to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        %tmp3 = lshr i64 %tmp2, 32              ; <i64> [#uses=1]
+        %tmp3.upgrd.2 = trunc i64 %tmp3 to i32          ; <i32> [#uses=1]
+        ret i32 %tmp3.upgrd.2
+}
diff --git a/test/CodeGen/Thumb2/thumb2-mvn.ll b/test/CodeGen/Thumb2/thumb2-mvn.ll
index 95694d67912e1..a8c8f831c75a8 100644
--- a/test/CodeGen/Thumb2/thumb2-mvn.ll
+++ b/test/CodeGen/Thumb2/thumb2-mvn.ll
@@ -1,27 +1,33 @@
-; RUN: llvm-as < %s | llc | grep {mvn\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1114112} | count 4
-
-target triple = "thumbv7-apple-darwin"
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: mvn r0, #187
     %tmp = xor i32 4294967295, 187
     ret i32 %tmp
 }
 
 ; 0x00aa00aa = 11141290
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: mvn r0, #11141290
     %tmp = xor i32 4294967295, 11141290 
     ret i32 %tmp
 }
 
 ; 0xcc00cc00 = 3422604288
 define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: mvn r0, #-872363008
     %tmp = xor i32 4294967295, 3422604288
     ret i32 %tmp
 }
 
 ; 0x00110000 = 1114112
 define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: mvn r0, #1114112
     %tmp = xor i32 4294967295, 1114112
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-mvn2.ll b/test/CodeGen/Thumb2/thumb2-mvn2.ll
index df9b11bed9174..375d0aad5021a 100644
--- a/test/CodeGen/Thumb2/thumb2-mvn2.ll
+++ b/test/CodeGen/Thumb2/thumb2-mvn2.ll
@@ -1,38 +1,46 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mvn\\W*r\[0-9\]*,\\W*r\[0-9\]*$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mvn\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mvn\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mvn\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mvn\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: mvns r0, r0
     %tmp = xor i32 4294967295, %a
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: mvns r0, r0
     %tmp = xor i32 %a, 4294967295
     ret i32 %tmp
 }
 
 define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: mvn.w r0, r0, lsl #5
     %tmp = shl i32 %a, 5
     %tmp1 = xor i32 %tmp, 4294967295
     ret i32 %tmp1
 }
 
 define i32 @f6(i32 %a) {
+; CHECK: f6:
+; CHECK: mvn.w r0, r0, lsr #6
     %tmp = lshr i32 %a, 6
     %tmp1 = xor i32 %tmp, 4294967295
     ret i32 %tmp1
 }
 
 define i32 @f7(i32 %a) {
+; CHECK: f7:
+; CHECK: mvn.w r0, r0, asr #7
     %tmp = ashr i32 %a, 7
     %tmp1 = xor i32 %tmp, 4294967295
     ret i32 %tmp1
 }
 
 define i32 @f8(i32 %a) {
+; CHECK: f8:
+; CHECK: mvn.w r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-neg.ll b/test/CodeGen/Thumb2/thumb2-neg.ll
index 8f938d579b837..6bf11ec90621b 100644
--- a/test/CodeGen/Thumb2/thumb2-neg.ll
+++ b/test/CodeGen/Thumb2/thumb2-neg.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*#0} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: rsbs r0, r0, #0
     %tmp = sub i32 0, %a
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-orn.ll b/test/CodeGen/Thumb2/thumb2-orn.ll
index 92c4564841b9a..d4222c2b2dacc 100644
--- a/test/CodeGen/Thumb2/thumb2-orn.ll
+++ b/test/CodeGen/Thumb2/thumb2-orn.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*$} | count 4
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*$} | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
 
 define i32 @f1(i32 %a, i32 %b) {
     %tmp = xor i32 %b, 4294967295
diff --git a/test/CodeGen/Thumb2/thumb2-orn2.ll b/test/CodeGen/Thumb2/thumb2-orn2.ll
index 7758edd1d6938..7b018826a621c 100644
--- a/test/CodeGen/Thumb2/thumb2-orn2.ll
+++ b/test/CodeGen/Thumb2/thumb2-orn2.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1114112} | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} |\
+; RUN:     grep {#187\\|#11141290\\|#-872363008\\|#1114112} | count 4
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-orr.ll b/test/CodeGen/Thumb2/thumb2-orr.ll
index 9891658049592..89ab7b1edf70b 100644
--- a/test/CodeGen/Thumb2/thumb2-orr.ll
+++ b/test/CodeGen/Thumb2/thumb2-orr.ll
@@ -1,33 +1,39 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: orrs r0, r1
     %tmp2 = or i32 %a, %b
     ret i32 %tmp2
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: orr.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp2 = or i32 %a, %tmp
     ret i32 %tmp2
 }
 
 define i32 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: orr.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp2 = or i32 %a, %tmp
     ret i32 %tmp2
 }
 
 define i32 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: orr.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp2 = or i32 %a, %tmp
     ret i32 %tmp2
 }
 
 define i32 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: orr.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-orr2.ll b/test/CodeGen/Thumb2/thumb2-orr2.ll
index 6f2b62c00c6e3..759a5b8dd8944 100644
--- a/test/CodeGen/Thumb2/thumb2-orr2.ll
+++ b/test/CodeGen/Thumb2/thumb2-orr2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1145324612\\|#1114112} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#-872363008\\|#1145324612\\|#1114112} | count 5
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-pack.ll b/test/CodeGen/Thumb2/thumb2-pack.ll
new file mode 100644
index 0000000000000..a9822498fe087
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-pack.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep pkhbt | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep pkhtb | count 4
+
+define i32 @test1(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp4 = shl i32 %Y, 16		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+define i32 @test1a(i32 %X, i32 %Y) {
+	%tmp19 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp37 = shl i32 %Y, 16		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp37, %tmp19		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+define i32 @test2(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp3 = shl i32 %Y, 12		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp3, -65536		; <i32> [#uses=1]
+	%tmp57 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp57
+}
+
+define i32 @test3(i32 %X, i32 %Y) {
+	%tmp19 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp37 = shl i32 %Y, 18		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp37, %tmp19		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+define i32 @test4(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp3 = and i32 %Y, -65536		; <i32> [#uses=1]
+	%tmp46 = or i32 %tmp3, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp46
+}
+
+define i32 @test5(i32 %X, i32 %Y) {
+	%tmp17 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp2 = bitcast i32 %Y to i32		; <i32> [#uses=1]
+	%tmp4 = lshr i32 %tmp2, 16		; <i32> [#uses=2]
+	%tmp5 = or i32 %tmp4, %tmp17		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+define i32 @test5a(i32 %X, i32 %Y) {
+	%tmp110 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp37 = lshr i32 %Y, 16		; <i32> [#uses=1]
+	%tmp39 = bitcast i32 %tmp37 to i32		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp39, %tmp110		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+define i32 @test6(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp37 = lshr i32 %Y, 12		; <i32> [#uses=1]
+	%tmp38 = bitcast i32 %tmp37 to i32		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp38, 65535		; <i32> [#uses=1]
+	%tmp59 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp59
+}
+
+define i32 @test7(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp3 = ashr i32 %Y, 18		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp3, 65535		; <i32> [#uses=1]
+	%tmp57 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp57
+}
diff --git a/test/CodeGen/Thumb2/thumb2-rev.ll b/test/CodeGen/Thumb2/thumb2-rev.ll
index 4009da33b2607..27b1672e554a0 100644
--- a/test/CodeGen/Thumb2/thumb2-rev.ll
+++ b/test/CodeGen/Thumb2/thumb2-rev.ll
@@ -1,8 +1,23 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2,+v7a | grep {rev\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7a | FileCheck %s
 
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: rev r0, r0
     %tmp = tail call i32 @llvm.bswap.i32(i32 %a)
     ret i32 %tmp
 }
 
 declare i32 @llvm.bswap.i32(i32) nounwind readnone
+
+define i32 @f2(i32 %X) {
+; CHECK: f2:
+; CHECK: revsh r0, r0
+        %tmp1 = lshr i32 %X, 8
+        %tmp1.upgrd.1 = trunc i32 %tmp1 to i16
+        %tmp3 = trunc i32 %X to i16
+        %tmp2 = and i16 %tmp1.upgrd.1, 255
+        %tmp4 = shl i16 %tmp3, 8
+        %tmp5 = or i16 %tmp2, %tmp4
+        %tmp5.upgrd.2 = sext i16 %tmp5 to i32
+        ret i32 %tmp5.upgrd.2
+}
diff --git a/test/CodeGen/Thumb2/thumb2-rev16.ll b/test/CodeGen/Thumb2/thumb2-rev16.ll
new file mode 100644
index 0000000000000..39b6ac3f0027a
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-rev16.ll
@@ -0,0 +1,32 @@
+; XFAIL: *
+; fixme rev16 pattern is not matching
+
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rev16\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1
+
+; 0xff00ff00 = 4278255360
+; 0x00ff00ff = 16711935
+define i32 @f1(i32 %a) {
+    %l8 = shl i32 %a, 8
+    %r8 = lshr i32 %a, 8
+    %mask_l8 = and i32 %l8, 4278255360
+    %mask_r8 = and i32 %r8, 16711935
+    %tmp = or i32 %mask_l8, %mask_r8
+    ret i32 %tmp
+}
+
+; 0xff000000 = 4278190080
+; 0x00ff0000 = 16711680
+; 0x0000ff00 = 65280
+; 0x000000ff = 255
+define i32 @f2(i32 %a) {
+    %l8 = shl i32 %a, 8
+    %r8 = lshr i32 %a, 8
+    %masklo_l8 = and i32 %l8, 65280
+    %maskhi_l8 = and i32 %l8, 4278190080
+    %masklo_r8 = and i32 %r8, 255
+    %maskhi_r8 = and i32 %r8, 16711680
+    %tmp1 = or i32 %masklo_l8, %masklo_r8
+    %tmp2 = or i32 %maskhi_l8, %maskhi_r8
+    %tmp = or i32 %tmp1, %tmp2
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ror.ll b/test/CodeGen/Thumb2/thumb2-ror.ll
index 305ab994518dd..01adb528087be 100644
--- a/test/CodeGen/Thumb2/thumb2-ror.ll
+++ b/test/CodeGen/Thumb2/thumb2-ror.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ror\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep 22 | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {ror\\.w\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep 22 | count 1
 
 define i32 @f1(i32 %a) {
     %l8 = shl i32 %a, 10
diff --git a/test/CodeGen/Thumb2/thumb2-ror2.ll b/test/CodeGen/Thumb2/thumb2-ror2.ll
index dd19b0afb18ff..ffd1dd7dc6137 100644
--- a/test/CodeGen/Thumb2/thumb2-ror2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ror2.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ror\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: rors r0, r1
     %db = sub i32 32, %b
     %l8 = shl i32 %a, %b
     %r8 = lshr i32 %a, %db
diff --git a/test/CodeGen/Thumb2/thumb2-rsb.ll b/test/CodeGen/Thumb2/thumb2-rsb.ll
index 57796873b2d19..4611e94350343 100644
--- a/test/CodeGen/Thumb2/thumb2-rsb.ll
+++ b/test/CodeGen/Thumb2/thumb2-rsb.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
 
 define i32 @f2(i32 %a, i32 %b) {
     %tmp = shl i32 %b, 5
diff --git a/test/CodeGen/Thumb2/thumb2-rsb2.ll b/test/CodeGen/Thumb2/thumb2-rsb2.ll
index 957d1d0717e4f..84a379677ad4c 100644
--- a/test/CodeGen/Thumb2/thumb2-rsb2.ll
+++ b/test/CodeGen/Thumb2/thumb2-rsb2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-sbc.ll b/test/CodeGen/Thumb2/thumb2-sbc.ll
new file mode 100644
index 0000000000000..ad962919edcee
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-sbc.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: subs r0, r0, r2
+    %tmp = sub i64 %a, %b
+    ret i64 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-select.ll b/test/CodeGen/Thumb2/thumb2-select.ll
new file mode 100644
index 0000000000000..2dcf8aaa24c5f
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-select.ll
@@ -0,0 +1,98 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a.s) {
+entry:
+; CHECK: f1:
+; CHECK: it eq
+; CHECK: moveq
+
+    %tmp = icmp eq i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f2(i32 %a.s) {
+entry:
+; CHECK: f2:
+; CHECK: it gt
+; CHECK: movgt
+    %tmp = icmp sgt i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f3(i32 %a.s, i32 %b.s) {
+entry:
+; CHECK: f3:
+; CHECK: it lt
+; CHECK: movlt
+    %tmp = icmp slt i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f4(i32 %a.s, i32 %b.s) {
+entry:
+; CHECK: f4:
+; CHECK: it le
+; CHECK: movle
+
+    %tmp = icmp sle i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f5(i32 %a.u, i32 %b.u) {
+entry:
+; CHECK: f5:
+; CHECK: it ls
+; CHECK: movls
+    %tmp = icmp ule i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f6(i32 %a.u, i32 %b.u) {
+entry:
+; CHECK: f6:
+; CHECK: it hi
+; CHECK: movhi
+    %tmp = icmp ugt i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f7(i32 %a, i32 %b, i32 %c) {
+entry:
+; CHECK: f7:
+; CHECK: it hi
+; CHECK: lsrhi.w
+    %tmp1 = icmp ugt i32 %a, %b
+    %tmp2 = udiv i32 %c, 3
+    %tmp3 = select i1 %tmp1, i32 %tmp2, i32 3
+    ret i32 %tmp3
+}
+
+define i32 @f8(i32 %a, i32 %b, i32 %c) {
+entry:
+; CHECK: f8:
+; CHECK: it lo
+; CHECK: lsllo.w
+    %tmp1 = icmp ult i32 %a, %b
+    %tmp2 = mul i32 %c, 4
+    %tmp3 = select i1 %tmp1, i32 %tmp2, i32 3
+    ret i32 %tmp3
+}
+
+define i32 @f9(i32 %a, i32 %b, i32 %c) {
+entry:
+; CHECK: f9:
+; CHECK: it ge
+; CHECK: rorge.w
+    %tmp1 = icmp sge i32 %a, %b
+    %tmp2 = shl i32 %c, 10
+    %tmp3 = lshr i32 %c, 22
+    %tmp4 = or i32 %tmp2, %tmp3
+    %tmp5 = select i1 %tmp1, i32 %tmp4, i32 3
+    ret i32 %tmp5
+}
diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll
new file mode 100644
index 0000000000000..b4274adb58233
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep mov | count 3
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep mvn | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep it  | count 3
+
+define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
+        %tmp1 = icmp sgt i32 %c, 10
+        %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
+        %tmp3 = add i32 %tmp2, %b
+        ret i32 %tmp3
+}
+
+define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind {
+        %tmp1 = icmp sgt i32 %c, 10
+        %tmp2 = select i1 %tmp1, i32 0, i32 2147483648
+        %tmp3 = add i32 %tmp2, %b
+        ret i32 %tmp3
+}
+
+define i32 @t3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+        %tmp1 = icmp sgt i32 %c, 10
+        %tmp2 = select i1 %tmp1, i32 0, i32 10
+        %tmp3 = sub i32 %b, %tmp2
+        ret i32 %tmp3
+}
diff --git a/test/CodeGen/Thumb2/thumb2-shifter.ll b/test/CodeGen/Thumb2/thumb2-shifter.ll
index 9bd6e43101a89..7746cd3f584b3 100644
--- a/test/CodeGen/Thumb2/thumb2-shifter.ll
+++ b/test/CodeGen/Thumb2/thumb2-shifter.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsl
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsr
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep asr
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ror
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep mov
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep lsl
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep lsr
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep asr
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ror
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | not grep mov
 
 define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) {
         %A = shl i32 %Y, 16
diff --git a/test/CodeGen/Thumb2/thumb2-smla.ll b/test/CodeGen/Thumb2/thumb2-smla.ll
new file mode 100644
index 0000000000000..66cc88402fc58
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-smla.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep smlabt | count 1
+
+define i32 @f3(i32 %a, i16 %x, i32 %y) {
+        %tmp = sext i16 %x to i32               ; <i32> [#uses=1]
+        %tmp2 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp3 = mul i32 %tmp2, %tmp             ; <i32> [#uses=1]
+        %tmp5 = add i32 %tmp3, %a               ; <i32> [#uses=1]
+        ret i32 %tmp5
+}
diff --git a/test/CodeGen/Thumb2/thumb2-smul.ll b/test/CodeGen/Thumb2/thumb2-smul.ll
new file mode 100644
index 0000000000000..cdbf4ca7bf678
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-smul.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep smulbt | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep smultt | count 1
+
+@x = weak global i16 0          ; <i16*> [#uses=1]
+@y = weak global i16 0          ; <i16*> [#uses=0]
+
+define i32 @f1(i32 %y) {
+        %tmp = load i16* @x             ; <i16> [#uses=1]
+        %tmp1 = add i16 %tmp, 2         ; <i16> [#uses=1]
+        %tmp2 = sext i16 %tmp1 to i32           ; <i32> [#uses=1]
+        %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp4 = mul i32 %tmp2, %tmp3            ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define i32 @f2(i32 %x, i32 %y) {
+        %tmp1 = ashr i32 %x, 16         ; <i32> [#uses=1]
+        %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp4 = mul i32 %tmp3, %tmp1            ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
new file mode 100644
index 0000000000000..0a7221c61749f
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=thumbv7-elf -mattr=+neon | FileCheck %s
+; PR4789
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+
+define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
+; CHECK: aaa:
+; CHECK: vstmia sp
+; CHECK: vldmia sp
+entry:
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  store float 6.300000e+01, float* undef, align 4
+  %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  store float 0.000000e+00, float* undef, align 4
+  %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+  %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
+  br label %bb4
+
+bb4:                                              ; preds = %bb193, %entry
+  %besterror.0.2264 = phi <4 x float> [ undef, %entry ], [ %besterror.0.0, %bb193 ] ; <<4 x float>> [#uses=2]
+  %part0.0.0261 = phi <4 x float> [ zeroinitializer, %entry ], [ %23, %bb193 ] ; <<4 x float>> [#uses=2]
+  %3 = fmul <4 x float> zeroinitializer, %0       ; <<4 x float>> [#uses=2]
+  %4 = fadd <4 x float> %3, %part0.0.0261         ; <<4 x float>> [#uses=1]
+  %5 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %7 = fmul <4 x float> %1, undef                 ; <<4 x float>> [#uses=1]
+  %8 = fadd <4 x float> %7, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %9 = fptosi <4 x float> %8 to <4 x i32>         ; <<4 x i32>> [#uses=1]
+  %10 = sitofp <4 x i32> %9 to <4 x float>        ; <<4 x float>> [#uses=1]
+  %11 = fmul <4 x float> %10, %2                  ; <<4 x float>> [#uses=1]
+  %12 = fmul <4 x float> undef, %6                ; <<4 x float>> [#uses=1]
+  %13 = fmul <4 x float> %11, %4                  ; <<4 x float>> [#uses=1]
+  %14 = fsub <4 x float> %12, %13                 ; <<4 x float>> [#uses=1]
+  %15 = fsub <4 x float> %14, undef               ; <<4 x float>> [#uses=1]
+  %16 = fmul <4 x float> %15, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %16, undef               ; <<4 x float>> [#uses=1]
+  %18 = fmul <4 x float> %17, %val173             ; <<4 x float>> [#uses=1]
+  %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %21 = fadd <4 x float> zeroinitializer, %20     ; <<4 x float>> [#uses=2]
+  %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
+  br i1 undef, label %bb193, label %bb186
+
+bb186:                                            ; preds = %bb4
+  br label %bb193
+
+bb193:                                            ; preds = %bb186, %bb4
+  %besterror.0.0 = phi <4 x float> [ %21, %bb186 ], [ %besterror.0.2264, %bb4 ] ; <<4 x float>> [#uses=1]
+  %23 = fadd <4 x float> %part0.0.0261, zeroinitializer ; <<4 x float>> [#uses=1]
+  br label %bb4
+}
diff --git a/test/CodeGen/Thumb2/thumb2-str.ll b/test/CodeGen/Thumb2/thumb2-str.ll
index 4097a6c1579a2..3eeec8c3850f2 100644
--- a/test/CodeGen/Thumb2/thumb2-str.ll
+++ b/test/CodeGen/Thumb2/thumb2-str.ll
@@ -1,28 +1,32 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4092\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#-128\\\]$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4096\\\]$}
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*\\\]$} | count 3
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {str\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*,\\Wlsl #2\\\]$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32* %v) {
+; CHECK: f1:
+; CHECK: str r0, [r1]
         store i32 %a, i32* %v
         ret i32 %a
 }
 
 define i32 @f2(i32 %a, i32* %v) {
+; CHECK: f2:
+; CHECK: str.w r0, [r1, #+4092]
         %tmp2 = getelementptr i32* %v, i32 1023
         store i32 %a, i32* %tmp2
         ret i32 %a
 }
 
 define i32 @f2a(i32 %a, i32* %v) {
+; CHECK: f2a:
+; CHECK: str r0, [r1, #-128]
         %tmp2 = getelementptr i32* %v, i32 -32
         store i32 %a, i32* %tmp2
         ret i32 %a
 }
 
 define i32 @f3(i32 %a, i32* %v) {
+; CHECK: f3:
+; CHECK: mov.w r2, #4096
+; CHECK: str r0, [r1, r2]
         %tmp2 = getelementptr i32* %v, i32 1024
         store i32 %a, i32* %tmp2
         ret i32 %a
@@ -30,6 +34,8 @@ define i32 @f3(i32 %a, i32* %v) {
 
 define i32 @f4(i32 %a, i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: str r0, [r1, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i32*
         store i32 %a, i32* %tmp2
@@ -38,6 +44,8 @@ entry:
 
 define i32 @f5(i32 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: str r0, [r1, r2]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i32*
         store i32 %a, i32* %tmp2
@@ -46,6 +54,8 @@ entry:
 
 define i32 @f6(i32 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: str.w r0, [r1, r2, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
@@ -55,6 +65,9 @@ entry:
 
 define i32 @f7(i32 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r2, r2, #2
+; CHECK: str r0, [r1, r2]
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
diff --git a/test/CodeGen/Thumb2/thumb2-str_post.ll b/test/CodeGen/Thumb2/thumb2-str_post.ll
index 536011c4de7d7..bee58105daebb 100644
--- a/test/CodeGen/Thumb2/thumb2-str_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-str_post.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {strh .*\\\[.*\], #-4} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {str .*\\\[.*\],} | count 1
 
 define i16 @test1(i32* %X, i16* %A) {
diff --git a/test/CodeGen/Thumb2/thumb2-str_pre.ll b/test/CodeGen/Thumb2/thumb2-str_pre.ll
index 1e93b70df5acc..6c804eea634c6 100644
--- a/test/CodeGen/Thumb2/thumb2-str_pre.ll
+++ b/test/CodeGen/Thumb2/thumb2-str_pre.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep {str.*\\!} | count 2
 
 define void @test1(i32* %X, i32* %A, i32** %dest) {
diff --git a/test/CodeGen/Thumb2/thumb2-strb.ll b/test/CodeGen/Thumb2/thumb2-strb.ll
index d8401cd68471b..1ebb938b1a885 100644
--- a/test/CodeGen/Thumb2/thumb2-strb.ll
+++ b/test/CodeGen/Thumb2/thumb2-strb.ll
@@ -1,28 +1,32 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4092\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#-128\\\]$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4096\\\]$}
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*\\\]$} | count 3
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strb\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*,\\Wlsl #2\\\]$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i8 @f1(i8 %a, i8* %v) {
+; CHECK: f1:
+; CHECK: strb r0, [r1]
         store i8 %a, i8* %v
         ret i8 %a
 }
 
 define i8 @f2(i8 %a, i8* %v) {
+; CHECK: f2:
+; CHECK: strb.w r0, [r1, #+4092]
         %tmp2 = getelementptr i8* %v, i32 4092
         store i8 %a, i8* %tmp2
         ret i8 %a
 }
 
 define i8 @f2a(i8 %a, i8* %v) {
+; CHECK: f2a:
+; CHECK: strb r0, [r1, #-128]
         %tmp2 = getelementptr i8* %v, i32 -128
         store i8 %a, i8* %tmp2
         ret i8 %a
 }
 
 define i8 @f3(i8 %a, i8* %v) {
+; CHECK: f3:
+; CHECK: mov.w r2, #4096
+; CHECK: strb r0, [r1, r2]
         %tmp2 = getelementptr i8* %v, i32 4096
         store i8 %a, i8* %tmp2
         ret i8 %a
@@ -30,6 +34,8 @@ define i8 @f3(i8 %a, i8* %v) {
 
 define i8 @f4(i8 %a, i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: strb r0, [r1, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i8*
         store i8 %a, i8* %tmp2
@@ -38,6 +44,8 @@ entry:
 
 define i8 @f5(i8 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: strb r0, [r1, r2]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i8*
         store i8 %a, i8* %tmp2
@@ -46,6 +54,8 @@ entry:
 
 define i8 @f6(i8 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: strb.w r0, [r1, r2, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i8*
@@ -55,6 +65,9 @@ entry:
 
 define i8 @f7(i8 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r2, r2, #2
+; CHECK: strb r0, [r1, r2]
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i8*
diff --git a/test/CodeGen/Thumb2/thumb2-strh.ll b/test/CodeGen/Thumb2/thumb2-strh.ll
index 80dedf0c28ddb..b0eb8c12f5949 100644
--- a/test/CodeGen/Thumb2/thumb2-strh.ll
+++ b/test/CodeGen/Thumb2/thumb2-strh.ll
@@ -1,28 +1,32 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4092\\\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#-128\\\]$} | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*#+4096\\\]$}
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*\\\]$} | count 3
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {strh\\W*r\[0-9\],\\W*\\\[r\[0-9\]*,\\W*+r\[0-9\]*,\\Wlsl #2\\\]$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i16 @f1(i16 %a, i16* %v) {
+; CHECK: f1:
+; CHECK: strh r0, [r1]
         store i16 %a, i16* %v
         ret i16 %a
 }
 
 define i16 @f2(i16 %a, i16* %v) {
+; CHECK: f2:
+; CHECK: strh.w r0, [r1, #+4092]
         %tmp2 = getelementptr i16* %v, i32 2046
         store i16 %a, i16* %tmp2
         ret i16 %a
 }
 
 define i16 @f2a(i16 %a, i16* %v) {
+; CHECK: f2a:
+; CHECK: strh r0, [r1, #-128]
         %tmp2 = getelementptr i16* %v, i32 -64
         store i16 %a, i16* %tmp2
         ret i16 %a
 }
 
 define i16 @f3(i16 %a, i16* %v) {
+; CHECK: f3:
+; CHECK: mov.w r2, #4096
+; CHECK: strh r0, [r1, r2]
         %tmp2 = getelementptr i16* %v, i32 2048
         store i16 %a, i16* %tmp2
         ret i16 %a
@@ -30,6 +34,8 @@ define i16 @f3(i16 %a, i16* %v) {
 
 define i16 @f4(i16 %a, i32 %base) {
 entry:
+; CHECK: f4:
+; CHECK: strh r0, [r1, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i16*
         store i16 %a, i16* %tmp2
@@ -38,6 +44,8 @@ entry:
 
 define i16 @f5(i16 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f5:
+; CHECK: strh r0, [r1, r2]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i16*
         store i16 %a, i16* %tmp2
@@ -46,6 +54,8 @@ entry:
 
 define i16 @f6(i16 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f6:
+; CHECK: strh.w r0, [r1, r2, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i16*
@@ -55,6 +65,9 @@ entry:
 
 define i16 @f7(i16 %a, i32 %base, i32 %offset) {
 entry:
+; CHECK: f7:
+; CHECK: lsrs r2, r2, #2
+; CHECK: strh r0, [r1, r2]
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i16*
diff --git a/test/CodeGen/Thumb2/thumb2-sub.ll b/test/CodeGen/Thumb2/thumb2-sub.ll
index cf8270412d109..95335a2ee2ccc 100644
--- a/test/CodeGen/Thumb2/thumb2-sub.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub.ll
@@ -1,31 +1,49 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\[w\]\\?\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#510} | count 5
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: subs r0, #171
     %tmp = sub i32 %a, 171
     ret i32 %tmp
 }
 
 ; 1179666 = 0x00120012
 define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: sub.w r0, r0, #1179666
     %tmp = sub i32 %a, 1179666
     ret i32 %tmp
 }
 
 ; 872428544 = 0x34003400
 define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: sub.w r0, r0, #872428544
     %tmp = sub i32 %a, 872428544
     ret i32 %tmp
 }
 
 ; 1448498774 = 0x56565656
 define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: sub.w r0, r0, #1448498774
     %tmp = sub i32 %a, 1448498774
     ret i32 %tmp
 }
 
 ; 510 = 0x000001fe
 define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: sub.w r0, r0, #510
     %tmp = sub i32 %a, 510
     ret i32 %tmp
 }
+
+; Don't change this to an add.
+define i32 @f6(i32 %a) {
+; CHECK: f6:
+; CHECK: subs r0, #1
+    %tmp = sub i32 %a, 1
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-sub2.ll b/test/CodeGen/Thumb2/thumb2-sub2.ll
index c7ebd22a8a1fa..6813f76d89326 100644
--- a/test/CodeGen/Thumb2/thumb2-sub2.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {subw\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#4095} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {subw\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#4095} | count 1
 
 define i32 @f1(i32 %a) {
     %tmp = sub i32 %a, 4095
diff --git a/test/CodeGen/Thumb2/thumb2-sub4.ll b/test/CodeGen/Thumb2/thumb2-sub4.ll
index fd283fdc8ef92..a040d170f9355 100644
--- a/test/CodeGen/Thumb2/thumb2-sub4.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub4.ll
@@ -1,33 +1,39 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {sub\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: subs r0, r0, r1
     %tmp = sub i32 %a, %b
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: sub.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = sub i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: sub.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = sub i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: sub.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = sub i32 %a, %tmp
     ret i32 %tmp1
 }
 
 define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: sub.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-sub5.ll b/test/CodeGen/Thumb2/thumb2-sub5.ll
index 3e9ec2569738d..c3b56bc09c85b 100644
--- a/test/CodeGen/Thumb2/thumb2-sub5.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub5.ll
@@ -1,6 +1,9 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {subs\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: subs r0, r0, r2
+; CHECK: sbcs r1, r3
     %tmp = sub i64 %a, %b
     ret i64 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
index 4afe354028757..33ed543d6b6aa 100644
--- a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep sxtb | count 2
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep sxtb | grep ror | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep sxtab | count 1
 
 define i32 @test0(i8 %A) {
diff --git a/test/CodeGen/Thumb2/thumb2-tbb.ll b/test/CodeGen/Thumb2/thumb2-tbb.ll
new file mode 100644
index 0000000000000..5dc3cc3ce70a9
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-tbb.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s
+
+define void @bar(i32 %n.u) {
+entry:
+; CHECK: bar:
+; CHECK: tbb
+; CHECK: .align 1
+
+    switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
+bb:
+    tail call void(...)* @foo1()
+    ret void
+bb1:
+    tail call void(...)* @foo2()
+    ret void
+bb2:
+    tail call void(...)* @foo6()
+    ret void
+bb3:
+    tail call void(...)* @foo3()
+    ret void
+bb4:
+    tail call void(...)* @foo4()
+    ret void
+bb5:
+    tail call void(...)* @foo5()
+    ret void
+bb6:
+    tail call void(...)* @foo1()
+    ret void
+bb7:
+    tail call void(...)* @foo2()
+    ret void
+bb8:
+    tail call void(...)* @foo6()
+    ret void
+bb9:
+    tail call void(...)* @foo3()
+    ret void
+bb10:
+    tail call void(...)* @foo4()
+    ret void
+bb11:
+    tail call void(...)* @foo5()
+    ret void
+bb12:
+    tail call void(...)* @foo6()
+    ret void
+}
+
+declare void @foo1(...)
+declare void @foo2(...)
+declare void @foo6(...)
+declare void @foo3(...)
+declare void @foo4(...)
+declare void @foo5(...)
diff --git a/test/CodeGen/Thumb2/thumb2-tbh.ll b/test/CodeGen/Thumb2/thumb2-tbh.ll
new file mode 100644
index 0000000000000..c5cb6f33e2ed9
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-tbh.ll
@@ -0,0 +1,90 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s
+
+; Thumb2 target should reorder the bb's in order to use tbb / tbh.
+
+; XFAIL: *
+
+	%struct.R_flstr = type { i32, i32, i8* }
+	%struct._T_tstr = type { i32, %struct.R_flstr*, %struct._T_tstr* }
+@_C_nextcmd = external global i32		; <i32*> [#uses=3]
+@.str31 = external constant [28 x i8], align 1		; <[28 x i8]*> [#uses=1]
+@_T_gtol = external global %struct._T_tstr*		; <%struct._T_tstr**> [#uses=2]
+
+declare arm_apcscc i32 @strlen(i8* nocapture) nounwind readonly
+
+declare arm_apcscc void @Z_fatal(i8*) noreturn nounwind
+
+declare arm_apcscc noalias i8* @calloc(i32, i32) nounwind
+
+define arm_apcscc i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+; CHECK: main:
+; CHECK: tbh
+entry:
+	br label %bb42.i
+
+bb1.i2:		; preds = %bb42.i
+	br label %bb40.i
+
+bb5.i:		; preds = %bb42.i
+	%0 = or i32 %_Y_flags.1, 32		; <i32> [#uses=1]
+	br label %bb40.i
+
+bb7.i:		; preds = %bb42.i
+	call arm_apcscc  void @_T_addtol(%struct._T_tstr** @_T_gtol, i32 0, i8* null) nounwind
+	unreachable
+
+bb15.i:		; preds = %bb42.i
+	call arm_apcscc  void @_T_addtol(%struct._T_tstr** @_T_gtol, i32 2, i8* null) nounwind
+	unreachable
+
+bb23.i:		; preds = %bb42.i
+	%1 = call arm_apcscc  i32 @strlen(i8* null) nounwind readonly		; <i32> [#uses=0]
+	unreachable
+
+bb33.i:		; preds = %bb42.i
+	store i32 0, i32* @_C_nextcmd, align 4
+	%2 = call arm_apcscc  noalias i8* @calloc(i32 21, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb34.i:		; preds = %bb42.i
+	%3 = load i32* @_C_nextcmd, align 4		; <i32> [#uses=1]
+	%4 = add i32 %3, 1		; <i32> [#uses=1]
+	store i32 %4, i32* @_C_nextcmd, align 4
+	%5 = call arm_apcscc  noalias i8* @calloc(i32 22, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb35.i:		; preds = %bb42.i
+	%6 = call arm_apcscc  noalias i8* @calloc(i32 20, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb37.i:		; preds = %bb42.i
+	%7 = call arm_apcscc  noalias i8* @calloc(i32 14, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb39.i:		; preds = %bb42.i
+	call arm_apcscc  void @Z_fatal(i8* getelementptr ([28 x i8]* @.str31, i32 0, i32 0)) nounwind
+	unreachable
+
+bb40.i:		; preds = %bb42.i, %bb5.i, %bb1.i2
+	%_Y_flags.0 = phi i32 [ 0, %bb1.i2 ], [ %0, %bb5.i ], [ %_Y_flags.1, %bb42.i ]		; <i32> [#uses=1]
+	%_Y_eflag.b.0 = phi i1 [ %_Y_eflag.b.1, %bb1.i2 ], [ %_Y_eflag.b.1, %bb5.i ], [ true, %bb42.i ]		; <i1> [#uses=1]
+	br label %bb42.i
+
+bb42.i:		; preds = %bb40.i, %entry
+	%_Y_eflag.b.1 = phi i1 [ false, %entry ], [ %_Y_eflag.b.0, %bb40.i ]		; <i1> [#uses=2]
+	%_Y_flags.1 = phi i32 [ 0, %entry ], [ %_Y_flags.0, %bb40.i ]		; <i32> [#uses=2]
+	switch i32 undef, label %bb39.i [
+		i32 67, label %bb33.i
+		i32 70, label %bb35.i
+		i32 77, label %bb37.i
+		i32 83, label %bb34.i
+		i32 97, label %bb7.i
+		i32 100, label %bb5.i
+		i32 101, label %bb40.i
+		i32 102, label %bb23.i
+		i32 105, label %bb15.i
+		i32 116, label %bb1.i2
+	]
+}
+
+declare arm_apcscc void @_T_addtol(%struct._T_tstr** nocapture, i32, i8*) nounwind
diff --git a/test/CodeGen/Thumb2/thumb2-teq.ll b/test/CodeGen/Thumb2/thumb2-teq.ll
index c3c20943dda61..634d318c85c40 100644
--- a/test/CodeGen/Thumb2/thumb2-teq.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1114112\\|#3722304989} | count 10
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*#\[0-9\]*} | \
+; RUN:     grep {#187\\|#11141290\\|#-872363008\\|#1114112\\|#-572662307} | count 10
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-teq2.ll b/test/CodeGen/Thumb2/thumb2-teq2.ll
index fe2b2c8b15d3c..c6867d99de76b 100644
--- a/test/CodeGen/Thumb2/thumb2-teq2.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq2.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {teq\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
 
 define i1 @f1(i32 %a, i32 %b) {
     %tmp = xor i32 %a, %b
diff --git a/test/CodeGen/Thumb2/thumb2-tst.ll b/test/CodeGen/Thumb2/thumb2-tst.ll
index 9e2d3e5ec1c9f..525a817fe37ef 100644
--- a/test/CodeGen/Thumb2/thumb2-tst.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1114112\\|#3722304989} | count 10
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {tst\\.w\\W*r\[0-9\],\\W*#\[0-9\]*} | \
+; RUN:     grep {#187\\|#11141290\\|#-872363008\\|#1114112\\|#-572662307} | count 10
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-tst2.ll b/test/CodeGen/Thumb2/thumb2-tst2.ll
index c0f404c89f6d5..db202dd2cbcd8 100644
--- a/test/CodeGen/Thumb2/thumb2-tst2.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst2.ll
@@ -1,34 +1,40 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {tst\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i1 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: tst r0, r1
     %tmp = and i32 %a, %b
     %tmp1 = icmp ne i32 %tmp, 0
     ret i1 %tmp1
 }
 
 define i1 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: tst r0, r1
     %tmp = and i32 %a, %b
     %tmp1 = icmp eq i32 %tmp, 0
     ret i1 %tmp1
 }
 
 define i1 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: tst r0, r1
     %tmp = and i32 %a, %b
     %tmp1 = icmp ne i32 0, %tmp
     ret i1 %tmp1
 }
 
 define i1 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: tst r0, r1
     %tmp = and i32 %a, %b
     %tmp1 = icmp eq i32 0, %tmp
     ret i1 %tmp1
 }
 
 define i1 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: tst.w r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = and i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -36,6 +42,8 @@ define i1 @f6(i32 %a, i32 %b) {
 }
 
 define i1 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: tst.w r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = and i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -43,6 +51,8 @@ define i1 @f7(i32 %a, i32 %b) {
 }
 
 define i1 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: tst.w r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = and i32 %a, %tmp
     %tmp2 = icmp eq i32 %tmp1, 0
@@ -50,6 +60,8 @@ define i1 @f8(i32 %a, i32 %b) {
 }
 
 define i1 @f9(i32 %a, i32 %b) {
+; CHECK: f9:
+; CHECK: tst.w r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
     %tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
index 0d1cc183de32f..37919dde1dcc9 100644
--- a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep uxtb | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep uxtab | count 1
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep uxth | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep uxtb | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep uxtab | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep uxth | count 1
 
 define i8 @test1(i32 %A.u) zeroext {
     %B.u = trunc i32 %A.u to i8
diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll
index 28a5fe4d2ee9a..4022d95ed4754 100644
--- a/test/CodeGen/Thumb2/thumb2-uxtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
 ; RUN:   grep uxt | count 10
 
 define i32 @test1(i32 %x) {
diff --git a/test/CodeGen/Thumb2/tls1.ll b/test/CodeGen/Thumb2/tls1.ll
index 6abb6eba630d4..1e555571c0542 100644
--- a/test/CodeGen/Thumb2/tls1.ll
+++ b/test/CodeGen/Thumb2/tls1.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | \
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \
 ; RUN:     grep {i(tpoff)}
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | \
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \
 ; RUN:     grep {__aeabi_read_tp}
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi \
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi \
 ; RUN:     -relocation-model=pic | grep {__tls_get_addr}
 
 
diff --git a/test/CodeGen/Thumb2/tls2.ll b/test/CodeGen/Thumb2/tls2.ll
index 3396b0ba43f3a..b8a0657c90691 100644
--- a/test/CodeGen/Thumb2/tls2.ll
+++ b/test/CodeGen/Thumb2/tls2.ll
@@ -1,19 +1,29 @@
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | \
-; RUN:     grep {i(gottpoff)}
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi | \
-; RUN:     grep {ldr r., \[pc, r.\]}
-; RUN: llvm-as < %s | llc -mtriple=thumbv7-linux-gnueabi \
-; RUN:     -relocation-model=pic | grep {__tls_get_addr}
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | FileCheck %s -check-prefix=CHECK-NOT-PIC
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=CHECK-PIC
 
 @i = external thread_local global i32		; <i32*> [#uses=2]
 
 define i32 @f() {
 entry:
+; CHECK-NOT-PIC: f:
+; CHECK-NOT-PIC: add r0, pc
+; CHECK-NOT-PIC: ldr r1, [r0]
+; CHECK-NOT-PIC: i(gottpoff)
+
+; CHECK-PIC: f:
+; CHECK-PIC: bl __tls_get_addr(PLT)
 	%tmp1 = load i32* @i		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
 define i32* @g() {
 entry:
+; CHECK-NOT-PIC: g:
+; CHECK-NOT-PIC: add r0, pc
+; CHECK-NOT-PIC: ldr r1, [r0]
+; CHECK-NOT-PIC: i(gottpoff)
+
+; CHECK-PIC: g:
+; CHECK-PIC: bl __tls_get_addr(PLT)
 	ret i32* @i
 }
diff --git a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
index 2b4242aaa15e5..24848602baf84 100644
--- a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
+++ b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
@@ -3,7 +3,7 @@
 ; it makes a ton of annoying overlapping live ranges.  This code should not
 ; cause spills!
 ;
-; RUN: llvm-as < %s | llc -march=x86 -stats |& not grep spilled
+; RUN: llc < %s -march=x86 -stats |& not grep spilled
 
 target datalayout = "e-p:32:32"
 
diff --git a/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll b/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll
index a4d558949e307..5c40eeaa1eade 100644
--- a/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll
+++ b/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @test() {
 entry:
diff --git a/test/CodeGen/X86/2003-11-03-GlobalBool.ll b/test/CodeGen/X86/2003-11-03-GlobalBool.ll
index 4de3c79fdcbb6..8b0a18550da15 100644
--- a/test/CodeGen/X86/2003-11-03-GlobalBool.ll
+++ b/test/CodeGen/X86/2003-11-03-GlobalBool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   not grep {.byte\[\[:space:\]\]*true}
 
 @X = global i1 true             ; <i1*> [#uses=0]
diff --git a/test/CodeGen/X86/2004-02-12-Memcpy.ll b/test/CodeGen/X86/2004-02-12-Memcpy.ll
index 56bb21caf3ca9..f15a1b441816c 100644
--- a/test/CodeGen/X86/2004-02-12-Memcpy.ll
+++ b/test/CodeGen/X86/2004-02-12-Memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
 
 @A = global [32 x i32] zeroinitializer
 @B = global [32 x i32] zeroinitializer
diff --git a/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll b/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
index f48b1d3adf015..fea2b54d76305 100644
--- a/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
+++ b/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
@@ -1,4 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {(%esp}
+; RUN: llc < %s -march=x86 | grep {(%esp}
+; RUN: llc < %s -march=x86 | grep {pushl	%ebp} | count 1
+; RUN: llc < %s -march=x86 | grep {popl	%ebp} | count 1
 
 declare i8* @llvm.returnaddress(i32)
 
diff --git a/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll b/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll
index b25dfaf5d90e8..f986ebd35f85e 100644
--- a/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll
+++ b/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep -i ESP | not grep sub
+; RUN: llc < %s -march=x86 | grep -i ESP | not grep sub
 
 define i32 @test(i32 %X) {
         ret i32 %X
diff --git a/test/CodeGen/X86/2004-02-22-Casts.ll b/test/CodeGen/X86/2004-02-22-Casts.ll
index 40d5f39df642f..dabf7d3c15b6c 100644
--- a/test/CodeGen/X86/2004-02-22-Casts.ll
+++ b/test/CodeGen/X86/2004-02-22-Casts.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 define i1 @test1(double %X) {
         %V = fcmp one double %X, 0.000000e+00           ; <i1> [#uses=1]
         ret i1 %V
diff --git a/test/CodeGen/X86/2004-03-30-Select-Max.ll b/test/CodeGen/X86/2004-03-30-Select-Max.ll
index 5021fd89dfe4c..b6631b62118ab 100644
--- a/test/CodeGen/X86/2004-03-30-Select-Max.ll
+++ b/test/CodeGen/X86/2004-03-30-Select-Max.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep {j\[lgbe\]}
+; RUN: llc < %s -march=x86 | not grep {j\[lgbe\]}
 
 define i32 @max(i32 %A, i32 %B) {
         %gt = icmp sgt i32 %A, %B               ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll b/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
index 633a61564558c..c62fee1bd263e 100644
--- a/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
+++ b/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
@@ -2,7 +2,7 @@
 ; overlapping live intervals. When two overlapping intervals have the same
 ; value, they can be joined though.
 ;
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=linearscan | \
+; RUN: llc < %s -march=x86 -regalloc=linearscan | \
 ; RUN:   not grep {mov %\[A-Z\]\\\{2,3\\\}, %\[A-Z\]\\\{2,3\\\}}
 
 define i64 @test(i64 %x) {
diff --git a/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll b/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll
index 858605c231bc4..f8ed016f99b69 100644
--- a/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll
+++ b/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define double @test(double %d) {
         %X = select i1 false, double %d, double %d              ; <double> [#uses=1]
diff --git a/test/CodeGen/X86/2004-06-10-StackifierCrash.ll b/test/CodeGen/X86/2004-06-10-StackifierCrash.ll
index 1a51bee404d06..036aa6a77f407 100644
--- a/test/CodeGen/X86/2004-06-10-StackifierCrash.ll
+++ b/test/CodeGen/X86/2004-06-10-StackifierCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i1 @T(double %X) {
         %V = fcmp oeq double %X, %X             ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll b/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll
index 9ee773c91a24f..db3af0139cee2 100644
--- a/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll
+++ b/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i1 @test(i1 %C, i1 %D, i32 %X, i32 %Y) {
         %E = icmp slt i32 %X, %Y                ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2005-01-17-CycleInDAG.ll b/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
index 37cff57f30e23..32fafc61e8de3 100644
--- a/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
+++ b/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
@@ -3,7 +3,7 @@
 ; is invalid code (there is no correct way to order the instruction).  Check
 ; that we do not fold the load into the sub.
 
-; RUN: llvm-as < %s | llc -march=x86 | not grep sub.*GLOBAL
+; RUN: llc < %s -march=x86 | not grep sub.*GLOBAL
 
 @GLOBAL = external global i32           ; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll b/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
index 762047b7d8c43..30a6ac6fbdf1d 100644
--- a/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
+++ b/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep 18446744073709551612
+; RUN: llc < %s -march=x86 | not grep 18446744073709551612
 
 @A = external global i32                ; <i32*> [#uses=1]
 @Y = global i32* getelementptr (i32* @A, i32 -1)                ; <i32**> [#uses=0]
diff --git a/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll b/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
index 04035aca998fa..5266009c55a5d 100644
--- a/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
+++ b/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=generic
+; RUN: llc < %s -march=x86 -mcpu=generic
 ; Make sure LLC doesn't crash in the stackifier due to FP PHI nodes.
 
 define void @radfg_() {
diff --git a/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll b/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
index 817b281243e76..d906da43fe11c 100644
--- a/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
+++ b/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   grep shld | count 1
 ;
 ; Check that the isel does not fold the shld, which already folds a load
diff --git a/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
index 51d2fb2fe27bc..dc69ef83103f7 100644
--- a/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
+++ b/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep {subl.*%esp}
+; RUN: llc < %s -march=x86 | not grep {subl.*%esp}
 
 define i32 @f(i32 %a, i32 %b) {
         %tmp.2 = mul i32 %a, %a         ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
index c410c4668a9be..0421896922b9e 100644
--- a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
+++ b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86  -stats |& \
+; RUN: llc < %s -march=x86  -stats |& \
 ; RUN:   grep asm-printer | grep 7
 
 define i32 @g(i32 %a, i32 %b) nounwind {
diff --git a/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll b/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
index 743790cad0339..c106f57e93843 100644
--- a/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
+++ b/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah
+; RUN: llc < %s -march=x86 -mcpu=yonah
 ; END.
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll b/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
index 4a0b5c37e2617..8783a11c060b1 100644
--- a/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
+++ b/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mtriple=i686-apple-darwin8 -relocation-model=static > %t
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin8 -relocation-model=static > %t
 ; RUN: grep {movl	_last} %t | count 1
 ; RUN: grep {cmpl.*_last} %t | count 1
 
diff --git a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
index f28366699c3d4..49f3a95705ad0 100644
--- a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
+++ b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -stats |& \
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& \
 ; RUN:   not grep {Number of register spills}
 ; END.
 
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
index 72dab39888f18..7d0a6ab0a04c2 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched1.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -relocation-model=static  -stats |& \
+; RUN: llc < %s -march=x86 -relocation-model=static -stats |& \
 ; RUN:   grep asm-printer | grep 14
 ;
 @size20 = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched2.ll b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
index 48ed2b9cb498d..23954d76a5d6a 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched2.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats  |& \
+; RUN: llc < %s -march=x86 -stats  |& \
 ; RUN:   grep asm-printer | grep 13
 
 define void @_ZN9__gnu_cxx9hashtableISt4pairIKPKciES3_NS_4hashIS3_EESt10_Select1stIS5_E5eqstrSaIiEE14find_or_insertERKS5__cond_true456.i(i8* %tmp435.i, i32* %tmp449.i.out) nounwind {
diff --git a/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll b/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
index 900abe55cd213..8421483ecb556 100644
--- a/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
+++ b/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
@@ -1,7 +1,7 @@
 ; Coalescing from R32 to a subset R32_. Once another register coalescer bug is
 ; fixed, the movb should go away as well.
 
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | \
+; RUN: llc < %s -march=x86 -relocation-model=static | \
 ; RUN:   grep movl
 
 @B = external global i32		; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/2006-05-08-InstrSched.ll b/test/CodeGen/X86/2006-05-08-InstrSched.ll
index c39b377cc733e..d58d638562c9a 100644
--- a/test/CodeGen/X86/2006-05-08-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-08-InstrSched.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -relocation-model=static | not grep {subl.*%esp}
+; RUN: llc < %s -march=x86 -relocation-model=static | not grep {subl.*%esp}
 
 @A = external global i16*		; <i16**> [#uses=1]
 @B = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index 6c0e76b34ade2..89b127cccf82a 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
 ; RUN:     grep {asm-printer} | grep 31
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/X86/2006-05-17-VectorArg.ll b/test/CodeGen/X86/2006-05-17-VectorArg.ll
index 217cbe1059f28..b36d61e0f31b1 100644
--- a/test/CodeGen/X86/2006-05-17-VectorArg.ll
+++ b/test/CodeGen/X86/2006-05-17-VectorArg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define <4 x float> @opRSQ(<4 x float> %a) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
index ae18c90d8c17c..083d06805f2f6 100644
--- a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
+++ b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep setnp
-; RUN: llvm-as < %s | llc -march=x86 -enable-unsafe-fp-math | \
+; RUN: llc < %s -march=x86 | grep setnp
+; RUN: llc < %s -march=x86 -enable-unsafe-fp-math | \
 ; RUN:   not grep setnp
 
 define i32 @test(float %f) {
diff --git a/test/CodeGen/X86/2006-05-25-CycleInDAG.ll b/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
index 78838d1141a4d..0288278d626eb 100644
--- a/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @test() {
 	br i1 false, label %cond_next33, label %cond_true12
diff --git a/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll b/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll
index 760fe3650e902..4ea364d57e515 100644
--- a/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll
+++ b/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR825
 
 define i64 @test() {
diff --git a/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll b/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll
index 1db3921ecdb10..568fbbcc4f4ff 100644
--- a/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll
+++ b/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR828
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/X86/2006-07-19-ATTAsm.ll b/test/CodeGen/X86/2006-07-19-ATTAsm.ll
index 78167f631e1bd..c8fd10f7009c0 100644
--- a/test/CodeGen/X86/2006-07-19-ATTAsm.ll
+++ b/test/CodeGen/X86/2006-07-19-ATTAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att
 ; PR834
 ; END.
 
diff --git a/test/CodeGen/X86/2006-07-20-InlineAsm.ll b/test/CodeGen/X86/2006-07-20-InlineAsm.ll
index 08510a8a65283..cac47cdab6dee 100644
--- a/test/CodeGen/X86/2006-07-20-InlineAsm.ll
+++ b/test/CodeGen/X86/2006-07-20-InlineAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR833
 
 @G = weak global i32 0		; <i32*> [#uses=3]
diff --git a/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll b/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll
index a82612b5a62a0..deae086cf76c7 100644
--- a/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll
+++ b/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep -- 4294967240
+; RUN: llc < %s -march=x86 | grep -- 4294967240
 ; PR853
 
 @X = global i32* inttoptr (i64 -56 to i32*)		; <i32**> [#uses=0]
diff --git a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
index 2a521ad73885b..3159cec8553e4 100644
--- a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
+++ b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
@@ -1,5 +1,5 @@
 ; PR850
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att > %t
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att > %t
 ; RUN: grep {movl 4(%eax),%ebp} %t
 ; RUN: grep {movl 0(%eax), %ebx} %t
 
diff --git a/test/CodeGen/X86/2006-08-07-CycleInDAG.ll b/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
index 194cd6681bfaa..aea707ee8fe49 100644
--- a/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 	%struct.foo = type opaque
 
 define fastcc i32 @test(%struct.foo* %v, %struct.foo* %vi) {
diff --git a/test/CodeGen/X86/2006-08-16-CycleInDAG.ll b/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
index f2a8855245cc3..5fee326d530d9 100644
--- a/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 	%struct.expr = type { %struct.rtx_def*, i32, %struct.expr*, %struct.occr*, %struct.occr*, %struct.rtx_def* }
 	%struct.hash_table = type { %struct.expr**, i32, i32, i32 }
 	%struct.occr = type { %struct.occr*, %struct.rtx_def*, i8, i8 }
diff --git a/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll b/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
index c1d81d52b932b..a19d8f7092c34 100644
--- a/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
+++ b/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 | \
+; RUN: llc < %s -march=x86 -mcpu=i386 | \
 ; RUN:    not grep {movl %eax, %edx}
 
 define i32 @foo(i32 %t, i32 %C) {
diff --git a/test/CodeGen/X86/2006-09-01-CycleInDAG.ll b/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
index dd21c0455d6d3..1e890bbc02e54 100644
--- a/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8"
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll b/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
index cc988f26618cf..795d4647a3f66 100644
--- a/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
+++ b/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; PR933
 
 define fastcc i1 @test() {
diff --git a/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll b/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
index e8055f5f901f5..bf9fa5782b06b 100644
--- a/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
+++ b/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse | grep movaps
+; RUN: llc < %s -march=x86 -mattr=sse | grep movaps
 ; Test that the load is NOT folded into the intrinsic, which would zero the top
 ; elts of the loaded vector.
 
diff --git a/test/CodeGen/X86/2006-10-09-CycleInDAG.ll b/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
index d627d1bf214ca..fbb14ee161512 100644
--- a/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define void @_ZN13QFSFileEngine4readEPcx() {
 	%tmp201 = load i32* null		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll b/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
index 5dc1cb3d9a2d3..b1f04518acaab 100644
--- a/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
+++ b/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep shrl
+; RUN: llc < %s -march=x86 | grep shrl
 ; Bug in FindModifiedNodeSlot cause tmp14 load to become a zextload and shr 31
 ; is then optimized away.
 @tree_code_type = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-10-12-CycleInDAG.ll b/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
index 31eb070e85b9d..3b987ac79f946 100644
--- a/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 	%struct.function = type opaque
 	%struct.lang_decl = type opaque
 	%struct.location_t = type { i8*, i32 }
diff --git a/test/CodeGen/X86/2006-10-13-CycleInDAG.ll b/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
index 2b53f26f578e4..6ed2e7bb57512 100644
--- a/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 @str = external global [18 x i8]		; <[18 x i8]*> [#uses=1]
 
 define void @test() {
diff --git a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
index 1ff687a1b8b79..88e8b4a4fd924 100644
--- a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
+++ b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
@@ -1,11 +1,14 @@
-; RUN: llvm-as < %s | llc -march=x86 -asm-verbose | %prcontext je 1 | \
-; RUN:   grep BB1_1:
+; RUN: llc < %s -march=x86 -asm-verbose | FileCheck %s
 
 @str = internal constant [14 x i8] c"Hello world!\0A\00"		; <[14 x i8]*> [#uses=1]
 @str.upgrd.1 = internal constant [13 x i8] c"Blah world!\0A\00"		; <[13 x i8]*> [#uses=1]
 
-define i32 @main(i32 %argc, i8** %argv) {
+define i32 @test(i32 %argc, i8** %argv) nounwind {
 entry:
+; CHECK: cmpl	$2
+; CHECK-NEXT: je
+; CHECK-NEXT: %entry
+
 	switch i32 %argc, label %UnifiedReturnBlock [
 		 i32 1, label %bb
 		 i32 2, label %bb2
diff --git a/test/CodeGen/X86/2006-11-12-CSRetCC.ll b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
index 1a92852f06fe5..91210ea90c69d 100644
--- a/test/CodeGen/X86/2006-11-12-CSRetCC.ll
+++ b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {subl	\$4, %esp}
+; RUN: llc < %s -march=x86 | grep {subl	\$4, %esp}
 
 target triple = "i686-pc-linux-gnu"
 @str = internal constant [9 x i8] c"%f+%f*i\0A\00"              ; <[9 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
index f0067c7e489ce..e839d7295adc6 100644
--- a/test/CodeGen/X86/2006-11-17-IllegalMove.ll
+++ b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
@@ -1,9 +1,9 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep movb %t | count 2
 ; RUN: grep {movzb\[wl\]} %t
 
 
-define void @handle_vector_size_attribute() {
+define void @handle_vector_size_attribute() nounwind {
 entry:
 	%tmp69 = load i32* null		; <i32> [#uses=1]
 	switch i32 %tmp69, label %bb84 [
diff --git a/test/CodeGen/X86/2006-11-27-SelectLegalize.ll b/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
index 1222a37436862..ea2e6db61e1af 100644
--- a/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
+++ b/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep test.*1
+; RUN: llc < %s -march=x86 | grep test.*1
 ; PR1016
 
 define i32 @test(i32 %A, i32 %B, i32 %C) {
diff --git a/test/CodeGen/X86/2006-11-28-Memcpy.ll b/test/CodeGen/X86/2006-11-28-Memcpy.ll
index a58bedc28d758..8c1573f130ba1 100644
--- a/test/CodeGen/X86/2006-11-28-Memcpy.ll
+++ b/test/CodeGen/X86/2006-11-28-Memcpy.ll
@@ -1,8 +1,6 @@
 ; PR1022, PR1023
-; RUN: llvm-as < %s | llc -march=x86 | \
-; RUN:   grep 3721182122 | count 2
-; RUN: llvm-as < %s | llc -march=x86 | \
-; RUN:   grep -E {movl	_?bytes2} | count 1
+; RUN: llc < %s -march=x86 | grep -- -573785174 | count 2
+; RUN: llc < %s -march=x86 | grep -E {movl	_?bytes2} | count 1
 
 @fmt = constant [4 x i8] c"%x\0A\00"            ; <[4 x i8]*> [#uses=2]
 @bytes = constant [4 x i8] c"\AA\BB\CC\DD"              ; <[4 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-12-19-IntelSyntax.ll b/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
index 17234b827e4c5..f81b303e3b80e 100644
--- a/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
+++ b/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel
 ; PR1061
 target datalayout = "e-p:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 3b365f35cb221..e1bae3251a22f 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -1,8 +1,7 @@
 ; PR1075
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | \
-; RUN:   %prcontext {mulss	LCPI1_3} 1 | grep mulss | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
 
-define float @foo(float %x) {
+define float @foo(float %x) nounwind {
     %tmp1 = fmul float %x, 3.000000e+00
     %tmp3 = fmul float %x, 5.000000e+00
     %tmp5 = fmul float %x, 7.000000e+00
@@ -11,4 +10,10 @@ define float @foo(float %x) {
     %tmp12 = fadd float %tmp10, %tmp5
     %tmp14 = fadd float %tmp12, %tmp7
     ret float %tmp14
+
+; CHECK:      mulss	LCPI1_2(%rip)
+; CHECK-NEXT: addss
+; CHECK-NEXT: mulss	LCPI1_3(%rip)
+; CHECK-NEXT: addss
+; CHECK-NEXT: ret
 }
diff --git a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
index c03d982aeb15c..5e7c0a7ee2b73 100644
--- a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
+++ b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep leaq %t
 ; RUN: not grep {,%rsp)} %t
 ; PR1103
diff --git a/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll b/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll
index b1c86f4138a3c..e83e2e54e4556 100644
--- a/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll
+++ b/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; Test 'ri' constraint.
 
 define void @run_init_process() {
diff --git a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
index 26d3e367195c5..93e8808549857 100644
--- a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
+++ b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {orl	\$1, %eax}
-; RUN: llvm-as < %s | llc -march=x86 | grep {leal	3(,%eax,8)}
+; RUN: llc < %s -march=x86 | grep {orl	\$1, %eax}
+; RUN: llc < %s -march=x86 | grep {leal	3(,%eax,8)}
 
 ;; This example can't fold the or into an LEA.
 define i32 @test(float ** %tmp2, i32 %tmp12) {
diff --git a/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
index 365768afe794a..954c95d696117 100644
--- a/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu -relocation-model=pic
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu -relocation-model=pic
 ; PR1027
 
 	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
diff --git a/test/CodeGen/X86/2007-02-25-FastCCStack.ll b/test/CodeGen/X86/2007-02-25-FastCCStack.ll
index 3b1eb1fdb66b4..2e2b56d04a25c 100644
--- a/test/CodeGen/X86/2007-02-25-FastCCStack.ll
+++ b/test/CodeGen/X86/2007-02-25-FastCCStack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=pentium3
+; RUN: llc < %s -march=x86 -mcpu=pentium3
 
 define internal fastcc double @ggc_rlimit_bound(double %limit) {
     ret double %limit
diff --git a/test/CodeGen/X86/2007-03-01-SpillerCrash.ll b/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
index 721b6e7e20949..112d1ab65e7b8 100644
--- a/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
+++ b/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin8 -mattr=+sse2
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin8 -mattr=+sse2 | not grep movhlps
+; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -mattr=+sse2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -mattr=+sse2 | not grep movhlps
 
 define void @test() nounwind {
 test.exit:
diff --git a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
index 4c69ec733dd48..4cac9b4c4a216 100644
--- a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
+++ b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-darwin | \
+; RUN: llc < %s -march=x86 -mtriple=i686-darwin | \
 ; RUN:   grep push | count 3
 
 define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) {
diff --git a/test/CodeGen/X86/2007-03-16-InlineAsm.ll b/test/CodeGen/X86/2007-03-16-InlineAsm.ll
index c98c89a537a3e..9580726ce02a2 100644
--- a/test/CodeGen/X86/2007-03-16-InlineAsm.ll
+++ b/test/CodeGen/X86/2007-03-16-InlineAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 ; ModuleID = 'a.bc'
 
diff --git a/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll
index 6965849e32310..70936fbc92812 100644
--- a/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR1259
 
 define void @test() {
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll b/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll
index babcf6a0e8059..44d68dd0493ef 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @test(i16 %tmp40414244) {
   %tmp48 = call i32 asm sideeffect "inl ${1:w}, $0", "={ax},N{dx},~{dirflag},~{fpsr},~{flags}"( i16 %tmp40414244 )
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll b/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
index 9bdb2493508da..3312e01b3d8ef 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {mov %gs:72, %eax}
+; RUN: llc < %s -march=x86 | grep {mov %gs:72, %eax}
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin9"
 
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
index 6e1adf8346243..c1b1ad1c730d8 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah -march=x86 | \
+; RUN: llc < %s -mcpu=yonah -march=x86 | \
 ; RUN:   grep {cmpltsd %xmm0, %xmm0}
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin9"
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll b/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
index e440cdb6cfd7c..30453d5266b9f 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {psrlw \$8, %xmm0}
+; RUN: llc < %s -march=x86 | grep {psrlw \$8, %xmm0}
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin9"
 
diff --git a/test/CodeGen/X86/2007-03-26-CoalescerBug.ll b/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
index 7ce0584c5450e..9676f143bca6c 100644
--- a/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
+++ b/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 @data = external global [339 x i64]
 
diff --git a/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll b/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
index 840fc7d513a02..9f09e88664c69 100644
--- a/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
+++ b/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1314
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
 	%struct.bc_struct = type { i32, i32, i32, i32, %struct.bc_struct*, i8*, i8* }
 @_programStartTime = external global %struct.CycleCount		; <%struct.CycleCount*> [#uses=1]
 
-define fastcc i32 @bc_divide(%struct.bc_struct* %n1, %struct.bc_struct* %n2, %struct.bc_struct** %quot, i32 %scale) {
+define fastcc i32 @bc_divide(%struct.bc_struct* %n1, %struct.bc_struct* %n2, %struct.bc_struct** %quot, i32 %scale) nounwind {
 entry:
 	%tmp7.i46 = tail call i64 asm sideeffect ".byte 0x0f,0x31", "={dx},=*{ax},~{dirflag},~{fpsr},~{flags}"( i64* getelementptr (%struct.CycleCount* @_programStartTime, i32 0, i32 1) )		; <i64> [#uses=0]
 	%tmp221 = sdiv i32 10, 0		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll b/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
index 514d6656cd2aa..f48c13259c423 100644
--- a/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
+++ b/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah
+; RUN: llc < %s -march=x86 -mcpu=yonah
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
 
diff --git a/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
index f9671a4daaed1..4604f46c533f1 100644
--- a/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -relocation-model=pic --disable-fp-elim
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic --disable-fp-elim
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.__sFILEX = type opaque
diff --git a/test/CodeGen/X86/2007-04-24-Huge-Stack.ll b/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
index 74e6e72a4aa68..7528129971ab1 100644
--- a/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
+++ b/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep 4294967112
+; RUN: llc < %s -march=x86-64 | not grep 4294967112
 ; PR1348
 
 	%struct.md5_ctx = type { i32, i32, i32, i32, [2 x i32], i32, [128 x i8], [4294967288 x i8] }
diff --git a/test/CodeGen/X86/2007-04-24-VectorCrash.ll b/test/CodeGen/X86/2007-04-24-VectorCrash.ll
index 3e08e50f09de0..e38992d8b3044 100644
--- a/test/CodeGen/X86/2007-04-24-VectorCrash.ll
+++ b/test/CodeGen/X86/2007-04-24-VectorCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah
+; RUN: llc < %s -mcpu=yonah
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
 
diff --git a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
index ac85a9d72bbd1..113d0eb8647fc 100644
--- a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
+++ b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -o - -march=x86 -mattr=+mmx | grep paddq | count 2
-; RUN: llvm-as < %s | llc -o - -march=x86 -mattr=+mmx | grep movq | count 2
+; RUN: llc < %s -o - -march=x86 -mattr=+mmx | grep paddq | count 2
+; RUN: llc < %s -o - -march=x86 -mattr=+mmx | grep movq | count 2
 
 define <1 x i64> @unsigned_add3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) {
 entry:
diff --git a/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll b/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
index cbd6a73dbee7f..85a2ecc959ab7 100644
--- a/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
+++ b/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep {bsrl.*10}
+; RUN: llc < %s | not grep {bsrl.*10}
 ; PR1356
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/2007-05-05-VecCastExpand.ll b/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
index b0bcf5c155aaf..e58b1932197de 100644
--- a/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
+++ b/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 -mattr=+sse
+; RUN: llc < %s -march=x86 -mcpu=i386 -mattr=+sse
 ; PR1371
 
 @str = external global [18 x i8]		; <[18 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
index ff7aac0239d8d..a3ff2f60c8d70 100644
--- a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
+++ b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu -enable-eh -disable-fp-elim | not grep {addl .12, %esp}
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -enable-eh -disable-fp-elim | not grep {addl .12, %esp}
 ; PR1398
 
 	%struct.S = type { i32, i32 }
diff --git a/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
index 61f8b2ce58f29..8ef253822bd9e 100644
--- a/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 	%struct.XDesc = type <{ i32, %struct.OpaqueXDataStorageType** }>
 	%struct.OpaqueXDataStorageType = type opaque
diff --git a/test/CodeGen/X86/2007-05-15-maskmovq.ll b/test/CodeGen/X86/2007-05-15-maskmovq.ll
index d9836e4a8d5da..2093b8f687443 100644
--- a/test/CodeGen/X86/2007-05-15-maskmovq.ll
+++ b/test/CodeGen/X86/2007-05-15-maskmovq.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah
+; RUN: llc < %s -mcpu=yonah
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
index 64ccef3917a38..989dfc5bdb2cd 100644
--- a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
+++ b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep punpckhwd
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep punpckhwd
 
 declare <8 x i16> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
 
diff --git a/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll b/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
index 5d090759092ee..321e11651b60d 100644
--- a/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
+++ b/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | not grep GOTPCREL
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep ".align.*3"
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep GOTPCREL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep ".align.*3"
 
 	%struct.A = type { [1024 x i8] }
 @_ZN1A1aE = global %struct.A zeroinitializer, align 32		; <%struct.A*> [#uses=1]
diff --git a/test/CodeGen/X86/2007-06-04-tailmerge4.ll b/test/CodeGen/X86/2007-06-04-tailmerge4.ll
index 0ad539664c990..baf2377c5a02a 100644
--- a/test/CodeGen/X86/2007-06-04-tailmerge4.ll
+++ b/test/CodeGen/X86/2007-06-04-tailmerge4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-eh -asm-verbose | grep invcont131
+; RUN: llc < %s -enable-eh -asm-verbose | grep invcont131
 ; PR 1496:  tail merge was incorrectly removing this block
 
 ; ModuleID = 'report.1.bc'
diff --git a/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll b/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
index 3e7776a62ab13..36a97ef9c3cf0 100644
--- a/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
+++ b/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
@@ -1,5 +1,5 @@
 ; PR1495
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2007-06-14-branchfold.ll b/test/CodeGen/X86/2007-06-14-branchfold.ll
index 7756d060ff259..2680b1543fbb4 100644
--- a/test/CodeGen/X86/2007-06-14-branchfold.ll
+++ b/test/CodeGen/X86/2007-06-14-branchfold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i686 | not grep jmp
+; RUN: llc < %s -march=x86 -mcpu=i686 | not grep jmp
 ; check that branch folding understands FP_REG_KILL is not a branch
 
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2007-06-15-IntToMMX.ll b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
index e608ac3ecb97e..6128d8b92d11a 100644
--- a/test/CodeGen/X86/2007-06-15-IntToMMX.ll
+++ b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep paddusw
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep paddusw
 @R = external global <1 x i64>          ; <<1 x i64>*> [#uses=1]
 
 define void @foo(<1 x i64> %A, <1 x i64> %B) {
diff --git a/test/CodeGen/X86/2007-06-28-X86-64-isel.ll b/test/CodeGen/X86/2007-06-28-X86-64-isel.ll
index af11f127cfb5e..9d42c49317fdd 100644
--- a/test/CodeGen/X86/2007-06-28-X86-64-isel.ll
+++ b/test/CodeGen/X86/2007-06-28-X86-64-isel.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2
+; RUN: llc < %s -march=x86-64 -mattr=+sse2
 
 define void @test() {
 	%tmp1 = call <8 x i16> @llvm.x86.sse2.pmins.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 7, i32 7, i32 7, i32 7 > to <8 x i16>) )
diff --git a/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll b/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll
index bcd265aeddaa2..d2d6388c07827 100644
--- a/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll
+++ b/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define void @test() {
 entry:
diff --git a/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll b/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
index 66a58c73e824a..dc11eec9c17f9 100644
--- a/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
+++ b/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define void @test(<4 x float>* %arg) {
 	%tmp89 = getelementptr <4 x float>* %arg, i64 3
diff --git a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
index 18850b135ccf5..2c513f17811a7 100644
--- a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
+++ b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rsi, %mm0}
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rdi, %mm1}
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {paddusw	%mm0, %mm1}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rsi, %mm0}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rdi, %mm1}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {paddusw	%mm0, %mm1}
 
 @R = external global <1 x i64>		; <<1 x i64>*> [#uses=1]
 
diff --git a/test/CodeGen/X86/2007-07-10-StackerAssert.ll b/test/CodeGen/X86/2007-07-10-StackerAssert.ll
index 7f09b5275a050..d611677942c24 100644
--- a/test/CodeGen/X86/2007-07-10-StackerAssert.ll
+++ b/test/CodeGen/X86/2007-07-10-StackerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu -mcpu=athlon -relocation-model=pic
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -mcpu=athlon -relocation-model=pic
 ; PR1545
 
 @.str97 = external constant [56 x i8]		; <[56 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2007-07-18-Vector-Extract.ll b/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
index c0bd282e01915..8625b27717382 100644
--- a/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
+++ b/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse | grep {movq	(%rdi), %rax}
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse | grep {movq	8(%rdi), %rax}
+; RUN: llc < %s -march=x86-64 -mattr=+sse | grep {movq	(%rdi), %rax}
+; RUN: llc < %s -march=x86-64 -mattr=+sse | grep {movq	8(%rdi), %rax}
 define i64 @foo_0(<2 x i64>* %val) {
 entry:
         %val12 = getelementptr <2 x i64>* %val, i32 0, i32 0            ; <i64*> [#uses=1]
diff --git a/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll b/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
index 8eda0ab9bc4ea..3cd8052a732c7 100644
--- a/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
+++ b/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep movl
+; RUN: llc < %s -march=x86 | not grep movl
 
 define i8 @t(i8 zeroext  %x, i8 zeroext  %y) zeroext  {
 	%tmp2 = add i8 %x, 2
diff --git a/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
index e9ea843ba3c36..7768f36efae5e 100644
--- a/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
+++ b/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | not grep "movb   %ah, %r"
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep "movb   %ah, %r"
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, [4 x i8], i64 }
 	%struct.PyBoolScalarObject = type { i64, %struct._typeobject*, i8 }
diff --git a/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll b/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
index b62d2c61bba7a..e93092f355c5c 100644
--- a/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
+++ b/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {movsbl}
+; RUN: llc < %s -march=x86 | grep {movsbl}
 
 @X = global i32 0               ; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll b/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
index f6ed0fe7a5ffe..c90a85f16949e 100644
--- a/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
+++ b/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep drectve
+; RUN: llc < %s -march=x86 | not grep drectve
 ; PR1607
 
 %hlvm_programs_element = type { i8*, i32 (i32, i8**)* }
diff --git a/test/CodeGen/X86/2007-08-13-SpillerReuse.ll b/test/CodeGen/X86/2007-08-13-SpillerReuse.ll
index edcb8232fde1a..d6ea5109d1fb6 100644
--- a/test/CodeGen/X86/2007-08-13-SpillerReuse.ll
+++ b/test/CodeGen/X86/2007-08-13-SpillerReuse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep "48(%esp)" | count 5
+; RUN: llc < %s -mtriple=i686-apple-darwin | grep "48(%esp)" | count 5
 
 	%struct..0anon = type { i32 }
 	%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
diff --git a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
index b6a5fc97b4bb2..5acb05134c7cb 100644
--- a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
+++ b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | not grep {lea\[\[:space:\]\]R}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | not grep {lea\[\[:space:\]\]R}
 
 	%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
 	%struct.AGenericManager = type <{ i8 }>
diff --git a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
index 4f95b7603bae6..c5d2a46f92c27 100644
--- a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
+++ b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep weak | count 2
+; RUN: llc < %s -march=x86 | grep weak | count 2
 @__gthrw_pthread_once = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
 
 declare extern_weak i32 @pthread_once(i32*, void ()*)
diff --git a/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll b/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
index 6a313be188851..56ee2a3149903 100644
--- a/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
+++ b/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin -enable-eh  | grep {isNullOrNil].eh"} | count 2
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin -enable-eh  | grep {isNullOrNil].eh"} | count 2
 
 	%struct.NSString = type {  }
 	%struct._objc__method_prototype_list = type opaque
diff --git a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll b/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
index 835e4caf0aafc..0ae1897e60e98 100644
--- a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
+++ b/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep 170
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep -- -86
 
 define i16 @f(<4 x float>* %tmp116117.i1061.i) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll b/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
index be51c04a38371..4a56ee446a0fb 100644
--- a/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
+++ b/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | grep powixf2
-; RUN: llvm-as < %s | llc | grep fsqrt
+; RUN: llc < %s | grep powixf2
+; RUN: llc < %s | grep fsqrt
 ; ModuleID = 'yyy.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll b/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
index a733bb31646c7..6fc8ec907eacc 100644
--- a/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
+++ b/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep pushf
+; RUN: llc < %s -march=x86 | not grep pushf
 
 	%struct.gl_texture_image = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8* }
 	%struct.gl_texture_object = type { i32, i32, i32, float, [4 x i32], i32, i32, i32, i32, i32, float, [11 x %struct.gl_texture_image*], [1024 x i8], i32, i32, i32, i8, i8*, i8, void (%struct.gl_texture_object*, i32, float*, float*, float*, float*, i8*, i8*, i8*, i8*)*, %struct.gl_texture_object* }
diff --git a/test/CodeGen/X86/2007-10-05-3AddrConvert.ll b/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
index e9fbe797f5bca..67323e87eff57 100644
--- a/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
+++ b/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep lea
+; RUN: llc < %s -march=x86 | grep lea
 
 	%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
 	%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
diff --git a/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll b/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
index e2fdbb32bde34..fc11347224bea 100644
--- a/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
+++ b/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep movb
+; RUN: llc < %s -march=x86 | not grep movb
 
 define i16 @f(i32* %bp, i32* %ss) signext  {
 entry:
diff --git a/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll b/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
index fd914a1687b77..ea1bbc464693e 100644
--- a/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
+++ b/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep addss | not grep esp
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep addss | not grep esp
 
 define fastcc void @fht(float* %fz, i16 signext  %n) {
 entry:
diff --git a/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll b/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
index 3016a013f2c90..a3872ad47e981 100644
--- a/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
+++ b/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep sarl | not grep esp
+; RUN: llc < %s -march=x86 | grep sarl | not grep esp
 
 define i16 @t(i16* %qmatrix, i16* %dct, i16* %acBaseTable, i16* %acExtTable, i16 signext  %acBaseRes, i16 signext  %acMaskRes, i16 signext  %acExtRes, i32* %bitptr, i32* %source, i32 %markerPrefix, i8** %byteptr, i32 %scale, i32 %round, i32 %bits) signext  {
 entry:
diff --git a/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
index 6cac558e427d0..8a55935cc1f88 100644
--- a/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
 
         %struct._Unwind_Context = type {  }
 
diff --git a/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
index 4ea42440e1e2f..1e4ae84645869 100644
--- a/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-linux-gnu
 ; PR1729
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll
index a414ef0d8626b..fbcac50875c27 100644
--- a/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
 
 define i64 @__ashldi3(i64 %u, i64 %b) {
 entry:
diff --git a/test/CodeGen/X86/2007-10-16-IllegalAsm.ll b/test/CodeGen/X86/2007-10-16-IllegalAsm.ll
index 5332fa1007ed1..6d0cb475b1f18 100644
--- a/test/CodeGen/X86/2007-10-16-IllegalAsm.ll
+++ b/test/CodeGen/X86/2007-10-16-IllegalAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux-gnu | grep movb | not grep x
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep movb | not grep x
 ; PR1734
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2007-10-16-fp80_select.ll b/test/CodeGen/X86/2007-10-16-fp80_select.ll
index 2fcf76be5c78d..3f9845c3c3ecb 100644
--- a/test/CodeGen/X86/2007-10-16-fp80_select.ll
+++ b/test/CodeGen/X86/2007-10-16-fp80_select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; ModuleID = 'bugpoint-reduced-simplified.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
diff --git a/test/CodeGen/X86/2007-10-17-IllegalAsm.ll b/test/CodeGen/X86/2007-10-17-IllegalAsm.ll
index f3cdfee7545f7..c0bb55ed14ef0 100644
--- a/test/CodeGen/X86/2007-10-17-IllegalAsm.ll
+++ b/test/CodeGen/X86/2007-10-17-IllegalAsm.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux-gnu | grep addb | not grep x
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux-gnu | grep cmpb | not grep x
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep addb | not grep x
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep cmpb | not grep x
 ; PR1734
 
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll b/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
index e649999bb0a8d..600bd1f178497 100644
--- a/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | grep inc | not grep PTR
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep inc | not grep PTR
 
 define i16 @t(i32* %bitptr, i32* %source, i8** %byteptr, i32 %scale, i32 %round) signext  {
 entry:
diff --git a/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll b/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll
index 450911ae81991..984094d86a275 100644
--- a/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll
+++ b/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1748
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll b/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
index 9013e9020efa4..86d3bbf4f4e3b 100644
--- a/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
+++ b/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 1
+; RUN: llc < %s -march=x86 | grep mov | count 1
 
 define i16 @t() signext  {
 entry:
diff --git a/test/CodeGen/X86/2007-10-30-LSRCrash.ll b/test/CodeGen/X86/2007-10-30-LSRCrash.ll
index 1c912a014049e..42db98b447502 100644
--- a/test/CodeGen/X86/2007-10-30-LSRCrash.ll
+++ b/test/CodeGen/X86/2007-10-30-LSRCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @unique(i8* %full, i32 %p, i32 %len, i32 %mode, i32 %verbos, i32 %flags) {
 entry:
diff --git a/test/CodeGen/X86/2007-10-31-extractelement-i64.ll b/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
index f73a9105cef40..1b8e67dcc9b3e 100644
--- a/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
+++ b/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse2
+; RUN: llc < %s -march=x86 -mattr=sse2
 ; ModuleID = 'yyy.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/2007-11-01-ISelCrash.ll b/test/CodeGen/X86/2007-11-01-ISelCrash.ll
index 704efd0ef8008..019c6a8cc0d90 100644
--- a/test/CodeGen/X86/2007-11-01-ISelCrash.ll
+++ b/test/CodeGen/X86/2007-11-01-ISelCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
         %"struct.K::JL" = type <{ i8 }>
         %struct.jv = type { i64 }
diff --git a/test/CodeGen/X86/2007-11-02-BadAsm.ll b/test/CodeGen/X86/2007-11-02-BadAsm.ll
index 4ae4d2f9e8d90..4e11cda92e6d3 100644
--- a/test/CodeGen/X86/2007-11-02-BadAsm.ll
+++ b/test/CodeGen/X86/2007-11-02-BadAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movl | not grep rax
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl | not grep rax
 
 	%struct.color_sample = type { i64 }
 	%struct.gs_matrix = type { float, i64, float, i64, float, i64, float, i64, float, i64, float, i64 }
diff --git a/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll b/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll
index ffa6e44d1cb66..27ec8260d06b7 100644
--- a/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll
+++ b/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1763
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll b/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
index 889b122bb066b..404561848b711 100644
--- a/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
+++ b/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
 ; PR1766
 
         %struct.dentry = type { %struct.dentry_operations* }
diff --git a/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll b/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll
index 7e41f36790638..6b871aa3a4d4f 100644
--- a/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll
+++ b/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
 ; PR1767
 
 define void @xor_sse_2(i64 %bytes, i64* %p1, i64* %p2) {
diff --git a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
index de33c617d050b..8e586a7059eb6 100644
--- a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
+++ b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=static | grep {foo _str$}
+; RUN: llc < %s -relocation-model=static | grep {foo _str$}
 ; PR1761
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/2007-11-06-InstrSched.ll b/test/CodeGen/X86/2007-11-06-InstrSched.ll
index a4e44e1f4e1d1..f6db0d0379e76 100644
--- a/test/CodeGen/X86/2007-11-06-InstrSched.ll
+++ b/test/CodeGen/X86/2007-11-06-InstrSched.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep lea
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lea
 
 define float @foo(i32* %x, float* %y, i32 %c) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2007-11-07-MulBy4.ll b/test/CodeGen/X86/2007-11-07-MulBy4.ll
index d7fb684a6ba46..d5b630b59d9f5 100644
--- a/test/CodeGen/X86/2007-11-07-MulBy4.ll
+++ b/test/CodeGen/X86/2007-11-07-MulBy4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep imul
+; RUN: llc < %s -march=x86 | not grep imul
 
 	%struct.eebb = type { %struct.eebb*, i16* }
 	%struct.hf = type { %struct.hf*, i16*, i8*, i32, i32, %struct.eebb*, i32, i32, i8*, i8*, i8*, i8*, i16*, i8*, i16*, %struct.ri, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [30 x i32], %struct.eebb, i32, i8* }
diff --git a/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll b/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll
index b5635b38cfc95..9c004f946b4af 100644
--- a/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll
+++ b/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att | grep movl | count 1
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att | grep movl | count 2
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att | not grep movb
 
 	%struct.double_int = type { i64, i64 }
 	%struct.tree_common = type <{ i8, [3 x i8] }>
@@ -6,7 +7,7 @@
 	%struct.tree_node = type { %struct.tree_int_cst }
 @tree_code_type = external constant [0 x i32]		; <[0 x i32]*> [#uses=1]
 
-define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) {
+define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) nounwind {
 entry:
 	%tmp2526 = bitcast %struct.tree_node* %t1 to i32*		; <i32*> [#uses=1]
 	br i1 false, label %UnifiedReturnBlock, label %bb21
diff --git a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
index 46422bcf2c506..0626d28eefee4 100644
--- a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& \
 ; RUN:   grep {1 .*folded into instructions}
 ; Increment in loop bb.128.i adjusted to 2, to prevent loop reversal from
 ; kicking in.
diff --git a/test/CodeGen/X86/2007-11-30-TestLoadFolding.ll b/test/CodeGen/X86/2007-11-30-TestLoadFolding.ll
index 0d43a6e73f873..debb46121698f 100644
--- a/test/CodeGen/X86/2007-11-30-TestLoadFolding.ll
+++ b/test/CodeGen/X86/2007-11-30-TestLoadFolding.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& \
+; RUN: llc < %s -march=x86 -stats |& \
 ; RUN:   grep {1 .*folded into instructions}
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | count 4
+; RUN: llc < %s -march=x86 | grep cmp | count 4
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
 
diff --git a/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll b/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
index cb7a3dcd33cb3..ca995cc3f65e4 100644
--- a/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
+++ b/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | not grep IMPLICIT_DEF
+; RUN: llc < %s -mtriple=i686-apple-darwin | not grep IMPLICIT_DEF
 
 	%struct.__sbuf = type { i8*, i32 }
 	%struct.ggBRDF = type { i32 (...)** }
diff --git a/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll b/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
index 8ad77051beddb..455de91d30abf 100644
--- a/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
+++ b/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu
 ; PR1799
 
 	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
index 6309f3c510526..265d968548516 100644
--- a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
+++ b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {(%esp)} | count 2
+; RUN: llc < %s -march=x86 | grep {(%esp)} | count 2
 ; PR1872
 
 	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2008-01-08-IllegalCMP.ll b/test/CodeGen/X86/2008-01-08-IllegalCMP.ll
index fddfd4f3a4863..7aec613e2abbc 100644
--- a/test/CodeGen/X86/2008-01-08-IllegalCMP.ll
+++ b/test/CodeGen/X86/2008-01-08-IllegalCMP.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
index 8a1520c1fe413..b040095195c8f 100644
--- a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
+++ b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep pushf
+; RUN: llc < %s -march=x86 | not grep pushf
 
 	%struct.indexentry = type { i32, i8*, i8*, i8*, i8*, i8* }
 
diff --git a/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll b/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
index 962d6ecc24e9c..6997d535ff92b 100644
--- a/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
+++ b/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -o - | grep sinl
+; RUN: llc < %s -o - | grep sinl
 
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
index 38020c1e3ea88..d795610607ee6 100644
--- a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
+++ b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -regalloc=local
+; RUN: llc < %s -march=x86 -mattr=+sse2 -regalloc=local
 
 define void @SolveCubic(double %a, double %b, double %c, double %d, i32* %solutions, double* %x) {
 entry:
diff --git a/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll b/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
index 4feb078671fb4..e91f52ef05696 100644
--- a/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
+++ b/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep IMPLICIT_DEF
+; RUN: llc < %s -march=x86 | not grep IMPLICIT_DEF
 
 	%struct.node_t = type { double*, %struct.node_t*, %struct.node_t**, double**, double*, i32, i32 }
 
diff --git a/test/CodeGen/X86/2008-01-16-Trampoline.ll b/test/CodeGen/X86/2008-01-16-Trampoline.ll
index 4510edb9d7dbf..704b2bab4a266 100644
--- a/test/CodeGen/X86/2008-01-16-Trampoline.ll
+++ b/test/CodeGen/X86/2008-01-16-Trampoline.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 
 	%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets = type { i32, i32, void (i32, i32)*, i8 (i32, i32)* }
 
diff --git a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll b/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
index ffb82ae7f2b86..b936686798f03 100644
--- a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
+++ b/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep nop
+; RUN: llc < %s -march=x86 | grep nop
 target triple = "i686-apple-darwin8"
 
 
diff --git a/test/CodeGen/X86/2008-02-05-ISelCrash.ll b/test/CodeGen/X86/2008-02-05-ISelCrash.ll
index 6885cf14cf111..443a32de3b426 100644
--- a/test/CodeGen/X86/2008-02-05-ISelCrash.ll
+++ b/test/CodeGen/X86/2008-02-05-ISelCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR1975
 
 @nodes = external global i64		; <i64*> [#uses=2]
diff --git a/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll b/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
index 6db6537aed26e..d2d5149de3aaa 100644
--- a/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
+++ b/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep xor | grep CPI
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xor | grep CPI
 
 define void @casin({ double, double }* sret  %agg.result, double %z.0, double %z.1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll b/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
index 230af57fea8ca..b772d77f6405f 100644
--- a/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
+++ b/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep andpd | not grep esp
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep andpd | not grep esp
 
 declare double @llvm.sqrt.f64(double) nounwind readnone 
 
diff --git a/test/CodeGen/X86/2008-02-14-BitMiscompile.ll b/test/CodeGen/X86/2008-02-14-BitMiscompile.ll
index 5bf84560a37c5..1983f1d19c6f8 100644
--- a/test/CodeGen/X86/2008-02-14-BitMiscompile.ll
+++ b/test/CodeGen/X86/2008-02-14-BitMiscompile.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep and
+; RUN: llc < %s -march=x86 | grep and
 define i32 @test(i1 %A) {
 	%B = zext i1 %A to i32		; <i32> [#uses=1]
 	%C = sub i32 0, %B		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index 47c8677d385b6..9b52c5c06990c 100644
--- a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 9
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 9
 ; PR1909
 
 @.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00"		; <[48 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
index 557d00c62937c..5115e48365fcc 100644
--- a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
+++ b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | grep {a:} | not grep ax
-; RUN: llvm-as < %s | llc | grep {b:} | not grep ax
+; RUN: llc < %s | grep {a:} | not grep ax
+; RUN: llc < %s | grep {b:} | not grep ax
 ; PR2078
 ; The clobber list says that "ax" is clobbered.  Make sure that eax isn't 
 ; allocated to the input/output register.
diff --git a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
index 8cf36425f22d6..6b1eefe5750a0 100644
--- a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -regalloc=local -march=x86 -mattr=+mmx | grep esi
+; RUN: llc < %s -regalloc=local -march=x86 -mattr=+mmx | grep esi
 ; PR2082
 ; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of
 ; registers.
diff --git a/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
index f78d52651ded8..8d6bb0df1f6d0 100644
--- a/test/CodeGen/X86/2008-02-22-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of re-materialization} | grep 3
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of dead spill slots removed}
+; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 3
 ; rdar://5761454
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
diff --git a/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll b/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
index ff7cf5e94e258..1d31859f46cc2 100644
--- a/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
+++ b/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu -mattr=+sse2
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -mattr=+sse2
 ; PR2076
 
 define void @h264_h_loop_filter_luma_mmx2(i8* %pix, i32 %stride, i32 %alpha, i32 %beta, i8* %tc0) nounwind  {
diff --git a/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll b/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
index 5d60bde85614c..6615b8c620759 100644
--- a/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 	%struct.XX = type <{ i8 }>
 	%struct.YY = type { i64 }
diff --git a/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll b/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll
index 3ba31f4ad9000..0b4eb3a3b9b20 100644
--- a/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll
+++ b/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll b/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
index fe0ee8a8faaf6..ad7950ccd8e3c 100644
--- a/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
+++ b/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 	%struct.CompAtom = type <{ %struct.Position, float, i32 }>
 	%struct.Lattice = type { %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2008-02-27-PEICrash.ll b/test/CodeGen/X86/2008-02-27-PEICrash.ll
index 055eabb43a63c..d842967561abd 100644
--- a/test/CodeGen/X86/2008-02-27-PEICrash.ll
+++ b/test/CodeGen/X86/2008-02-27-PEICrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define i64 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone  {
 entry:
diff --git a/test/CodeGen/X86/2008-03-06-frem-fpstack.ll b/test/CodeGen/X86/2008-03-06-frem-fpstack.ll
index 2d7182e733fb1..70a83b5c9f57b 100644
--- a/test/CodeGen/X86/2008-03-06-frem-fpstack.ll
+++ b/test/CodeGen/X86/2008-03-06-frem-fpstack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386
+; RUN: llc < %s -march=x86 -mcpu=i386
 ; PR2122
 define float @func(float %a, float %b) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-03-07-APIntBug.ll b/test/CodeGen/X86/2008-03-07-APIntBug.ll
index 5d1ccad745ade..84e4827d04162 100644
--- a/test/CodeGen/X86/2008-03-07-APIntBug.ll
+++ b/test/CodeGen/X86/2008-03-07-APIntBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 | not grep 255
+; RUN: llc < %s -march=x86 -mcpu=i386 | not grep 255
 
 	%struct.CONSTRAINT = type { i32, i32, i32, i32 }
 	%struct.FIRST_UNION = type { %struct.anon }
diff --git a/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll b/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
index 10989885f0f1e..cd2d609b53560 100644
--- a/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
+++ b/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim -schedule-livein-copies | not grep {Number of register spills}
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim -schedule-livein-copies | not grep {Number of register spills}
 ; PR2134
 
 declare fastcc i8* @w_addchar(i8*, i32*, i32*, i8 signext ) nounwind 
diff --git a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
index 0f83b399ad7c9..e673d315a4356 100644
--- a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
+++ b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep TLSGD | count 2
+; RUN: llc < %s -relocation-model=pic | grep TLSGD | count 2
 ; PR2137
 
 ; ModuleID = '1.c'
diff --git a/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll b/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
index 4a896e9f33e16..c6ba22ea3da60 100644
--- a/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
+++ b/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i16 @t(i32 %depth) signext nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
index 544c9b5819ece..8946415108f4a 100644
--- a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
+++ b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu
 ; PR2138
 
 	%struct.__locale_struct = type { [13 x %struct.locale_data*], i16*, i32*, i32*, [13 x i8*] }
diff --git a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
index 4b6758d6833ca..ccc4d754c1f56 100644
--- a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep movss | count 1
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -stats |& grep {Number of re-materialization} | grep 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep movss | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -stats |& grep {Number of re-materialization} | grep 1
 
 	%struct..0objc_object = type opaque
 	%struct.OhBoy = type {  }
diff --git a/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll b/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll
index 2fad32a36c3f6..eaa883c963f2b 100644
--- a/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll
+++ b/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @t() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll b/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
index 6cf731b0e9b75..4dc3a10f46479 100644
--- a/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
+++ b/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -asm-verbose | grep {#} | not grep -v {##}
+; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose | grep {#} | not grep -v {##}
 
 	%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
 	%struct.AGenericManager = type <{ i8 }>
diff --git a/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll b/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
index 53bb054795ec1..2d868e0f612a6 100644
--- a/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
+++ b/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define void @t() {
 entry:
diff --git a/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll b/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
index 83e1d60fcbafd..305968ac37785 100644
--- a/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
+++ b/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim | grep add | grep 12 | not grep non_lazy_ptr
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim | grep add | grep 12 | not grep non_lazy_ptr
 ; Don't fold re-materialized load into a two address instruction
 
 	%"struct.Smarts::Runnable" = type { i32 (...)**, i32 }
diff --git a/test/CodeGen/X86/2008-04-02-unnamedEH.ll b/test/CodeGen/X86/2008-04-02-unnamedEH.ll
index fff75ff660a7b..a9f368b6eaa5b 100644
--- a/test/CodeGen/X86/2008-04-02-unnamedEH.ll
+++ b/test/CodeGen/X86/2008-04-02-unnamedEH.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc | grep unnamed_1_0.eh
-; ModuleID = '<stdin>'
+; RUN: llc < %s | grep unnamed_1.eh
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
diff --git a/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll b/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
index f5de113b9ea9f..dc8c097efc506 100644
--- a/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx
+; RUN: llc < %s -march=x86 -mattr=+mmx
 
 define i32 @t2() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-04-09-BranchFolding.ll b/test/CodeGen/X86/2008-04-09-BranchFolding.ll
index fea54c4e5ecff..41fbdd19f2b20 100644
--- a/test/CodeGen/X86/2008-04-09-BranchFolding.ll
+++ b/test/CodeGen/X86/2008-04-09-BranchFolding.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep jmp
+; RUN: llc < %s -march=x86 | not grep jmp
 
 	%struct..0anon = type { i32 }
 	%struct.binding_level = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.binding_level*, i8, i8, i8, i8, i8, i32, %struct.tree_node* }
diff --git a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
index 4bb8c6d27a71c..83eb61aed433c 100644
--- a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
+++ b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=local
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=local
 
 	%struct.CGPoint = type { double, double }
 	%struct.NSArray = type { %struct.NSObject }
diff --git a/test/CodeGen/X86/2008-04-16-CoalescerBug.ll b/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
index 30accad5863b3..3ccc0fe163400 100644
--- a/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define void @Hubba(i8* %saveunder, i32 %firstBlob, i32 %select) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-04-16-ReMatBug.ll b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
index 3e0662aed88d7..6e8891bfd5b8b 100644
--- a/test/CodeGen/X86/2008-04-16-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep movw | not grep {, %e}
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep movw | not grep {, %e}
 
 	%struct.DBC_t = type { i32, i8*, i16, %struct.DBC_t*, i8*, i8*, i8*, i8*, i8*, %struct.DBC_t*, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16, i32*, i8, i16, %struct.DRVOPT*, i16 }
 	%struct.DRVOPT = type { i16, i32, i8, %struct.DRVOPT* }
diff --git a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
index c69ff332c2c5e..ac482850b8312 100644
--- a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep xorl | grep {%e}
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep xorl | grep {%e}
 ; Make sure xorl operands are 32-bit registers.
 
 	%struct.tm = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* }
diff --git a/test/CodeGen/X86/2008-04-24-MemCpyBug.ll b/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
index 09fdc707b854b..6389267aa4e88 100644
--- a/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
+++ b/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep 120
+; RUN: llc < %s -march=x86 | not grep 120
 ; Don't accidentally add the offset twice for trailing bytes.
 
 	%struct.S63 = type { [63 x i8] }
diff --git a/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll b/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
index 838c2ea579873..4eaca17c88616 100644
--- a/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
+++ b/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mattr=+sse41
+; RUN: llc < %s -mattr=+sse41
 ; rdar://5886601
 ; gcc testsuite:  gcc.target/i386/sse4_1-pblendw.c
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll b/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
index 82721a53b8b49..38d6aa6d172ac 100644
--- a/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
+++ b/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {1 \$2 3}
+; RUN: llc < %s | grep {1 \$2 3}
 ; rdar://5720231
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/CodeGen/X86/2008-04-28-CoalescerBug.ll b/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
index f93ad9ae7151f..5b97eb71cbfdf 100644
--- a/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movl > %t
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl > %t
 ; RUN: not grep {r\[abcd\]x} %t
 ; RUN: not grep {r\[ds\]i} %t
 ; RUN: not grep {r\[bs\]p} %t
diff --git a/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll b/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll
index 6613fafcce825..6e8e98d865bd2 100644
--- a/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll
+++ b/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i64 @t(i64 %maxIdleDuration) nounwind  {
 	call void asm sideeffect "wrmsr", "{cx},A,~{dirflag},~{fpsr},~{flags}"( i32 416, i64 0 ) nounwind 
diff --git a/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll b/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
index d7b5f25de6c9a..a708224dd0d99 100644
--- a/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
+++ b/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-unsafe-fp-math -march=x86 | grep jnp
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86 | grep jnp
 ; rdar://5902801
 
 declare void @test2()
diff --git a/test/CodeGen/X86/2008-05-09-PHIElimBug.ll b/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
index c0b196113137a..cea0076076d68 100644
--- a/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
+++ b/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 	%struct.V = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x i32>, float*, float*, float*, float*, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
 
diff --git a/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll b/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
index 9bcd1f374dd64..5ceb5464d2b02 100644
--- a/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
+++ b/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define fastcc void @glgVectorFloatConversion() nounwind  {
 	%tmp12745 = load <4 x float>* null, align 16		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/2008-05-12-tailmerge-5.ll b/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
index 8751328249d52..1f95a2409fe74 100644
--- a/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
+++ b/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep abort | count 1
+; RUN: llc < %s | grep abort | count 1
 ; Calls to abort should all be merged
 
 ; ModuleID = '5898899.c'
diff --git a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
index 9ecd5814de4f7..9cf50f4bfc586 100644
--- a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -O0 -fast-isel=false | grep mov | count 5
+; RUN: llc < %s -march=x86 -O0 -fast-isel=false | grep mov | count 5
 ; PR2343
 
 	%llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll b/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
index c9e30d8f80a20..19a73543c65e6 100644
--- a/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
+++ b/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movups | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movups | count 2
 
 define void @a(<4 x float>* %x) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-05-28-CoalescerBug.ll b/test/CodeGen/X86/2008-05-28-CoalescerBug.ll
index 68f6ccea4ee66..32bf8d494165a 100644
--- a/test/CodeGen/X86/2008-05-28-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-05-28-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
 ; PR2289
 
 define void @_ada_ca11001() {
diff --git a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
index 02db2ed93cd29..f1a19ec147a89 100644
--- a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -regalloc=local
+; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=local
 
 @_ZTVN10Evaluation10GridOutputILi3EEE = external constant [5 x i32 (...)*]		; <[5 x i32 (...)*]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll b/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
index d28276141689f..236b7cd6121f2 100644
--- a/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
+++ b/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep subl | grep 24
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep subl | grep 24
 
 	%struct.argument_t = type { i8*, %struct.argument_t*, i32, %struct.ipc_type_t*, i32, void (...)*, void (...)*, void (...)*, void (...)*, void (...)*, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, %struct.routine*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, i32, i32, i32, i32, i32, i32 }
 	%struct.ipc_type_t = type { i8*, %struct.ipc_type_t*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, %struct.ipc_type_t*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
diff --git a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
index 0cde7cf269eaf..90af3870bd441 100644
--- a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep movsd
-; RUN: llvm-as < %s | llc -march=x86 | grep movw
-; RUN: llvm-as < %s | llc -march=x86 | grep addw
+; RUN: llc < %s -march=x86 | not grep movsd
+; RUN: llc < %s -march=x86 | grep movw
+; RUN: llc < %s -march=x86 | grep addw
 ; These transforms are turned off for volatile loads and stores.
 ; Check that they weren't turned off for all loads and stores!
 
diff --git a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
index 2b64212dfb879..500cd1f08cfa9 100644
--- a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd | count 5
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movl | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 5
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movl | count 2
 
 @atomic = global double 0.000000e+00		; <double*> [#uses=1]
 @atomic2 = global double 0.000000e+00		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-06-16-SubregsBug.ll b/test/CodeGen/X86/2008-06-16-SubregsBug.ll
index 75513b665a0b6..4d4819ab05d59 100644
--- a/test/CodeGen/X86/2008-06-16-SubregsBug.ll
+++ b/test/CodeGen/X86/2008-06-16-SubregsBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep mov | count 4
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep mov | count 4
 
 define i16 @test(i16* %tmp179) nounwind  {
 	%tmp180 = load i16* %tmp179, align 2		; <i16> [#uses=2]
diff --git a/test/CodeGen/X86/2008-06-18-BadShuffle.ll b/test/CodeGen/X86/2008-06-18-BadShuffle.ll
index ba0a1f90ab923..66f9065799e58 100644
--- a/test/CodeGen/X86/2008-06-18-BadShuffle.ll
+++ b/test/CodeGen/X86/2008-06-18-BadShuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 -mattr=+sse2 | grep pinsrw
+; RUN: llc < %s -march=x86 -mcpu=i386 -mattr=+sse2 | grep pinsrw
 
 ; Test to make sure we actually insert the bottom element of the vector
 define <8 x i16> @a(<8 x i16> %a) nounwind  {
diff --git a/test/CodeGen/X86/2008-06-25-VecISelBug.ll b/test/CodeGen/X86/2008-06-25-VecISelBug.ll
index f369986fbcea0..72d190758f8d9 100644
--- a/test/CodeGen/X86/2008-06-25-VecISelBug.ll
+++ b/test/CodeGen/X86/2008-06-25-VecISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep pslldq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep pslldq
 
 define void @t() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll b/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
index 3586f87776a36..46341fc871038 100644
--- a/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
+++ b/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9
+; RUN: llc < %s -mtriple=i386-apple-darwin9
 
 	%struct.ogg_stream_state = type { i8*, i32, i32, i32, i32*, i64*, i32, i32, i32, i32, [282 x i8], i32, i32, i32, i32, i32, i64, i64 }
 	%struct.res_state = type { i32, i32, i32, i32, float*, float*, i32, i32 }
diff --git a/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll b/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll
index 5fb3e5780b94e..1a786ef7a90fd 100644
--- a/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll
+++ b/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep ax
+; RUN: llc < %s | grep ax
 ; PR2024
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/X86/2008-07-11-SHLBy1.ll b/test/CodeGen/X86/2008-07-11-SHLBy1.ll
index 5b94a351cff9d..ff2b05fb08ebd 100644
--- a/test/CodeGen/X86/2008-07-11-SHLBy1.ll
+++ b/test/CodeGen/X86/2008-07-11-SHLBy1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -o - | not grep shr
+; RUN: llc < %s -march=x86-64 -o - | not grep shr
 define i128 @sl(i128 %x) {
         %t = shl i128 %x, 1
         ret i128 %t
diff --git a/test/CodeGen/X86/2008-07-11-SpillerBug.ll b/test/CodeGen/X86/2008-07-11-SpillerBug.ll
index 1d94638865019..f75e605168ec7 100644
--- a/test/CodeGen/X86/2008-07-11-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-07-11-SpillerBug.ll
@@ -1,7 +1,12 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static -disable-fp-elim |\
-; RUN:   %prcontext 65534 1 | grep movl | count 1
+; RUN: llc < %s -march=x86 -relocation-model=static -disable-fp-elim | FileCheck %s
 ; PR2536
 
+
+; CHECK: movw %cx
+; CHECK-NEXT: andl    $65534, %
+; CHECK-NEXT: movl %
+; CHECK-NEXT: movl $17
+
 @g_5 = external global i16		; <i16*> [#uses=2]
 @g_107 = external global i16		; <i16*> [#uses=1]
 @g_229 = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll b/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
index aa9ee507f80c0..f56604b75bd71 100644
--- a/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
 
 	%struct.SV = type { i8*, i64, i64 }
 @"\01LC25" = external constant [8 x i8]		; <[8 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-07-19-movups-spills.ll b/test/CodeGen/X86/2008-07-19-movups-spills.ll
index ae30385e13e68..98919ee5221ab 100644
--- a/test/CodeGen/X86/2008-07-19-movups-spills.ll
+++ b/test/CodeGen/X86/2008-07-19-movups-spills.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movaps | count 75
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 1
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movaps | count 75
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 1
 ; PR2539
 
 external global <4 x float>, align 1		; <<4 x float>*>:0 [#uses=2]
diff --git a/test/CodeGen/X86/2008-07-22-CombinerCrash.ll b/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
index a18564f4f979d..0f6714579bcce 100644
--- a/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
+++ b/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 ; PR2566
 
 external global i16		; <i16*>:0 [#uses=1]
diff --git a/test/CodeGen/X86/2008-07-23-VSetCC.ll b/test/CodeGen/X86/2008-07-23-VSetCC.ll
index da6c089c460fb..684ca5c89fd2e 100644
--- a/test/CodeGen/X86/2008-07-23-VSetCC.ll
+++ b/test/CodeGen/X86/2008-07-23-VSetCC.ll
@@ -1,11 +1,12 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=pentium
+; RUN: llc < %s -march=x86 -mcpu=pentium
 ; PR2575
 
 define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x) nounwind  {
 	br i1 false, label %bb.nph, label %._crit_edge
 
 bb.nph:		; preds = %bb.nph, %0
-	vicmp sgt <4 x i32> zeroinitializer, < i32 -128, i32 -128, i32 -128, i32 -128 >		; <<4 x i32>>:1 [#uses=1]
+	%X = icmp sgt <4 x i32> zeroinitializer, < i32 -128, i32 -128, i32 -128, i32 -128 >		; <<4 x i32>>:1 [#uses=1]
+        sext <4 x i1> %X to <4 x i32>
 	extractelement <4 x i32> %1, i32 3		; <i32>:2 [#uses=1]
 	lshr i32 %2, 31		; <i32>:3 [#uses=1]
 	trunc i32 %3 to i1		; <i1>:4 [#uses=1]
@@ -27,4 +28,5 @@ bb.nph:		; preds = %bb.nph, %0
 	ret void
 }
 
+
 declare float @fmaxf(float, float)
diff --git a/test/CodeGen/X86/2008-08-05-SpillerBug.ll b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
index 2ebbe6ea52260..1d166f4881586 100644
--- a/test/CodeGen/X86/2008-08-05-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 56
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 59
 ; PR2568
 
 @g_3 = external global i16		; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-08-06-RewriterBug.ll b/test/CodeGen/X86/2008-08-06-RewriterBug.ll
index 9371c2a6383b0..4428035cc8277 100644
--- a/test/CodeGen/X86/2008-08-06-RewriterBug.ll
+++ b/test/CodeGen/X86/2008-08-06-RewriterBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2596
 
 @data = external global [400 x i64]		; <[400 x i64]*> [#uses=5]
diff --git a/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll b/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll
index b09211d9efe0a..32f6ca0ce086b 100644
--- a/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll
+++ b/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movzbl
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movzbl
 
 define i32 @foo(<4 x float> %a, <4 x float> %b) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2008-08-19-SubAndFetch.ll b/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
index 00bcdf82e8ddd..8475e8d354e58 100644
--- a/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
+++ b/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
@@ -1,9 +1,12 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep xadd
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
 @var = external global i64		; <i64*> [#uses=1]
 
 define i32 @main() nounwind {
 entry:
+; CHECK: main:
+; CHECK: lock
+; CHECK: decq
 	tail call i64 @llvm.atomic.load.sub.i64.p0i64( i64* @var, i64 1 )		; <i64>:0 [#uses=0]
 	unreachable
 }
diff --git a/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll b/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
index 2c6828bbd0aa9..c76dd7de12560 100644
--- a/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
+++ b/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 	%struct.DrawHelper = type { void (i32, %struct.QT_FT_Span*, i8*)*, void (i32, %struct.QT_FT_Span*, i8*)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i32, i32)* }
 	%struct.QBasicAtomic = type { i32 }
diff --git a/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll b/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
index 4e3533287dbce..eacb4a51c2154 100644
--- a/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
+++ b/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
@@ -1,7 +1,8 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movd | count 1
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movq
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movd | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movq
 ; PR2677
 
+
 	%struct.Bigint = type { %struct.Bigint*, i32, i32, i32, i32, [1 x i32] }
 
 define double @_Z7qstrtodPKcPS0_Pb(i8* %s00, i8** %se, i8* %ok) nounwind {
diff --git a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll b/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
index f793b524e61fc..101b3c5cfdbbd 100644
--- a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
+++ b/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mcpu=core2 | grep pxor | count 2
-; RUN: llvm-as < %s | llc -mcpu=core2 | not grep movapd
+; RUN: llc < %s -mcpu=core2 | grep pxor | count 2
+; RUN: llc < %s -mcpu=core2 | not grep movapd
 ; PR2715
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
index e22b647a13f0a..b92c789a30c72 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
@@ -1,6 +1,6 @@
 ; Check that eh_return & unwind_init were properly lowered
-; RUN: llvm-as < %s | llc | grep %ebp | count 9
-; RUN: llvm-as < %s | llc | grep %ecx | count 5
+; RUN: llc < %s | grep %ebp | count 7
+; RUN: llc < %s | grep %ecx | count 5
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i386-pc-linux"
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
index 7d01824400c8e..00ab73569c4bb 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
@@ -1,6 +1,6 @@
 ; Check that eh_return & unwind_init were properly lowered
-; RUN: llvm-as < %s | llc | grep %rbp | count 7
-; RUN: llvm-as < %s | llc | grep %rcx | count 3
+; RUN: llc < %s | grep %rbp | count 5
+; RUN: llc < %s | grep %rcx | count 3
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
index ffe10d439bc70..60be0d51e7e74 100644
--- a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
+++ b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
 ; PR2687
 
 define <2 x double> @a(<2 x i32> %x) nounwind {
diff --git a/test/CodeGen/X86/2008-09-09-LinearScanBug.ll b/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
index 30a2b15c8dea4..b3312d9464d14 100644
--- a/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
+++ b/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin
+; RUN: llc < %s -mtriple=i386-apple-darwin
 ; PR2757
 
 @g_3 = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-09-11-CoalescerBug.ll b/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
index 02dd04dc133c4..108f24307ea9c 100644
--- a/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2783
 
 @g_15 = external global i16		; <i16*> [#uses=2]
diff --git a/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll b/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
index 94033449114f5..534f990333727 100644
--- a/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
+++ b/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2748
 
 @g_73 = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
index ed8d345aad3d0..74429c382e71e 100644
--- a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
+++ b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep "movl %eax, %eax"
-; RUN: llvm-as < %s | llc -march=x86 | not grep "movl %edx, %edx"
-; RUN: llvm-as < %s | llc -march=x86 | not grep "movl (%eax), %eax"
-; RUN: llvm-as < %s | llc -march=x86 | not grep "movl (%edx), %edx"
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | not grep "movl %eax, %eax"
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | not grep "movl %edx, %edx"
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | not grep "movl (%eax), %eax"
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | not grep "movl (%edx), %edx"
+; RUN: llc < %s -march=x86 | not grep "movl %eax, %eax"
+; RUN: llc < %s -march=x86 | not grep "movl %edx, %edx"
+; RUN: llc < %s -march=x86 | not grep "movl (%eax), %eax"
+; RUN: llc < %s -march=x86 | not grep "movl (%edx), %edx"
+; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl %eax, %eax"
+; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl %edx, %edx"
+; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl (%eax), %eax"
+; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl (%edx), %edx"
 
 ; %0 must not be put in EAX or EDX.
 ; In the first asm, $0 and $2 must not be put in EAX.
diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
index 62e3233f9b3a0..f5bd307139d60 100644
--- a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
+++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "#%ebp %edi %esi 8(%edx) %eax (%ebx)"
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | grep "#%edi %edx %ebp 8(%ebx) %eax (%esi)"
+; RUN: llc < %s -march=x86 | grep "#%ebp %edi %esi 8(%edx) %eax (%ebx)"
+; RUN: llc < %s -march=x86 -regalloc=local | grep "#%edi %edx %ebp 8(%ebx) %eax (%esi)"
 ; The 1st, 2nd, 3rd and 5th registers above must all be different.  The registers
 ; referenced in the 4th and 6th operands must not be the same as the 1st or 5th
 ; operand.  There are many combinations that work; this is what llc puts out now.
diff --git a/test/CodeGen/X86/2008-09-19-RegAllocBug.ll b/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
index 47feb83c92725..a8f2912a70af4 100644
--- a/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
+++ b/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin
+; RUN: llc < %s -mtriple=i386-apple-darwin
 ; PR2808
 
 @g_3 = external global i32		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-09-25-sseregparm-1.ll b/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
index d103f144e284f..c92a8f4635713 100644
--- a/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
+++ b/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movs | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep fld | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movs | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep fld | count 2
 ; check 'inreg' attribute for sse_regparm
 
 define double @foo1() inreg nounwind {
diff --git a/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll b/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll
index b1f5ab5907176..f1ada28bcfcb1 100644
--- a/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll
+++ b/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9
+; RUN: llc < %s -mtriple=i386-apple-darwin9
 
 	%struct._Unwind_Context = type { [18 x i8*], i8*, i8*, i8*, %struct.dwarf_eh_bases, i32, i32, i32, [18 x i8] }
 	%struct._Unwind_Exception = type { i64, void (i32, %struct._Unwind_Exception*)*, i32, i32, [3 x i32] }
diff --git a/test/CodeGen/X86/2008-09-29-ReMatBug.ll b/test/CodeGen/X86/2008-09-29-ReMatBug.ll
index d4da01a508fd6..c36cf39fb3418 100644
--- a/test/CodeGen/X86/2008-09-29-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-09-29-ReMatBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim
 
 	%struct..0objc_selector = type opaque
 	%struct.NSString = type opaque
diff --git a/test/CodeGen/X86/2008-09-29-VolatileBug.ll b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
index 4f6eb59773fb0..935c4c55f046d 100644
--- a/test/CodeGen/X86/2008-09-29-VolatileBug.ll
+++ b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep movz
+; RUN: llc < %s -march=x86 | not grep movz
 ; PR2835
 
 @g_407 = internal global i32 0		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-10-02-Atomics32-2.ll b/test/CodeGen/X86/2008-10-02-Atomics32-2.ll
index e74280cd3a178..b48c4adaa26cd 100644
--- a/test/CodeGen/X86/2008-10-02-Atomics32-2.ll
+++ b/test/CodeGen/X86/2008-10-02-Atomics32-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ;; This version includes 64-bit version of binary operators (in 32-bit mode).
 ;; Swap, cmp-and-swap not supported yet in this mode.
 ; ModuleID = 'Atomics.c'
diff --git a/test/CodeGen/X86/2008-10-06-MMXISelBug.ll b/test/CodeGen/X86/2008-10-06-MMXISelBug.ll
index bd1ad59797aba..7f7b1a436d24a 100644
--- a/test/CodeGen/X86/2008-10-06-MMXISelBug.ll
+++ b/test/CodeGen/X86/2008-10-06-MMXISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx,+sse2
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2
 ; PR2850
 
 @tmp_V2i = common global <2 x i32> zeroinitializer		; <<2 x i32>*> [#uses=2]
diff --git a/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll b/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll
index 837aad5304075..a135cd4978761 100644
--- a/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll
+++ b/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll
@@ -1,7 +1,7 @@
 ; ModuleID = 'nan.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-f80:32:32-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2,-sse3,-sse | grep fldl
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fldl
 ; This NaN should be shortened to a double (not a float).
 
 declare x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %f)
diff --git a/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll b/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
index d2e9b457517e3..bd48105f129ae 100644
--- a/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
+++ b/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
@@ -1,7 +1,7 @@
 ; ModuleID = 'nan.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-f80:32:32-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2,-sse3,-sse | grep fldt | count 3
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fldt | count 3
 ; it is not safe to shorten any of these NaNs.
 
 declare x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %f)
diff --git a/test/CodeGen/X86/2008-10-07-SSEISelBug.ll b/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
index 48089861bc321..bc5761288c9b5 100644
--- a/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
+++ b/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2
 
 define <4 x float> @f(float %w) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2008-10-11-CallCrash.ll b/test/CodeGen/X86/2008-10-11-CallCrash.ll
index 979b7875fec67..efc6125cfc2d7 100644
--- a/test/CodeGen/X86/2008-10-11-CallCrash.ll
+++ b/test/CodeGen/X86/2008-10-11-CallCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2735
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
diff --git a/test/CodeGen/X86/2008-10-13-CoalescerBug.ll b/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
index 608372e5a8905..4d3f8c2071b5b 100644
--- a/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2775
 
 define i32 @func_77(i8 zeroext %p_79) nounwind {
diff --git a/test/CodeGen/X86/2008-10-16-SpillerBug.ll b/test/CodeGen/X86/2008-10-16-SpillerBug.ll
index 4318f1d28c72b..b8ca364d1798d 100644
--- a/test/CodeGen/X86/2008-10-16-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-10-16-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*edx}
+; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*edi}
 
 	%struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
 	%struct.XXDAlphaTest = type { float, i16, i8, i8 }
diff --git a/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll b/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
index e1dc7b6bb27c1..de4c1e70b8d85 100644
--- a/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
+++ b/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 ; PR2762
 define void @foo(<4 x i32>* %p, <4 x double>* %q) {
   %n = load <4 x i32>* %p
diff --git a/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll b/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll
index eb2ec3760b9e5..b2e6061ff91cb 100644
--- a/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll
+++ b/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 
 define void @test(i64 %x) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll b/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll
index 33e8c49277f47..353d1c75216b3 100644
--- a/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll
+++ b/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 
 ; from gcc.c-torture/compile/920520-1.c
 
diff --git a/test/CodeGen/X86/2008-10-24-FlippedCompare.ll b/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
index d6ae05e3798e3..421b931ecd5a2 100644
--- a/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
+++ b/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o - | not grep {ucomiss\[^,\]*esp}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o - | not grep {ucomiss\[^,\]*esp}
 
 define void @f(float %wt) {
 entry:
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index ad13b8528372b..afeb358da5725 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& not grep {Number of register spills}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep {Number of register spills}
 
 define fastcc void @fourn(double* %data, i32 %isign) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
index d8b0e706d2ab4..784bc72f42e90 100644
--- a/test/CodeGen/X86/2008-10-27-StackRealignment.ll
+++ b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
@@ -2,8 +2,8 @@
 ; Until it does, we shouldn't use movaps to access the stack.  On targets with
 ; sufficiently aligned stack (e.g. darwin) we should.
 
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -mcpu=yonah | not grep movaps
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=yonah | not grep movaps
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
 
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll b/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll
index 41776b2a38e9b..7ad94f149e1fd 100644
--- a/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll
+++ b/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2977
 define i8* @ap_php_conv_p2(){
 entry:
diff --git a/test/CodeGen/X86/2008-11-03-F80VAARG.ll b/test/CodeGen/X86/2008-11-03-F80VAARG.ll
index 36a054a3e6f8e..507799b7304fb 100644
--- a/test/CodeGen/X86/2008-11-03-F80VAARG.ll
+++ b/test/CodeGen/X86/2008-11-03-F80VAARG.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -o - | not grep 10
+; RUN: llc < %s -march=x86 -o - | not grep 10
 
 declare void @llvm.va_start(i8*) nounwind
 
diff --git a/test/CodeGen/X86/2008-11-06-testb.ll b/test/CodeGen/X86/2008-11-06-testb.ll
index 7acc7cad3cfdf..f8f317c2dd46e 100644
--- a/test/CodeGen/X86/2008-11-06-testb.ll
+++ b/test/CodeGen/X86/2008-11-06-testb.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep testb
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep testb
 
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2008-11-13-inlineasm-3.ll b/test/CodeGen/X86/2008-11-13-inlineasm-3.ll
index 7487548e820b8..1dc97fc52a46d 100644
--- a/test/CodeGen/X86/2008-11-13-inlineasm-3.ll
+++ b/test/CodeGen/X86/2008-11-13-inlineasm-3.ll
@@ -1,4 +1,4 @@
-; RUN:  llvm-as < %s | llc -mtriple=i686-pc-linux-gnu
+; RUN:  llc < %s -mtriple=i686-pc-linux-gnu
 ; PR 1779
 ; Using 'A' constraint and a tied constraint together used to crash.
 ; ModuleID = '<stdin>'
diff --git a/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll b/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
index fe1870e1d84c5..2e114ab5ae885 100644
--- a/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
+++ b/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
@@ -1,5 +1,4 @@
-; RUN:  llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | grep 63551 | count 1
-; ModuleID = '<stdin>'
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep -- -1985 | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll b/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
index faf7cd4b22040..7c811afa51d3e 100644
--- a/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
+++ b/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
@@ -1,5 +1,4 @@
-; RUN:  llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | grep 63551
-; ModuleID = '<stdin>'
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep -- -1985
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/2008-11-29-ULT-Sign.ll b/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
index 6c26b6818e4f8..6dca141639e4a 100644
--- a/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
+++ b/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
@@ -1,4 +1,4 @@
-; RUN:  llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | grep "jns" | count 1
+; RUN:  llc < %s -mtriple=i686-pc-linux-gnu | grep "jns" | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/2008-12-01-SpillerAssert.ll b/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
index 81b25da8a8dea..d96d806388c9a 100644
--- a/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
+++ b/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
 ; PR3124
 
         %struct.cpuinfo_x86 = type { i8, i8, i8, i8, i32, i8, i8, i8, i32, i32, [9 x i32], [16 x i8], [64 x i8], i32, i32, i32, i64, %struct.cpumask_t, i16, i16, i16, i16, i16, i16, i16, i16, i32 }
diff --git a/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll b/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
index ca5a80ccd82b5..1f8bd45da14d5 100644
--- a/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
+++ b/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | not grep lea
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep lea
 ; The inner loop should use [reg] addressing, not [reg+reg] addressing.
 ; rdar://6403965
 
diff --git a/test/CodeGen/X86/2008-12-02-IllegalResultType.ll b/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
index 01e0f7eb81de2..4b72cb919ffa3 100644
--- a/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
+++ b/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3117
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/X86/2008-12-02-dagcombine-1.ll b/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
index 48bb4e438328a..fe5bff3e3459b 100644
--- a/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
+++ b/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "(%esp)" | count 2
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; a - a should be found and removed, leaving refs to only L and P
diff --git a/test/CodeGen/X86/2008-12-02-dagcombine-2.ll b/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
index ba7dfbbcecc1c..4cb1b42693b97 100644
--- a/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
+++ b/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "(%esp)" | count 2
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; a - a should be found and removed, leaving refs to only L and P
diff --git a/test/CodeGen/X86/2008-12-02-dagcombine-3.ll b/test/CodeGen/X86/2008-12-02-dagcombine-3.ll
index 5fb639d5fc33c..d5a676a7dbba6 100644
--- a/test/CodeGen/X86/2008-12-02-dagcombine-3.ll
+++ b/test/CodeGen/X86/2008-12-02-dagcombine-3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep add | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep sub | grep -v subsections | count 1
+; RUN: llc < %s -march=x86 | grep add | count 2
+; RUN: llc < %s -march=x86 | grep sub | grep -v subsections | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; this should be rearranged to have two +s and one -
diff --git a/test/CodeGen/X86/2008-12-05-SpillerCrash.ll b/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
index b6b5cbda4bd1c..7fd2e6f2948f6 100644
--- a/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
+++ b/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9.5 -mattr=+sse41 -relocation-model=pic
+; RUN: llc < %s -mtriple=i386-apple-darwin9.5 -mattr=+sse41 -relocation-model=pic
 
 	%struct.XXActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
 	%struct.XXAlphaTest = type { float, i16, i8, i8 }
diff --git a/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll b/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll
new file mode 100644
index 0000000000000..e97b63db14d97
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | grep ^.L_Z1fv.eh
+; RUN: llc < %s -march=x86    -mtriple=i686-unknown-linux-gnu | grep ^.L_Z1fv.eh
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin9 | grep ^__Z1fv.eh
+; RUN: llc < %s -march=x86    -mtriple=i386-apple-darwin9 | grep ^__Z1fv.eh
+
+define void @_Z1fv() {
+entry:
+	br label %return
+
+return:
+	ret void
+}
diff --git a/test/CodeGen/X86/2008-12-16-BadShift.ll b/test/CodeGen/X86/2008-12-16-BadShift.ll
index 46b70188c8fef..6c70c5ba53227 100644
--- a/test/CodeGen/X86/2008-12-16-BadShift.ll
+++ b/test/CodeGen/X86/2008-12-16-BadShift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep shrl
+; RUN: llc < %s | not grep shrl
 ; Note: this test is really trying to make sure that the shift
 ; returns the right result; shrl is most likely wrong,
 ; but if CodeGen starts legitimately using an shrl here,
diff --git a/test/CodeGen/X86/2008-12-16-dagcombine-4.ll b/test/CodeGen/X86/2008-12-16-dagcombine-4.ll
index 193d290e33ffc..3080d08557274 100644
--- a/test/CodeGen/X86/2008-12-16-dagcombine-4.ll
+++ b/test/CodeGen/X86/2008-12-16-dagcombine-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "(%esp)" | count 2
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; a - a should be found and removed, leaving refs to only L and P
diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
index c7fdfb269207d..13a9080c1401f 100644
--- a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
@@ -1,7 +1,12 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | %prcontext End 2 | grep mov
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
 ; PR3149
 ; Make sure the copy after inline asm is not coalesced away.
 
+; CHECK:         ## InlineAsm End
+; CHECK-NEXT: BB1_2:
+; CHECK-NEXT:    movl	%esi, %eax
+
+
 @"\01LC" = internal constant [7 x i8] c"n0=%d\0A\00"		; <[7 x i8]*> [#uses=1]
 @llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i64, i64)* @umoddi3 to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
 
diff --git a/test/CodeGen/X86/2008-12-22-dagcombine-5.ll b/test/CodeGen/X86/2008-12-22-dagcombine-5.ll
index 24be521842f0b..75773e0959c27 100644
--- a/test/CodeGen/X86/2008-12-22-dagcombine-5.ll
+++ b/test/CodeGen/X86/2008-12-22-dagcombine-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "(%esp)" | count 2
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; -(-a) - a should be found and removed, leaving refs to only L and P
diff --git a/test/CodeGen/X86/2008-12-23-crazy-address.ll b/test/CodeGen/X86/2008-12-23-crazy-address.ll
index e53a91ec3a119..2edcaea80ce70 100644
--- a/test/CodeGen/X86/2008-12-23-crazy-address.ll
+++ b/test/CodeGen/X86/2008-12-23-crazy-address.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | grep {lea.*X.*esp} | count 2
+; RUN: llc < %s -march=x86 -relocation-model=static | grep {lea.*X.*esp} | count 2
 
 @X = external global [0 x i32]
 
diff --git a/test/CodeGen/X86/2008-12-23-dagcombine-6.ll b/test/CodeGen/X86/2008-12-23-dagcombine-6.ll
index 13cb9db8eeb8c..bae928336baaa 100644
--- a/test/CodeGen/X86/2008-12-23-dagcombine-6.ll
+++ b/test/CodeGen/X86/2008-12-23-dagcombine-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "(%esp)" | count 4
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 4
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.5"
 ; a - a should be found and removed, leaving refs to only L and P
diff --git a/test/CodeGen/X86/2009-01-12-CoalescerBug.ll b/test/CodeGen/X86/2009-01-12-CoalescerBug.ll
index 7c800d4e287c3..27a7113ffd56c 100644
--- a/test/CodeGen/X86/2009-01-12-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-01-12-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu | grep movq | count 2
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | grep movq | count 2
 ; PR3311
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll b/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
index ecf71f64cf999..9c71469b5b202 100644
--- a/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
+++ b/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2  -disable-mmx -enable-legalize-types-checking
+; RUN: llc < %s -march=x86 -mattr=+sse2  -disable-mmx -enable-legalize-types-checking
 
 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
 
diff --git a/test/CodeGen/X86/2009-01-16-SchedulerBug.ll b/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
index ff20dc1e30049..99bef6ce3fc9d 100644
--- a/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
+++ b/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin
+; RUN: llc < %s -mtriple=i386-apple-darwin
 ; rdar://6501631
 
 	%CF = type { %Register }
diff --git a/test/CodeGen/X86/2009-01-16-UIntToFP.ll b/test/CodeGen/X86/2009-01-16-UIntToFP.ll
index 340608af35a8b..2eab5f1773ac4 100644
--- a/test/CodeGen/X86/2009-01-16-UIntToFP.ll
+++ b/test/CodeGen/X86/2009-01-16-UIntToFP.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
diff --git a/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll b/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
index 8857df38926d7..f895336491e22 100644
--- a/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
+++ b/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; rdar://6505632
 ; reduced from 483.xalancbmk
 
diff --git a/test/CodeGen/X86/2009-01-25-NoSSE.ll b/test/CodeGen/X86/2009-01-25-NoSSE.ll
index b12e4137dbd5b..0583ef1909197 100644
--- a/test/CodeGen/X86/2009-01-25-NoSSE.ll
+++ b/test/CodeGen/X86/2009-01-25-NoSSE.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=-sse,-sse2 | not grep xmm
+; RUN: llc < %s -march=x86-64 -mattr=-sse,-sse2 | not grep xmm
 ; PR3402
 target datalayout =
 "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-01-26-WrongCheck.ll b/test/CodeGen/X86/2009-01-26-WrongCheck.ll
index db9dbb67def4a..117ff47657f4c 100644
--- a/test/CodeGen/X86/2009-01-26-WrongCheck.ll
+++ b/test/CodeGen/X86/2009-01-26-WrongCheck.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -enable-legalize-types-checking
+; RUN: llc < %s -march=x86 -enable-legalize-types-checking
 ; PR3393
 
 define void @foo(i32 inreg %x) {
diff --git a/test/CodeGen/X86/2009-01-27-NullStrings.ll b/test/CodeGen/X86/2009-01-27-NullStrings.ll
index b0c27d8903e75..8684f4a19ca42 100644
--- a/test/CodeGen/X86/2009-01-27-NullStrings.ll
+++ b/test/CodeGen/X86/2009-01-27-NullStrings.ll
@@ -1,38 +1,7 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep {\\.cstring} | count 1
-	%struct.A = type {  }
-	%struct.NSString = type opaque
-	%struct.__builtin_CFString = type { i32*, i32, i8*, i32 }
-	%struct._objc_module = type { i32, i32, i8*, %struct._objc_symtab* }
-	%struct._objc_symtab = type { i32, %struct.objc_selector**, i16, i16 }
-	%struct.objc_object = type opaque
-	%struct.objc_selector = type opaque
-@"\01L_unnamed_cfstring_0" = internal constant %struct.__builtin_CFString { i32* getelementptr ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr ([1 x i8]* @"\01LC", i32 0, i32 0), i32 0 }, section "__DATA, __cfstring"		; <%struct.__builtin_CFString*> [#uses=1]
-@__CFConstantStringClassReference = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
-@"\01LC" = internal constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-@"\01L_OBJC_SELECTOR_REFERENCES_0" = internal global %struct.objc_selector* bitcast ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_0" to %struct.objc_selector*), section "__OBJC,__message_refs,literal_pointers,no_dead_strip", align 4		; <%struct.objc_selector**> [#uses=2]
-@"\01L_OBJC_SYMBOLS" = internal global %struct._objc_symtab zeroinitializer, section "__OBJC,__symbols,regular,no_dead_strip", align 4		; <%struct._objc_symtab*> [#uses=2]
-@"\01L_OBJC_METH_VAR_NAME_0" = internal global [6 x i8] c"bork:\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[6 x i8]*> [#uses=2]
-@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] zeroinitializer, section "__OBJC, __image_info,regular"		; <[2 x i32]*> [#uses=1]
-@"\01L_OBJC_CLASS_NAME_0" = internal global [1 x i8] zeroinitializer, section "__TEXT,__cstring,cstring_literals", align 1		; <[1 x i8]*> [#uses=1]
-@"\01L_OBJC_MODULES" = internal global %struct._objc_module { i32 7, i32 16, i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), %struct._objc_symtab* @"\01L_OBJC_SYMBOLS" }, section "__OBJC,__module_info,regular,no_dead_strip", align 4		; <%struct._objc_module*> [#uses=1]
-@llvm.used = appending global [6 x i8*] [ i8* bitcast (%struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0" to i8*), i8* bitcast (%struct._objc_symtab* @"\01L_OBJC_SYMBOLS" to i8*), i8* getelementptr ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_0", i32 0, i32 0), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*) ], section "llvm.metadata"		; <[6 x i8*]*> [#uses=0]
+; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s
+; CHECK: .section __TEXT,__cstring,cstring_literals
 
-define void @func(%struct.A* %a) nounwind {
-entry:
-	%a_addr = alloca %struct.A*		; <%struct.A**> [#uses=2]
-	%a.0 = alloca %struct.objc_object*		; <%struct.objc_object**> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store %struct.A* %a, %struct.A** %a_addr
-	%0 = load %struct.A** %a_addr, align 4		; <%struct.A*> [#uses=1]
-	%1 = bitcast %struct.A* %0 to %struct.objc_object*		; <%struct.objc_object*> [#uses=1]
-	store %struct.objc_object* %1, %struct.objc_object** %a.0, align 4
-	%2 = load %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0", align 4		; <%struct.objc_selector*> [#uses=1]
-	%3 = load %struct.objc_object** %a.0, align 4		; <%struct.objc_object*> [#uses=1]
-	call void bitcast (%struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...)* @objc_msgSend to void (%struct.objc_object*, %struct.objc_selector*, %struct.NSString*)*)(%struct.objc_object* %3, %struct.objc_selector* %2, %struct.NSString* bitcast (%struct.__builtin_CFString* @"\01L_unnamed_cfstring_0" to %struct.NSString*)) nounwind
-	br label %return
+@x = internal constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
 
-return:		; preds = %entry
-	ret void
-}
+@y = global [1 x i8]* @x
 
-declare %struct.objc_object* @objc_msgSend(%struct.objc_object*, %struct.objc_selector*, ...)
diff --git a/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll b/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
index b7f37c9d3102f..ce3ea828ec0c5 100644
--- a/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9.6 -regalloc=local -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-apple-darwin9.6 -regalloc=local -disable-fp-elim
 ; rdar://6538384
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/CodeGen/X86/2009-01-31-BigShift.ll b/test/CodeGen/X86/2009-01-31-BigShift.ll
index 360b4f0e46bfe..4eb0ec1485b75 100644
--- a/test/CodeGen/X86/2009-01-31-BigShift.ll
+++ b/test/CodeGen/X86/2009-01-31-BigShift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep and
+; RUN: llc < %s -march=x86 | not grep and
 ; PR3401
 
 define void @x(i288 %i) nounwind {
diff --git a/test/CodeGen/X86/2009-01-31-BigShift2.ll b/test/CodeGen/X86/2009-01-31-BigShift2.ll
index 2b5b189578300..9d240844afba1 100644
--- a/test/CodeGen/X86/2009-01-31-BigShift2.ll
+++ b/test/CodeGen/X86/2009-01-31-BigShift2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {mov.*56}
+; RUN: llc < %s -march=x86 | grep {mov.*56}
 ; PR3449
 
 define void @test(<8 x double>* %P, i64* %Q) nounwind {
diff --git a/test/CodeGen/X86/2009-01-31-BigShift3.ll b/test/CodeGen/X86/2009-01-31-BigShift3.ll
index c92c86a092a10..1b531e370437b 100644
--- a/test/CodeGen/X86/2009-01-31-BigShift3.ll
+++ b/test/CodeGen/X86/2009-01-31-BigShift3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3450
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-02-01-LargeMask.ll b/test/CodeGen/X86/2009-02-01-LargeMask.ll
index f2a964f208ce1..c4042e6c9c682 100644
--- a/test/CodeGen/X86/2009-02-01-LargeMask.ll
+++ b/test/CodeGen/X86/2009-02-01-LargeMask.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3453
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll b/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
index 5f97ee7a70cd1..e75af13a600b8 100644
--- a/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
+++ b/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3411
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll b/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
index 1f29bdbe37eb7..6ba046a80c228 100644
--- a/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
+++ b/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep { - 92}
+; RUN: llc < %s | grep { - 92}
 ; PR3481
 ; The offset should print as -92, not +17179869092
 
diff --git a/test/CodeGen/X86/2009-02-05-CoalescerBug.ll b/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
index 39cad73d4c093..0ffa8fdc30dd7 100644
--- a/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep movss  | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep movaps | count 4
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss  | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movaps | count 4
 
 define i1 @t([2 x float]* %y, [2 x float]* %w, i32, [2 x float]* %x.pn59, i32 %smax190, i32 %j.1180, <4 x float> %wu.2179, <4 x float> %wr.2178, <4 x float>* %tmp89.out, <4 x float>* %tmp107.out, i32* %indvar.next218.out) nounwind {
 newFuncRoot:
diff --git a/test/CodeGen/X86/2009-02-07-CoalescerBug.ll b/test/CodeGen/X86/2009-02-07-CoalescerBug.ll
index 784c97a226194..2d0bbe607279f 100644
--- a/test/CodeGen/X86/2009-02-07-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-02-07-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=pic -stats |& grep {Number of valno def marked dead} | grep 1
+; RUN: llc < %s -march=x86 -relocation-model=pic -stats |& grep {Number of valno def marked dead} | grep 1
 ; rdar://6566708
 
 target triple = "i386-apple-darwin9.6"
diff --git a/test/CodeGen/X86/2009-02-08-CoalescerBug.ll b/test/CodeGen/X86/2009-02-08-CoalescerBug.ll
index cd30c1e7e40e0..908cc08991d89 100644
--- a/test/CodeGen/X86/2009-02-08-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-02-08-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3486
 
 define i32 @foo(i8 signext %p_26) nounwind {
diff --git a/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll b/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
index 7b73a86a72ec6..1284b0d1b7b20 100644
--- a/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
+++ b/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR3537
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
diff --git a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
index b0c4449610ac5..72c7ee93a9d25 100644
--- a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
+++ b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s
+; RUN: llc < %s -march=x86-64
 ; PR3538
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
diff --git a/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll b/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
index ddd15f7c81e8b..2e148ad6b18cc 100644
--- a/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
+++ b/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {\$-81920} | count 3
-; RUN: llvm-as < %s | llc -march=x86 | grep {\$4294885376} | count 1
+; RUN: llc < %s -march=x86 | grep {\$-81920} | count 3
+; RUN: llc < %s -march=x86 | grep {\$4294885376} | count 1
 
 ; ModuleID = 'shant.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-02-12-SpillerBug.ll b/test/CodeGen/X86/2009-02-12-SpillerBug.ll
index 1d10319e86d94..4f8a5e7b3e30f 100644
--- a/test/CodeGen/X86/2009-02-12-SpillerBug.ll
+++ b/test/CodeGen/X86/2009-02-12-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-apple-darwin8
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8
 ; PR3561
 
 define hidden void @__mulxc3({ x86_fp80, x86_fp80 }* noalias nocapture sret %agg.result, x86_fp80 %a, x86_fp80 %b, x86_fp80 %c, x86_fp80 %d) nounwind {
diff --git a/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll b/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
index 54fcd430e98c0..58a7f9fb75936 100644
--- a/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
+++ b/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-apple-darwin8 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8 -pre-alloc-split
 
 define i32 @main() nounwind {
 bb4.i.thread:
diff --git a/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll b/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll
index a6bb7b8615fd1..b3dd13c50f927 100644
--- a/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll
+++ b/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep weak | count 3
+; RUN: llc < %s | grep weak | count 3
 ; PR3629
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
index 3dbfa80e00e62..7ea699833ba86 100644
--- a/test/CodeGen/X86/2009-02-25-CommuteBug.ll
+++ b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& not grep commuted
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep commuted
 ; rdar://6608609
 
 define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone {
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index 8bf6c23d59db3..cb1b1efae3e2a 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse3 -stats |& not grep {machine-licm}
+; RUN: llc < %s -march=x86-64 -mattr=+sse3 -stats |& not grep {machine-licm}
 ; rdar://6627786
 
 target triple = "x86_64-apple-darwin10.0"
diff --git a/test/CodeGen/X86/2009-03-03-BTHang.ll b/test/CodeGen/X86/2009-03-03-BTHang.ll
index 0f338d8eadffd..bb95925774357 100644
--- a/test/CodeGen/X86/2009-03-03-BTHang.ll
+++ b/test/CodeGen/X86/2009-03-03-BTHang.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; rdar://6642541
 
  	%struct.HandleBlock = type { [30 x i32], [990 x i8*], %struct.HandleBlockTrailer }
diff --git a/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll b/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
index 6f16ced1c674d..9deecebe94535 100644
--- a/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
+++ b/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3686
 ; rdar://6661799
 
diff --git a/test/CodeGen/X86/2009-03-05-burr-list-crash.ll b/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
index ccedaae9322db..411a0c92830af 100644
--- a/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
+++ b/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2009-03-07-FPConstSelect.ll b/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
index 28302c0f7b0be..39caddcf9342f 100644
--- a/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
+++ b/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep xmm
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep xmm
 ; This should do a single load into the fp stack for the return, not diddle with xmm registers.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-03-09-APIntCrash.ll b/test/CodeGen/X86/2009-03-09-APIntCrash.ll
index d7b5269eaeb90..896c9686cc4e3 100644
--- a/test/CodeGen/X86/2009-03-09-APIntCrash.ll
+++ b/test/CodeGen/X86/2009-03-09-APIntCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 ; PR3763
 	%struct.__block_descriptor = type { i64, i64 }
 
diff --git a/test/CodeGen/X86/2009-03-09-SpillerBug.ll b/test/CodeGen/X86/2009-03-09-SpillerBug.ll
index 2ccd7714233e9..4224210e58f01 100644
--- a/test/CodeGen/X86/2009-03-09-SpillerBug.ll
+++ b/test/CodeGen/X86/2009-03-09-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu
 ; PR3706
 
 define void @__mulxc3(x86_fp80 %b) nounwind {
diff --git a/test/CodeGen/X86/2009-03-10-CoalescerBug.ll b/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
index 3d979e9d73975..90dff8878a784 100644
--- a/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
 ; rdar://r6661945
 
 	%struct.WINDOW = type { i16, i16, i16, i16, i16, i16, i16, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.ldat*, i16, i16, i32, i32, %struct.WINDOW*, %struct.pdat, i16, %struct.cchar_t }
diff --git a/test/CodeGen/X86/2009-03-11-CoalescerBug.ll b/test/CodeGen/X86/2009-03-11-CoalescerBug.ll
index 1f5631764b57b..d5ba93e10495f 100644
--- a/test/CodeGen/X86/2009-03-11-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-03-11-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 -stats |& grep regcoalescing | grep commuting
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -stats |& grep regcoalescing | grep commuting
 
 @lookupTable5B = external global [64 x i32], align 32		; <[64 x i32]*> [#uses=1]
 @lookupTable3B = external global [16 x i32], align 32		; <[16 x i32]*> [#uses=1]
diff --git a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
index ec060e4ef4a58..3564f01a7c433 100644
--- a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
+++ b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 | not grep {.space}
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | not grep {.space}
 ; rdar://6668548
 
 declare double @llvm.sqrt.f64(double) nounwind readonly
diff --git a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
index b01556de4828f..878fa51d5dc31 100644
--- a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
+++ b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep -A 2 {call.*f} | grep movl
+; RUN: llc < %s -march=x86 | grep -A 2 {call.*f} | grep movl
 ; Check the register copy comes after the call to f and before the call to g
 ; PR3784
 
diff --git a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
index 091aab41d291e..adbd241cd98fa 100644
--- a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
+++ b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -asm-verbose | grep -A 1 lpad | grep Llabel
+; RUN: llc < %s -march=x86 -asm-verbose | grep -A 1 lpad | grep Llabel
 ; Check that register copies in the landing pad come after the EH_LABEL
 
 declare i32 @f()
diff --git a/test/CodeGen/X86/2009-03-16-SpillerBug.ll b/test/CodeGen/X86/2009-03-16-SpillerBug.ll
index 09782a26fec98..80e7639e7c295 100644
--- a/test/CodeGen/X86/2009-03-16-SpillerBug.ll
+++ b/test/CodeGen/X86/2009-03-16-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -stats |& grep virtregrewriter | not grep {stores unfolded}
+; RUN: llc < %s -mtriple=i386-apple-darwin -stats |& grep virtregrewriter | not grep {stores unfolded}
 ; rdar://6682365
 
 ; Do not clobber a register if another spill slot is available in it and it's marked "do not clobber".
diff --git a/test/CodeGen/X86/2009-03-23-LinearScanBug.ll b/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
index b5298aee30654..06dfdc0c767f4 100644
--- a/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
+++ b/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -O0
+; RUN: llc < %s -mtriple=i386-apple-darwin -O0
 
 define fastcc void @optimize_bit_field() nounwind {
 bb4:
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index b30d41eb05ba2..b5873bae5f05f 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static -stats -info-output-file - > %t
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -stats -info-output-file - > %t
 ; RUN: not grep spill %t
 ; RUN: not grep {%rsp} %t
 ; RUN: not grep {%rbp} %t
diff --git a/test/CodeGen/X86/2009-03-23-i80-fp80.ll b/test/CodeGen/X86/2009-03-23-i80-fp80.ll
index 0619e12039681..e542325b63697 100644
--- a/test/CodeGen/X86/2009-03-23-i80-fp80.ll
+++ b/test/CodeGen/X86/2009-03-23-i80-fp80.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep 302245289961712575840256
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep K40018000000000000000
+; RUN: opt < %s -instcombine -S | grep 302245289961712575840256
+; RUN: opt < %s -instcombine -S | grep K40018000000000000000
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
 
diff --git a/test/CodeGen/X86/2009-03-25-TestBug.ll b/test/CodeGen/X86/2009-03-25-TestBug.ll
index 2c330db713e80..f40fddc5a36d0 100644
--- a/test/CodeGen/X86/2009-03-25-TestBug.ll
+++ b/test/CodeGen/X86/2009-03-25-TestBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -o %t -f
+; RUN: llc < %s -march=x86 -o %t
 ; RUN: not grep and %t
 ; RUN: not grep shr %t
 ; rdar://6661955
diff --git a/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll b/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll
index 0e31942e468df..f4864793ba2f3 100644
--- a/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll
+++ b/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -no-implicit-float
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
-define double @t(double %x) nounwind ssp {
+define double @t(double %x) nounwind ssp noimplicitfloat {
 entry:
 	br i1 false, label %return, label %bb3
 
diff --git a/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll b/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
index 1d4d2b67783c2..97bbd93f83f1f 100644
--- a/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
+++ b/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; rdar://6774324
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin10.0"
diff --git a/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll b/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
index bf1c8df377db7..27f11cf6bc6ed 100644
--- a/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
+++ b/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel
+; RUN: llc < %s -fast-isel
 ; radr://6772169
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10"
diff --git a/test/CodeGen/X86/2009-04-12-picrel.ll b/test/CodeGen/X86/2009-04-12-picrel.ll
index 73062ab6263e9..f1942801c7af2 100644
--- a/test/CodeGen/X86/2009-04-12-picrel.ll
+++ b/test/CodeGen/X86/2009-04-12-picrel.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small > %t
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small > %t
 ; RUN: grep leaq %t | count 1
 
 @dst = external global [131072 x i32]
diff --git a/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll b/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
index d6f4b9444b596..ff8cf0ac229ec 100644
--- a/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
+++ b/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin
+; RUN: llc < %s -mtriple=i386-apple-darwin
 ; rdar://6781755
 ; PR3934
 
diff --git a/test/CodeGen/X86/2009-04-13-2AddrAssert.ll b/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
index 7f94c6ca947e6..4362ba4375411 100644
--- a/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
+++ b/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; rdar://6781755
 ; PR3934
 
diff --git a/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
index 0d66f6984fe22..bfa3eaa565dfa 100644
--- a/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
+++ b/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -O0 -regalloc=local | not grep sil
+; RUN: llc < %s -mtriple=i386-apple-darwin -O0 -regalloc=local | not grep sil
 ; rdar://6787136
 
 	%struct.X = type { i8, [32 x i8] }
diff --git a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
index 3e60f6bbac8e0..f46eed4769f7e 100644
--- a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of modref unfolded}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of modref unfolded}
 ; XFAIL: *
 ; 69408 removed the opportunity for this optimization to work
 
diff --git a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
index 985eb2147247f..4d25b0f983192 100644
--- a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
+++ b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of registers downgraded}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 84
 ; rdar://6802189
 
 ; Test if linearscan is unfavoring registers for allocation to allow more reuse
diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
index 750dba7721425..c6e6e50641c5c 100644
--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@@ -1,7 +1,13 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -mattr=-sse41,-sse3,+sse2 | \
-; RUN:   %prcontext {14} 2 | grep {(%ebp)} | count 1
+; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic \
+; RUN:     -disable-fp-elim -mattr=-sse41,-sse3,+sse2 < %s | \
+; RUN:   FileCheck %s
 ; rdar://6808032
 
+; CHECK: pextrw $14
+; CHECK-NEXT: movzbl
+; CHECK-NEXT: (%ebp)
+; CHECK-NEXT: pinsrw
+
 define void @update(i8** %args_list) nounwind {
 entry:
 	%cmp.i = icmp eq i32 0, 0		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2009-04-24.ll b/test/CodeGen/X86/2009-04-24.ll
index 2835c2decfcac..c1ec45fc007e5 100644
--- a/test/CodeGen/X86/2009-04-24.ll
+++ b/test/CodeGen/X86/2009-04-24.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu -regalloc=local -relocation-model=pic > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -regalloc=local -relocation-model=pic > %t
 ; RUN: grep {leal.*TLSGD.*___tls_get_addr} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=local -relocation-model=pic > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=local -relocation-model=pic > %t2
 ; RUN: grep {leaq.*TLSGD.*__tls_get_addr} %t2
 ; PR4004
 
diff --git a/test/CodeGen/X86/2009-04-25-CoalescerBug.ll b/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
index 981d3277d3430..94d3eb21cecca 100644
--- a/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 2
+; RUN: llc < %s -march=x86-64 | grep mov | count 2
 ; rdar://6806252
 
 define i64 @test(i32* %tmp13) nounwind {
diff --git a/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll b/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
index b804a5b40a85f..7981a52e740a0 100644
--- a/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
+++ b/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
 ; PR4034
 
 	%struct.BiContextType = type { i16, i8 }
diff --git a/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll b/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll
index 1b757b16d73e2..d77e528fa7c11 100644
--- a/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll
+++ b/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9
+; RUN: llc < %s -mtriple=i386-apple-darwin9
 ; PR4056
 
 define void @int163(i32 %p_4, i32 %p_5) nounwind {
diff --git a/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll b/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll
index 70cb4ff3c825a..f02565403e879 100644
--- a/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll
+++ b/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9
+; RUN: llc < %s -mtriple=i386-apple-darwin9
 ; PR4051
 
 define void @int163(i32 %p_4, i32 %p_5) nounwind {
diff --git a/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll b/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
index 0fb000c3a0730..0a2fcdbf6c082 100644
--- a/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
+++ b/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-unknown-linux-gnu | grep cmpxchgl | not grep eax
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | grep cmpxchgl | not grep eax
 ; PR4076
 
 	type { i8, i8, i8 }		; type %0
diff --git a/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll b/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
index fc31c0b416d07..a2fd2e4c51c95 100644
--- a/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
+++ b/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {movl.*%ebx, 8(%esi)}
+; RUN: llc < %s | grep {movl.*%ebx, 8(%esi)}
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.0"
 
diff --git a/test/CodeGen/X86/2009-04-29-LinearScanBug.ll b/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
index 767eb3118d96e..6843723052c13 100644
--- a/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
+++ b/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10
+; RUN: llc < %s -mtriple=i386-apple-darwin10
 ; rdar://6837009
 
 	type { %struct.pf_state*, %struct.pf_state*, %struct.pf_state*, i32 }		; type %0
diff --git a/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll b/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
index c02c045ba5de4..d1f9cf83307cd 100644
--- a/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
+++ b/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10 -disable-fp-elim -relocation-model=pic
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -disable-fp-elim -relocation-model=pic
 ; PR4099
 
 	type { [62 x %struct.Bitvec*] }		; type %0
diff --git a/test/CodeGen/X86/2009-04-scale.ll b/test/CodeGen/X86/2009-04-scale.ll
index 0766dc79e0203..e4c756cfdd441 100644
--- a/test/CodeGen/X86/2009-04-scale.ll
+++ b/test/CodeGen/X86/2009-04-scale.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-unknown-linux-gnu
+; RUN: llc < %s -march=x86 -mtriple=i386-unknown-linux-gnu
 ; PR3995
 
         %struct.vtable = type { i32 (...)** }
diff --git a/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll b/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
index 284c6e250d793..738b5fbb7048a 100644
--- a/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
+++ b/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=static > %t
+; RUN: llc < %s -relocation-model=static > %t
 ; RUN: grep "1: ._pv_cpu_ops+8" %t
 ; RUN: grep "2: ._G" %t
 ; PR4152
diff --git a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
index 817872598eaf0..a5e28c0748678 100644
--- a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
+++ b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR4188
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll b/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
index 42bf9e991e6e9..6e062fb25089e 100644
--- a/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
+++ b/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 ; PR3886
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
diff --git a/test/CodeGen/X86/2009-05-23-available_externally.ll b/test/CodeGen/X86/2009-05-23-available_externally.ll
index f4881bab45cf1..94773d91ea17d 100644
--- a/test/CodeGen/X86/2009-05-23-available_externally.ll
+++ b/test/CodeGen/X86/2009-05-23-available_externally.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep atoi | grep PLT
+; RUN: llc < %s -relocation-model=pic | grep atoi | grep PLT
 ; PR4253
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
index 6f2bef4fca10e..8a0b244a23fae 100644
--- a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
+++ b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep -E {sar|shl|mov|or} | count 4
+; RUN: llc < %s | grep -E {sar|shl|mov|or} | count 4
 ; Check that the shr(shl X, 56), 48) is not mistakenly turned into
 ; a shr (X, -8) that gets subsequently "optimized away" as undef
 ; PR4254
diff --git a/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll b/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
index 7bdfcb31035c8..2fd42f40d8915 100644
--- a/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
+++ b/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 	%struct.tempsym_t = type { i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32 }
 
diff --git a/test/CodeGen/X86/2009-05-30-ISelBug.ll b/test/CodeGen/X86/2009-05-30-ISelBug.ll
index 373f91f06f611..af552d4ce20d4 100644
--- a/test/CodeGen/X86/2009-05-30-ISelBug.ll
+++ b/test/CodeGen/X86/2009-05-30-ISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep {movzbl	%\[abcd\]h,}
+; RUN: llc < %s -march=x86-64 | not grep {movzbl	%\[abcd\]h,}
 
 define void @BZ2_bzDecompress_bb5_2E_outer_bb35_2E_i_bb54_2E_i(i32*, i32 %c_nblock_used.2.i, i32 %.reload51, i32* %.out, i32* %.out1, i32* %.out2, i32* %.out3) nounwind {
 newFuncRoot:
diff --git a/test/CodeGen/X86/2009-06-02-RewriterBug.ll b/test/CodeGen/X86/2009-06-02-RewriterBug.ll
index ea33b16f823fe..779f9857de7f7 100644
--- a/test/CodeGen/X86/2009-06-02-RewriterBug.ll
+++ b/test/CodeGen/X86/2009-06-02-RewriterBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-undermydesk-freebsd8.0 -relocation-model=pic -disable-fp-elim
+; RUN: llc < %s -mtriple=x86_64-undermydesk-freebsd8.0 -relocation-model=pic -disable-fp-elim
 ; PR4225
 
 define void @sha256_block1(i32* nocapture %arr, i8* nocapture %in, i64 %num) nounwind {
diff --git a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
index c628b8affdd91..e6f3008c24761 100644
--- a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep "subq.*\\\$8, \\\%rsp"
+; RUN: llc < %s | grep "subq.*\\\$40, \\\%rsp"
 target triple = "x86_64-mingw64"
 
 define x86_fp80 @a(i64 %x) nounwind readnone {
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index 33d797297be82..cb64bf22c9819 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -o %t1 -f
-; RUN: grep "subq.*\\\$40, \\\%rsp" %t1
-; RUN: grep "movaps	\\\%xmm8, \\\(\\\%rsp\\\)" %t1
-; RUN: grep "movaps	\\\%xmm7, 16\\\(\\\%rsp\\\)" %t1
+; RUN: llc < %s -o %t1
+; RUN: grep "subq.*\\\$72, \\\%rsp" %t1
+; RUN: grep "movaps	\\\%xmm8, 32\\\(\\\%rsp\\\)" %t1
+; RUN: grep "movaps	\\\%xmm7, 48\\\(\\\%rsp\\\)" %t1
 target triple = "x86_64-mingw64"
 
 define i32 @a() nounwind {
diff --git a/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
index fa90fa9426d6f..9415732de0257 100644
--- a/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
+++ b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 	type { %struct.GAP }		; type %0
 	type { i16, i8, i8 }		; type %1
diff --git a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
index 94df530ec0e6b..336f17e2a3253 100644
--- a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
+++ b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | not grep movl
+; RUN: llc < %s -march=x86 -mattr=+mmx | not grep movl
 
 define <8 x i8> @a(i8 zeroext %x) nounwind {
   %r = insertelement <8 x i8> undef, i8 %x, i32 0
diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
index 220423aa986a7..5c514805e485c 100644
--- a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
+++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx,+sse2 > %t1
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1
 ; RUN: grep movzwl %t1 | count 2
 ; RUN: grep movzbl %t1 | count 2
 ; RUN: grep movd %t1 | count 4
diff --git a/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll b/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
index 2e3f195ff9478..8bb3dc63a3b9a 100644
--- a/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
+++ b/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define <2 x i64> @_mm_insert_epi16(<2 x i64> %a, i32 %b, i32 %imm) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2009-06-05-sitofpCrash.ll b/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
index 589a8800ede7c..e361804d61ba7 100644
--- a/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
+++ b/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse
+; RUN: llc < %s -march=x86 -mattr=+sse
 ; PR2598
 
 define <2 x float> @a(<2 x i32> %i) nounwind {
diff --git a/test/CodeGen/X86/2009-06-06-ConcatVectors.ll b/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
index a46fd1a2e76f0..92419fcb8b81d 100644
--- a/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
+++ b/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 define <2 x i64> @_mm_movpi64_pi64(<1 x i64> %a, <1 x i64> %b) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
index c3687a533e053..07ef53e09d8ea 100644
--- a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
+++ b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep movl | count 2
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep movl | count 2
 
 define i64 @a(i32 %a, i32 %b) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll b/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
index 001b7fc5a4af6..673e936e21788 100644
--- a/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
+++ b/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep fstpt
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep xmm
+; RUN: llc < %s -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep fstpt
+; RUN: llc < %s -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep xmm
 
 ; Check that x86-64 tail calls support x86_fp80 and v2f32 types. (Tail call
 ; calling convention out of sync with standard c calling convention on x86_64)
diff --git a/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll b/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll
index 095e6a1036218..feb578098caee 100644
--- a/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll
+++ b/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | not grep TAILCALL 
+; RUN: llc < %s -march=x86 -tailcallopt | not grep TAILCALL 
 
 ; Bug 4396. This tail call can NOT be optimized.
 
diff --git a/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll b/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
index d6ff5b6803e33..228cd48119e3d 100644
--- a/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
+++ b/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2
 ; PR2484
 
 define <4 x float> @f4523(<4 x float> %a,<4 x float> %b) nounwind {
diff --git a/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll b/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll
new file mode 100644
index 0000000000000..fcc71aef23aed
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -march=x86 -mtriple=x86_64-unknown-freebsd7.2
+; PR4478
+
+	%struct.sockaddr = type <{ i8, i8, [14 x i8] }>
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	br label %while.cond
+
+while.cond:		; preds = %sw.bb6, %entry
+	switch i32 undef, label %sw.default [
+		i32 -1, label %while.end
+		i32 119, label %sw.bb6
+	]
+
+sw.bb6:		; preds = %while.cond
+	br i1 undef, label %if.then, label %while.cond
+
+if.then:		; preds = %sw.bb6
+	ret i32 1
+
+sw.default:		; preds = %while.cond
+	ret i32 1
+
+while.end:		; preds = %while.cond
+	br i1 undef, label %if.then15, label %if.end16
+
+if.then15:		; preds = %while.end
+	ret i32 1
+
+if.end16:		; preds = %while.end
+	br i1 undef, label %lor.lhs.false, label %if.then21
+
+lor.lhs.false:		; preds = %if.end16
+	br i1 undef, label %if.end22, label %if.then21
+
+if.then21:		; preds = %lor.lhs.false, %if.end16
+	ret i32 1
+
+if.end22:		; preds = %lor.lhs.false
+	br i1 undef, label %lor.lhs.false27, label %if.then51
+
+lor.lhs.false27:		; preds = %if.end22
+	br i1 undef, label %lor.lhs.false39, label %if.then51
+
+lor.lhs.false39:		; preds = %lor.lhs.false27
+	br i1 undef, label %if.end52, label %if.then51
+
+if.then51:		; preds = %lor.lhs.false39, %lor.lhs.false27, %if.end22
+	ret i32 1
+
+if.end52:		; preds = %lor.lhs.false39
+	br i1 undef, label %if.then57, label %if.end58
+
+if.then57:		; preds = %if.end52
+	ret i32 1
+
+if.end58:		; preds = %if.end52
+	br i1 undef, label %if.then64, label %if.end65
+
+if.then64:		; preds = %if.end58
+	ret i32 1
+
+if.end65:		; preds = %if.end58
+	br i1 undef, label %if.then71, label %if.end72
+
+if.then71:		; preds = %if.end65
+	ret i32 1
+
+if.end72:		; preds = %if.end65
+	br i1 undef, label %if.then83, label %if.end84
+
+if.then83:		; preds = %if.end72
+	ret i32 1
+
+if.end84:		; preds = %if.end72
+	br i1 undef, label %if.then101, label %if.end102
+
+if.then101:		; preds = %if.end84
+	ret i32 1
+
+if.end102:		; preds = %if.end84
+	br i1 undef, label %if.then113, label %if.end114
+
+if.then113:		; preds = %if.end102
+	ret i32 1
+
+if.end114:		; preds = %if.end102
+	br i1 undef, label %if.then209, label %if.end210
+
+if.then209:		; preds = %if.end114
+	ret i32 1
+
+if.end210:		; preds = %if.end114
+	br i1 undef, label %if.then219, label %if.end220
+
+if.then219:		; preds = %if.end210
+	ret i32 1
+
+if.end220:		; preds = %if.end210
+	br i1 undef, label %if.end243, label %lor.lhs.false230
+
+lor.lhs.false230:		; preds = %if.end220
+	unreachable
+
+if.end243:		; preds = %if.end220
+	br i1 undef, label %if.then249, label %if.end250
+
+if.then249:		; preds = %if.end243
+	ret i32 1
+
+if.end250:		; preds = %if.end243
+	br i1 undef, label %if.end261, label %if.then260
+
+if.then260:		; preds = %if.end250
+	ret i32 1
+
+if.end261:		; preds = %if.end250
+	br i1 undef, label %if.then270, label %if.end271
+
+if.then270:		; preds = %if.end261
+	ret i32 1
+
+if.end271:		; preds = %if.end261
+	%call.i = call i32 @arc4random() nounwind		; <i32> [#uses=1]
+	%rem.i = urem i32 %call.i, 16383		; <i32> [#uses=1]
+	%rem1.i = trunc i32 %rem.i to i16		; <i16> [#uses=1]
+	%conv2.i = or i16 %rem1.i, -16384		; <i16> [#uses=1]
+	%0 = call i16 asm "xchgb ${0:h}, ${0:b}", "=Q,0,~{dirflag},~{fpsr},~{flags}"(i16 %conv2.i) nounwind		; <i16> [#uses=1]
+	store i16 %0, i16* undef
+	%call281 = call i32 @bind(i32 undef, %struct.sockaddr* undef, i32 16) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @bind(i32, %struct.sockaddr*, i32)
+
+declare i32 @arc4random()
diff --git a/test/CodeGen/X86/2009-07-07-SplitICmp.ll b/test/CodeGen/X86/2009-07-07-SplitICmp.ll
new file mode 100644
index 0000000000000..eb9378b9527b6
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-07-SplitICmp.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -disable-mmx
+
+define void @test2(<2 x i32> %A, <2 x i32> %B, <2 x i32>* %C) nounwind {
+       %D = icmp sgt <2 x i32> %A, %B
+       %E = zext <2 x i1> %D to <2 x i32>
+       store <2 x i32> %E, <2 x i32>* %C
+       ret void
+}
diff --git a/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll b/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll
new file mode 100644
index 0000000000000..0fdfdcb8a30af
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86
+; PR3037
+
+define void @entry(<4 x i8>* %dest) {
+	%1 = xor <4 x i1> zeroinitializer, < i1 true, i1 true, i1 true, i1 true >
+	%2 = extractelement <4 x i1> %1, i32 3
+	%3 = zext i1 %2 to i8
+	%4 = insertelement <4 x i8> zeroinitializer, i8 %3, i32 3
+	store <4 x i8> %4, <4 x i8>* %dest, align 1
+	ret void
+}
diff --git a/test/CodeGen/X86/2009-07-15-CoalescerBug.ll b/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
new file mode 100644
index 0000000000000..eabaf775edefc
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
@@ -0,0 +1,958 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+	%struct.ANY = type { i8* }
+	%struct.AV = type { %struct.XPVAV*, i32, i32 }
+	%struct.CLONE_PARAMS = type { %struct.AV*, i64, %struct.PerlInterpreter* }
+	%struct.CV = type { %struct.XPVCV*, i32, i32 }
+	%struct.DIR = type { i32, i64, i64, i8*, i32, i64, i64, i32, %struct.__darwin_pthread_mutex_t, %struct._telldir* }
+	%struct.GP = type { %struct.SV*, i32, %struct.io*, %struct.CV*, %struct.AV*, %struct.HV*, %struct.GV*, %struct.CV*, i32, i32, i32, i8* }
+	%struct.GV = type { %struct.XPVGV*, i32, i32 }
+	%struct.HE = type { %struct.HE*, %struct.HEK*, %struct.SV* }
+	%struct.HEK = type { i32, i32, [1 x i8] }
+	%struct.HV = type { %struct.XPVHV*, i32, i32 }
+	%struct.MAGIC = type { %struct.MAGIC*, %struct.MGVTBL*, i16, i8, i8, %struct.SV*, i8*, i32 }
+	%struct.MGVTBL = type { i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*, %struct.SV*, i8*, i32)*, i32 (%struct.MAGIC*, %struct.CLONE_PARAMS*)* }
+	%struct.OP = type { %struct.OP*, %struct.OP*, %struct.OP* ()*, i64, i16, i16, i8, i8 }
+	%struct.PMOP = type { %struct.OP*, %struct.OP*, %struct.OP* ()*, i64, i16, i16, i8, i8, %struct.OP*, %struct.OP*, %struct.OP*, %struct.OP*, %struct.PMOP*, %struct.REGEXP*, i32, i32, i8, %struct.HV* }
+	%struct.PerlIO_funcs = type { i64, i8*, i64, i32, i64 (%struct.PerlIOl**, i8*, %struct.SV*, %struct.PerlIO_funcs*)*, i64 (%struct.PerlIOl**)*, %struct.PerlIOl** (%struct.PerlIO_funcs*, %struct.PerlIO_list_t*, i64, i8*, i32, i32, i32, %struct.PerlIOl**, i32, %struct.SV**)*, i64 (%struct.PerlIOl**)*, %struct.SV* (%struct.PerlIOl**, %struct.CLONE_PARAMS*, i32)*, i64 (%struct.PerlIOl**)*, %struct.PerlIOl** (%struct.PerlIOl**, %struct.PerlIOl**, %struct.CLONE_PARAMS*, i32)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i64, i32)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, void (%struct.PerlIOl**)*, void (%struct.PerlIOl**)*, i8* (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i8* (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, void (%struct.PerlIOl**, i8*, i64)* }
+	%struct.PerlIO_list_t = type { i64, i64, i64, %struct.PerlIO_pair_t* }
+	%struct.PerlIO_pair_t = type { %struct.PerlIO_funcs*, %struct.SV* }
+	%struct.PerlIOl = type { %struct.PerlIOl*, %struct.PerlIO_funcs*, i32 }
+	%struct.PerlInterpreter = type { i8 }
+	%struct.REGEXP = type { i32*, i32*, %struct.regnode*, %struct.reg_substr_data*, i8*, %struct.reg_data*, i8*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, [1 x %struct.regnode] }
+	%struct.SV = type { i8*, i32, i32 }
+	%struct.XPVAV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.SV**, %struct.SV*, i8 }
+	%struct.XPVCV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.HV*, %struct.OP*, %struct.OP*, void (%struct.CV*)*, %struct.ANY, %struct.GV*, i8*, i64, %struct.AV*, %struct.CV*, i16, i32 }
+	%struct.XPVGV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.GP*, i8*, i64, %struct.HV*, i8 }
+	%struct.XPVHV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, i32, %struct.HE*, %struct.PMOP*, i8* }
+	%struct.XPVIO = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.PerlIOl**, %struct.PerlIOl**, %struct.anon, i64, i64, i64, i64, i8*, %struct.GV*, i8*, %struct.GV*, i8*, %struct.GV*, i16, i8, i8 }
+	%struct.__darwin_pthread_mutex_t = type { i64, [56 x i8] }
+	%struct._telldir = type opaque
+	%struct.anon = type { %struct.DIR* }
+	%struct.io = type { %struct.XPVIO*, i32, i32 }
+	%struct.reg_data = type { i32, i8*, [1 x i8*] }
+	%struct.reg_substr_data = type { [3 x %struct.reg_substr_datum] }
+	%struct.reg_substr_datum = type { i32, i32, %struct.SV*, %struct.SV* }
+	%struct.regnode = type { i8, i8, i16 }
+
+define i32 @Perl_yylex() nounwind ssp {
+entry:
+	br i1 undef, label %bb21, label %bb
+
+bb:		; preds = %entry
+	unreachable
+
+bb21:		; preds = %entry
+	switch i32 undef, label %bb103 [
+		i32 1, label %bb101
+		i32 4, label %bb75
+		i32 6, label %bb68
+		i32 7, label %bb67
+		i32 8, label %bb25
+	]
+
+bb25:		; preds = %bb21
+	ret i32 41
+
+bb67:		; preds = %bb21
+	ret i32 40
+
+bb68:		; preds = %bb21
+	br i1 undef, label %bb69, label %bb70
+
+bb69:		; preds = %bb68
+	ret i32 undef
+
+bb70:		; preds = %bb68
+	unreachable
+
+bb75:		; preds = %bb21
+	unreachable
+
+bb101:		; preds = %bb21
+	unreachable
+
+bb103:		; preds = %bb21
+	switch i32 undef, label %bb104 [
+		i32 0, label %bb126
+		i32 4, label %fake_eof
+		i32 26, label %fake_eof
+		i32 34, label %bb1423
+		i32 36, label %bb1050
+		i32 37, label %bb534
+		i32 39, label %bb1412
+		i32 41, label %bb643
+		i32 44, label %bb544
+		i32 48, label %bb1406
+		i32 49, label %bb1406
+		i32 50, label %bb1406
+		i32 51, label %bb1406
+		i32 52, label %bb1406
+		i32 53, label %bb1406
+		i32 54, label %bb1406
+		i32 55, label %bb1406
+		i32 56, label %bb1406
+		i32 57, label %bb1406
+		i32 59, label %bb639
+		i32 65, label %keylookup
+		i32 66, label %keylookup
+		i32 67, label %keylookup
+		i32 68, label %keylookup
+		i32 69, label %keylookup
+		i32 70, label %keylookup
+		i32 71, label %keylookup
+		i32 72, label %keylookup
+		i32 73, label %keylookup
+		i32 74, label %keylookup
+		i32 75, label %keylookup
+		i32 76, label %keylookup
+		i32 77, label %keylookup
+		i32 78, label %keylookup
+		i32 79, label %keylookup
+		i32 80, label %keylookup
+		i32 81, label %keylookup
+		i32 82, label %keylookup
+		i32 83, label %keylookup
+		i32 84, label %keylookup
+		i32 85, label %keylookup
+		i32 86, label %keylookup
+		i32 87, label %keylookup
+		i32 88, label %keylookup
+		i32 89, label %keylookup
+		i32 90, label %keylookup
+		i32 92, label %bb1455
+		i32 95, label %keylookup
+		i32 96, label %bb1447
+		i32 97, label %keylookup
+		i32 98, label %keylookup
+		i32 99, label %keylookup
+		i32 100, label %keylookup
+		i32 101, label %keylookup
+		i32 102, label %keylookup
+		i32 103, label %keylookup
+		i32 104, label %keylookup
+		i32 105, label %keylookup
+		i32 106, label %keylookup
+		i32 107, label %keylookup
+		i32 108, label %keylookup
+		i32 109, label %keylookup
+		i32 110, label %keylookup
+		i32 111, label %keylookup
+		i32 112, label %keylookup
+		i32 113, label %keylookup
+		i32 114, label %keylookup
+		i32 115, label %keylookup
+		i32 116, label %keylookup
+		i32 117, label %keylookup
+		i32 118, label %keylookup
+		i32 119, label %keylookup
+		i32 120, label %keylookup
+		i32 121, label %keylookup
+		i32 122, label %keylookup
+		i32 126, label %bb544
+	]
+
+bb104:		; preds = %bb103
+	unreachable
+
+bb126:		; preds = %bb103
+	ret i32 0
+
+fake_eof:		; preds = %bb1841, %bb103, %bb103
+	unreachable
+
+bb534:		; preds = %bb103
+	unreachable
+
+bb544:		; preds = %bb103, %bb103
+	ret i32 undef
+
+bb639:		; preds = %bb103
+	unreachable
+
+bb643:		; preds = %bb103
+	unreachable
+
+bb1050:		; preds = %bb103
+	unreachable
+
+bb1406:		; preds = %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103
+	unreachable
+
+bb1412:		; preds = %bb103
+	unreachable
+
+bb1423:		; preds = %bb103
+	unreachable
+
+bb1447:		; preds = %bb103
+	unreachable
+
+bb1455:		; preds = %bb103
+	unreachable
+
+keylookup:		; preds = %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103
+	br i1 undef, label %bb1498, label %bb1496
+
+bb1496:		; preds = %keylookup
+	br i1 undef, label %bb1498, label %bb1510.preheader
+
+bb1498:		; preds = %bb1496, %keylookup
+	unreachable
+
+bb1510.preheader:		; preds = %bb1496
+	br i1 undef, label %bb1511, label %bb1518
+
+bb1511:		; preds = %bb1510.preheader
+	br label %bb1518
+
+bb1518:		; preds = %bb1511, %bb1510.preheader
+	switch i32 undef, label %bb741.i4285 [
+		i32 95, label %bb744.i4287
+		i32 115, label %bb852.i4394
+	]
+
+bb741.i4285:		; preds = %bb1518
+	br label %Perl_keyword.exit4735
+
+bb744.i4287:		; preds = %bb1518
+	br label %Perl_keyword.exit4735
+
+bb852.i4394:		; preds = %bb1518
+	br i1 undef, label %bb861.i4404, label %bb856.i4399
+
+bb856.i4399:		; preds = %bb852.i4394
+	br label %Perl_keyword.exit4735
+
+bb861.i4404:		; preds = %bb852.i4394
+	br label %Perl_keyword.exit4735
+
+Perl_keyword.exit4735:		; preds = %bb861.i4404, %bb856.i4399, %bb744.i4287, %bb741.i4285
+	br i1 undef, label %bb1544, label %reserved_word
+
+bb1544:		; preds = %Perl_keyword.exit4735
+	br i1 undef, label %bb1565, label %bb1545
+
+bb1545:		; preds = %bb1544
+	br i1 undef, label %bb1563, label %bb1558
+
+bb1558:		; preds = %bb1545
+	%0 = load %struct.SV** undef		; <%struct.SV*> [#uses=1]
+	%1 = bitcast %struct.SV* %0 to %struct.GV*		; <%struct.GV*> [#uses=5]
+	br i1 undef, label %bb1563, label %bb1559
+
+bb1559:		; preds = %bb1558
+	br i1 undef, label %bb1560, label %bb1563
+
+bb1560:		; preds = %bb1559
+	br i1 undef, label %bb1563, label %bb1561
+
+bb1561:		; preds = %bb1560
+	br i1 undef, label %bb1562, label %bb1563
+
+bb1562:		; preds = %bb1561
+	br label %bb1563
+
+bb1563:		; preds = %bb1562, %bb1561, %bb1560, %bb1559, %bb1558, %bb1545
+	%gv19.3 = phi %struct.GV* [ %1, %bb1562 ], [ undef, %bb1545 ], [ %1, %bb1558 ], [ %1, %bb1559 ], [ %1, %bb1560 ], [ %1, %bb1561 ]		; <%struct.GV*> [#uses=0]
+	br i1 undef, label %bb1565, label %reserved_word
+
+bb1565:		; preds = %bb1563, %bb1544
+	br i1 undef, label %bb1573, label %bb1580
+
+bb1573:		; preds = %bb1565
+	br label %bb1580
+
+bb1580:		; preds = %bb1573, %bb1565
+	br i1 undef, label %bb1595, label %reserved_word
+
+bb1595:		; preds = %bb1580
+	br i1 undef, label %reserved_word, label %bb1597
+
+bb1597:		; preds = %bb1595
+	br i1 undef, label %reserved_word, label %bb1602
+
+bb1602:		; preds = %bb1597
+	br label %reserved_word
+
+reserved_word:		; preds = %bb1602, %bb1597, %bb1595, %bb1580, %bb1563, %Perl_keyword.exit4735
+	switch i32 undef, label %bb2012 [
+		i32 1, label %bb1819
+		i32 2, label %bb1830
+		i32 4, label %bb1841
+		i32 5, label %bb1841
+		i32 8, label %bb1880
+		i32 14, label %bb1894
+		i32 16, label %bb1895
+		i32 17, label %bb1896
+		i32 18, label %bb1897
+		i32 19, label %bb1898
+		i32 20, label %bb1899
+		i32 22, label %bb1906
+		i32 23, label %bb1928
+		i32 24, label %bb2555
+		i32 26, label %bb1929
+		i32 31, label %bb1921
+		i32 32, label %bb1930
+		i32 33, label %bb1905
+		i32 34, label %bb1936
+		i32 35, label %bb1927
+		i32 37, label %bb1962
+		i32 40, label %bb1951
+		i32 41, label %bb1946
+		i32 42, label %bb1968
+		i32 44, label %bb1969
+		i32 45, label %bb1970
+		i32 46, label %bb2011
+		i32 47, label %bb2006
+		i32 48, label %bb2007
+		i32 49, label %bb2009
+		i32 50, label %bb2010
+		i32 51, label %bb2008
+		i32 53, label %bb1971
+		i32 54, label %bb1982
+		i32 55, label %bb2005
+		i32 59, label %bb2081
+		i32 61, label %bb2087
+		i32 64, label %bb2080
+		i32 65, label %really_sub
+		i32 66, label %bb2079
+		i32 67, label %bb2089
+		i32 69, label %bb2155
+		i32 72, label %bb2137
+		i32 74, label %bb2138
+		i32 75, label %bb2166
+		i32 76, label %bb2144
+		i32 78, label %bb2145
+		i32 81, label %bb2102
+		i32 82, label %bb2108
+		i32 84, label %bb2114
+		i32 85, label %bb2115
+		i32 86, label %bb2116
+		i32 89, label %bb2146
+		i32 90, label %bb2147
+		i32 91, label %bb2148
+		i32 93, label %bb2154
+		i32 94, label %bb2167
+		i32 96, label %bb2091
+		i32 97, label %bb2090
+		i32 98, label %bb2088
+		i32 100, label %bb2173
+		i32 101, label %bb2174
+		i32 102, label %bb2175
+		i32 103, label %bb2180
+		i32 104, label %bb2181
+		i32 106, label %bb2187
+		i32 107, label %bb2188
+		i32 110, label %bb2206
+		i32 112, label %bb2217
+		i32 113, label %bb2218
+		i32 114, label %bb2199
+		i32 119, label %bb2205
+		i32 120, label %bb2229
+		i32 121, label %bb2233
+		i32 122, label %bb2234
+		i32 123, label %bb2235
+		i32 124, label %bb2236
+		i32 125, label %bb2237
+		i32 126, label %bb2238
+		i32 127, label %bb2239
+		i32 128, label %bb2268
+		i32 129, label %bb2267
+		i32 133, label %bb2276
+		i32 134, label %bb2348
+		i32 135, label %bb2337
+		i32 137, label %bb2239
+		i32 138, label %bb2367
+		i32 139, label %bb2368
+		i32 140, label %bb2369
+		i32 141, label %bb2357
+		i32 143, label %bb2349
+		i32 144, label %bb2350
+		i32 146, label %bb2356
+		i32 147, label %bb2370
+		i32 148, label %bb2445
+		i32 149, label %bb2453
+		i32 151, label %bb2381
+		i32 152, label %bb2457
+		i32 154, label %bb2516
+		i32 156, label %bb2522
+		i32 158, label %bb2527
+		i32 159, label %bb2537
+		i32 160, label %bb2503
+		i32 162, label %bb2504
+		i32 163, label %bb2464
+		i32 165, label %bb2463
+		i32 166, label %bb2538
+		i32 168, label %bb2515
+		i32 170, label %bb2549
+		i32 172, label %bb2566
+		i32 173, label %bb2595
+		i32 174, label %bb2565
+		i32 175, label %bb2567
+		i32 176, label %bb2568
+		i32 177, label %bb2569
+		i32 178, label %bb2570
+		i32 179, label %bb2594
+		i32 182, label %bb2571
+		i32 183, label %bb2572
+		i32 185, label %bb2593
+		i32 186, label %bb2583
+		i32 187, label %bb2596
+		i32 189, label %bb2602
+		i32 190, label %bb2603
+		i32 191, label %bb2604
+		i32 192, label %bb2605
+		i32 193, label %bb2606
+		i32 196, label %bb2617
+		i32 197, label %bb2618
+		i32 198, label %bb2619
+		i32 199, label %bb2627
+		i32 200, label %bb2625
+		i32 201, label %bb2626
+		i32 206, label %really_sub
+		i32 207, label %bb2648
+		i32 208, label %bb2738
+		i32 209, label %bb2739
+		i32 210, label %bb2740
+		i32 211, label %bb2742
+		i32 212, label %bb2741
+		i32 213, label %bb2737
+		i32 214, label %bb2743
+		i32 217, label %bb2758
+		i32 219, label %bb2764
+		i32 220, label %bb2765
+		i32 221, label %bb2744
+		i32 222, label %bb2766
+		i32 226, label %bb2785
+		i32 227, label %bb2783
+		i32 228, label %bb2784
+		i32 229, label %bb2790
+		i32 230, label %bb2797
+		i32 232, label %bb2782
+		i32 234, label %bb2791
+		i32 236, label %bb2815
+		i32 237, label %bb2818
+		i32 238, label %bb2819
+		i32 239, label %bb2820
+		i32 240, label %bb2817
+		i32 241, label %bb2816
+		i32 242, label %bb2821
+		i32 243, label %bb2826
+		i32 244, label %bb2829
+		i32 245, label %bb2830
+	]
+
+bb1819:		; preds = %reserved_word
+	unreachable
+
+bb1830:		; preds = %reserved_word
+	unreachable
+
+bb1841:		; preds = %reserved_word, %reserved_word
+	br i1 undef, label %fake_eof, label %bb1842
+
+bb1842:		; preds = %bb1841
+	unreachable
+
+bb1880:		; preds = %reserved_word
+	unreachable
+
+bb1894:		; preds = %reserved_word
+	ret i32 undef
+
+bb1895:		; preds = %reserved_word
+	ret i32 301
+
+bb1896:		; preds = %reserved_word
+	ret i32 undef
+
+bb1897:		; preds = %reserved_word
+	ret i32 undef
+
+bb1898:		; preds = %reserved_word
+	ret i32 undef
+
+bb1899:		; preds = %reserved_word
+	ret i32 undef
+
+bb1905:		; preds = %reserved_word
+	ret i32 278
+
+bb1906:		; preds = %reserved_word
+	unreachable
+
+bb1921:		; preds = %reserved_word
+	ret i32 288
+
+bb1927:		; preds = %reserved_word
+	ret i32 undef
+
+bb1928:		; preds = %reserved_word
+	ret i32 undef
+
+bb1929:		; preds = %reserved_word
+	ret i32 undef
+
+bb1930:		; preds = %reserved_word
+	ret i32 undef
+
+bb1936:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb1937
+
+bb1937:		; preds = %bb1936
+	ret i32 undef
+
+bb1946:		; preds = %reserved_word
+	unreachable
+
+bb1951:		; preds = %reserved_word
+	ret i32 undef
+
+bb1962:		; preds = %reserved_word
+	ret i32 undef
+
+bb1968:		; preds = %reserved_word
+	ret i32 280
+
+bb1969:		; preds = %reserved_word
+	ret i32 276
+
+bb1970:		; preds = %reserved_word
+	ret i32 277
+
+bb1971:		; preds = %reserved_word
+	ret i32 288
+
+bb1982:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb1986
+
+bb1986:		; preds = %bb1982
+	ret i32 undef
+
+bb2005:		; preds = %reserved_word
+	ret i32 undef
+
+bb2006:		; preds = %reserved_word
+	ret i32 282
+
+bb2007:		; preds = %reserved_word
+	ret i32 282
+
+bb2008:		; preds = %reserved_word
+	ret i32 282
+
+bb2009:		; preds = %reserved_word
+	ret i32 282
+
+bb2010:		; preds = %reserved_word
+	ret i32 282
+
+bb2011:		; preds = %reserved_word
+	ret i32 282
+
+bb2012:		; preds = %reserved_word
+	unreachable
+
+bb2079:		; preds = %reserved_word
+	ret i32 undef
+
+bb2080:		; preds = %reserved_word
+	ret i32 282
+
+bb2081:		; preds = %reserved_word
+	ret i32 undef
+
+bb2087:		; preds = %reserved_word
+	ret i32 undef
+
+bb2088:		; preds = %reserved_word
+	ret i32 287
+
+bb2089:		; preds = %reserved_word
+	ret i32 287
+
+bb2090:		; preds = %reserved_word
+	ret i32 undef
+
+bb2091:		; preds = %reserved_word
+	ret i32 280
+
+bb2102:		; preds = %reserved_word
+	ret i32 282
+
+bb2108:		; preds = %reserved_word
+	ret i32 undef
+
+bb2114:		; preds = %reserved_word
+	ret i32 undef
+
+bb2115:		; preds = %reserved_word
+	ret i32 282
+
+bb2116:		; preds = %reserved_word
+	ret i32 282
+
+bb2137:		; preds = %reserved_word
+	ret i32 undef
+
+bb2138:		; preds = %reserved_word
+	ret i32 282
+
+bb2144:		; preds = %reserved_word
+	ret i32 undef
+
+bb2145:		; preds = %reserved_word
+	ret i32 282
+
+bb2146:		; preds = %reserved_word
+	ret i32 undef
+
+bb2147:		; preds = %reserved_word
+	ret i32 undef
+
+bb2148:		; preds = %reserved_word
+	ret i32 282
+
+bb2154:		; preds = %reserved_word
+	ret i32 undef
+
+bb2155:		; preds = %reserved_word
+	ret i32 282
+
+bb2166:		; preds = %reserved_word
+	ret i32 282
+
+bb2167:		; preds = %reserved_word
+	ret i32 undef
+
+bb2173:		; preds = %reserved_word
+	ret i32 274
+
+bb2174:		; preds = %reserved_word
+	ret i32 undef
+
+bb2175:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2176
+
+bb2176:		; preds = %bb2175
+	ret i32 undef
+
+bb2180:		; preds = %reserved_word
+	ret i32 undef
+
+bb2181:		; preds = %reserved_word
+	ret i32 undef
+
+bb2187:		; preds = %reserved_word
+	ret i32 undef
+
+bb2188:		; preds = %reserved_word
+	ret i32 280
+
+bb2199:		; preds = %reserved_word
+	ret i32 295
+
+bb2205:		; preds = %reserved_word
+	ret i32 287
+
+bb2206:		; preds = %reserved_word
+	ret i32 287
+
+bb2217:		; preds = %reserved_word
+	ret i32 undef
+
+bb2218:		; preds = %reserved_word
+	ret i32 undef
+
+bb2229:		; preds = %reserved_word
+	unreachable
+
+bb2233:		; preds = %reserved_word
+	ret i32 undef
+
+bb2234:		; preds = %reserved_word
+	ret i32 undef
+
+bb2235:		; preds = %reserved_word
+	ret i32 undef
+
+bb2236:		; preds = %reserved_word
+	ret i32 undef
+
+bb2237:		; preds = %reserved_word
+	ret i32 undef
+
+bb2238:		; preds = %reserved_word
+	ret i32 undef
+
+bb2239:		; preds = %reserved_word, %reserved_word
+	unreachable
+
+bb2267:		; preds = %reserved_word
+	ret i32 280
+
+bb2268:		; preds = %reserved_word
+	ret i32 288
+
+bb2276:		; preds = %reserved_word
+	unreachable
+
+bb2337:		; preds = %reserved_word
+	ret i32 300
+
+bb2348:		; preds = %reserved_word
+	ret i32 undef
+
+bb2349:		; preds = %reserved_word
+	ret i32 undef
+
+bb2350:		; preds = %reserved_word
+	ret i32 undef
+
+bb2356:		; preds = %reserved_word
+	ret i32 undef
+
+bb2357:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2358
+
+bb2358:		; preds = %bb2357
+	ret i32 undef
+
+bb2367:		; preds = %reserved_word
+	ret i32 undef
+
+bb2368:		; preds = %reserved_word
+	ret i32 270
+
+bb2369:		; preds = %reserved_word
+	ret i32 undef
+
+bb2370:		; preds = %reserved_word
+	unreachable
+
+bb2381:		; preds = %reserved_word
+	unreachable
+
+bb2445:		; preds = %reserved_word
+	unreachable
+
+bb2453:		; preds = %reserved_word
+	unreachable
+
+bb2457:		; preds = %reserved_word
+	unreachable
+
+bb2463:		; preds = %reserved_word
+	ret i32 286
+
+bb2464:		; preds = %reserved_word
+	unreachable
+
+bb2503:		; preds = %reserved_word
+	ret i32 280
+
+bb2504:		; preds = %reserved_word
+	ret i32 undef
+
+bb2515:		; preds = %reserved_word
+	ret i32 undef
+
+bb2516:		; preds = %reserved_word
+	ret i32 undef
+
+bb2522:		; preds = %reserved_word
+	unreachable
+
+bb2527:		; preds = %reserved_word
+	unreachable
+
+bb2537:		; preds = %reserved_word
+	ret i32 undef
+
+bb2538:		; preds = %reserved_word
+	ret i32 undef
+
+bb2549:		; preds = %reserved_word
+	unreachable
+
+bb2555:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2556
+
+bb2556:		; preds = %bb2555
+	ret i32 undef
+
+bb2565:		; preds = %reserved_word
+	ret i32 undef
+
+bb2566:		; preds = %reserved_word
+	ret i32 undef
+
+bb2567:		; preds = %reserved_word
+	ret i32 undef
+
+bb2568:		; preds = %reserved_word
+	ret i32 undef
+
+bb2569:		; preds = %reserved_word
+	ret i32 undef
+
+bb2570:		; preds = %reserved_word
+	ret i32 undef
+
+bb2571:		; preds = %reserved_word
+	ret i32 undef
+
+bb2572:		; preds = %reserved_word
+	ret i32 undef
+
+bb2583:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2584
+
+bb2584:		; preds = %bb2583
+	ret i32 undef
+
+bb2593:		; preds = %reserved_word
+	ret i32 282
+
+bb2594:		; preds = %reserved_word
+	ret i32 282
+
+bb2595:		; preds = %reserved_word
+	ret i32 undef
+
+bb2596:		; preds = %reserved_word
+	ret i32 undef
+
+bb2602:		; preds = %reserved_word
+	ret i32 undef
+
+bb2603:		; preds = %reserved_word
+	ret i32 undef
+
+bb2604:		; preds = %reserved_word
+	ret i32 undef
+
+bb2605:		; preds = %reserved_word
+	ret i32 undef
+
+bb2606:		; preds = %reserved_word
+	ret i32 undef
+
+bb2617:		; preds = %reserved_word
+	ret i32 undef
+
+bb2618:		; preds = %reserved_word
+	ret i32 undef
+
+bb2619:		; preds = %reserved_word
+	unreachable
+
+bb2625:		; preds = %reserved_word
+	ret i32 undef
+
+bb2626:		; preds = %reserved_word
+	ret i32 undef
+
+bb2627:		; preds = %reserved_word
+	ret i32 undef
+
+bb2648:		; preds = %reserved_word
+	ret i32 undef
+
+really_sub:		; preds = %reserved_word, %reserved_word
+	unreachable
+
+bb2737:		; preds = %reserved_word
+	ret i32 undef
+
+bb2738:		; preds = %reserved_word
+	ret i32 undef
+
+bb2739:		; preds = %reserved_word
+	ret i32 undef
+
+bb2740:		; preds = %reserved_word
+	ret i32 undef
+
+bb2741:		; preds = %reserved_word
+	ret i32 undef
+
+bb2742:		; preds = %reserved_word
+	ret i32 undef
+
+bb2743:		; preds = %reserved_word
+	ret i32 undef
+
+bb2744:		; preds = %reserved_word
+	unreachable
+
+bb2758:		; preds = %reserved_word
+	ret i32 undef
+
+bb2764:		; preds = %reserved_word
+	ret i32 282
+
+bb2765:		; preds = %reserved_word
+	ret i32 282
+
+bb2766:		; preds = %reserved_word
+	ret i32 undef
+
+bb2782:		; preds = %reserved_word
+	ret i32 273
+
+bb2783:		; preds = %reserved_word
+	ret i32 275
+
+bb2784:		; preds = %reserved_word
+	ret i32 undef
+
+bb2785:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2786
+
+bb2786:		; preds = %bb2785
+	ret i32 undef
+
+bb2790:		; preds = %reserved_word
+	ret i32 undef
+
+bb2791:		; preds = %reserved_word
+	ret i32 undef
+
+bb2797:		; preds = %reserved_word
+	ret i32 undef
+
+bb2815:		; preds = %reserved_word
+	ret i32 undef
+
+bb2816:		; preds = %reserved_word
+	ret i32 272
+
+bb2817:		; preds = %reserved_word
+	ret i32 undef
+
+bb2818:		; preds = %reserved_word
+	ret i32 282
+
+bb2819:		; preds = %reserved_word
+	ret i32 undef
+
+bb2820:		; preds = %reserved_word
+	ret i32 282
+
+bb2821:		; preds = %reserved_word
+	unreachable
+
+bb2826:		; preds = %reserved_word
+	unreachable
+
+bb2829:		; preds = %reserved_word
+	ret i32 300
+
+bb2830:		; preds = %reserved_word
+	unreachable
+
+bb2834:		; preds = %bb2785, %bb2583, %bb2555, %bb2357, %bb2175, %bb1982, %bb1936
+	ret i32 283
+}
diff --git a/test/CodeGen/X86/2009-07-16-CoalescerBug.ll b/test/CodeGen/X86/2009-07-16-CoalescerBug.ll
new file mode 100644
index 0000000000000..48af440df2d6d
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-16-CoalescerBug.ll
@@ -0,0 +1,210 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; rdar://7059496
+
+	%struct.brinfo = type <{ %struct.brinfo*, %struct.brinfo*, i8*, i32, i32, i32, i8, i8, i8, i8 }>
+	%struct.cadata = type <{ i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i32, %struct.cmatcher*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i8, i8, i8, i8 }>
+	%struct.cline = type <{ %struct.cline*, i32, i8, i8, i8, i8, i8*, i32, i8, i8, i8, i8, i8*, i32, i8, i8, i8, i8, i8*, i32, i32, %struct.cline*, %struct.cline*, i32, i32 }>
+	%struct.cmatch = type <{ i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i8, i8, i8, i8, i32*, i32*, i8*, i8*, i32, i32, i32, i32, i16, i8, i8, i16, i8, i8 }>
+	%struct.cmatcher = type <{ i32, i8, i8, i8, i8, %struct.cmatcher*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8 }>
+	%struct.cpattern = type <{ %struct.cpattern*, i32, i8, i8, i8, i8, %union.anon }>
+	%struct.patprog = type <{ i64, i64, i64, i64, i32, i32, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%union.anon = type <{ [8 x i8] }>
+
+define i32 @addmatches(%struct.cadata* %dat, i8** nocapture %argv) nounwind ssp {
+entry:
+	br i1 undef, label %if.else, label %if.then91
+
+if.then91:		; preds = %entry
+	br label %if.end96
+
+if.else:		; preds = %entry
+	br label %if.end96
+
+if.end96:		; preds = %if.else, %if.then91
+	br i1 undef, label %lor.lhs.false, label %if.then105
+
+lor.lhs.false:		; preds = %if.end96
+	br i1 undef, label %if.else139, label %if.then105
+
+if.then105:		; preds = %lor.lhs.false, %if.end96
+	unreachable
+
+if.else139:		; preds = %lor.lhs.false
+	br i1 undef, label %land.end, label %land.rhs
+
+land.rhs:		; preds = %if.else139
+	unreachable
+
+land.end:		; preds = %if.else139
+	br i1 undef, label %land.lhs.true285, label %if.then315
+
+land.lhs.true285:		; preds = %land.end
+	br i1 undef, label %if.end324, label %if.then322
+
+if.then315:		; preds = %land.end
+	unreachable
+
+if.then322:		; preds = %land.lhs.true285
+	unreachable
+
+if.end324:		; preds = %land.lhs.true285
+	br i1 undef, label %if.end384, label %if.then358
+
+if.then358:		; preds = %if.end324
+	unreachable
+
+if.end384:		; preds = %if.end324
+	br i1 undef, label %if.end394, label %land.lhs.true387
+
+land.lhs.true387:		; preds = %if.end384
+	unreachable
+
+if.end394:		; preds = %if.end384
+	br i1 undef, label %if.end498, label %land.lhs.true399
+
+land.lhs.true399:		; preds = %if.end394
+	br i1 undef, label %if.end498, label %if.then406
+
+if.then406:		; preds = %land.lhs.true399
+	unreachable
+
+if.end498:		; preds = %land.lhs.true399, %if.end394
+	br i1 undef, label %if.end514, label %if.then503
+
+if.then503:		; preds = %if.end498
+	unreachable
+
+if.end514:		; preds = %if.end498
+	br i1 undef, label %if.end585, label %if.then520
+
+if.then520:		; preds = %if.end514
+	br i1 undef, label %lor.lhs.false547, label %if.then560
+
+lor.lhs.false547:		; preds = %if.then520
+	unreachable
+
+if.then560:		; preds = %if.then520
+	br i1 undef, label %if.end585, label %land.lhs.true566
+
+land.lhs.true566:		; preds = %if.then560
+	br i1 undef, label %if.end585, label %if.then573
+
+if.then573:		; preds = %land.lhs.true566
+	unreachable
+
+if.end585:		; preds = %land.lhs.true566, %if.then560, %if.end514
+	br i1 undef, label %cond.true593, label %cond.false599
+
+cond.true593:		; preds = %if.end585
+	unreachable
+
+cond.false599:		; preds = %if.end585
+	br i1 undef, label %if.end647, label %if.then621
+
+if.then621:		; preds = %cond.false599
+	br i1 undef, label %cond.true624, label %cond.false630
+
+cond.true624:		; preds = %if.then621
+	br label %if.end647
+
+cond.false630:		; preds = %if.then621
+	unreachable
+
+if.end647:		; preds = %cond.true624, %cond.false599
+	br i1 undef, label %if.end723, label %if.then701
+
+if.then701:		; preds = %if.end647
+	br label %if.end723
+
+if.end723:		; preds = %if.then701, %if.end647
+	br i1 undef, label %if.else1090, label %if.then729
+
+if.then729:		; preds = %if.end723
+	br i1 undef, label %if.end887, label %if.then812
+
+if.then812:		; preds = %if.then729
+	unreachable
+
+if.end887:		; preds = %if.then729
+	br i1 undef, label %if.end972, label %if.then893
+
+if.then893:		; preds = %if.end887
+	br i1 undef, label %if.end919, label %if.then903
+
+if.then903:		; preds = %if.then893
+	unreachable
+
+if.end919:		; preds = %if.then893
+	br label %if.end972
+
+if.end972:		; preds = %if.end919, %if.end887
+	%sline.0 = phi %struct.cline* [ undef, %if.end919 ], [ null, %if.end887 ]		; <%struct.cline*> [#uses=5]
+	%bcs.0 = phi i32 [ undef, %if.end919 ], [ 0, %if.end887 ]		; <i32> [#uses=5]
+	br i1 undef, label %if.end1146, label %land.lhs.true975
+
+land.lhs.true975:		; preds = %if.end972
+	br i1 undef, label %if.end1146, label %if.then980
+
+if.then980:		; preds = %land.lhs.true975
+	br i1 undef, label %cond.false1025, label %cond.false1004
+
+cond.false1004:		; preds = %if.then980
+	unreachable
+
+cond.false1025:		; preds = %if.then980
+	br i1 undef, label %if.end1146, label %if.then1071
+
+if.then1071:		; preds = %cond.false1025
+	br i1 undef, label %if.then1074, label %if.end1081
+
+if.then1074:		; preds = %if.then1071
+	br label %if.end1081
+
+if.end1081:		; preds = %if.then1074, %if.then1071
+	%call1083 = call %struct.patprog* @patcompile(i8* undef, i32 0, i8** null) nounwind ssp		; <%struct.patprog*> [#uses=2]
+	br i1 undef, label %if.end1146, label %if.then1086
+
+if.then1086:		; preds = %if.end1081
+	br label %if.end1146
+
+if.else1090:		; preds = %if.end723
+	br i1 undef, label %if.end1146, label %land.lhs.true1093
+
+land.lhs.true1093:		; preds = %if.else1090
+	br i1 undef, label %if.end1146, label %if.then1098
+
+if.then1098:		; preds = %land.lhs.true1093
+	unreachable
+
+if.end1146:		; preds = %land.lhs.true1093, %if.else1090, %if.then1086, %if.end1081, %cond.false1025, %land.lhs.true975, %if.end972
+	%cp.0 = phi %struct.patprog* [ %call1083, %if.then1086 ], [ null, %if.end972 ], [ null, %land.lhs.true975 ], [ null, %cond.false1025 ], [ %call1083, %if.end1081 ], [ null, %if.else1090 ], [ null, %land.lhs.true1093 ]		; <%struct.patprog*> [#uses=1]
+	%sline.1 = phi %struct.cline* [ %sline.0, %if.then1086 ], [ %sline.0, %if.end972 ], [ %sline.0, %land.lhs.true975 ], [ %sline.0, %cond.false1025 ], [ %sline.0, %if.end1081 ], [ null, %if.else1090 ], [ null, %land.lhs.true1093 ]		; <%struct.cline*> [#uses=1]
+	%bcs.1 = phi i32 [ %bcs.0, %if.then1086 ], [ %bcs.0, %if.end972 ], [ %bcs.0, %land.lhs.true975 ], [ %bcs.0, %cond.false1025 ], [ %bcs.0, %if.end1081 ], [ 0, %if.else1090 ], [ 0, %land.lhs.true1093 ]		; <i32> [#uses=1]
+	br i1 undef, label %if.end1307, label %do.body1270
+
+do.body1270:		; preds = %if.end1146
+	unreachable
+
+if.end1307:		; preds = %if.end1146
+	br i1 undef, label %if.end1318, label %if.then1312
+
+if.then1312:		; preds = %if.end1307
+	unreachable
+
+if.end1318:		; preds = %if.end1307
+	br i1 undef, label %for.cond1330.preheader, label %if.then1323
+
+if.then1323:		; preds = %if.end1318
+	unreachable
+
+for.cond1330.preheader:		; preds = %if.end1318
+	%call1587 = call i8* @comp_match(i8* undef, i8* undef, i8* undef, %struct.patprog* %cp.0, %struct.cline** undef, i32 0, %struct.brinfo** undef, i32 0, %struct.brinfo** undef, i32 %bcs.1, i32* undef) nounwind ssp		; <i8*> [#uses=0]
+	%call1667 = call %struct.cmatch* @add_match_data(i32 0, i8* undef, i8* undef, %struct.cline* undef, i8* undef, i8* null, i8* undef, i8* undef, i8* undef, i8* undef, %struct.cline* null, i8* undef, %struct.cline* %sline.1, i8* undef, i32 undef, i32 undef) ssp		; <%struct.cmatch*> [#uses=0]
+	unreachable
+}
+
+declare %struct.patprog* @patcompile(i8*, i32, i8**) ssp
+
+declare i8* @comp_match(i8*, i8*, i8*, %struct.patprog*, %struct.cline**, i32, %struct.brinfo**, i32, %struct.brinfo**, i32, i32*) ssp
+
+declare %struct.cmatch* @add_match_data(i32, i8*, i8*, %struct.cline*, i8*, i8*, i8*, i8*, i8*, i8*, %struct.cline*, i8*, %struct.cline*, i8*, i32, i32) nounwind ssp
diff --git a/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll b/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll
new file mode 100644
index 0000000000000..e21c8923df4a0
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll
@@ -0,0 +1,102 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+
+; CHECK: _foo:
+; CHECK: pavgw LCPI1_4(%rip)
+
+; rdar://7057804
+
+define void @foo(i16* %out8x8, i16* %in8x8, i32 %lastrow) optsize ssp {
+entry:
+	%0 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%1 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %0, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%2 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%3 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %2, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i10 = add <8 x i16> %0, %3		; <<8 x i16>> [#uses=1]
+	%4 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> zeroinitializer, <8 x i16> %1) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%5 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i10, <8 x i16> %4) nounwind readnone		; <<8 x i16>> [#uses=3]
+	%6 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%7 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%8 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %7, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%9 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%10 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %9, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i8 = add <8 x i16> %7, %10		; <<8 x i16>> [#uses=1]
+	%11 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %8) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%12 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i8, <8 x i16> %11) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%13 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%14 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%15 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %6, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%17 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %12, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%18 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %13, <8 x i16> %15) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%19 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %14) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%20 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=4]
+	%21 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %17) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%22 = bitcast <8 x i16> %21 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%23 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%24 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %23, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%25 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%26 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %25, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i6 = add <8 x i16> %23, %26		; <<8 x i16>> [#uses=1]
+	%27 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %24) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%28 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i6, <8 x i16> %27) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%29 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%30 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %29, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%31 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%32 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %31, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i4 = add <8 x i16> %29, %32		; <<8 x i16>> [#uses=1]
+	%33 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %30) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%34 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i4, <8 x i16> %33) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%35 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>, <8 x i16> %20) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%tmp.i2.i1 = mul <8 x i16> %20, <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>		; <<8 x i16>> [#uses=1]
+	%36 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %35, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%37 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i1, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%38 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %37, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i2 = add <8 x i16> %35, %38		; <<8 x i16>> [#uses=1]
+	%39 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %36) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%40 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i2, <8 x i16> %39) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%41 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>, <8 x i16> %20) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%tmp.i2.i = mul <8 x i16> %20, <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>		; <<8 x i16>> [#uses=1]
+	%42 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %41, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%43 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%44 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %43, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i = add <8 x i16> %41, %44		; <<8 x i16>> [#uses=1]
+	%45 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %42) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%46 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i, <8 x i16> %45) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%47 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %18, <8 x i16> %16) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%48 = bitcast <8 x i16> %47 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%49 = bitcast <8 x i16> %28 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%50 = getelementptr i16* %out8x8, i64 8		; <i16*> [#uses=1]
+	%51 = bitcast i16* %50 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %49, <2 x i64>* %51, align 16
+	%52 = bitcast <8 x i16> %40 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%53 = getelementptr i16* %out8x8, i64 16		; <i16*> [#uses=1]
+	%54 = bitcast i16* %53 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %52, <2 x i64>* %54, align 16
+	%55 = getelementptr i16* %out8x8, i64 24		; <i16*> [#uses=1]
+	%56 = bitcast i16* %55 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %48, <2 x i64>* %56, align 16
+	%57 = bitcast <8 x i16> %46 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%58 = getelementptr i16* %out8x8, i64 40		; <i16*> [#uses=1]
+	%59 = bitcast i16* %58 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %57, <2 x i64>* %59, align 16
+	%60 = bitcast <8 x i16> %34 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%61 = getelementptr i16* %out8x8, i64 48		; <i16*> [#uses=1]
+	%62 = bitcast i16* %61 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %60, <2 x i64>* %62, align 16
+	%63 = getelementptr i16* %out8x8, i64 56		; <i16*> [#uses=1]
+	%64 = bitcast i16* %63 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %22, <2 x i64>* %64, align 16
+	ret void
+}
+
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
diff --git a/test/CodeGen/X86/2009-07-17-StackColoringBug.ll b/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
new file mode 100644
index 0000000000000..3e5bd348ecd97
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -disable-fp-elim -color-ss-with-regs | not grep dil
+; PR4552
+
+target triple = "i386-pc-linux-gnu"
+@g_8 = internal global i32 0		; <i32*> [#uses=1]
+@g_72 = internal global i32 0		; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32, i8, i8)* @uint84 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @uint84(i32 %p_15, i8 signext %p_17, i8 signext %p_19) nounwind {
+entry:
+	%g_72.promoted = load i32* @g_72		; <i32> [#uses=1]
+	%g_8.promoted = load i32* @g_8		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %func_40.exit, %entry
+	%g_8.tmp.1 = phi i32 [ %g_8.promoted, %entry ], [ %g_8.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
+	%g_72.tmp.1 = phi i32 [ %g_72.promoted, %entry ], [ %g_72.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
+	%retval12.i4.i.i = trunc i32 %g_8.tmp.1 to i8		; <i8> [#uses=2]
+	%0 = trunc i32 %g_72.tmp.1 to i8		; <i8> [#uses=2]
+	%1 = mul i8 %retval12.i4.i.i, %0		; <i8> [#uses=1]
+	%2 = icmp eq i8 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb2.i.i, label %bb.i.i
+
+bb.i.i:		; preds = %bb
+	%3 = sext i8 %0 to i32		; <i32> [#uses=1]
+	%4 = and i32 %3, 50295		; <i32> [#uses=1]
+	%5 = icmp eq i32 %4, 0		; <i1> [#uses=1]
+	br i1 %5, label %bb2.i.i, label %func_55.exit.i
+
+bb2.i.i:		; preds = %bb.i.i, %bb
+	br label %func_55.exit.i
+
+func_55.exit.i:		; preds = %bb2.i.i, %bb.i.i
+	%g_72.tmp.2 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
+	%6 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
+	%7 = trunc i32 %6 to i8		; <i8> [#uses=2]
+	%8 = mul i8 %7, %retval12.i4.i.i		; <i8> [#uses=1]
+	%9 = icmp eq i8 %8, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb2.i4.i, label %bb.i3.i
+
+bb.i3.i:		; preds = %func_55.exit.i
+	%10 = sext i8 %7 to i32		; <i32> [#uses=1]
+	%11 = and i32 %10, 50295		; <i32> [#uses=1]
+	%12 = icmp eq i32 %11, 0		; <i1> [#uses=1]
+	br i1 %12, label %bb2.i4.i, label %func_40.exit
+
+bb2.i4.i:		; preds = %bb.i3.i, %func_55.exit.i
+	br label %func_40.exit
+
+func_40.exit:		; preds = %bb2.i4.i, %bb.i3.i
+	%g_72.tmp.0 = phi i32 [ 1, %bb2.i4.i ], [ %g_72.tmp.2, %bb.i3.i ]		; <i32> [#uses=1]
+	%phitmp = icmp sgt i32 %g_8.tmp.1, 0		; <i1> [#uses=1]
+	%g_8.tmp.0 = select i1 %phitmp, i32 %g_8.tmp.1, i32 1		; <i32> [#uses=1]
+	br label %bb
+}
diff --git a/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll b/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll
new file mode 100644
index 0000000000000..a0095ab2064c4
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64
+; PR4583
+
+define i32 @atomic_cmpset_long(i64* %dst, i64 %exp, i64 %src) nounwind ssp noredzone noimplicitfloat {
+entry:
+	%0 = call i8 asm sideeffect "\09lock ; \09\09\09cmpxchgq $2,$1 ;\09       sete\09$0 ;\09\091:\09\09\09\09# atomic_cmpset_long", "={ax},=*m,r,{ax},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* undef, i64 undef, i64 undef, i64* undef) nounwind		; <i8> [#uses=0]
+	br label %1
+
+; <label>:1		; preds = %entry
+	ret i32 undef
+}
diff --git a/test/CodeGen/X86/2009-07-20-CoalescerBug.ll b/test/CodeGen/X86/2009-07-20-CoalescerBug.ll
new file mode 100644
index 0000000000000..e99edd60bd5e3
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-20-CoalescerBug.ll
@@ -0,0 +1,165 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; PR4587
+; rdar://7072590
+
+	%struct.re_pattern_buffer = type <{ i8*, i64, i64, i64, i8*, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8 }>
+
+define fastcc i32 @regex_compile(i8* %pattern, i64 %size, i64 %syntax, %struct.re_pattern_buffer* nocapture %bufp) nounwind ssp {
+entry:
+	br i1 undef, label %return, label %if.end
+
+if.end:		; preds = %entry
+	%tmp35 = getelementptr %struct.re_pattern_buffer* %bufp, i64 0, i32 3		; <i64*> [#uses=1]
+	store i64 %syntax, i64* %tmp35
+	store i32 undef, i32* undef
+	br i1 undef, label %if.then66, label %if.end102
+
+if.then66:		; preds = %if.end
+	br i1 false, label %if.else, label %if.then70
+
+if.then70:		; preds = %if.then66
+	%call74 = call i8* @xrealloc(i8* undef, i64 32) nounwind ssp		; <i8*> [#uses=0]
+	unreachable
+
+if.else:		; preds = %if.then66
+	br i1 false, label %do.body86, label %if.end99
+
+do.body86:		; preds = %if.else
+	br i1 false, label %do.end, label %if.then90
+
+if.then90:		; preds = %do.body86
+	unreachable
+
+do.end:		; preds = %do.body86
+	ret i32 12
+
+if.end99:		; preds = %if.else
+	br label %if.end102
+
+if.end102:		; preds = %if.end99, %if.end
+	br label %while.body
+
+while.body:		; preds = %if.end1126, %sw.bb532, %while.body, %if.end102
+	%laststart.2 = phi i8* [ null, %if.end102 ], [ %laststart.7.ph, %if.end1126 ], [ %laststart.2, %sw.bb532 ], [ %laststart.2, %while.body ]		; <i8*> [#uses=6]
+	%b.1 = phi i8* [ undef, %if.end102 ], [ %ctg29688, %if.end1126 ], [ %b.1, %sw.bb532 ], [ %b.1, %while.body ]		; <i8*> [#uses=5]
+	br i1 undef, label %while.body, label %if.end127
+
+if.end127:		; preds = %while.body
+	switch i32 undef, label %sw.bb532 [
+		i32 123, label %handle_interval
+		i32 92, label %do.body3527
+	]
+
+sw.bb532:		; preds = %if.end127
+	br i1 undef, label %while.body, label %if.end808
+
+if.end808:		; preds = %sw.bb532
+	br i1 undef, label %while.cond1267.preheader, label %if.then811
+
+while.cond1267.preheader:		; preds = %if.end808
+	br i1 false, label %return, label %if.end1294
+
+if.then811:		; preds = %if.end808
+	%call817 = call fastcc i8* @skip_one_char(i8* %laststart.2) ssp		; <i8*> [#uses=0]
+	br i1 undef, label %cond.end834, label %lor.lhs.false827
+
+lor.lhs.false827:		; preds = %if.then811
+	br label %cond.end834
+
+cond.end834:		; preds = %lor.lhs.false827, %if.then811
+	br i1 undef, label %land.lhs.true838, label %while.cond979.preheader
+
+land.lhs.true838:		; preds = %cond.end834
+	br i1 undef, label %if.then842, label %while.cond979.preheader
+
+if.then842:		; preds = %land.lhs.true838
+	%conv851 = trunc i64 undef to i32		; <i32> [#uses=1]
+	br label %while.cond979.preheader
+
+while.cond979.preheader:		; preds = %if.then842, %land.lhs.true838, %cond.end834
+	%startoffset.0.ph = phi i32 [ 0, %cond.end834 ], [ 0, %land.lhs.true838 ], [ %conv851, %if.then842 ]		; <i32> [#uses=2]
+	%laststart.7.ph = phi i8* [ %laststart.2, %cond.end834 ], [ %laststart.2, %land.lhs.true838 ], [ %laststart.2, %if.then842 ]		; <i8*> [#uses=3]
+	%b.4.ph = phi i8* [ %b.1, %cond.end834 ], [ %b.1, %land.lhs.true838 ], [ %b.1, %if.then842 ]		; <i8*> [#uses=3]
+	%ctg29688 = getelementptr i8* %b.4.ph, i64 6		; <i8*> [#uses=1]
+	br label %while.cond979
+
+while.cond979:		; preds = %if.end1006, %while.cond979.preheader
+	%cmp991 = icmp ugt i64 undef, 0		; <i1> [#uses=1]
+	br i1 %cmp991, label %do.body994, label %while.end1088
+
+do.body994:		; preds = %while.cond979
+	br i1 undef, label %return, label %if.end1006
+
+if.end1006:		; preds = %do.body994
+	%cmp1014 = icmp ugt i64 undef, 32768		; <i1> [#uses=1]
+	%storemerge10953 = select i1 %cmp1014, i64 32768, i64 undef		; <i64> [#uses=1]
+	store i64 %storemerge10953, i64* undef
+	br i1 false, label %return, label %while.cond979
+
+while.end1088:		; preds = %while.cond979
+	br i1 undef, label %if.then1091, label %if.else1101
+
+if.then1091:		; preds = %while.end1088
+	store i8 undef, i8* undef
+	%idx.ext1132.pre = zext i32 %startoffset.0.ph to i64		; <i64> [#uses=1]
+	%add.ptr1133.pre = getelementptr i8* %laststart.7.ph, i64 %idx.ext1132.pre		; <i8*> [#uses=1]
+	%sub.ptr.lhs.cast1135.pre = ptrtoint i8* %add.ptr1133.pre to i64		; <i64> [#uses=1]
+	br label %if.end1126
+
+if.else1101:		; preds = %while.end1088
+	%cond1109 = select i1 undef, i32 18, i32 14		; <i32> [#uses=1]
+	%idx.ext1112 = zext i32 %startoffset.0.ph to i64		; <i64> [#uses=1]
+	%add.ptr1113 = getelementptr i8* %laststart.7.ph, i64 %idx.ext1112		; <i8*> [#uses=2]
+	%sub.ptr.rhs.cast1121 = ptrtoint i8* %add.ptr1113 to i64		; <i64> [#uses=1]
+	call fastcc void @insert_op1(i32 %cond1109, i8* %add.ptr1113, i32 undef, i8* %b.4.ph) ssp
+	br label %if.end1126
+
+if.end1126:		; preds = %if.else1101, %if.then1091
+	%sub.ptr.lhs.cast1135.pre-phi = phi i64 [ %sub.ptr.rhs.cast1121, %if.else1101 ], [ %sub.ptr.lhs.cast1135.pre, %if.then1091 ]		; <i64> [#uses=1]
+	%add.ptr1128 = getelementptr i8* %b.4.ph, i64 3		; <i8*> [#uses=1]
+	%sub.ptr.rhs.cast1136 = ptrtoint i8* %add.ptr1128 to i64		; <i64> [#uses=1]
+	%sub.ptr.sub1137 = sub i64 %sub.ptr.lhs.cast1135.pre-phi, %sub.ptr.rhs.cast1136		; <i64> [#uses=1]
+	%sub.ptr.sub11378527 = trunc i64 %sub.ptr.sub1137 to i32		; <i32> [#uses=1]
+	%conv1139 = add i32 %sub.ptr.sub11378527, -3		; <i32> [#uses=1]
+	store i8 undef, i8* undef
+	%shr10.i8599 = lshr i32 %conv1139, 8		; <i32> [#uses=1]
+	%conv6.i8600 = trunc i32 %shr10.i8599 to i8		; <i8> [#uses=1]
+	store i8 %conv6.i8600, i8* undef
+	br label %while.body
+
+if.end1294:		; preds = %while.cond1267.preheader
+	ret i32 12
+
+do.body3527:		; preds = %if.end127
+	br i1 undef, label %do.end3536, label %if.then3531
+
+if.then3531:		; preds = %do.body3527
+	unreachable
+
+do.end3536:		; preds = %do.body3527
+	ret i32 5
+
+handle_interval:		; preds = %if.end127
+	br i1 undef, label %do.body4547, label %cond.false4583
+
+do.body4547:		; preds = %handle_interval
+	br i1 undef, label %do.end4556, label %if.then4551
+
+if.then4551:		; preds = %do.body4547
+	unreachable
+
+do.end4556:		; preds = %do.body4547
+	ret i32 9
+
+cond.false4583:		; preds = %handle_interval
+	unreachable
+
+return:		; preds = %if.end1006, %do.body994, %while.cond1267.preheader, %entry
+	ret i32 undef
+}
+
+declare i8* @xrealloc(i8*, i64) ssp
+
+declare fastcc i8* @skip_one_char(i8*) nounwind readonly ssp
+
+declare fastcc void @insert_op1(i32, i8*, i32, i8*) nounwind ssp
diff --git a/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll b/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
new file mode 100644
index 0000000000000..e83b3a7db5921
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86
+
+@bsBuff = internal global i32 0		; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @bsGetUInt32 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define fastcc i32 @bsGetUInt32() nounwind ssp {
+entry:
+	%bsBuff.promoted44 = load i32* @bsBuff		; <i32> [#uses=1]
+	%0 = add i32 0, -8		; <i32> [#uses=1]
+	%1 = lshr i32 %bsBuff.promoted44, %0		; <i32> [#uses=1]
+	%2 = shl i32 %1, 8		; <i32> [#uses=1]
+	br label %bb3.i17
+
+bb3.i9:		; preds = %bb3.i17
+	br i1 false, label %bb2.i16, label %bb1.i15
+
+bb1.i15:		; preds = %bb3.i9
+	unreachable
+
+bb2.i16:		; preds = %bb3.i9
+	br label %bb3.i17
+
+bb3.i17:		; preds = %bb2.i16, %entry
+	br i1 false, label %bb3.i9, label %bsR.exit18
+
+bsR.exit18:		; preds = %bb3.i17
+	%3 = or i32 0, %2		; <i32> [#uses=0]
+	ret i32 0
+}
diff --git a/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
new file mode 100644
index 0000000000000..b9b09a3f00042
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64
+; PR4669
+declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32)
+
+define <1 x i64> @test(i64 %t) {
+entry:
+	%t1 = insertelement <1 x i64> undef, i64 %t, i32 0
+	%t2 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %t1, i32 48)
+	ret <1 x i64> %t2
+}
diff --git a/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll b/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
new file mode 100644
index 0000000000000..b329c9163c9f5
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -O3
+; PR4626
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@g_3 = common global i8 0, align 1		; <i8*> [#uses=2]
+
+define signext i8 @safe_mul_func_int16_t_s_s(i32 %_si1, i8 signext %_si2) nounwind readnone {
+entry:
+	%tobool = icmp eq i32 %_si1, 0		; <i1> [#uses=1]
+	%cmp = icmp sgt i8 %_si2, 0		; <i1> [#uses=2]
+	%or.cond = or i1 %cmp, %tobool		; <i1> [#uses=1]
+	br i1 %or.cond, label %lor.rhs, label %land.lhs.true3
+
+land.lhs.true3:		; preds = %entry
+	%conv5 = sext i8 %_si2 to i32		; <i32> [#uses=1]
+	%cmp7 = icmp slt i32 %conv5, %_si1		; <i1> [#uses=1]
+	br i1 %cmp7, label %cond.end, label %lor.rhs
+
+lor.rhs:		; preds = %land.lhs.true3, %entry
+	%cmp10.not = icmp slt i32 %_si1, 1		; <i1> [#uses=1]
+	%or.cond23 = and i1 %cmp, %cmp10.not		; <i1> [#uses=1]
+	br i1 %or.cond23, label %lor.end, label %cond.false
+
+lor.end:		; preds = %lor.rhs
+	%tobool19 = icmp ne i8 %_si2, 0		; <i1> [#uses=2]
+	%lor.ext = zext i1 %tobool19 to i32		; <i32> [#uses=1]
+	br i1 %tobool19, label %cond.end, label %cond.false
+
+cond.false:		; preds = %lor.end, %lor.rhs
+	%conv21 = sext i8 %_si2 to i32		; <i32> [#uses=1]
+	br label %cond.end
+
+cond.end:		; preds = %cond.false, %lor.end, %land.lhs.true3
+	%cond = phi i32 [ %conv21, %cond.false ], [ 1, %land.lhs.true3 ], [ %lor.ext, %lor.end ]		; <i32> [#uses=1]
+	%conv22 = trunc i32 %cond to i8		; <i8> [#uses=1]
+	ret i8 %conv22
+}
+
+define i32 @func_34(i8 signext %p_35) nounwind readonly {
+entry:
+	%tobool = icmp eq i8 %p_35, 0		; <i1> [#uses=1]
+	br i1 %tobool, label %lor.lhs.false, label %if.then
+
+lor.lhs.false:		; preds = %entry
+	%tmp1 = load i8* @g_3		; <i8> [#uses=1]
+	%tobool3 = icmp eq i8 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tobool3, label %return, label %if.then
+
+if.then:		; preds = %lor.lhs.false, %entry
+	%tmp4 = load i8* @g_3		; <i8> [#uses=1]
+	%conv5 = sext i8 %tmp4 to i32		; <i32> [#uses=1]
+	ret i32 %conv5
+
+return:		; preds = %lor.lhs.false
+	ret i32 0
+}
+
+define void @foo(i32 %p_5) noreturn nounwind {
+entry:
+	%cmp = icmp sgt i32 %p_5, 0		; <i1> [#uses=2]
+	%call = tail call i32 @safe() nounwind		; <i32> [#uses=1]
+	%conv1 = trunc i32 %call to i8		; <i8> [#uses=3]
+	%tobool.i = xor i1 %cmp, true		; <i1> [#uses=3]
+	%cmp.i = icmp sgt i8 %conv1, 0		; <i1> [#uses=3]
+	%or.cond.i = or i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %lor.rhs.i, label %land.lhs.true3.i
+
+land.lhs.true3.i:		; preds = %entry
+	%xor = zext i1 %cmp to i32		; <i32> [#uses=1]
+	%conv5.i = sext i8 %conv1 to i32		; <i32> [#uses=1]
+	%cmp7.i = icmp slt i32 %conv5.i, %xor		; <i1> [#uses=1]
+	%cmp7.i.not = xor i1 %cmp7.i, true		; <i1> [#uses=1]
+	%or.cond23.i = and i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	%or.cond = and i1 %cmp7.i.not, %or.cond23.i		; <i1> [#uses=1]
+	br i1 %or.cond, label %lor.end.i, label %for.inc
+
+lor.rhs.i:		; preds = %entry
+	%or.cond23.i.old = and i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	br i1 %or.cond23.i.old, label %lor.end.i, label %for.inc
+
+lor.end.i:		; preds = %lor.rhs.i, %land.lhs.true3.i
+	%tobool19.i = icmp eq i8 %conv1, 0		; <i1> [#uses=0]
+	br label %for.inc
+
+for.inc:		; preds = %for.inc, %lor.end.i, %lor.rhs.i, %land.lhs.true3.i
+	br label %for.inc
+}
+
+declare i32 @safe()
+
+define i32 @func_35(i8 signext %p_35) nounwind readonly {
+entry:
+  %tobool = icmp eq i8 %p_35, 0                   ; <i1> [#uses=1]
+  br i1 %tobool, label %lor.lhs.false, label %if.then
+
+lor.lhs.false:                                    ; preds = %entry
+  %tmp1 = load i8* @g_3                           ; <i8> [#uses=1]
+  %tobool3 = icmp eq i8 %tmp1, 0                  ; <i1> [#uses=1]
+  br i1 %tobool3, label %return, label %if.then
+
+if.then:                                          ; preds = %lor.lhs.false, %entry
+  %tmp4 = load i8* @g_3                           ; <i8> [#uses=1]
+  %conv5 = sext i8 %tmp4 to i32                   ; <i32> [#uses=1]
+  ret i32 %conv5
+
+return:                                           ; preds = %lor.lhs.false
+  ret i32 0
+}
+
+define void @bar(i32 %p_5) noreturn nounwind {
+entry:
+  %cmp = icmp sgt i32 %p_5, 0                     ; <i1> [#uses=2]
+  %call = tail call i32 @safe() nounwind          ; <i32> [#uses=1]
+  %conv1 = trunc i32 %call to i8                  ; <i8> [#uses=3]
+  %tobool.i = xor i1 %cmp, true                   ; <i1> [#uses=3]
+  %cmp.i = icmp sgt i8 %conv1, 0                  ; <i1> [#uses=3]
+  %or.cond.i = or i1 %cmp.i, %tobool.i            ; <i1> [#uses=1]
+  br i1 %or.cond.i, label %lor.rhs.i, label %land.lhs.true3.i
+
+land.lhs.true3.i:                                 ; preds = %entry
+  %xor = zext i1 %cmp to i32                      ; <i32> [#uses=1]
+  %conv5.i = sext i8 %conv1 to i32                ; <i32> [#uses=1]
+  %cmp7.i = icmp slt i32 %conv5.i, %xor           ; <i1> [#uses=1]
+  %cmp7.i.not = xor i1 %cmp7.i, true              ; <i1> [#uses=1]
+  %or.cond23.i = and i1 %cmp.i, %tobool.i         ; <i1> [#uses=1]
+  %or.cond = and i1 %cmp7.i.not, %or.cond23.i     ; <i1> [#uses=1]
+  br i1 %or.cond, label %lor.end.i, label %for.inc
+
+lor.rhs.i:                                        ; preds = %entry
+  %or.cond23.i.old = and i1 %cmp.i, %tobool.i     ; <i1> [#uses=1]
+  br i1 %or.cond23.i.old, label %lor.end.i, label %for.inc
+
+lor.end.i:                                        ; preds = %lor.rhs.i, %land.lhs.true3.i
+  %tobool19.i = icmp eq i8 %conv1, 0              ; <i1> [#uses=0]
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.inc, %lor.end.i, %lor.rhs.i, %land.lhs.true3.i
+  br label %for.inc
+}
+
+declare i32 @safe()
diff --git a/test/CodeGen/X86/2009-08-06-inlineasm.ll b/test/CodeGen/X86/2009-08-06-inlineasm.ll
new file mode 100644
index 0000000000000..cc2f3d824bbe7
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-06-inlineasm.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s
+; PR4668
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define i32 @x(i32 %qscale) nounwind {
+entry:
+	%temp_block = alloca [64 x i16], align 16		; <[64 x i16]*> [#uses=0]
+	%tmp = call i32 asm sideeffect "xor %edx, %edx", "={dx},~{dirflag},~{fpsr},~{flags}"() nounwind		; <i32> [#uses=1]
+	br i1 undef, label %if.end78, label %if.then28
+
+if.then28:		; preds = %entry
+	br label %if.end78
+
+if.end78:		; preds = %if.then28, %entry
+	%level.1 = phi i32 [ %tmp, %if.then28 ], [ 0, %entry ]		; <i32> [#uses=1]
+	%add.ptr1 = getelementptr [64 x i16]* null, i32 0, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr2 = getelementptr [64 x i16]* null, i32 1, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr3 = getelementptr [64 x i16]* null, i32 2, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr4 = getelementptr [64 x i16]* null, i32 3, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr5 = getelementptr [64 x i16]* null, i32 4, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr6 = getelementptr [64 x i16]* null, i32 5, i32 %qscale		; <i16*> [#uses=1]
+	%tmp1 = call i32 asm sideeffect "nop", "={ax},r,r,r,r,r,0,~{dirflag},~{fpsr},~{flags}"(i16* %add.ptr6, i16* %add.ptr5, i16* %add.ptr4, i16* %add.ptr3, i16* %add.ptr2, i16* %add.ptr1) nounwind		; <i32> [#uses=0]
+	ret i32 %level.1
+}
diff --git a/test/CodeGen/X86/2009-08-08-CastError.ll b/test/CodeGen/X86/2009-08-08-CastError.ll
new file mode 100644
index 0000000000000..9456d91efaaba
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-08-CastError.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-mingw64 | grep movabsq
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define <4 x float> @RecursiveTestFunc1(i8*) {
+EntryBlock:
+	%1 = call <4 x float> inttoptr (i64 5367207198 to <4 x float> (i8*, float, float, float, float)*)(i8* %0, float 8.000000e+00, float 5.000000e+00, float 3.000000e+00, float 4.000000e+00)		; <<4 x float>> [#uses=1]
+	ret <4 x float> %1
+}
diff --git a/test/CodeGen/X86/2009-08-12-badswitch.ll b/test/CodeGen/X86/2009-08-12-badswitch.ll
new file mode 100644
index 0000000000000..a94fce04ee019
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-12-badswitch.ll
@@ -0,0 +1,176 @@
+; RUN: llc < %s | grep LJT
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10"
+
+declare void @f1() nounwind readnone
+declare void @f2() nounwind readnone
+declare void @f3() nounwind readnone
+declare void @f4() nounwind readnone
+declare void @f5() nounwind readnone
+declare void @f6() nounwind readnone
+declare void @f7() nounwind readnone
+declare void @f8() nounwind readnone
+declare void @f9() nounwind readnone
+declare void @f10() nounwind readnone
+declare void @f11() nounwind readnone
+declare void @f12() nounwind readnone
+declare void @f13() nounwind readnone
+declare void @f14() nounwind readnone
+declare void @f15() nounwind readnone
+declare void @f16() nounwind readnone
+declare void @f17() nounwind readnone
+declare void @f18() nounwind readnone
+declare void @f19() nounwind readnone
+declare void @f20() nounwind readnone
+declare void @f21() nounwind readnone
+declare void @f22() nounwind readnone
+declare void @f23() nounwind readnone
+declare void @f24() nounwind readnone
+declare void @f25() nounwind readnone
+declare void @f26() nounwind readnone
+
+define internal fastcc i32 @foo(i64 %bar) nounwind ssp {
+entry:
+        br label %bb49
+
+bb49:
+	switch i64 %bar, label %RETURN [
+		i64 2, label %RRETURN_2
+		i64 3, label %RRETURN_6
+		i64 4, label %RRETURN_7
+		i64 5, label %RRETURN_14
+		i64 6, label %RRETURN_15
+		i64 7, label %RRETURN_16
+		i64 8, label %RRETURN_17
+		i64 9, label %RRETURN_18
+		i64 10, label %RRETURN_19
+		i64 11, label %RRETURN_20
+		i64 12, label %RRETURN_21
+		i64 13, label %RRETURN_22
+		i64 14, label %RRETURN_24
+		i64 15, label %RRETURN_26
+		i64 16, label %RRETURN_27
+		i64 17, label %RRETURN_28
+		i64 18, label %RRETURN_29
+		i64 19, label %RRETURN_30
+		i64 20, label %RRETURN_31
+		i64 21, label %RRETURN_38
+		i64 22, label %RRETURN_40
+		i64 23, label %RRETURN_42
+		i64 24, label %RRETURN_44
+		i64 25, label %RRETURN_48
+		i64 26, label %RRETURN_52
+		i64 27, label %RRETURN_1
+	]
+
+RETURN:
+        call void @f1()
+        br label %EXIT
+
+RRETURN_2:		; preds = %bb49
+        call void @f2()
+        br label %EXIT
+
+RRETURN_6:		; preds = %bb49
+        call void @f2()
+        br label %EXIT
+
+RRETURN_7:		; preds = %bb49
+        call void @f3()
+        br label %EXIT
+
+RRETURN_14:		; preds = %bb49
+        call void @f4()
+        br label %EXIT
+
+RRETURN_15:		; preds = %bb49
+        call void @f5()
+        br label %EXIT
+
+RRETURN_16:		; preds = %bb49
+        call void @f6()
+        br label %EXIT
+
+RRETURN_17:		; preds = %bb49
+        call void @f7()
+        br label %EXIT
+
+RRETURN_18:		; preds = %bb49
+        call void @f8()
+        br label %EXIT
+
+RRETURN_19:		; preds = %bb49
+        call void @f9()
+        br label %EXIT
+
+RRETURN_20:		; preds = %bb49
+        call void @f10()
+        br label %EXIT
+
+RRETURN_21:		; preds = %bb49
+        call void @f11()
+        br label %EXIT
+
+RRETURN_22:		; preds = %bb49
+        call void @f12()
+        br label %EXIT
+
+RRETURN_24:		; preds = %bb49
+        call void @f13()
+        br label %EXIT
+
+RRETURN_26:		; preds = %bb49
+        call void @f14()
+        br label %EXIT
+
+RRETURN_27:		; preds = %bb49
+        call void @f15()
+        br label %EXIT
+
+RRETURN_28:		; preds = %bb49
+        call void @f16()
+        br label %EXIT
+
+RRETURN_29:		; preds = %bb49
+        call void @f17()
+        br label %EXIT
+
+RRETURN_30:		; preds = %bb49
+        call void @f18()
+        br label %EXIT
+
+RRETURN_31:		; preds = %bb49
+        call void @f19()
+        br label %EXIT
+
+RRETURN_38:		; preds = %bb49
+        call void @f20()
+        br label %EXIT
+
+RRETURN_40:		; preds = %bb49
+        call void @f21()
+        br label %EXIT
+
+RRETURN_42:		; preds = %bb49
+        call void @f22()
+        br label %EXIT
+
+RRETURN_44:		; preds = %bb49
+        call void @f23()
+        br label %EXIT
+
+RRETURN_48:		; preds = %bb49
+        call void @f24()
+        br label %EXIT
+
+RRETURN_52:		; preds = %bb49
+        call void @f25()
+        br label %EXIT
+
+RRETURN_1:		; preds = %bb49
+        call void @f26()
+        br label %EXIT
+
+EXIT:
+        ret i32 0
+}
diff --git a/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll b/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
new file mode 100644
index 0000000000000..6b0d6d9790de4
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s
+target triple = "x86_64-mingw"
+
+; ModuleID = 'mm.bc'
+	type opaque		; type %0
+	type opaque		; type %1
+
+define internal fastcc float @computeMipmappingRho(%0* %shaderExecutionStatePtr, i32 %index, <4 x float> %texCoord, <4 x float> %texCoordDX, <4 x float> %texCoordDY) readonly {
+indexCheckBlock:
+	%indexCmp = icmp ugt i32 %index, 16		; <i1> [#uses=1]
+	br i1 %indexCmp, label %zeroReturnBlock, label %primitiveTextureFetchBlock
+
+primitiveTextureFetchBlock:		; preds = %indexCheckBlock
+	%pointerArithmeticTmp = bitcast %0* %shaderExecutionStatePtr to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp1 = getelementptr i8* %pointerArithmeticTmp, i64 1808		; <i8*> [#uses=1]
+	%pointerArithmeticTmp2 = bitcast i8* %pointerArithmeticTmp1 to %1**		; <%1**> [#uses=1]
+	%primitivePtr = load %1** %pointerArithmeticTmp2		; <%1*> [#uses=1]
+	%pointerArithmeticTmp3 = bitcast %1* %primitivePtr to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp4 = getelementptr i8* %pointerArithmeticTmp3, i64 19408		; <i8*> [#uses=1]
+	%pointerArithmeticTmp5 = bitcast i8* %pointerArithmeticTmp4 to %1**		; <%1**> [#uses=1]
+	%primitiveTexturePtr = getelementptr %1** %pointerArithmeticTmp5, i32 %index		; <%1**> [#uses=1]
+	%primitiveTexturePtr6 = load %1** %primitiveTexturePtr		; <%1*> [#uses=2]
+	br label %textureCheckBlock
+
+textureCheckBlock:		; preds = %primitiveTextureFetchBlock
+	%texturePtrInt = ptrtoint %1* %primitiveTexturePtr6 to i64		; <i64> [#uses=1]
+	%testTextureNULL = icmp eq i64 %texturePtrInt, 0		; <i1> [#uses=1]
+	br i1 %testTextureNULL, label %zeroReturnBlock, label %rhoCalculateBlock
+
+rhoCalculateBlock:		; preds = %textureCheckBlock
+	%pointerArithmeticTmp7 = bitcast %1* %primitiveTexturePtr6 to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp8 = getelementptr i8* %pointerArithmeticTmp7, i64 640		; <i8*> [#uses=1]
+	%pointerArithmeticTmp9 = bitcast i8* %pointerArithmeticTmp8 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%dimensionsPtr = load <4 x float>* %pointerArithmeticTmp9, align 1		; <<4 x float>> [#uses=2]
+	%texDiffDX = fsub <4 x float> %texCoordDX, %texCoord		; <<4 x float>> [#uses=1]
+	%texDiffDY = fsub <4 x float> %texCoordDY, %texCoord		; <<4 x float>> [#uses=1]
+	%ddx = fmul <4 x float> %texDiffDX, %dimensionsPtr		; <<4 x float>> [#uses=2]
+	%ddx10 = fmul <4 x float> %texDiffDY, %dimensionsPtr		; <<4 x float>> [#uses=2]
+	%ddxSquared = fmul <4 x float> %ddx, %ddx		; <<4 x float>> [#uses=3]
+	%0 = shufflevector <4 x float> %ddxSquared, <4 x float> %ddxSquared, <4 x i32> <i32 1, i32 0, i32 0, i32 0>		; <<4 x float>> [#uses=1]
+	%dxSquared = fadd <4 x float> %ddxSquared, %0		; <<4 x float>> [#uses=1]
+	%1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %dxSquared)		; <<4 x float>> [#uses=1]
+	%ddySquared = fmul <4 x float> %ddx10, %ddx10		; <<4 x float>> [#uses=3]
+	%2 = shufflevector <4 x float> %ddySquared, <4 x float> %ddySquared, <4 x i32> <i32 1, i32 0, i32 0, i32 0>		; <<4 x float>> [#uses=1]
+	%dySquared = fadd <4 x float> %ddySquared, %2		; <<4 x float>> [#uses=1]
+	%3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %dySquared)		; <<4 x float>> [#uses=1]
+	%4 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %3)		; <<4 x float>> [#uses=1]
+	%rho = extractelement <4 x float> %4, i32 0		; <float> [#uses=1]
+	ret float %rho
+
+zeroReturnBlock:		; preds = %textureCheckBlock, %indexCheckBlock
+	ret float 0.000000e+00
+}
+
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll b/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
new file mode 100644
index 0000000000000..5f6cf3b9e0bbc
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-pc-linux | FileCheck %s
+
+@a = external global i96, align 4
+@b = external global i64, align 8
+
+define void @c() nounwind {
+; CHECK: movl a+8, %eax
+  %srcval1 = load i96* @a, align 4
+  %sroa.store.elt2 = lshr i96 %srcval1, 64
+  %tmp = trunc i96 %sroa.store.elt2 to i64
+; CHECK: movl %eax, b
+; CHECK: movl $0, b+4
+  store i64 %tmp, i64* @b, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
new file mode 100644
index 0000000000000..790fd88c46dd1
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=x86
+; PR4753
+
+; This function has a sub-register reuse undone.
+
+@uint8 = external global i32                      ; <i32*> [#uses=3]
+
+declare signext i8 @foo(i32, i8 signext) nounwind readnone
+
+declare signext i8 @bar(i32, i8 signext) nounwind readnone
+
+define i32 @uint80(i8 signext %p_52) nounwind {
+entry:
+  %0 = sext i8 %p_52 to i16                       ; <i16> [#uses=1]
+  %1 = tail call i32 @func_24(i16 zeroext %0, i8 signext ptrtoint (i8 (i32, i8)* @foo to i8)) nounwind; <i32> [#uses=1]
+  %2 = trunc i32 %1 to i8                         ; <i8> [#uses=1]
+  %3 = or i8 %2, 1                                ; <i8> [#uses=1]
+  %4 = tail call i32 @safe(i32 1) nounwind        ; <i32> [#uses=0]
+  %5 = tail call i32 @func_24(i16 zeroext 0, i8 signext undef) nounwind; <i32> [#uses=1]
+  %6 = trunc i32 %5 to i8                         ; <i8> [#uses=1]
+  %7 = xor i8 %3, %p_52                           ; <i8> [#uses=1]
+  %8 = xor i8 %7, %6                              ; <i8> [#uses=1]
+  %9 = icmp ne i8 %p_52, 0                        ; <i1> [#uses=1]
+  %10 = zext i1 %9 to i8                          ; <i8> [#uses=1]
+  %11 = tail call i32 @func_24(i16 zeroext ptrtoint (i8 (i32, i8)* @bar to i16), i8 signext %10) nounwind; <i32> [#uses=1]
+  %12 = tail call i32 @func_24(i16 zeroext 0, i8 signext 1) nounwind; <i32> [#uses=0]
+  br i1 undef, label %bb2, label %bb
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb, %entry
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %iftmp.2.0 = phi i32 [ 0, %bb2 ], [ 1, %bb ]    ; <i32> [#uses=1]
+  %13 = icmp ne i32 %11, %iftmp.2.0               ; <i1> [#uses=1]
+  %14 = tail call i32 @safe(i32 -2) nounwind      ; <i32> [#uses=0]
+  %15 = zext i1 %13 to i8                         ; <i8> [#uses=1]
+  %16 = tail call signext i8 @func_53(i8 signext undef, i8 signext 1, i8 signext %15, i8 signext %8) nounwind; <i8> [#uses=0]
+  br i1 undef, label %bb5, label %bb4
+
+bb4:                                              ; preds = %bb3
+  %17 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb3
+  %18 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %19 = sext i8 undef to i16                      ; <i16> [#uses=1]
+  %20 = tail call i32 @func_24(i16 zeroext %19, i8 signext 1) nounwind; <i32> [#uses=0]
+  br i1 undef, label %return, label %bb6.preheader
+
+bb6.preheader:                                    ; preds = %bb5
+  %21 = sext i8 %p_52 to i32                      ; <i32> [#uses=1]
+  %22 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %23 = tail call i32 (...)* @safefuncts(i32 %21, i32 1) nounwind; <i32> [#uses=0]
+  unreachable
+
+return:                                           ; preds = %bb5
+  ret i32 undef
+}
+
+declare i32 @func_24(i16 zeroext, i8 signext)
+
+declare i32 @safe(i32)
+
+declare signext i8 @func_53(i8 signext, i8 signext, i8 signext, i8 signext)
+
+declare i32 @safefuncts(...)
diff --git a/test/CodeGen/X86/2009-08-23-linkerprivate.ll b/test/CodeGen/X86/2009-08-23-linkerprivate.ll
new file mode 100644
index 0000000000000..3da8f00a60437
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-23-linkerprivate.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16		; <i32*> [#uses=0]
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
diff --git a/test/CodeGen/X86/2009-09-07-CoalescerBug.ll b/test/CodeGen/X86/2009-09-07-CoalescerBug.ll
new file mode 100644
index 0000000000000..55432be1c2c98
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-07-CoalescerBug.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-freebsd7.2 -code-model=kernel | FileCheck %s
+; PR4689
+
+%struct.__s = type { [8 x i8] }
+%struct.pcb = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i16, i8* }
+%struct.pcpu = type { i32*, i32*, i32*, i32*, %struct.pcb*, i64, i32, i32, i32, i32 }
+
+define i64 @hammer_time(i64 %modulep, i64 %physfree) nounwind ssp noredzone noimplicitfloat {
+; CHECK: hammer_time:
+; CHECK: movq $Xrsvd, %rax
+; CHECK: movq $Xrsvd, %rdi
+; CHECK: movq $Xrsvd, %r8
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %if.end
+  switch i32 undef, label %if.then76 [
+    i32 9, label %for.inc
+    i32 10, label %for.inc
+    i32 11, label %for.inc
+    i32 12, label %for.inc
+  ]
+
+if.then76:                                        ; preds = %for.body
+  unreachable
+
+for.inc:                                          ; preds = %for.body, %for.body, %for.body, %for.body
+  br i1 undef, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc
+  call void asm sideeffect "mov $1,%gs:$0", "=*m,r,~{dirflag},~{fpsr},~{flags}"(%struct.__s* bitcast (%struct.pcb** getelementptr (%struct.pcpu* null, i32 0, i32 4) to %struct.__s*), i64 undef) nounwind
+  br label %for.body170
+
+for.body170:                                      ; preds = %for.body170, %for.end
+  store i64 or (i64 and (i64 or (i64 ptrtoint (void (i32, i32, i32, i32)* @Xrsvd to i64), i64 2097152), i64 2162687), i64 or (i64 or (i64 and (i64 shl (i64 ptrtoint (void (i32, i32, i32, i32)* @Xrsvd to i64), i64 32), i64 -281474976710656), i64 140737488355328), i64 15393162788864)), i64* undef
+  br i1 undef, label %for.end175, label %for.body170
+
+for.end175:                                       ; preds = %for.body170
+  unreachable
+}
+
+declare void @Xrsvd(i32, i32, i32, i32) ssp noredzone noimplicitfloat
diff --git a/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll b/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
new file mode 100644
index 0000000000000..9e58872b73c85
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim | FileCheck %s
+
+; It's not legal to fold a load from 32-bit stack slot into a 64-bit
+; instruction. If done, the instruction does a 64-bit load and that's not
+; safe. This can happen we a subreg_to_reg 0 has been coalesced. One
+; exception is when the instruction that folds the load is a move, then we
+; can simply turn it into a 32-bit load from the stack slot.
+; rdar://7170444
+
+%struct.ComplexType = type { i32 }
+
+define i32 @t(i32 %clientPort, i32 %pluginID, i32 %requestID, i32 %objectID, i64 %serverIdentifier, i64 %argumentsData, i32 %argumentsLength) ssp {
+entry:
+; CHECK: _t:
+; CHECK: movl 16(%rbp),
+; CHECK: movl 16(%rbp), %edx
+  %0 = zext i32 %argumentsLength to i64           ; <i64> [#uses=1]
+  %1 = zext i32 %clientPort to i64                ; <i64> [#uses=1]
+  %2 = inttoptr i64 %1 to %struct.ComplexType*    ; <%struct.ComplexType*> [#uses=1]
+  %3 = invoke i8* @pluginInstance(i8* undef, i32 %pluginID)
+          to label %invcont unwind label %lpad    ; <i8*> [#uses=1]
+
+invcont:                                          ; preds = %entry
+  %4 = add i32 %requestID, %pluginID              ; <i32> [#uses=0]
+  %5 = invoke zeroext i8 @invoke(i8* %3, i32 %objectID, i8* undef, i64 %argumentsData, i32 %argumentsLength, i64* undef, i32* undef)
+          to label %invcont1 unwind label %lpad   ; <i8> [#uses=0]
+
+invcont1:                                         ; preds = %invcont
+  %6 = getelementptr inbounds %struct.ComplexType* %2, i64 0, i32 0 ; <i32*> [#uses=1]
+  %7 = load i32* %6, align 4                      ; <i32> [#uses=1]
+  invoke void @booleanAndDataReply(i32 %7, i32 undef, i32 %requestID, i32 undef, i64 undef, i32 undef)
+          to label %invcont2 unwind label %lpad
+
+invcont2:                                         ; preds = %invcont1
+  ret i32 0
+
+lpad:                                             ; preds = %invcont1, %invcont, %entry
+  %8 = call i32 @vm_deallocate(i32 undef, i64 0, i64 %0) ; <i32> [#uses=0]
+  unreachable
+}
+
+declare i32 @vm_deallocate(i32, i64, i64)
+
+declare i8* @pluginInstance(i8*, i32)
+
+declare zeroext i8 @invoke(i8*, i32, i8*, i64, i32, i64*, i32*)
+
+declare void @booleanAndDataReply(i32, i32, i32, i32, i64, i32)
diff --git a/test/CodeGen/X86/2009-09-16-CoalescerBug.ll b/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
new file mode 100644
index 0000000000000..18b5a179c9ef3
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10
+; PR4910
+
+%0 = type { i32, i32, i32, i32 }
+
+@boot_cpu_id = external global i32                ; <i32*> [#uses=1]
+@cpu_logical = common global i32 0, align 4       ; <i32*> [#uses=1]
+
+define void @topo_probe_0xb() nounwind ssp {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc38, %entry
+  %0 = phi i32 [ 0, %entry ], [ %inc40, %for.inc38 ] ; <i32> [#uses=3]
+  %cmp = icmp slt i32 %0, 3                       ; <i1> [#uses=1]
+  br i1 %cmp, label %for.body, label %for.end41
+
+for.body:                                         ; preds = %for.cond
+  %1 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,{cx},~{dirflag},~{fpsr},~{flags}"(i32 11, i32 %0) nounwind ; <%0> [#uses=3]
+  %asmresult.i = extractvalue %0 %1, 0            ; <i32> [#uses=1]
+  %asmresult10.i = extractvalue %0 %1, 2          ; <i32> [#uses=1]
+  %and = and i32 %asmresult.i, 31                 ; <i32> [#uses=2]
+  %shr42 = lshr i32 %asmresult10.i, 8             ; <i32> [#uses=1]
+  %and12 = and i32 %shr42, 255                    ; <i32> [#uses=2]
+  %cmp14 = icmp eq i32 %and12, 0                  ; <i1> [#uses=1]
+  br i1 %cmp14, label %for.end41, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %for.body
+  %asmresult9.i = extractvalue %0 %1, 1           ; <i32> [#uses=1]
+  %and7 = and i32 %asmresult9.i, 65535            ; <i32> [#uses=1]
+  %cmp16 = icmp eq i32 %and7, 0                   ; <i1> [#uses=1]
+  br i1 %cmp16, label %for.end41, label %for.cond17.preheader
+
+for.cond17.preheader:                             ; preds = %lor.lhs.false
+  %tmp24 = load i32* @boot_cpu_id                 ; <i32> [#uses=1]
+  %shr26 = ashr i32 %tmp24, %and                  ; <i32> [#uses=1]
+  br label %for.body20
+
+for.body20:                                       ; preds = %for.body20, %for.cond17.preheader
+  %2 = phi i32 [ 0, %for.cond17.preheader ], [ %inc32, %for.body20 ] ; <i32> [#uses=2]
+  %cnt.143 = phi i32 [ 0, %for.cond17.preheader ], [ %inc.cnt.1, %for.body20 ] ; <i32> [#uses=1]
+  %shr23 = ashr i32 %2, %and                      ; <i32> [#uses=1]
+  %cmp27 = icmp eq i32 %shr23, %shr26             ; <i1> [#uses=1]
+  %inc = zext i1 %cmp27 to i32                    ; <i32> [#uses=1]
+  %inc.cnt.1 = add i32 %inc, %cnt.143             ; <i32> [#uses=2]
+  %inc32 = add nsw i32 %2, 1                      ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %inc32, 255             ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body20
+
+for.end:                                          ; preds = %for.body20
+  %cmp34 = icmp eq i32 %and12, 1                  ; <i1> [#uses=1]
+  br i1 %cmp34, label %if.then35, label %for.inc38
+
+if.then35:                                        ; preds = %for.end
+  store i32 %inc.cnt.1, i32* @cpu_logical
+  br label %for.inc38
+
+for.inc38:                                        ; preds = %for.end, %if.then35
+  %inc40 = add nsw i32 %0, 1                      ; <i32> [#uses=1]
+  br label %for.cond
+
+for.end41:                                        ; preds = %lor.lhs.false, %for.body, %for.cond
+  ret void
+}
diff --git a/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll b/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll
new file mode 100644
index 0000000000000..646806e5dbb28
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
+
+; PR4958
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: main:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  br label %bb
+
+bb:                                               ; preds = %bb1, %entry
+; CHECK:      movl %e
+; CHECK-NEXT: addl $1
+; CHECK-NEXT: movl %e
+; CHECK-NEXT: adcl $0
+  %i.0 = phi i64 [ 0, %entry ], [ %0, %bb1 ]      ; <i64> [#uses=1]
+  %0 = add nsw i64 %i.0, 1                        ; <i64> [#uses=2]
+  %1 = icmp sgt i32 0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  %2 = icmp sle i64 %0, 1                         ; <i1> [#uses=1]
+  br i1 %2, label %bb, label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  br label %return
+
+return:                                           ; preds = %bb2
+  ret i32 0
+}
diff --git a/test/CodeGen/X86/2009-09-19-earlyclobber.ll b/test/CodeGen/X86/2009-09-19-earlyclobber.ll
new file mode 100644
index 0000000000000..4f44caea74c94
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-19-earlyclobber.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = '4964.c'
+; PR 4964
+; Registers other than RAX, RCX are OK, but they must be different.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+	type { i64, i64 }		; type %0
+
+define i64 @flsst(i64 %find) nounwind ssp {
+entry:
+; CHECK: FOO %rax %rcx
+	%asmtmp = tail call %0 asm sideeffect "FOO $0 $1 $2", "=r,=&r,rm,~{dirflag},~{fpsr},~{flags},~{cc}"(i64 %find) nounwind		; <%0> [#uses=1]
+	%asmresult = extractvalue %0 %asmtmp, 0		; <i64> [#uses=1]
+	ret i64 %asmresult
+}
diff --git a/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll b/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
new file mode 100644
index 0000000000000..80b883582ce53
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -relocation-model=pic | FileCheck %s
+
+define void @dot(i16* nocapture %A, i32 %As, i16* nocapture %B, i32 %Bs, i16* nocapture %C, i32 %N) nounwind ssp {
+; CHECK: dot:
+; CHECK: decl %
+; CHECK-NEXT: jne
+entry:
+	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %bb2
+
+bb:		; preds = %bb, %entry
+	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%sum.04 = phi i32 [ 0, %entry ], [ %10, %bb ]		; <i32> [#uses=1]
+	%1 = mul i32 %i.03, %As		; <i32> [#uses=1]
+	%2 = getelementptr i16* %A, i32 %1		; <i16*> [#uses=1]
+	%3 = load i16* %2, align 2		; <i16> [#uses=1]
+	%4 = sext i16 %3 to i32		; <i32> [#uses=1]
+	%5 = mul i32 %i.03, %Bs		; <i32> [#uses=1]
+	%6 = getelementptr i16* %B, i32 %5		; <i16*> [#uses=1]
+	%7 = load i16* %6, align 2		; <i16> [#uses=1]
+	%8 = sext i16 %7 to i32		; <i32> [#uses=1]
+	%9 = mul i32 %8, %4		; <i32> [#uses=1]
+	%10 = add i32 %9, %sum.04		; <i32> [#uses=2]
+	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb1.bb2_crit_edge, label %bb
+
+bb1.bb2_crit_edge:		; preds = %bb
+	%phitmp = trunc i32 %10 to i16		; <i16> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %entry, %bb1.bb2_crit_edge
+	%sum.0.lcssa = phi i16 [ %phitmp, %bb1.bb2_crit_edge ], [ 0, %entry ]		; <i16> [#uses=1]
+	store i16 %sum.0.lcssa, i16* %C, align 2
+	ret void
+}
diff --git a/test/CodeGen/X86/2009-09-22-CoalescerBug.ll b/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
new file mode 100644
index 0000000000000..33f35f881e853
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
@@ -0,0 +1,124 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  br i1 undef, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  ret i32 3
+
+bb1:                                              ; preds = %entry
+  br i1 undef, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb1
+  ret i32 3
+
+bb3:                                              ; preds = %bb1
+  br i1 undef, label %bb.i18, label %quantum_getwidth.exit
+
+bb.i18:                                           ; preds = %bb.i18, %bb3
+  br i1 undef, label %bb.i18, label %quantum_getwidth.exit
+
+quantum_getwidth.exit:                            ; preds = %bb.i18, %bb3
+  br i1 undef, label %bb4, label %bb6.preheader
+
+bb4:                                              ; preds = %quantum_getwidth.exit
+  unreachable
+
+bb6.preheader:                                    ; preds = %quantum_getwidth.exit
+  br i1 undef, label %bb.i1, label %bb1.i2
+
+bb.i1:                                            ; preds = %bb6.preheader
+  unreachable
+
+bb1.i2:                                           ; preds = %bb6.preheader
+  br i1 undef, label %bb2.i, label %bb3.i4
+
+bb2.i:                                            ; preds = %bb1.i2
+  unreachable
+
+bb3.i4:                                           ; preds = %bb1.i2
+  br i1 undef, label %quantum_new_qureg.exit, label %bb4.i
+
+bb4.i:                                            ; preds = %bb3.i4
+  unreachable
+
+quantum_new_qureg.exit:                           ; preds = %bb3.i4
+  br i1 undef, label %bb9, label %bb11.thread
+
+bb11.thread:                                      ; preds = %quantum_new_qureg.exit
+  %.cast.i = zext i32 undef to i64                ; <i64> [#uses=1]
+  br label %bb.i37
+
+bb9:                                              ; preds = %quantum_new_qureg.exit
+  unreachable
+
+bb.i37:                                           ; preds = %bb.i37, %bb11.thread
+  %0 = load i64* undef, align 8                   ; <i64> [#uses=1]
+  %1 = shl i64 %0, %.cast.i                       ; <i64> [#uses=1]
+  store i64 %1, i64* undef, align 8
+  br i1 undef, label %bb.i37, label %quantum_addscratch.exit
+
+quantum_addscratch.exit:                          ; preds = %bb.i37
+  br i1 undef, label %bb12.preheader, label %bb14
+
+bb12.preheader:                                   ; preds = %quantum_addscratch.exit
+  unreachable
+
+bb14:                                             ; preds = %quantum_addscratch.exit
+  br i1 undef, label %bb17, label %bb.nph
+
+bb.nph:                                           ; preds = %bb14
+  unreachable
+
+bb17:                                             ; preds = %bb14
+  br i1 undef, label %bb1.i7, label %quantum_measure.exit
+
+bb1.i7:                                           ; preds = %bb17
+  br label %quantum_measure.exit
+
+quantum_measure.exit:                             ; preds = %bb1.i7, %bb17
+  switch i32 undef, label %bb21 [
+    i32 -1, label %bb18
+    i32 0, label %bb20
+  ]
+
+bb18:                                             ; preds = %quantum_measure.exit
+  unreachable
+
+bb20:                                             ; preds = %quantum_measure.exit
+  unreachable
+
+bb21:                                             ; preds = %quantum_measure.exit
+  br i1 undef, label %quantum_frac_approx.exit, label %bb1.i
+
+bb1.i:                                            ; preds = %bb21
+  unreachable
+
+quantum_frac_approx.exit:                         ; preds = %bb21
+  br i1 undef, label %bb25, label %bb26
+
+bb25:                                             ; preds = %quantum_frac_approx.exit
+  unreachable
+
+bb26:                                             ; preds = %quantum_frac_approx.exit
+  br i1 undef, label %quantum_gcd.exit, label %bb.i
+
+bb.i:                                             ; preds = %bb.i, %bb26
+  br i1 undef, label %quantum_gcd.exit, label %bb.i
+
+quantum_gcd.exit:                                 ; preds = %bb.i, %bb26
+  br i1 undef, label %bb32, label %bb33
+
+bb32:                                             ; preds = %quantum_gcd.exit
+  br i1 undef, label %bb.i.i, label %quantum_delete_qureg.exit
+
+bb.i.i:                                           ; preds = %bb32
+  ret i32 0
+
+quantum_delete_qureg.exit:                        ; preds = %bb32
+  ret i32 0
+
+bb33:                                             ; preds = %quantum_gcd.exit
+  unreachable
+}
diff --git a/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll b/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll
new file mode 100644
index 0000000000000..d37d4b8bd427c
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+; rdar://7247745
+
+%struct._lck_mtx_ = type { %union.anon }
+%struct._lck_rw_t_internal_ = type <{ i16, i8, i8, i32, i32, i32 }>
+%struct.anon = type { i64, i64, [2 x i8], i8, i8, i32 }
+%struct.memory_object = type { i32, i32, %struct.memory_object_pager_ops* }
+%struct.memory_object_control = type { i32, i32, %struct.vm_object* }
+%struct.memory_object_pager_ops = type { void (%struct.memory_object*)*, void (%struct.memory_object*)*, i32 (%struct.memory_object*, %struct.memory_object_control*, i32)*, i32 (%struct.memory_object*)*, i32 (%struct.memory_object*, i64, i32, i32, i32*)*, i32 (%struct.memory_object*, i64, i32, i64*, i32*, i32, i32, i32)*, i32 (%struct.memory_object*, i64, i32)*, i32 (%struct.memory_object*, i64, i64, i32)*, i32 (%struct.memory_object*, i64, i64, i32)*, i32 (%struct.memory_object*, i32)*, i32 (%struct.memory_object*)*, i8* }
+%struct.queue_entry = type { %struct.queue_entry*, %struct.queue_entry* }
+%struct.upl = type { %struct._lck_mtx_, i32, i32, %struct.vm_object*, i64, i32, i64, %struct.vm_object*, i32, i8* }
+%struct.upl_page_info = type <{ i32, i8, [3 x i8] }>
+%struct.vm_object = type { %struct.queue_entry, %struct._lck_rw_t_internal_, i64, %struct.vm_page*, i32, i32, i32, i32, %struct.vm_object*, %struct.vm_object*, i64, %struct.memory_object*, i64, %struct.memory_object_control*, i32, i16, i16, [2 x i8], i8, i8, %struct.queue_entry, %struct.queue_entry, i64, i32, i32, i32, i8*, i64, i8, i8, [2 x i8], %struct.queue_entry }
+%struct.vm_page = type { %struct.queue_entry, %struct.queue_entry, %struct.vm_page*, %struct.vm_object*, i64, [2 x i8], i8, i8, i32, i8, i8, i8, i8, i32 }
+%union.anon = type { %struct.anon }
+
+declare i64 @OSAddAtomic64(i64, i64*) noredzone noimplicitfloat
+
+define i32 @upl_commit_range(%struct.upl* %upl, i32 %offset, i32 %size, i32 %flags, %struct.upl_page_info* %page_list, i32 %count, i32* nocapture %empty) nounwind noredzone noimplicitfloat {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %if.end143, label %if.then136
+
+if.then136:                                       ; preds = %if.end
+  unreachable
+
+if.end143:                                        ; preds = %if.end
+  br i1 undef, label %if.else155, label %if.then153
+
+if.then153:                                       ; preds = %if.end143
+  br label %while.cond
+
+if.else155:                                       ; preds = %if.end143
+  unreachable
+
+while.cond:                                       ; preds = %if.end1039, %if.then153
+  br i1 undef, label %if.then1138, label %while.body
+
+while.body:                                       ; preds = %while.cond
+  br i1 undef, label %if.end260, label %if.then217
+
+if.then217:                                       ; preds = %while.body
+  br i1 undef, label %if.end260, label %if.then230
+
+if.then230:                                       ; preds = %if.then217
+  br i1 undef, label %if.then246, label %if.end260
+
+if.then246:                                       ; preds = %if.then230
+  br label %if.end260
+
+if.end260:                                        ; preds = %if.then246, %if.then230, %if.then217, %while.body
+  br i1 undef, label %if.end296, label %if.then266
+
+if.then266:                                       ; preds = %if.end260
+  unreachable
+
+if.end296:                                        ; preds = %if.end260
+  br i1 undef, label %if.end1039, label %if.end306
+
+if.end306:                                        ; preds = %if.end296
+  br i1 undef, label %if.end796, label %if.then616
+
+if.then616:                                       ; preds = %if.end306
+  br i1 undef, label %commit_next_page, label %do.body716
+
+do.body716:                                       ; preds = %if.then616
+  %call721 = call i64 @OSAddAtomic64(i64 1, i64* undef) nounwind noredzone noimplicitfloat ; <i64> [#uses=0]
+  call void asm sideeffect "movq\090x0($0),%rdi\0A\09movq\090x8($0),%rsi\0A\09.section __DATA, __data\0A\09.globl __dtrace_probeDOLLAR${:uid}4794___vminfo____pgrec\0A\09__dtrace_probeDOLLAR${:uid}4794___vminfo____pgrec:.quad 1f\0A\09.text\0A\091:nop\0A\09nop\0A\09nop\0A\09", "r,~{memory},~{di},~{si},~{dirflag},~{fpsr},~{flags}"(i64* undef) nounwind
+  br label %commit_next_page
+
+if.end796:                                        ; preds = %if.end306
+  unreachable
+
+commit_next_page:                                 ; preds = %do.body716, %if.then616
+  br i1 undef, label %if.end1039, label %if.then1034
+
+if.then1034:                                      ; preds = %commit_next_page
+  br label %if.end1039
+
+if.end1039:                                       ; preds = %if.then1034, %commit_next_page, %if.end296
+  br label %while.cond
+
+if.then1138:                                      ; preds = %while.cond
+  unreachable
+
+if.then:                                          ; preds = %entry
+  ret i32 4
+}
diff --git a/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll b/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll
new file mode 100644
index 0000000000000..ef10ae59ab6b2
--- /dev/null
+++ b/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll
@@ -0,0 +1,264 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -stats |& grep {machine-licm} | grep 1
+; rdar://7274692
+
+%0 = type { [125 x i32] }
+%1 = type { i32 }
+%struct..5sPragmaType = type { i8*, i32 }
+%struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 }
+%struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* }
+%struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 }
+%struct.AuxData = type { i8*, void (i8*)* }
+%struct.Bitvec = type { i32, i32, i32, %0 }
+%struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* }
+%struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* }
+%struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* }
+%struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* }
+%struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] }
+%struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 }
+%struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 }
+%struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* }
+%struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 }
+%struct.Context = type { i64, i32, %struct.Fifo }
+%struct.CountCtx = type { i64 }
+%struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* }
+%struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* }
+%struct.DbPage = type { %struct.Pager*, i32, %struct.DbPage*, %struct.DbPage*, %struct.PagerLruLink, %struct.DbPage*, i8, i8, i8, i8, i8, i16, %struct.DbPage*, %struct.DbPage*, i8* }
+%struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..5sPragmaType, %struct..5sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 }
+%struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* }
+%struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 }
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct..5sPragmaType, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct..5sPragmaType, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct..5sPragmaType, i32, i64 }
+%struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 }
+%struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* }
+%struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] }
+%struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] }
+%struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* }
+%struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 }
+%struct.IdList = type { %struct..5sPragmaType*, i32, i32 }
+%struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** }
+%struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] }
+%struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* }
+%struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.DbPage*, i32, %struct.MemPage* }
+%struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* }
+%struct.Op = type { i8, i8, i8, i8, i32, i32, i32, %1 }
+%struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.DbPage*, %struct.DbPage*, %struct.DbPage*, i64, i64, i64, i64, i64, i32, void (%struct.DbPage*, i32)*, void (%struct.DbPage*, i32)*, i32, %struct.DbPage**, i8*, [16 x i8] }
+%struct.PagerLruLink = type { %struct.DbPage*, %struct.DbPage* }
+%struct.PagerLruList = type { %struct.DbPage*, %struct.DbPage*, %struct.DbPage* }
+%struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* }
+%struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] }
+%struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] }
+%struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 }
+%struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* }
+%struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..5sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* }
+%struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..5sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* }
+%struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 }
+%struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] }
+%struct._OvflCell = type { i8*, i16 }
+%struct._RuneCharClass = type { [14 x i8], i32 }
+%struct._RuneEntry = type { i32, i32, i32, i32* }
+%struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i32, i8**)*, i32 (i32, i8*, i32, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32, i32, %struct._RuneCharClass* }
+%struct._RuneRange = type { i32, %struct._RuneEntry* }
+%struct.__sFILEX = type opaque
+%struct._ht = type { i32, %struct.HashElem* }
+%struct.callback_data = type { %struct.sqlite3*, i32, i32, %struct.FILE*, i32, i32, i32, i8*, [20 x i8], [100 x i32], [100 x i32], [20 x i8], %struct.previous_mode_data, [1024 x i8], i8* }
+%struct.previous_mode_data = type { i32, i32, i32, [100 x i32] }
+%struct.sColMap = type { i32, i8* }
+%struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %union.anon, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 }
+%struct.sqlite3InitInfo = type { i32, i32, i8 }
+%struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* }
+%struct.sqlite3_file = type { %struct.sqlite3_io_methods* }
+%struct.sqlite3_index_constraint = type { i32, i8, i8, i32 }
+%struct.sqlite3_index_constraint_usage = type { i32, i8 }
+%struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double }
+%struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* }
+%struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* }
+%struct.sqlite3_mutex = type opaque
+%struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* }
+%struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* }
+%struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* }
+%union.anon = type { double }
+
+@_DefaultRuneLocale = external global %struct._RuneLocale ; <%struct._RuneLocale*> [#uses=2]
+@__stderrp = external global %struct.FILE*        ; <%struct.FILE**> [#uses=1]
+@.str10 = internal constant [16 x i8] c"Out of memory!\0A\00", align 1 ; <[16 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.callback_data*, i8*)* @set_table_name to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define fastcc void @set_table_name(%struct.callback_data* nocapture %p, i8* %zName) nounwind ssp {
+entry:
+  %0 = getelementptr inbounds %struct.callback_data* %p, i32 0, i32 7 ; <i8**> [#uses=3]
+  %1 = load i8** %0, align 4                      ; <i8*> [#uses=2]
+  %2 = icmp eq i8* %1, null                       ; <i1> [#uses=1]
+  br i1 %2, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+  free i8* %1
+  store i8* null, i8** %0, align 4
+  br label %bb1
+
+bb1:                                              ; preds = %bb, %entry
+  %3 = icmp eq i8* %zName, null                   ; <i1> [#uses=1]
+  br i1 %3, label %return, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %4 = load i8* %zName, align 1                   ; <i8> [#uses=2]
+  %5 = zext i8 %4 to i32                          ; <i32> [#uses=2]
+  %6 = icmp sgt i8 %4, -1                         ; <i1> [#uses=1]
+  br i1 %6, label %bb.i.i, label %bb1.i.i
+
+bb.i.i:                                           ; preds = %bb2
+  %7 = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i32 0, i32 5, i32 %5 ; <i32*> [#uses=1]
+  %8 = load i32* %7, align 4                      ; <i32> [#uses=1]
+  %9 = and i32 %8, 256                            ; <i32> [#uses=1]
+  br label %isalpha.exit
+
+bb1.i.i:                                          ; preds = %bb2
+  %10 = tail call i32 @__maskrune(i32 %5, i32 256) nounwind ; <i32> [#uses=1]
+  br label %isalpha.exit
+
+isalpha.exit:                                     ; preds = %bb1.i.i, %bb.i.i
+  %storemerge.in.in.i.i = phi i32 [ %9, %bb.i.i ], [ %10, %bb1.i.i ] ; <i32> [#uses=1]
+  %storemerge.in.i.i = icmp eq i32 %storemerge.in.in.i.i, 0 ; <i1> [#uses=1]
+  br i1 %storemerge.in.i.i, label %bb3, label %bb5
+
+bb3:                                              ; preds = %isalpha.exit
+  %11 = load i8* %zName, align 1                  ; <i8> [#uses=2]
+  %12 = icmp eq i8 %11, 95                        ; <i1> [#uses=1]
+  br i1 %12, label %bb5, label %bb12.preheader
+
+bb5:                                              ; preds = %bb3, %isalpha.exit
+  %.pre = load i8* %zName, align 1                ; <i8> [#uses=1]
+  br label %bb12.preheader
+
+bb12.preheader:                                   ; preds = %bb5, %bb3
+  %13 = phi i8 [ %.pre, %bb5 ], [ %11, %bb3 ]     ; <i8> [#uses=1]
+  %needQuote.1.ph = phi i32 [ 0, %bb5 ], [ 1, %bb3 ] ; <i32> [#uses=2]
+  %14 = icmp eq i8 %13, 0                         ; <i1> [#uses=1]
+  br i1 %14, label %bb13, label %bb7
+
+bb7:                                              ; preds = %bb11, %bb12.preheader
+  %i.011 = phi i32 [ %tmp17, %bb11 ], [ 0, %bb12.preheader ] ; <i32> [#uses=2]
+  %n.110 = phi i32 [ %26, %bb11 ], [ 0, %bb12.preheader ] ; <i32> [#uses=3]
+  %needQuote.19 = phi i32 [ %needQuote.0, %bb11 ], [ %needQuote.1.ph, %bb12.preheader ] ; <i32> [#uses=2]
+  %scevgep16 = getelementptr i8* %zName, i32 %i.011 ; <i8*> [#uses=2]
+  %tmp17 = add i32 %i.011, 1                      ; <i32> [#uses=2]
+  %scevgep18 = getelementptr i8* %zName, i32 %tmp17 ; <i8*> [#uses=1]
+  %15 = load i8* %scevgep16, align 1              ; <i8> [#uses=2]
+  %16 = zext i8 %15 to i32                        ; <i32> [#uses=2]
+  %17 = icmp sgt i8 %15, -1                       ; <i1> [#uses=1]
+  br i1 %17, label %bb.i.i2, label %bb1.i.i3
+
+bb.i.i2:                                          ; preds = %bb7
+  %18 = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i32 0, i32 5, i32 %16 ; <i32*> [#uses=1]
+  %19 = load i32* %18, align 4                    ; <i32> [#uses=1]
+  %20 = and i32 %19, 1280                         ; <i32> [#uses=1]
+  br label %isalnum.exit
+
+bb1.i.i3:                                         ; preds = %bb7
+  %21 = tail call i32 @__maskrune(i32 %16, i32 1280) nounwind ; <i32> [#uses=1]
+  br label %isalnum.exit
+
+isalnum.exit:                                     ; preds = %bb1.i.i3, %bb.i.i2
+  %storemerge.in.in.i.i4 = phi i32 [ %20, %bb.i.i2 ], [ %21, %bb1.i.i3 ] ; <i32> [#uses=1]
+  %storemerge.in.i.i5 = icmp eq i32 %storemerge.in.in.i.i4, 0 ; <i1> [#uses=1]
+  br i1 %storemerge.in.i.i5, label %bb8, label %bb11
+
+bb8:                                              ; preds = %isalnum.exit
+  %22 = load i8* %scevgep16, align 1              ; <i8> [#uses=2]
+  %23 = icmp eq i8 %22, 95                        ; <i1> [#uses=1]
+  br i1 %23, label %bb11, label %bb9
+
+bb9:                                              ; preds = %bb8
+  %24 = icmp eq i8 %22, 39                        ; <i1> [#uses=1]
+  %25 = zext i1 %24 to i32                        ; <i32> [#uses=1]
+  %.n.1 = add i32 %n.110, %25                     ; <i32> [#uses=1]
+  br label %bb11
+
+bb11:                                             ; preds = %bb9, %bb8, %isalnum.exit
+  %needQuote.0 = phi i32 [ 1, %bb9 ], [ %needQuote.19, %isalnum.exit ], [ %needQuote.19, %bb8 ] ; <i32> [#uses=2]
+  %n.0 = phi i32 [ %.n.1, %bb9 ], [ %n.110, %isalnum.exit ], [ %n.110, %bb8 ] ; <i32> [#uses=1]
+  %26 = add nsw i32 %n.0, 1                       ; <i32> [#uses=2]
+  %27 = load i8* %scevgep18, align 1              ; <i8> [#uses=1]
+  %28 = icmp eq i8 %27, 0                         ; <i1> [#uses=1]
+  br i1 %28, label %bb13, label %bb7
+
+bb13:                                             ; preds = %bb11, %bb12.preheader
+  %n.1.lcssa = phi i32 [ 0, %bb12.preheader ], [ %26, %bb11 ] ; <i32> [#uses=2]
+  %needQuote.1.lcssa = phi i32 [ %needQuote.1.ph, %bb12.preheader ], [ %needQuote.0, %bb11 ] ; <i32> [#uses=1]
+  %29 = add nsw i32 %n.1.lcssa, 2                 ; <i32> [#uses=1]
+  %30 = icmp eq i32 %needQuote.1.lcssa, 0         ; <i1> [#uses=3]
+  %n.1. = select i1 %30, i32 %n.1.lcssa, i32 %29  ; <i32> [#uses=1]
+  %31 = add nsw i32 %n.1., 1                      ; <i32> [#uses=1]
+  %32 = malloc i8, i32 %31                        ; <i8*> [#uses=7]
+  store i8* %32, i8** %0, align 4
+  %33 = icmp eq i8* %32, null                     ; <i1> [#uses=1]
+  br i1 %33, label %bb16, label %bb17
+
+bb16:                                             ; preds = %bb13
+  %34 = load %struct.FILE** @__stderrp, align 4   ; <%struct.FILE*> [#uses=1]
+  %35 = bitcast %struct.FILE* %34 to i8*          ; <i8*> [#uses=1]
+  %36 = tail call i32 @"\01_fwrite$UNIX2003"(i8* getelementptr inbounds ([16 x i8]* @.str10, i32 0, i32 0), i32 1, i32 15, i8* %35) nounwind ; <i32> [#uses=0]
+  tail call void @exit(i32 1) noreturn nounwind
+  unreachable
+
+bb17:                                             ; preds = %bb13
+  br i1 %30, label %bb23.preheader, label %bb18
+
+bb18:                                             ; preds = %bb17
+  store i8 39, i8* %32, align 4
+  br label %bb23.preheader
+
+bb23.preheader:                                   ; preds = %bb18, %bb17
+  %n.3.ph = phi i32 [ 1, %bb18 ], [ 0, %bb17 ]    ; <i32> [#uses=2]
+  %37 = load i8* %zName, align 1                  ; <i8> [#uses=1]
+  %38 = icmp eq i8 %37, 0                         ; <i1> [#uses=1]
+  br i1 %38, label %bb24, label %bb20
+
+bb20:                                             ; preds = %bb22, %bb23.preheader
+  %storemerge18 = phi i32 [ %tmp, %bb22 ], [ 0, %bb23.preheader ] ; <i32> [#uses=2]
+  %n.37 = phi i32 [ %n.4, %bb22 ], [ %n.3.ph, %bb23.preheader ] ; <i32> [#uses=3]
+  %scevgep = getelementptr i8* %zName, i32 %storemerge18 ; <i8*> [#uses=1]
+  %tmp = add i32 %storemerge18, 1                 ; <i32> [#uses=2]
+  %scevgep15 = getelementptr i8* %zName, i32 %tmp ; <i8*> [#uses=1]
+  %39 = load i8* %scevgep, align 1                ; <i8> [#uses=2]
+  %40 = getelementptr inbounds i8* %32, i32 %n.37 ; <i8*> [#uses=1]
+  store i8 %39, i8* %40, align 1
+  %41 = add nsw i32 %n.37, 1                      ; <i32> [#uses=2]
+  %42 = icmp eq i8 %39, 39                        ; <i1> [#uses=1]
+  br i1 %42, label %bb21, label %bb22
+
+bb21:                                             ; preds = %bb20
+  %43 = getelementptr inbounds i8* %32, i32 %41   ; <i8*> [#uses=1]
+  store i8 39, i8* %43, align 1
+  %44 = add nsw i32 %n.37, 2                      ; <i32> [#uses=1]
+  br label %bb22
+
+bb22:                                             ; preds = %bb21, %bb20
+  %n.4 = phi i32 [ %44, %bb21 ], [ %41, %bb20 ]   ; <i32> [#uses=2]
+  %45 = load i8* %scevgep15, align 1              ; <i8> [#uses=1]
+  %46 = icmp eq i8 %45, 0                         ; <i1> [#uses=1]
+  br i1 %46, label %bb24, label %bb20
+
+bb24:                                             ; preds = %bb22, %bb23.preheader
+  %n.3.lcssa = phi i32 [ %n.3.ph, %bb23.preheader ], [ %n.4, %bb22 ] ; <i32> [#uses=3]
+  br i1 %30, label %bb26, label %bb25
+
+bb25:                                             ; preds = %bb24
+  %47 = getelementptr inbounds i8* %32, i32 %n.3.lcssa ; <i8*> [#uses=1]
+  store i8 39, i8* %47, align 1
+  %48 = add nsw i32 %n.3.lcssa, 1                 ; <i32> [#uses=1]
+  br label %bb26
+
+bb26:                                             ; preds = %bb25, %bb24
+  %n.5 = phi i32 [ %48, %bb25 ], [ %n.3.lcssa, %bb24 ] ; <i32> [#uses=1]
+  %49 = getelementptr inbounds i8* %32, i32 %n.5  ; <i8*> [#uses=1]
+  store i8 0, i8* %49, align 1
+  ret void
+
+return:                                           ; preds = %bb1
+  ret void
+}
+
+declare i32 @"\01_fwrite$UNIX2003"(i8*, i32, i32, i8*)
+
+declare void @exit(i32) noreturn nounwind
+
+declare i32 @__maskrune(i32, i32)
diff --git a/test/CodeGen/X86/20090313-signext.ll b/test/CodeGen/X86/20090313-signext.ll
index 7313670a1c331..de930d5126782 100644
--- a/test/CodeGen/X86/20090313-signext.ll
+++ b/test/CodeGen/X86/20090313-signext.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -relocation-model=pic > %t
+; RUN: llc < %s -march=x86-64 -relocation-model=pic > %t
 ; RUN: grep {movswl	%ax, %edi} %t
 ; RUN: grep {movw	(%rax), %ax} %t
 ; XFAIL: *
diff --git a/test/CodeGen/X86/Atomics-32.ll b/test/CodeGen/X86/Atomics-32.ll
index 2a3e2285800fb..0e9b73ea10903 100644
--- a/test/CodeGen/X86/Atomics-32.ll
+++ b/test/CodeGen/X86/Atomics-32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ;; Note the 64-bit variants are not supported yet (in 32-bit mode).
 ; ModuleID = 'Atomics.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/Atomics-64.ll b/test/CodeGen/X86/Atomics-64.ll
index 37b2e338eff60..ac174b9f9a3fa 100644
--- a/test/CodeGen/X86/Atomics-64.ll
+++ b/test/CodeGen/X86/Atomics-64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; ModuleID = 'Atomics.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 513599c58bcd6..a6fd2d8fe1348 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -1,186 +1,16 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small > %t
-; RUN: grep leal %t | count 33
-; RUN: grep movl %t | count 239
-; RUN: grep addl %t | count 20
-; RUN: grep subl %t | count 14
-; RUN: not grep leaq %t
-; RUN: not grep movq %t
-; RUN: not grep addq %t
-; RUN: not grep subq %t
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: not grep %rip %t
-; RUN: llvm-as < %s | llc -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=pic -code-model=small > %t
-; RUN: grep leal %t | count 43
-; RUN: grep movl %t | count 377
-; RUN: grep addl %t | count 179
-; RUN: grep subl %t | count 6
-; RUN: not grep leaq %t
-; RUN: not grep movq %t
-; RUN: not grep addq %t
-; RUN: not grep subq %t
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: grep _GLOBAL_OFFSET_TABLE_ %t | count 148
-; RUN: grep @GOT %t | count 207
-; RUN: grep @GOTOFF %t | count 58
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: grep @PLT %t | count 20
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: not grep {%rip} %t
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small > %t
-; RUN: not grep leal %t
-; RUN: grep movl %t | count 91
-; RUN: not grep addl %t
-; RUN: not grep subl %t
-; RUN: grep leaq %t | count 70
-; RUN: grep movq %t | count 56
-; RUN: grep addq %t | count 20
-; RUN: grep subq %t | count 14
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: grep {%rip} %t | count 139
-; RUN: llvm-as < %s | llc -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small > %t
-; RUN: not grep leal %t
-; RUN: grep movl %t | count 98
-; RUN: not grep addl %t
-; RUN: not grep subl %t
-; RUN: grep leaq %t | count 59
-; RUN: grep movq %t | count 195
-; RUN: grep addq %t | count 36
-; RUN: grep subq %t | count 11
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: grep @GOT %t | count 149
-; RUN: not grep @GOTOFF %t
-; RUN: grep @GOTPCREL %t | count 149
-; RUN: not grep @GOTPLT %t
-; RUN: grep @PLT %t | count 20
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: grep {%rip} %t | count 207
-
-
-
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small > %t
-; RUN: grep leal %t | count 33
-; RUN: grep movl %t | count 239
-; RUN: grep addl %t | count 20
-; RUN: grep subl %t | count 14
-; RUN: not grep leaq %t
-; RUN: not grep movq %t
-; RUN: not grep addq %t
-; RUN: not grep subq %t
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: not grep %rip %t
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small > %t
-; RUN: grep leal %t | count 31
-; RUN: grep movl %t | count 312
-; RUN: grep addl %t | count 32
-; RUN: grep subl %t | count 14
-; RUN: not grep leaq %t
-; RUN: not grep movq %t
-; RUN: not grep addq %t
-; RUN: not grep subq %t
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: not grep {%rip} %t
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small > %t
-; RUN: grep leal %t | count 57
-; RUN: grep movl %t | count 292
-; RUN: grep addl %t | count 32
-; RUN: grep subl %t | count 14
-; RUN: not grep leaq %t
-; RUN: not grep movq %t
-; RUN: not grep addq %t
-; RUN: not grep subq %t
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: not grep @GOT %t
-; RUN: not grep @GOTOFF %t
-; RUN: not grep @GOTPCREL %t
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: not grep {%rip} %t
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small > %t
-; RUN: not grep leal %t
-; RUN: grep movl %t | count 95
-; RUN: not grep addl %t
-; RUN: not grep subl %t
-; RUN: grep leaq %t | count 89
-; RUN: grep movq %t | count 142
-; RUN: grep addq %t | count 30
-; RUN: grep subq %t | count 12
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: grep @GOT %t | count 92
-; RUN: not grep @GOTOFF %t
-; RUN: grep @GOTPCREL %t | count 92
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: grep {%rip} %t | count 208
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small > %t
-; RUN: not grep leal %t
-; RUN: grep movl %t | count 95
-; RUN: not grep addl %t
-; RUN: not grep subl %t
-; RUN: grep leaq %t | count 89
-; RUN: grep movq %t | count 142
-; RUN: grep addq %t | count 30
-; RUN: grep subq %t | count 12
-; RUN: not grep movabs %t
-; RUN: not grep largecomm %t
-; RUN: not grep _GLOBAL_OFFSET_TABLE_ %t
-; RUN: grep @GOT %t | count 92
-; RUN: not grep @GOTOFF %t
-; RUN: grep @GOTPCREL %t | count 92
-; RUN: not grep @GOTPLT %t
-; RUN: not grep @PLT %t
-; RUN: not grep @PLTOFF %t
-; RUN: grep {call	\\\*} %t | count 10
-; RUN: grep {%rip} %t | count 208
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
 
 @src = external global [131072 x i32]
 @dst = external global [131072 x i32]
@@ -206,6 +36,71 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 0), align 4
 	ret void
+
+; LINUX-64-STATIC: foo00:
+; LINUX-64-STATIC: movl	src(%rip), %eax
+; LINUX-64-STATIC: movl	%eax, dst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo00:
+; LINUX-32-STATIC: 	movl	src, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo00:
+; LINUX-32-PIC: 	movl	src, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo00:
+; DARWIN-32-STATIC: 	movl	_src, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo00:
+; DARWIN-32-PIC: 	call	L1$pb
+; DARWIN-32-PIC-NEXT: L1$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L1$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L1$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @fxo00() nounwind {
@@ -213,18 +108,191 @@ entry:
 	%0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
 	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 0), align 4
 	ret void
+
+; LINUX-64-STATIC: fxo00:
+; LINUX-64-STATIC: movl	xsrc(%rip), %eax
+; LINUX-64-STATIC: movl	%eax, xdst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo00:
+; LINUX-32-STATIC: 	movl	xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, xdst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: fxo00:
+; LINUX-32-PIC: 	movl	xsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, xdst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo00:
+; DARWIN-32-STATIC: 	movl	_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _xdst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo00:
+; DARWIN-32-PIC: 	call	L2$pb
+; DARWIN-32-PIC-NEXT: L2$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L2$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L2$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo01() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i32 0), i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: foo01:
+; LINUX-64-STATIC: movq	$dst, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo01:
+; LINUX-32-STATIC: 	movl	$dst, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo01:
+; LINUX-32-PIC: 	movl	$dst, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo01:
+; DARWIN-32-STATIC: 	movl	$_dst, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo01:
+; DARWIN-32-PIC: 	call	L3$pb
+; DARWIN-32-PIC-NEXT: L3$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L3$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L3$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @fxo01() nounwind {
 entry:
 	store i32* getelementptr ([32 x i32]* @xdst, i32 0, i32 0), i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: fxo01:
+; LINUX-64-STATIC: movq	$xdst, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo01:
+; LINUX-32-STATIC: 	movl	$xdst, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: fxo01:
+; LINUX-32-PIC: 	movl	$xdst, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo01:
+; DARWIN-32-STATIC: 	movl	$_xdst, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo01:
+; DARWIN-32-PIC: 	call	L4$pb
+; DARWIN-32-PIC-NEXT: L4$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L4$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L4$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo02() nounwind {
@@ -233,6 +301,80 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
 	store i32 %1, i32* %0, align 4
 	ret void
+; LINUX-64-STATIC: foo02:
+; LINUX-64-STATIC: movl    src(%rip), %
+; LINUX-64-STATIC: movq    ptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo02:
+; LINUX-32-STATIC: 	movl	src, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo02:
+; LINUX-32-PIC: 	movl	src, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo02:
+; DARWIN-32-STATIC: 	movl	_src, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo02:
+; DARWIN-32-PIC: 	call	L5$pb
+; DARWIN-32-PIC-NEXT: L5$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L5$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L5$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @fxo02() nounwind {
@@ -240,7 +382,81 @@ entry:
 	%0 = load i32** @ptr, align 8
 	%1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
 	store i32 %1, i32* %0, align 4
+; LINUX-64-STATIC: fxo02:
+; LINUX-64-STATIC: movl    xsrc(%rip), %
+; LINUX-64-STATIC: movq    ptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo02:
+; LINUX-32-STATIC: 	movl	xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
 	ret void
+
+; LINUX-32-PIC: fxo02:
+; LINUX-32-PIC: 	movl	xsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo02:
+; DARWIN-32-STATIC: 	movl	_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo02:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo02:
+; DARWIN-32-PIC: 	call	L6$pb
+; DARWIN-32-PIC-NEXT: L6$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L6$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L6$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo03() nounwind {
@@ -248,12 +464,114 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 0), align 32
 	ret void
+; LINUX-64-STATIC: foo03:
+; LINUX-64-STATIC: movl    dsrc(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ddst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo03:
+; LINUX-32-STATIC: 	movl	dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo03:
+; LINUX-32-PIC: 	movl	dsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ddst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo03:
+; DARWIN-32-STATIC: 	movl	_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo03:
+; DARWIN-32-PIC: 	call	L7$pb
+; DARWIN-32-PIC-NEXT: L7$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L7$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _ddst-L7$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo03:
+; DARWIN-64-STATIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo03:
+; DARWIN-64-PIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo04() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i32 0), i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: foo04:
+; LINUX-64-STATIC: movq    $ddst, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo04:
+; LINUX-32-STATIC: 	movl	$ddst, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo04:
+; LINUX-32-PIC: 	movl	$ddst, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo04:
+; DARWIN-32-STATIC: 	movl	$_ddst, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo04:
+; DARWIN-32-PIC: 	call	L8$pb
+; DARWIN-32-PIC-NEXT: L8$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L8$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L8$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo05() nounwind {
@@ -262,6 +580,70 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
 	store i32 %1, i32* %0, align 4
 	ret void
+; LINUX-64-STATIC: foo05:
+; LINUX-64-STATIC: movl    dsrc(%rip), %
+; LINUX-64-STATIC: movq    dptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo05:
+; LINUX-32-STATIC: 	movl	dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo05:
+; LINUX-32-PIC: 	movl	dsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo05:
+; DARWIN-32-STATIC: 	movl	_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo05:
+; DARWIN-32-PIC: 	call	L9$pb
+; DARWIN-32-PIC-NEXT: L9$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L9$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L9$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo05:
+; DARWIN-64-STATIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo05:
+; DARWIN-64-PIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo06() nounwind {
@@ -269,12 +651,111 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 0), align 4
 	ret void
+; LINUX-64-STATIC: foo06:
+; LINUX-64-STATIC: movl    lsrc(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ldst(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo06:
+; LINUX-32-STATIC: 	movl	lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo06:
+; LINUX-32-PIC: 	movl	lsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ldst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo06:
+; LINUX-64-PIC: 	movl	lsrc(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movl	%eax, ldst(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo06:
+; DARWIN-32-STATIC: 	movl	_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo06:
+; DARWIN-32-PIC: 	call	L10$pb
+; DARWIN-32-PIC-NEXT: L10$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L10$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _ldst-L10$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo06:
+; DARWIN-64-STATIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo06:
+; DARWIN-64-PIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo07() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i32 0), i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: foo07:
+; LINUX-64-STATIC: movq    $ldst, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo07:
+; LINUX-32-STATIC: 	movl	$ldst, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo07:
+; LINUX-32-PIC: 	movl	$ldst, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo07:
+; DARWIN-32-STATIC: 	movl	$_ldst, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo07:
+; DARWIN-32-PIC: 	call	L11$pb
+; DARWIN-32-PIC-NEXT: L11$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L11$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L11$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @foo08() nounwind {
@@ -283,6 +764,68 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
 	store i32 %1, i32* %0, align 4
 	ret void
+; LINUX-64-STATIC: foo08:
+; LINUX-64-STATIC: movl    lsrc(%rip), %
+; LINUX-64-STATIC: movq    lptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo08:
+; LINUX-32-STATIC: 	movl	lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo08:
+; LINUX-32-PIC: 	movl	lsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo08:
+; LINUX-64-PIC: 	movl	lsrc(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo08:
+; DARWIN-32-STATIC: 	movl	_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo08:
+; DARWIN-32-PIC: 	call	L12$pb
+; DARWIN-32-PIC-NEXT: L12$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L12$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L12$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo08:
+; DARWIN-64-STATIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo08:
+; DARWIN-64-PIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux00() nounwind {
@@ -290,6 +833,70 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), align 4
 	ret void
+; LINUX-64-STATIC: qux00:
+; LINUX-64-STATIC: movl    src+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, dst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux00:
+; LINUX-32-STATIC: 	movl	src+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux00:
+; LINUX-32-PIC: 	movl	src+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux00:
+; DARWIN-32-STATIC: 	movl	_src+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux00:
+; DARWIN-32-PIC: 	call	L13$pb
+; DARWIN-32-PIC-NEXT: L13$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L13$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L13$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qxx00() nounwind {
@@ -297,18 +904,202 @@ entry:
 	%0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
 	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), align 4
 	ret void
+; LINUX-64-STATIC: qxx00:
+; LINUX-64-STATIC: movl    xsrc+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, xdst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx00:
+; LINUX-32-STATIC: 	movl	xsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, xdst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qxx00:
+; LINUX-32-PIC: 	movl	xsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, xdst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx00:
+; DARWIN-32-STATIC: 	movl	_xsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _xdst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx00:
+; DARWIN-32-PIC: 	call	L14$pb
+; DARWIN-32-PIC-NEXT: L14$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L14$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L14$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux01() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: qux01:
+; LINUX-64-STATIC: movq    $dst+64, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux01:
+; LINUX-32-STATIC: 	movl	$dst+64, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux01:
+; LINUX-32-PIC: 	movl	$dst+64, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux01:
+; DARWIN-32-STATIC: 	movl	$_dst+64, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux01:
+; DARWIN-32-PIC: 	call	L15$pb
+; DARWIN-32-PIC-NEXT: L15$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L15$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	addl	$64, %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L15$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qxx01() nounwind {
 entry:
 	store i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: qxx01:
+; LINUX-64-STATIC: movq    $xdst+64, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx01:
+; LINUX-32-STATIC: 	movl	$xdst+64, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qxx01:
+; LINUX-32-PIC: 	movl	$xdst+64, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx01:
+; DARWIN-32-STATIC: 	movl	$_xdst+64, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx01:
+; DARWIN-32-PIC: 	call	L16$pb
+; DARWIN-32-PIC-NEXT: L16$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L16$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	addl	$64, %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L16$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux02() nounwind {
@@ -317,7 +1108,81 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux02:
+; LINUX-64-STATIC: movl    src+64(%rip), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux02:
+; LINUX-32-STATIC: 	movl	src+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
 	ret void
+
+; LINUX-32-PIC: qux02:
+; LINUX-32-PIC: 	movl	src+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux02:
+; DARWIN-32-STATIC: 	movl	_src+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux02:
+; DARWIN-32-PIC: 	call	L17$pb
+; DARWIN-32-PIC-NEXT: L17$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L17$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L17$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qxx02() nounwind {
@@ -326,7 +1191,81 @@ entry:
 	%1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qxx02:
+; LINUX-64-STATIC: movl    xsrc+64(%rip), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx02:
+; LINUX-32-STATIC: 	movl	xsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
 	ret void
+
+; LINUX-32-PIC: qxx02:
+; LINUX-32-PIC: 	movl	xsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx02:
+; DARWIN-32-STATIC: 	movl	_xsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx02:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx02:
+; DARWIN-32-PIC: 	call	L18$pb
+; DARWIN-32-PIC-NEXT: L18$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L18$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L18$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux03() nounwind {
@@ -334,12 +1273,115 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), align 32
 	ret void
+; LINUX-64-STATIC: qux03:
+; LINUX-64-STATIC: movl    dsrc+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux03:
+; LINUX-32-STATIC: 	movl	dsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux03:
+; LINUX-32-PIC: 	movl	dsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ddst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux03:
+; DARWIN-32-STATIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst+64
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux03:
+; DARWIN-32-PIC: 	call	L19$pb
+; DARWIN-32-PIC-NEXT: L19$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L19$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ddst-L19$pb)+64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux03:
+; DARWIN-64-STATIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux03:
+; DARWIN-64-PIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux04() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: qux04:
+; LINUX-64-STATIC: movq    $ddst+64, dptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux04:
+; LINUX-32-STATIC: 	movl	$ddst+64, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux04:
+; LINUX-32-PIC: 	movl	$ddst+64, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux04:
+; DARWIN-32-STATIC: 	movl	$_ddst+64, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+64, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux04:
+; DARWIN-32-PIC: 	call	L20$pb
+; DARWIN-32-PIC-NEXT: L20$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L20$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L20$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux04:
+; DARWIN-64-STATIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux04:
+; DARWIN-64-PIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux05() nounwind {
@@ -348,7 +1390,71 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux05:
+; LINUX-64-STATIC: movl    dsrc+64(%rip), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux05:
+; LINUX-32-STATIC: 	movl	dsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
 	ret void
+
+; LINUX-32-PIC: qux05:
+; LINUX-32-PIC: 	movl	dsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux05:
+; DARWIN-32-STATIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux05:
+; DARWIN-32-PIC: 	call	L21$pb
+; DARWIN-32-PIC-NEXT: L21$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L21$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L21$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux05:
+; DARWIN-64-STATIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux05:
+; DARWIN-64-PIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux06() nounwind {
@@ -356,12 +1462,111 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), align 4
 	ret void
+; LINUX-64-STATIC: qux06:
+; LINUX-64-STATIC: movl    lsrc+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+64
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux06:
+; LINUX-32-STATIC: 	movl	lsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux06:
+; LINUX-32-PIC: 	movl	lsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ldst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux06:
+; LINUX-64-PIC: 	movl	lsrc+64(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movl	%eax, ldst+64(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux06:
+; DARWIN-32-STATIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst+64
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux06:
+; DARWIN-32-PIC: 	call	L22$pb
+; DARWIN-32-PIC-NEXT: L22$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L22$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ldst-L22$pb)+64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux06:
+; DARWIN-64-STATIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux06:
+; DARWIN-64-PIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux07() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: qux07:
+; LINUX-64-STATIC: movq    $ldst+64, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux07:
+; LINUX-32-STATIC: 	movl	$ldst+64, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux07:
+; LINUX-32-PIC: 	movl	$ldst+64, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux07:
+; LINUX-64-PIC: 	leaq	ldst+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux07:
+; DARWIN-32-STATIC: 	movl	$_ldst+64, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+64, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux07:
+; DARWIN-32-PIC: 	call	L23$pb
+; DARWIN-32-PIC-NEXT: L23$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L23$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L23$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux07:
+; DARWIN-64-STATIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux07:
+; DARWIN-64-PIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @qux08() nounwind {
@@ -370,7 +1575,69 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux08:
+; LINUX-64-STATIC: movl    lsrc+64(%rip), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux08:
+; LINUX-32-STATIC: 	movl	lsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
 	ret void
+
+; LINUX-32-PIC: qux08:
+; LINUX-32-PIC: 	movl	lsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux08:
+; LINUX-64-PIC: 	movl	lsrc+64(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux08:
+; DARWIN-32-STATIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux08:
+; DARWIN-32-PIC: 	call	L24$pb
+; DARWIN-32-PIC-NEXT: L24$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L24$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L24$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux08:
+; DARWIN-64-STATIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux08:
+; DARWIN-64-PIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind00(i64 %i) nounwind {
@@ -380,6 +1647,75 @@ entry:
 	%2 = getelementptr [131072 x i32]* @dst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: ind00:
+; LINUX-64-STATIC: movl    src(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, dst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, dst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind00:
+; DARWIN-32-PIC: 	call	L25$pb
+; DARWIN-32-PIC-NEXT: L25$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L25$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L25$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ixd00(i64 %i) nounwind {
@@ -389,6 +1725,75 @@ entry:
 	%2 = getelementptr [32 x i32]* @xdst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: ixd00:
+; LINUX-64-STATIC: movl    xsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, xdst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, xdst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, xdst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _xdst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd00:
+; DARWIN-32-PIC: 	call	L26$pb
+; DARWIN-32-PIC-NEXT: L26$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L26$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L26$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind01(i64 %i) nounwind {
@@ -396,6 +1801,75 @@ entry:
 	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %i
 	store i32* %0, i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: ind01:
+; LINUX-64-STATIC: leaq    dst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind01:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	shll	$2, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind01:
+; DARWIN-32-PIC: 	call	L27$pb
+; DARWIN-32-PIC-NEXT: L27$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	shll	$2, %ecx
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L27$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L27$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind01:
+; DARWIN-64-STATIC: 	shlq	$2, %rdi
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind01:
+; DARWIN-64-DYNAMIC: 	shlq	$2, %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind01:
+; DARWIN-64-PIC: 	shlq	$2, %rdi
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ixd01(i64 %i) nounwind {
@@ -403,6 +1877,75 @@ entry:
 	%0 = getelementptr [32 x i32]* @xdst, i64 0, i64 %i
 	store i32* %0, i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: ixd01:
+; LINUX-64-STATIC: leaq    xdst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd01:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	xdst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	shll	$2, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd01:
+; DARWIN-32-PIC: 	call	L28$pb
+; DARWIN-32-PIC-NEXT: L28$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	shll	$2, %ecx
+; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L28$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L28$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd01:
+; DARWIN-64-STATIC: 	shlq	$2, %rdi
+; DARWIN-64-STATIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd01:
+; DARWIN-64-DYNAMIC: 	shlq	$2, %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd01:
+; DARWIN-64-PIC: 	shlq	$2, %rdi
+; DARWIN-64-PIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind02(i64 %i) nounwind {
@@ -413,6 +1956,85 @@ entry:
 	%3 = getelementptr i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: ind02:
+; LINUX-64-STATIC: movl    src(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind02:
+; DARWIN-32-PIC: 	call	L29$pb
+; DARWIN-32-PIC-NEXT: L29$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L29$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L29$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ixd02(i64 %i) nounwind {
@@ -423,6 +2045,85 @@ entry:
 	%3 = getelementptr i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: ixd02:
+; LINUX-64-STATIC: movl    xsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd02:
+; DARWIN-32-PIC: 	call	L30$pb
+; DARWIN-32-PIC-NEXT: L30$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L30$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L30$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind03(i64 %i) nounwind {
@@ -432,6 +2133,71 @@ entry:
 	%2 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: ind03:
+; LINUX-64-STATIC: movl    dsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ddst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind03:
+; DARWIN-32-PIC: 	call	L31$pb
+; DARWIN-32-PIC-NEXT: L31$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L31$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, _ddst-L31$pb(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind04(i64 %i) nounwind {
@@ -439,6 +2205,68 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %i
 	store i32* %0, i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: ind04:
+; LINUX-64-STATIC: leaq    ddst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind04:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind04:
+; DARWIN-32-PIC: 	call	L32$pb
+; DARWIN-32-PIC-NEXT: L32$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L32$pb(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L32$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind05(i64 %i) nounwind {
@@ -449,6 +2277,78 @@ entry:
 	%3 = getelementptr i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: ind05:
+; LINUX-64-STATIC: movl    dsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind05:
+; DARWIN-32-PIC: 	call	L33$pb
+; DARWIN-32-PIC-NEXT: L33$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L33$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L33$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind06(i64 %i) nounwind {
@@ -458,6 +2358,71 @@ entry:
 	%2 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: ind06:
+; LINUX-64-STATIC: movl    lsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ldst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind06:
+; DARWIN-32-PIC: 	call	L34$pb
+; DARWIN-32-PIC-NEXT: L34$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L34$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, _ldst-L34$pb(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind07(i64 %i) nounwind {
@@ -465,6 +2430,67 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %i
 	store i32* %0, i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: ind07:
+; LINUX-64-STATIC: leaq    ldst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind07:
+; DARWIN-32-PIC: 	call	L35$pb
+; DARWIN-32-PIC-NEXT: L35$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L35$pb(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L35$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ind08(i64 %i) nounwind {
@@ -475,6 +2501,77 @@ entry:
 	%3 = getelementptr i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: ind08:
+; LINUX-64-STATIC: movl    lsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind08:
+; DARWIN-32-PIC: 	call	L36$pb
+; DARWIN-32-PIC-NEXT: L36$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L36$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L36$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off00(i64 %i) nounwind {
@@ -485,6 +2582,75 @@ entry:
 	%3 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: off00:
+; LINUX-64-STATIC: movl    src+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, dst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, dst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off00:
+; DARWIN-32-PIC: 	call	L37$pb
+; DARWIN-32-PIC-NEXT: L37$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L37$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L37$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @oxf00(i64 %i) nounwind {
@@ -495,6 +2661,75 @@ entry:
 	%3 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: oxf00:
+; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, xdst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, xdst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, xdst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _xdst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf00:
+; DARWIN-32-PIC: 	call	L38$pb
+; DARWIN-32-PIC-NEXT: L38$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L38$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L38$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off01(i64 %i) nounwind {
@@ -503,6 +2738,75 @@ entry:
 	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
 	store i32* %0, i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: off01:
+; LINUX-64-STATIC: leaq    dst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off01:
+; DARWIN-32-PIC: 	call	L39$pb
+; DARWIN-32-PIC-NEXT: L39$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L39$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	leal	64(%edx,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L39$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @oxf01(i64 %i) nounwind {
@@ -511,6 +2815,75 @@ entry:
 	%0 = getelementptr [32 x i32]* @xdst, i64 0, i64 %.sum
 	store i32* %0, i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: oxf01:
+; LINUX-64-STATIC: leaq    xdst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf01:
+; DARWIN-32-PIC: 	call	L40$pb
+; DARWIN-32-PIC-NEXT: L40$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L40$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	leal	64(%edx,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L40$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off02(i64 %i) nounwind {
@@ -522,6 +2895,85 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: off02:
+; LINUX-64-STATIC: movl    src+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off02:
+; DARWIN-32-PIC: 	call	L41$pb
+; DARWIN-32-PIC-NEXT: L41$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L41$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L41$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @oxf02(i64 %i) nounwind {
@@ -533,6 +2985,85 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: oxf02:
+; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf02:
+; DARWIN-32-PIC: 	call	L42$pb
+; DARWIN-32-PIC-NEXT: L42$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L42$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L42$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off03(i64 %i) nounwind {
@@ -543,6 +3074,71 @@ entry:
 	%3 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: off03:
+; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst+64(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off03:
+; DARWIN-32-PIC: 	call	L43$pb
+; DARWIN-32-PIC-NEXT: L43$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L43$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ddst-L43$pb)+64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off04(i64 %i) nounwind {
@@ -551,6 +3147,68 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %.sum
 	store i32* %0, i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: off04:
+; LINUX-64-STATIC: leaq    ddst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off04:
+; DARWIN-32-PIC: 	call	L44$pb
+; DARWIN-32-PIC-NEXT: L44$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L44$pb)+64(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L44$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off05(i64 %i) nounwind {
@@ -562,6 +3220,78 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: off05:
+; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off05:
+; DARWIN-32-PIC: 	call	L45$pb
+; DARWIN-32-PIC-NEXT: L45$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L45$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L45$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off06(i64 %i) nounwind {
@@ -572,6 +3302,71 @@ entry:
 	%3 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: off06:
+; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst+64(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off06:
+; DARWIN-32-PIC: 	call	L46$pb
+; DARWIN-32-PIC-NEXT: L46$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L46$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ldst-L46$pb)+64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off07(i64 %i) nounwind {
@@ -580,6 +3375,67 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %.sum
 	store i32* %0, i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: off07:
+; LINUX-64-STATIC: leaq    ldst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off07:
+; DARWIN-32-PIC: 	call	L47$pb
+; DARWIN-32-PIC-NEXT: L47$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L47$pb)+64(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L47$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @off08(i64 %i) nounwind {
@@ -591,6 +3447,77 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: off08:
+; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off08:
+; DARWIN-32-PIC: 	call	L48$pb
+; DARWIN-32-PIC-NEXT: L48$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L48$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L48$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo00(i64 %i) nounwind {
@@ -598,12 +3525,136 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), align 4
 	ret void
+; LINUX-64-STATIC: moo00:
+; LINUX-64-STATIC: movl    src+262144(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, dst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo00:
+; LINUX-32-STATIC: 	movl	src+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo00:
+; LINUX-32-PIC: 	movl	src+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo00:
+; DARWIN-32-STATIC: 	movl	_src+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo00:
+; DARWIN-32-PIC: 	call	L49$pb
+; DARWIN-32-PIC-NEXT: L49$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L49$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L49$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo01(i64 %i) nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: moo01:
+; LINUX-64-STATIC: movq    $dst+262144, ptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo01:
+; LINUX-32-STATIC: 	movl	$dst+262144, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo01:
+; LINUX-32-PIC: 	movl	$dst+262144, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo01:
+; DARWIN-32-STATIC: 	movl	$_dst+262144, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo01:
+; DARWIN-32-PIC: 	call	L50$pb
+; DARWIN-32-PIC-NEXT: L50$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %ecx
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L50$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L50$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo02(i64 %i) nounwind {
@@ -613,6 +3664,80 @@ entry:
 	%2 = getelementptr i32* %0, i64 65536
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: moo02:
+; LINUX-64-STATIC: movl    src+262144(%rip), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo02:
+; LINUX-32-STATIC: 	movl	src+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo02:
+; LINUX-32-PIC: 	movl	src+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo02:
+; DARWIN-32-STATIC: 	movl	_src+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo02:
+; DARWIN-32-PIC: 	call	L51$pb
+; DARWIN-32-PIC-NEXT: L51$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L51$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L51$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo03(i64 %i) nounwind {
@@ -620,12 +3745,115 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536), align 32
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), align 32
 	ret void
+; LINUX-64-STATIC: moo03:
+; LINUX-64-STATIC: movl    dsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo03:
+; LINUX-32-STATIC: 	movl	dsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo03:
+; LINUX-32-PIC: 	movl	dsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ddst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo03:
+; DARWIN-32-STATIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst+262144
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo03:
+; DARWIN-32-PIC: 	call	L52$pb
+; DARWIN-32-PIC-NEXT: L52$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L52$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ddst-L52$pb)+262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo03:
+; DARWIN-64-STATIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo03:
+; DARWIN-64-PIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo04(i64 %i) nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: moo04:
+; LINUX-64-STATIC: movq    $ddst+262144, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo04:
+; LINUX-32-STATIC: 	movl	$ddst+262144, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo04:
+; LINUX-32-PIC: 	movl	$ddst+262144, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo04:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo04:
+; DARWIN-32-STATIC: 	movl	$_ddst+262144, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+262144, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo04:
+; DARWIN-32-PIC: 	call	L53$pb
+; DARWIN-32-PIC-NEXT: L53$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L53$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L53$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo04:
+; DARWIN-64-STATIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo04:
+; DARWIN-64-PIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo05(i64 %i) nounwind {
@@ -635,6 +3863,70 @@ entry:
 	%2 = getelementptr i32* %0, i64 65536
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: moo05:
+; LINUX-64-STATIC: movl    dsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo05:
+; LINUX-32-STATIC: 	movl	dsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo05:
+; LINUX-32-PIC: 	movl	dsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo05:
+; DARWIN-32-STATIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo05:
+; DARWIN-32-PIC: 	call	L54$pb
+; DARWIN-32-PIC-NEXT: L54$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L54$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L54$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo05:
+; DARWIN-64-STATIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo05:
+; DARWIN-64-PIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo06(i64 %i) nounwind {
@@ -642,12 +3934,111 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), align 4
 	ret void
+; LINUX-64-STATIC: moo06:
+; LINUX-64-STATIC: movl    lsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo06:
+; LINUX-32-STATIC: 	movl	lsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo06:
+; LINUX-32-PIC: 	movl	lsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ldst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo06:
+; LINUX-64-PIC: 	movl	lsrc+262144(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movl	%eax, ldst+262144(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo06:
+; DARWIN-32-STATIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst+262144
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo06:
+; DARWIN-32-PIC: 	call	L55$pb
+; DARWIN-32-PIC-NEXT: L55$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L55$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ldst-L55$pb)+262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo06:
+; DARWIN-64-STATIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo06:
+; DARWIN-64-PIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo07(i64 %i) nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: moo07:
+; LINUX-64-STATIC: movq    $ldst+262144, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo07:
+; LINUX-32-STATIC: 	movl	$ldst+262144, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo07:
+; LINUX-32-PIC: 	movl	$ldst+262144, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo07:
+; LINUX-64-PIC: 	leaq	ldst+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo07:
+; DARWIN-32-STATIC: 	movl	$_ldst+262144, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+262144, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo07:
+; DARWIN-32-PIC: 	call	L56$pb
+; DARWIN-32-PIC-NEXT: L56$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L56$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L56$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo07:
+; DARWIN-64-STATIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo07:
+; DARWIN-64-PIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @moo08(i64 %i) nounwind {
@@ -657,6 +4048,68 @@ entry:
 	%2 = getelementptr i32* %0, i64 65536
 	store i32 %1, i32* %2, align 4
 	ret void
+; LINUX-64-STATIC: moo08:
+; LINUX-64-STATIC: movl    lsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo08:
+; LINUX-32-STATIC: 	movl	lsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo08:
+; LINUX-32-PIC: 	movl	lsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo08:
+; LINUX-64-PIC: 	movl	lsrc+262144(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo08:
+; DARWIN-32-STATIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo08:
+; DARWIN-32-PIC: 	call	L57$pb
+; DARWIN-32-PIC-NEXT: L57$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L57$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L57$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo08:
+; DARWIN-64-STATIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo08:
+; DARWIN-64-PIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big00(i64 %i) nounwind {
@@ -667,6 +4120,75 @@ entry:
 	%3 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: big00:
+; LINUX-64-STATIC: movl    src+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, dst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst+262144(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, dst+262144(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst+262144(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big00:
+; DARWIN-32-PIC: 	call	L58$pb
+; DARWIN-32-PIC-NEXT: L58$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L58$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L58$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big01(i64 %i) nounwind {
@@ -675,6 +4197,75 @@ entry:
 	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
 	store i32* %0, i32** @ptr, align 8
 	ret void
+; LINUX-64-STATIC: big01:
+; LINUX-64-STATIC: leaq    dst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big01:
+; DARWIN-32-PIC: 	call	L59$pb
+; DARWIN-32-PIC-NEXT: L59$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L59$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	leal	262144(%edx,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L59$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big02(i64 %i) nounwind {
@@ -686,6 +4277,85 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: big02:
+; LINUX-64-STATIC: movl    src+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big02:
+; DARWIN-32-PIC: 	call	L60$pb
+; DARWIN-32-PIC-NEXT: L60$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L60$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L60$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big03(i64 %i) nounwind {
@@ -696,6 +4366,71 @@ entry:
 	%3 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: big03:
+; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst+262144(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst+262144(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst+262144(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst+262144(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big03:
+; DARWIN-32-PIC: 	call	L61$pb
+; DARWIN-32-PIC-NEXT: L61$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L61$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ddst-L61$pb)+262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big04(i64 %i) nounwind {
@@ -704,6 +4439,68 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %.sum
 	store i32* %0, i32** @dptr, align 8
 	ret void
+; LINUX-64-STATIC: big04:
+; LINUX-64-STATIC: leaq    ddst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big04:
+; DARWIN-32-PIC: 	call	L62$pb
+; DARWIN-32-PIC-NEXT: L62$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L62$pb)+262144(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L62$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big05(i64 %i) nounwind {
@@ -715,6 +4512,78 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: big05:
+; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big05:
+; DARWIN-32-PIC: 	call	L63$pb
+; DARWIN-32-PIC-NEXT: L63$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L63$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L63$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big06(i64 %i) nounwind {
@@ -725,6 +4594,71 @@ entry:
 	%3 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
+; LINUX-64-STATIC: big06:
+; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst+262144(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst+262144(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst+262144(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst+262144(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big06:
+; DARWIN-32-PIC: 	call	L64$pb
+; DARWIN-32-PIC-NEXT: L64$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L64$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ldst-L64$pb)+262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big07(i64 %i) nounwind {
@@ -733,6 +4667,67 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %.sum
 	store i32* %0, i32** @lptr, align 8
 	ret void
+; LINUX-64-STATIC: big07:
+; LINUX-64-STATIC: leaq    ldst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big07:
+; DARWIN-32-PIC: 	call	L65$pb
+; DARWIN-32-PIC-NEXT: L65$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L65$pb)+262144(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L65$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @big08(i64 %i) nounwind {
@@ -744,81 +4739,782 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
+; LINUX-64-STATIC: big08:
+; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big08:
+; DARWIN-32-PIC: 	call	L66$pb
+; DARWIN-32-PIC-NEXT: L66$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L66$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L66$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar00() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @src to i8*)
+; LINUX-64-STATIC: bar00:
+; LINUX-64-STATIC: movl    $src, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar00:
+; LINUX-32-STATIC: 	movl	$src, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar00:
+; LINUX-32-PIC: 	movl	$src, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar00:
+; DARWIN-32-STATIC: 	movl	$_src, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar00:
+; DARWIN-32-PIC: 	call	L67$pb
+; DARWIN-32-PIC-NEXT: L67$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L67$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bxr00() nounwind {
 entry:
 	ret i8* bitcast ([32 x i32]* @xsrc to i8*)
+; LINUX-64-STATIC: bxr00:
+; LINUX-64-STATIC: movl    $xsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxr00:
+; LINUX-32-STATIC: 	movl	$xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxr00:
+; LINUX-32-PIC: 	movl	$xsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxr00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxr00:
+; DARWIN-32-STATIC: 	movl	$_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxr00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxr00:
+; DARWIN-32-PIC: 	call	L68$pb
+; DARWIN-32-PIC-NEXT: L68$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L68$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxr00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxr00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxr00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar01() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @dst to i8*)
+; LINUX-64-STATIC: bar01:
+; LINUX-64-STATIC: movl    $dst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar01:
+; LINUX-32-STATIC: 	movl	$dst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar01:
+; LINUX-32-PIC: 	movl	$dst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar01:
+; DARWIN-32-STATIC: 	movl	$_dst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar01:
+; DARWIN-32-PIC: 	call	L69$pb
+; DARWIN-32-PIC-NEXT: L69$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L69$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bxr01() nounwind {
 entry:
 	ret i8* bitcast ([32 x i32]* @xdst to i8*)
+; LINUX-64-STATIC: bxr01:
+; LINUX-64-STATIC: movl    $xdst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxr01:
+; LINUX-32-STATIC: 	movl	$xdst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxr01:
+; LINUX-32-PIC: 	movl	$xdst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxr01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxr01:
+; DARWIN-32-STATIC: 	movl	$_xdst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxr01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxr01:
+; DARWIN-32-PIC: 	call	L70$pb
+; DARWIN-32-PIC-NEXT: L70$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L70$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxr01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxr01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxr01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar02() nounwind {
 entry:
 	ret i8* bitcast (i32** @ptr to i8*)
+; LINUX-64-STATIC: bar02:
+; LINUX-64-STATIC: movl    $ptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar02:
+; LINUX-32-STATIC: 	movl	$ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar02:
+; LINUX-32-PIC: 	movl	$ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar02:
+; DARWIN-32-STATIC: 	movl	$_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar02:
+; DARWIN-32-PIC: 	call	L71$pb
+; DARWIN-32-PIC-NEXT: L71$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L71$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar03() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @dsrc to i8*)
+; LINUX-64-STATIC: bar03:
+; LINUX-64-STATIC: movl    $dsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar03:
+; LINUX-32-STATIC: 	movl	$dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar03:
+; LINUX-32-PIC: 	movl	$dsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar03:
+; DARWIN-32-STATIC: 	movl	$_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar03:
+; DARWIN-32-PIC: 	call	L72$pb
+; DARWIN-32-PIC-NEXT: L72$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L72$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar04() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @ddst to i8*)
+; LINUX-64-STATIC: bar04:
+; LINUX-64-STATIC: movl    $ddst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar04:
+; LINUX-32-STATIC: 	movl	$ddst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar04:
+; LINUX-32-PIC: 	movl	$ddst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar04:
+; DARWIN-32-STATIC: 	movl	$_ddst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar04:
+; DARWIN-32-PIC: 	call	L73$pb
+; DARWIN-32-PIC-NEXT: L73$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L73$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar05() nounwind {
 entry:
 	ret i8* bitcast (i32** @dptr to i8*)
+; LINUX-64-STATIC: bar05:
+; LINUX-64-STATIC: movl    $dptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar05:
+; LINUX-32-STATIC: 	movl	$dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar05:
+; LINUX-32-PIC: 	movl	$dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar05:
+; DARWIN-32-STATIC: 	movl	$_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar05:
+; DARWIN-32-DYNAMIC: 	movl	$_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar05:
+; DARWIN-32-PIC: 	call	L74$pb
+; DARWIN-32-PIC-NEXT: L74$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dptr-L74$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar05:
+; DARWIN-64-STATIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar05:
+; DARWIN-64-DYNAMIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar05:
+; DARWIN-64-PIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar06() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @lsrc to i8*)
+; LINUX-64-STATIC: bar06:
+; LINUX-64-STATIC: movl    $lsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar06:
+; LINUX-32-STATIC: 	movl	$lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar06:
+; LINUX-32-PIC: 	movl	$lsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar06:
+; DARWIN-32-STATIC: 	movl	$_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar06:
+; DARWIN-32-PIC: 	call	L75$pb
+; DARWIN-32-PIC-NEXT: L75$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L75$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar07() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @ldst to i8*)
+; LINUX-64-STATIC: bar07:
+; LINUX-64-STATIC: movl    $ldst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar07:
+; LINUX-32-STATIC: 	movl	$ldst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar07:
+; LINUX-32-PIC: 	movl	$ldst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar07:
+; DARWIN-32-STATIC: 	movl	$_ldst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar07:
+; DARWIN-32-PIC: 	call	L76$pb
+; DARWIN-32-PIC-NEXT: L76$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L76$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bar08() nounwind {
 entry:
 	ret i8* bitcast (i32** @lptr to i8*)
+; LINUX-64-STATIC: bar08:
+; LINUX-64-STATIC: movl    $lptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar08:
+; LINUX-32-STATIC: 	movl	$lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar08:
+; LINUX-32-PIC: 	movl	$lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar08:
+; LINUX-64-PIC: 	leaq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar08:
+; DARWIN-32-STATIC: 	movl	$_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar08:
+; DARWIN-32-DYNAMIC: 	movl	$_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar08:
+; DARWIN-32-PIC: 	call	L77$pb
+; DARWIN-32-PIC-NEXT: L77$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lptr-L77$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar08:
+; DARWIN-64-STATIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar08:
+; DARWIN-64-DYNAMIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar08:
+; DARWIN-64-PIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har00() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @src to i8*)
+; LINUX-64-STATIC: har00:
+; LINUX-64-STATIC: movl    $src, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har00:
+; LINUX-32-STATIC: 	movl	$src, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har00:
+; LINUX-32-PIC: 	movl	$src, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har00:
+; DARWIN-32-STATIC: 	movl	$_src, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har00:
+; DARWIN-32-PIC: 	call	L78$pb
+; DARWIN-32-PIC-NEXT: L78$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L78$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @hxr00() nounwind {
 entry:
 	ret i8* bitcast ([32 x i32]* @xsrc to i8*)
+; LINUX-64-STATIC: hxr00:
+; LINUX-64-STATIC: movl    $xsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: hxr00:
+; LINUX-32-STATIC: 	movl	$xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: hxr00:
+; LINUX-32-PIC: 	movl	$xsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: hxr00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _hxr00:
+; DARWIN-32-STATIC: 	movl	$_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _hxr00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _hxr00:
+; DARWIN-32-PIC: 	call	L79$pb
+; DARWIN-32-PIC-NEXT: L79$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L79$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _hxr00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _hxr00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _hxr00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har01() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @dst to i8*)
+; LINUX-64-STATIC: har01:
+; LINUX-64-STATIC: movl    $dst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har01:
+; LINUX-32-STATIC: 	movl	$dst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har01:
+; LINUX-32-PIC: 	movl	$dst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har01:
+; DARWIN-32-STATIC: 	movl	$_dst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har01:
+; DARWIN-32-PIC: 	call	L80$pb
+; DARWIN-32-PIC-NEXT: L80$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L80$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @hxr01() nounwind {
 entry:
 	ret i8* bitcast ([32 x i32]* @xdst to i8*)
+; LINUX-64-STATIC: hxr01:
+; LINUX-64-STATIC: movl    $xdst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: hxr01:
+; LINUX-32-STATIC: 	movl	$xdst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: hxr01:
+; LINUX-32-PIC: 	movl	$xdst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: hxr01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _hxr01:
+; DARWIN-32-STATIC: 	movl	$_xdst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _hxr01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _hxr01:
+; DARWIN-32-PIC: 	call	L81$pb
+; DARWIN-32-PIC-NEXT: L81$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L81$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _hxr01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _hxr01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _hxr01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har02() nounwind {
@@ -826,16 +5522,148 @@ entry:
 	%0 = load i32** @ptr, align 8
 	%1 = bitcast i32* %0 to i8*
 	ret i8* %1
+; LINUX-64-STATIC: har02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har02:
+; LINUX-32-STATIC: 	movl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har02:
+; LINUX-32-PIC: 	movl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har02:
+; DARWIN-32-STATIC: 	movl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har02:
+; DARWIN-32-PIC: 	call	L82$pb
+; DARWIN-32-PIC-NEXT: L82$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L82$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har03() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @dsrc to i8*)
+; LINUX-64-STATIC: har03:
+; LINUX-64-STATIC: movl    $dsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har03:
+; LINUX-32-STATIC: 	movl	$dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har03:
+; LINUX-32-PIC: 	movl	$dsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har03:
+; DARWIN-32-STATIC: 	movl	$_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har03:
+; DARWIN-32-PIC: 	call	L83$pb
+; DARWIN-32-PIC-NEXT: L83$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L83$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har04() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @ddst to i8*)
+; LINUX-64-STATIC: har04:
+; LINUX-64-STATIC: movl    $ddst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har04:
+; LINUX-32-STATIC: 	movl	$ddst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har04:
+; LINUX-32-PIC: 	movl	$ddst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har04:
+; DARWIN-32-STATIC: 	movl	$_ddst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har04:
+; DARWIN-32-PIC: 	call	L84$pb
+; DARWIN-32-PIC-NEXT: L84$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L84$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har05() nounwind {
@@ -843,16 +5671,143 @@ entry:
 	%0 = load i32** @dptr, align 8
 	%1 = bitcast i32* %0 to i8*
 	ret i8* %1
+; LINUX-64-STATIC: har05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har05:
+; LINUX-32-STATIC: 	movl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har05:
+; LINUX-32-PIC: 	movl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har05:
+; DARWIN-32-STATIC: 	movl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har05:
+; DARWIN-32-DYNAMIC: 	movl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har05:
+; DARWIN-32-PIC: 	call	L85$pb
+; DARWIN-32-PIC-NEXT: L85$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L85$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har06() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @lsrc to i8*)
+; LINUX-64-STATIC: har06:
+; LINUX-64-STATIC: movl    $lsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har06:
+; LINUX-32-STATIC: 	movl	$lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har06:
+; LINUX-32-PIC: 	movl	$lsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har06:
+; DARWIN-32-STATIC: 	movl	$_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har06:
+; DARWIN-32-PIC: 	call	L86$pb
+; DARWIN-32-PIC-NEXT: L86$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L86$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har07() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @ldst to i8*)
+; LINUX-64-STATIC: har07:
+; LINUX-64-STATIC: movl    $ldst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har07:
+; LINUX-32-STATIC: 	movl	$ldst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har07:
+; LINUX-32-PIC: 	movl	$ldst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har07:
+; DARWIN-32-STATIC: 	movl	$_ldst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har07:
+; DARWIN-32-PIC: 	call	L87$pb
+; DARWIN-32-PIC-NEXT: L87$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L87$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @har08() nounwind {
@@ -860,26 +5815,260 @@ entry:
 	%0 = load i32** @lptr, align 8
 	%1 = bitcast i32* %0 to i8*
 	ret i8* %1
+; LINUX-64-STATIC: har08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har08:
+; LINUX-32-STATIC: 	movl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har08:
+; LINUX-32-PIC: 	movl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har08:
+; DARWIN-32-STATIC: 	movl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har08:
+; DARWIN-32-DYNAMIC: 	movl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har08:
+; DARWIN-32-PIC: 	call	L88$pb
+; DARWIN-32-PIC-NEXT: L88$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L88$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat00() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat00:
+; LINUX-64-STATIC: movl    $src+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat00:
+; LINUX-32-STATIC: 	movl	$src+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat00:
+; LINUX-32-PIC: 	movl	$src+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat00:
+; DARWIN-32-STATIC: 	movl	$_src+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat00:
+; DARWIN-32-PIC: 	call	L89$pb
+; DARWIN-32-PIC-NEXT: L89$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L89$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bxt00() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bxt00:
+; LINUX-64-STATIC: movl    $xsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxt00:
+; LINUX-32-STATIC: 	movl	$xsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxt00:
+; LINUX-32-PIC: 	movl	$xsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxt00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxt00:
+; DARWIN-32-STATIC: 	movl	$_xsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxt00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxt00:
+; DARWIN-32-PIC: 	call	L90$pb
+; DARWIN-32-PIC-NEXT: L90$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L90$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxt00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxt00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxt00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat01() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat01:
+; LINUX-64-STATIC: movl    $dst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat01:
+; LINUX-32-STATIC: 	movl	$dst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat01:
+; LINUX-32-PIC: 	movl	$dst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat01:
+; DARWIN-32-STATIC: 	movl	$_dst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat01:
+; DARWIN-32-PIC: 	call	L91$pb
+; DARWIN-32-PIC-NEXT: L91$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L91$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bxt01() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bxt01:
+; LINUX-64-STATIC: movl    $xdst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxt01:
+; LINUX-32-STATIC: 	movl	$xdst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxt01:
+; LINUX-32-PIC: 	movl	$xdst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxt01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxt01:
+; DARWIN-32-STATIC: 	movl	$_xdst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxt01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxt01:
+; DARWIN-32-PIC: 	call	L92$pb
+; DARWIN-32-PIC-NEXT: L92$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L92$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxt01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxt01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxt01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat02() nounwind {
@@ -888,16 +6077,160 @@ entry:
 	%1 = getelementptr i32* %0, i64 16
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bat02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat02:
+; LINUX-32-STATIC: 	movl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat02:
+; LINUX-32-PIC: 	movl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat02:
+; DARWIN-32-STATIC: 	movl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat02:
+; DARWIN-32-PIC: 	call	L93$pb
+; DARWIN-32-PIC-NEXT: L93$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L93$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat03() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat03:
+; LINUX-64-STATIC: movl    $dsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat03:
+; LINUX-32-STATIC: 	movl	$dsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat03:
+; LINUX-32-PIC: 	movl	$dsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat03:
+; DARWIN-32-STATIC: 	movl	$_dsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat03:
+; DARWIN-32-PIC: 	call	L94$pb
+; DARWIN-32-PIC-NEXT: L94$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L94$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat03:
+; DARWIN-64-STATIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat03:
+; DARWIN-64-PIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat04() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat04:
+; LINUX-64-STATIC: movl    $ddst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat04:
+; LINUX-32-STATIC: 	movl	$ddst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat04:
+; LINUX-32-PIC: 	movl	$ddst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat04:
+; DARWIN-32-STATIC: 	movl	$_ddst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat04:
+; DARWIN-32-PIC: 	call	L95$pb
+; DARWIN-32-PIC-NEXT: L95$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L95$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat04:
+; DARWIN-64-STATIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat04:
+; DARWIN-64-PIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat05() nounwind {
@@ -906,16 +6239,153 @@ entry:
 	%1 = getelementptr i32* %0, i64 16
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bat05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat05:
+; LINUX-32-STATIC: 	movl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat05:
+; LINUX-32-PIC: 	movl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat05:
+; DARWIN-32-STATIC: 	movl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat05:
+; DARWIN-32-DYNAMIC: 	movl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat05:
+; DARWIN-32-PIC: 	call	L96$pb
+; DARWIN-32-PIC-NEXT: L96$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L96$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat06() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat06:
+; LINUX-64-STATIC: movl    $lsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat06:
+; LINUX-32-STATIC: 	movl	$lsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat06:
+; LINUX-32-PIC: 	movl	$lsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat06:
+; LINUX-64-PIC: 	leaq	lsrc+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat06:
+; DARWIN-32-STATIC: 	movl	$_lsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat06:
+; DARWIN-32-PIC: 	call	L97$pb
+; DARWIN-32-PIC-NEXT: L97$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L97$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat06:
+; DARWIN-64-STATIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat06:
+; DARWIN-64-PIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat07() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat07:
+; LINUX-64-STATIC: movl    $ldst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat07:
+; LINUX-32-STATIC: 	movl	$ldst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat07:
+; LINUX-32-PIC: 	movl	$ldst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat07:
+; LINUX-64-PIC: 	leaq	ldst+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat07:
+; DARWIN-32-STATIC: 	movl	$_ldst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat07:
+; DARWIN-32-PIC: 	call	L98$pb
+; DARWIN-32-PIC-NEXT: L98$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L98$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat07:
+; DARWIN-64-STATIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat07:
+; DARWIN-64-PIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bat08() nounwind {
@@ -924,21 +6394,217 @@ entry:
 	%1 = getelementptr i32* %0, i64 16
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bat08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat08:
+; LINUX-32-STATIC: 	movl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat08:
+; LINUX-32-PIC: 	movl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat08:
+; DARWIN-32-STATIC: 	movl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat08:
+; DARWIN-32-DYNAMIC: 	movl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat08:
+; DARWIN-32-PIC: 	call	L99$pb
+; DARWIN-32-PIC-NEXT: L99$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L99$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam00() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam00:
+; LINUX-64-STATIC: movl    $src+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam00:
+; LINUX-32-STATIC: 	movl	$src+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam00:
+; LINUX-32-PIC: 	movl	$src+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam00:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam00:
+; DARWIN-32-STATIC: 	movl	$_src+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam00:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam00:
+; DARWIN-32-PIC: 	call	L100$pb
+; DARWIN-32-PIC-NEXT: L100$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_src$non_lazy_ptr-L100$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam00:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam00:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam00:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam01() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam01:
+; LINUX-64-STATIC: movl    $dst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam01:
+; LINUX-32-STATIC: 	movl	$dst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam01:
+; LINUX-32-PIC: 	movl	$dst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam01:
+; DARWIN-32-STATIC: 	movl	$_dst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam01:
+; DARWIN-32-PIC: 	call	L101$pb
+; DARWIN-32-PIC-NEXT: L101$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L101$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bxm01() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bxm01:
+; LINUX-64-STATIC: movl    $xdst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxm01:
+; LINUX-32-STATIC: 	movl	$xdst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxm01:
+; LINUX-32-PIC: 	movl	$xdst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxm01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxm01:
+; DARWIN-32-STATIC: 	movl	$_xdst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxm01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxm01:
+; DARWIN-32-PIC: 	call	L102$pb
+; DARWIN-32-PIC-NEXT: L102$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L102$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxm01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxm01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxm01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam02() nounwind {
@@ -947,16 +6613,160 @@ entry:
 	%1 = getelementptr i32* %0, i64 65536
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bam02:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    ptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam02:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam02:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	(%rcx), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam02:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	(%ecx), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam02:
+; DARWIN-32-PIC: 	call	L103$pb
+; DARWIN-32-PIC-NEXT: L103$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L103$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam03() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam03:
+; LINUX-64-STATIC: movl    $dsrc+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam03:
+; LINUX-32-STATIC: 	movl	$dsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam03:
+; LINUX-32-PIC: 	movl	$dsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam03:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam03:
+; DARWIN-32-STATIC: 	movl	$_dsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam03:
+; DARWIN-32-PIC: 	call	L104$pb
+; DARWIN-32-PIC-NEXT: L104$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L104$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam03:
+; DARWIN-64-STATIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam03:
+; DARWIN-64-PIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam04() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam04:
+; LINUX-64-STATIC: movl    $ddst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam04:
+; LINUX-32-STATIC: 	movl	$ddst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam04:
+; LINUX-32-PIC: 	movl	$ddst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam04:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam04:
+; DARWIN-32-STATIC: 	movl	$_ddst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam04:
+; DARWIN-32-PIC: 	call	L105$pb
+; DARWIN-32-PIC-NEXT: L105$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L105$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam04:
+; DARWIN-64-STATIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam04:
+; DARWIN-64-PIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam05() nounwind {
@@ -965,16 +6775,153 @@ entry:
 	%1 = getelementptr i32* %0, i64 65536
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bam05:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    dptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam05:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam05:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	(%rcx), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam05:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam05:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam05:
+; DARWIN-32-PIC: 	call	L106$pb
+; DARWIN-32-PIC-NEXT: L106$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	_dptr-L106$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam05:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam05:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam05:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam06() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam06:
+; LINUX-64-STATIC: movl    $lsrc+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam06:
+; LINUX-32-STATIC: 	movl	$lsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam06:
+; LINUX-32-PIC: 	movl	$lsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam06:
+; LINUX-64-PIC: 	leaq	lsrc+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam06:
+; DARWIN-32-STATIC: 	movl	$_lsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam06:
+; DARWIN-32-PIC: 	call	L107$pb
+; DARWIN-32-PIC-NEXT: L107$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L107$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam06:
+; DARWIN-64-STATIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam06:
+; DARWIN-64-PIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam07() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam07:
+; LINUX-64-STATIC: movl    $ldst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam07:
+; LINUX-32-STATIC: 	movl	$ldst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam07:
+; LINUX-32-PIC: 	movl	$ldst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam07:
+; LINUX-64-PIC: 	leaq	ldst+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam07:
+; DARWIN-32-STATIC: 	movl	$_ldst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam07:
+; DARWIN-32-PIC: 	call	L108$pb
+; DARWIN-32-PIC-NEXT: L108$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L108$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam07:
+; DARWIN-64-STATIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam07:
+; DARWIN-64-PIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @bam08() nounwind {
@@ -983,6 +6930,58 @@ entry:
 	%1 = getelementptr i32* %0, i64 65536
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: bam08:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    lptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam08:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam08:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam08:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam08:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam08:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam08:
+; DARWIN-32-PIC: 	call	L109$pb
+; DARWIN-32-PIC-NEXT: L109$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	_lptr-L109$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam08:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam08:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam08:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat00(i64 %i) nounwind {
@@ -991,6 +6990,59 @@ entry:
 	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat00:
+; LINUX-64-STATIC: leaq    src+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	src+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	src+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_src+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat00:
+; DARWIN-32-PIC: 	call	L110$pb
+; DARWIN-32-PIC-NEXT: L110$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L110$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cxt00(i64 %i) nounwind {
@@ -999,6 +7051,59 @@ entry:
 	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cxt00:
+; LINUX-64-STATIC: leaq    xsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxt00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxt00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xsrc+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxt00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxt00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxt00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxt00:
+; DARWIN-32-PIC: 	call	L111$pb
+; DARWIN-32-PIC-NEXT: L111$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L111$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxt00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxt00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxt00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat01(i64 %i) nounwind {
@@ -1007,6 +7112,59 @@ entry:
 	%1 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat01:
+; LINUX-64-STATIC: leaq    dst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat01:
+; DARWIN-32-PIC: 	call	L112$pb
+; DARWIN-32-PIC-NEXT: L112$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L112$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cxt01(i64 %i) nounwind {
@@ -1015,6 +7173,59 @@ entry:
 	%1 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cxt01:
+; LINUX-64-STATIC: leaq    xdst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxt01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxt01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxt01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxt01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxt01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxt01:
+; DARWIN-32-PIC: 	call	L113$pb
+; DARWIN-32-PIC-NEXT: L113$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L113$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxt01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxt01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxt01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat02(i64 %i) nounwind {
@@ -1024,6 +7235,69 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cat02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat02:
+; DARWIN-32-PIC: 	call	L114$pb
+; DARWIN-32-PIC-NEXT: L114$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L114$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat03(i64 %i) nounwind {
@@ -1032,6 +7306,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat03:
+; LINUX-64-STATIC: leaq    dsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dsrc+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_dsrc+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat03:
+; DARWIN-32-PIC: 	call	L115$pb
+; DARWIN-32-PIC-NEXT: L115$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L115$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat04(i64 %i) nounwind {
@@ -1040,6 +7365,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat04:
+; LINUX-64-STATIC: leaq    ddst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat04:
+; DARWIN-32-PIC: 	call	L116$pb
+; DARWIN-32-PIC-NEXT: L116$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L116$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat05(i64 %i) nounwind {
@@ -1049,6 +7425,64 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cat05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat05:
+; DARWIN-32-PIC: 	call	L117$pb
+; DARWIN-32-PIC-NEXT: L117$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L117$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat06(i64 %i) nounwind {
@@ -1057,6 +7491,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat06:
+; LINUX-64-STATIC: leaq    lsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	lsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	lsrc+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_lsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_lsrc+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat06:
+; DARWIN-32-PIC: 	call	L118$pb
+; DARWIN-32-PIC-NEXT: L118$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L118$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat07(i64 %i) nounwind {
@@ -1065,6 +7550,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cat07:
+; LINUX-64-STATIC: leaq    ldst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat07:
+; DARWIN-32-PIC: 	call	L119$pb
+; DARWIN-32-PIC-NEXT: L119$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L119$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cat08(i64 %i) nounwind {
@@ -1074,6 +7610,63 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cat08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat08:
+; DARWIN-32-PIC: 	call	L120$pb
+; DARWIN-32-PIC-NEXT: L120$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L120$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam00(i64 %i) nounwind {
@@ -1082,6 +7675,59 @@ entry:
 	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam00:
+; LINUX-64-STATIC: leaq    src+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	src+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	src+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_src+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam00:
+; DARWIN-32-PIC: 	call	L121$pb
+; DARWIN-32-PIC-NEXT: L121$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L121$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cxm00(i64 %i) nounwind {
@@ -1090,6 +7736,59 @@ entry:
 	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cxm00:
+; LINUX-64-STATIC: leaq    xsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxm00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxm00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxm00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxm00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxm00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxm00:
+; DARWIN-32-PIC: 	call	L122$pb
+; DARWIN-32-PIC-NEXT: L122$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L122$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxm00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxm00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxm00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam01(i64 %i) nounwind {
@@ -1098,6 +7797,59 @@ entry:
 	%1 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam01:
+; LINUX-64-STATIC: leaq    dst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam01:
+; DARWIN-32-PIC: 	call	L123$pb
+; DARWIN-32-PIC-NEXT: L123$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L123$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cxm01(i64 %i) nounwind {
@@ -1106,6 +7858,59 @@ entry:
 	%1 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cxm01:
+; LINUX-64-STATIC: leaq    xdst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxm01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxm01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxm01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxm01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxm01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxm01:
+; DARWIN-32-PIC: 	call	L124$pb
+; DARWIN-32-PIC-NEXT: L124$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L124$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxm01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxm01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxm01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam02(i64 %i) nounwind {
@@ -1115,6 +7920,69 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cam02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam02:
+; DARWIN-32-PIC: 	call	L125$pb
+; DARWIN-32-PIC-NEXT: L125$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L125$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam03(i64 %i) nounwind {
@@ -1123,6 +7991,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam03:
+; LINUX-64-STATIC: leaq    dsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_dsrc+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam03:
+; DARWIN-32-PIC: 	call	L126$pb
+; DARWIN-32-PIC-NEXT: L126$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L126$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam04(i64 %i) nounwind {
@@ -1131,6 +8050,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam04:
+; LINUX-64-STATIC: leaq    ddst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam04:
+; DARWIN-32-PIC: 	call	L127$pb
+; DARWIN-32-PIC-NEXT: L127$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L127$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam05(i64 %i) nounwind {
@@ -1140,6 +8110,64 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cam05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam05:
+; DARWIN-32-PIC: 	call	L128$pb
+; DARWIN-32-PIC-NEXT: L128$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L128$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam06(i64 %i) nounwind {
@@ -1148,6 +8176,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam06:
+; LINUX-64-STATIC: leaq    lsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	lsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	lsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_lsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_lsrc+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam06:
+; DARWIN-32-PIC: 	call	L129$pb
+; DARWIN-32-PIC-NEXT: L129$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L129$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam07(i64 %i) nounwind {
@@ -1156,6 +8235,57 @@ entry:
 	%1 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
+; LINUX-64-STATIC: cam07:
+; LINUX-64-STATIC: leaq    ldst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam07:
+; DARWIN-32-PIC: 	call	L130$pb
+; DARWIN-32-PIC-NEXT: L130$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L130$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define i8* @cam08(i64 %i) nounwind {
@@ -1165,6 +8295,63 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
+; LINUX-64-STATIC: cam08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam08:
+; DARWIN-32-PIC: 	call	L131$pb
+; DARWIN-32-PIC-NEXT: L131$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L131$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @lcallee() nounwind {
@@ -1177,6 +8364,123 @@ entry:
 	tail call void @x() nounwind
 	tail call void @x() nounwind
 	ret void
+; LINUX-64-STATIC: lcallee:
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: call    x
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: lcallee:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: lcallee:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: lcallee:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	call	x@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _lcallee:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _lcallee:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _lcallee:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _lcallee:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	call	_x
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _lcallee:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	call	_x
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _lcallee:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	call	_x
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 declare void @x()
@@ -1191,6 +8495,123 @@ entry:
 	tail call void @y() nounwind
 	tail call void @y() nounwind
 	ret void
+; LINUX-64-STATIC: dcallee:
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: call    y
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dcallee:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dcallee:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dcallee:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	call	y@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dcallee:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dcallee:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dcallee:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dcallee:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	call	_y
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dcallee:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	call	_y
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dcallee:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	call	_y
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 declare void @y()
@@ -1198,6 +8619,48 @@ declare void @y()
 define void ()* @address() nounwind {
 entry:
 	ret void ()* @callee
+; LINUX-64-STATIC: address:
+; LINUX-64-STATIC: movl    $callee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: address:
+; LINUX-32-STATIC: 	movl	$callee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: address:
+; LINUX-32-PIC: 	movl	$callee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: address:
+; LINUX-64-PIC: 	movq	callee@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _address:
+; DARWIN-32-STATIC: 	movl	$_callee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _address:
+; DARWIN-32-DYNAMIC: 	movl	L_callee$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _address:
+; DARWIN-32-PIC: 	call	L134$pb
+; DARWIN-32-PIC-NEXT: L134$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_callee$non_lazy_ptr-L134$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _address:
+; DARWIN-64-STATIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _address:
+; DARWIN-64-DYNAMIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _address:
+; DARWIN-64-PIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 declare void @callee()
@@ -1205,11 +8668,95 @@ declare void @callee()
 define void ()* @laddress() nounwind {
 entry:
 	ret void ()* @lcallee
+; LINUX-64-STATIC: laddress:
+; LINUX-64-STATIC: movl    $lcallee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: laddress:
+; LINUX-32-STATIC: 	movl	$lcallee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: laddress:
+; LINUX-32-PIC: 	movl	$lcallee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: laddress:
+; LINUX-64-PIC: 	movq	lcallee@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _laddress:
+; DARWIN-32-STATIC: 	movl	$_lcallee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _laddress:
+; DARWIN-32-DYNAMIC: 	movl	$_lcallee, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _laddress:
+; DARWIN-32-PIC: 	call	L135$pb
+; DARWIN-32-PIC-NEXT: L135$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lcallee-L135$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _laddress:
+; DARWIN-64-STATIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _laddress:
+; DARWIN-64-DYNAMIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _laddress:
+; DARWIN-64-PIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void ()* @daddress() nounwind {
 entry:
 	ret void ()* @dcallee
+; LINUX-64-STATIC: daddress:
+; LINUX-64-STATIC: movl    $dcallee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: daddress:
+; LINUX-32-STATIC: 	movl	$dcallee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: daddress:
+; LINUX-32-PIC: 	movl	$dcallee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: daddress:
+; LINUX-64-PIC: 	leaq	dcallee(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _daddress:
+; DARWIN-32-STATIC: 	movl	$_dcallee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _daddress:
+; DARWIN-32-DYNAMIC: 	movl	$_dcallee, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _daddress:
+; DARWIN-32-PIC: 	call	L136$pb
+; DARWIN-32-PIC-NEXT: L136$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dcallee-L136$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _daddress:
+; DARWIN-64-STATIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _daddress:
+; DARWIN-64-DYNAMIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _daddress:
+; DARWIN-64-PIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @caller() nounwind {
@@ -1217,6 +8764,73 @@ entry:
 	tail call void @callee() nounwind
 	tail call void @callee() nounwind
 	ret void
+; LINUX-64-STATIC: caller:
+; LINUX-64-STATIC: call    callee
+; LINUX-64-STATIC: call    callee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: caller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	callee
+; LINUX-32-STATIC-NEXT: 	call	callee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: caller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	callee
+; LINUX-32-PIC-NEXT: 	call	callee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: caller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	callee@PLT
+; LINUX-64-PIC-NEXT: 	call	callee@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _caller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_callee
+; DARWIN-32-STATIC-NEXT: 	call	_callee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _caller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _caller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _caller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_callee
+; DARWIN-64-STATIC-NEXT: 	call	_callee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _caller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	call	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _caller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_callee
+; DARWIN-64-PIC-NEXT: 	call	_callee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @dcaller() nounwind {
@@ -1224,6 +8838,73 @@ entry:
 	tail call void @dcallee() nounwind
 	tail call void @dcallee() nounwind
 	ret void
+; LINUX-64-STATIC: dcaller:
+; LINUX-64-STATIC: call    dcallee
+; LINUX-64-STATIC: call    dcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	dcallee
+; LINUX-32-STATIC-NEXT: 	call	dcallee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	dcallee
+; LINUX-32-PIC-NEXT: 	call	dcallee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	dcallee
+; LINUX-64-PIC-NEXT: 	call	dcallee
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_dcallee
+; DARWIN-32-STATIC-NEXT: 	call	_dcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	_dcallee
+; DARWIN-32-PIC-NEXT: 	call	_dcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_dcallee
+; DARWIN-64-STATIC-NEXT: 	call	_dcallee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_dcallee
+; DARWIN-64-PIC-NEXT: 	call	_dcallee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @lcaller() nounwind {
@@ -1231,24 +8912,262 @@ entry:
 	tail call void @lcallee() nounwind
 	tail call void @lcallee() nounwind
 	ret void
+; LINUX-64-STATIC: lcaller:
+; LINUX-64-STATIC: call    lcallee
+; LINUX-64-STATIC: call    lcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: lcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	lcallee
+; LINUX-32-STATIC-NEXT: 	call	lcallee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: lcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	lcallee
+; LINUX-32-PIC-NEXT: 	call	lcallee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: lcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	call	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _lcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_lcallee
+; DARWIN-32-STATIC-NEXT: 	call	_lcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _lcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _lcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	_lcallee
+; DARWIN-32-PIC-NEXT: 	call	_lcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _lcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_lcallee
+; DARWIN-64-STATIC-NEXT: 	call	_lcallee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _lcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _lcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_lcallee
+; DARWIN-64-PIC-NEXT: 	call	_lcallee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @tailcaller() nounwind {
 entry:
 	tail call void @callee() nounwind
 	ret void
+; LINUX-64-STATIC: tailcaller:
+; LINUX-64-STATIC: call    callee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: tailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	callee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: tailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	callee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: tailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	callee@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _tailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_callee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _tailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _tailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _tailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_callee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _tailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _tailcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_callee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @dtailcaller() nounwind {
 entry:
 	tail call void @dcallee() nounwind
 	ret void
+; LINUX-64-STATIC: dtailcaller:
+; LINUX-64-STATIC: call    dcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dtailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	dcallee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dtailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	dcallee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dtailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	dcallee
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dtailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_dcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dtailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dtailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	_dcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dtailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_dcallee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dtailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dtailcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_dcallee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ltailcaller() nounwind {
 entry:
 	tail call void @lcallee() nounwind
 	ret void
+; LINUX-64-STATIC: ltailcaller:
+; LINUX-64-STATIC: call    lcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ltailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	lcallee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ltailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	lcallee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ltailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ltailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_lcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ltailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ltailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	_lcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ltailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	_lcallee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ltailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ltailcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	_lcallee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @icaller() nounwind {
@@ -1258,6 +9177,86 @@ entry:
 	%1 = load void ()** @ifunc, align 8
 	tail call void %1() nounwind
 	ret void
+; LINUX-64-STATIC: icaller:
+; LINUX-64-STATIC: call    *ifunc
+; LINUX-64-STATIC: call    *ifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: icaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*ifunc
+; LINUX-32-STATIC-NEXT: 	call	*ifunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: icaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*ifunc
+; LINUX-32-PIC-NEXT: 	call	*ifunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: icaller:
+; LINUX-64-PIC: 	pushq	%rbx
+; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), %rbx
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _icaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _icaller:
+; DARWIN-32-DYNAMIC: 	pushl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ifunc$non_lazy_ptr, %esi
+; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	popl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _icaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	call	L143$pb
+; DARWIN-32-PIC-NEXT: L143$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L143$pb(%eax), %esi
+; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _icaller:
+; DARWIN-64-STATIC: 	pushq	%rbx
+; DARWIN-64-STATIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-STATIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	popq	%rbx
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _icaller:
+; DARWIN-64-DYNAMIC: 	pushq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-DYNAMIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	popq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _icaller:
+; DARWIN-64-PIC: 	pushq	%rbx
+; DARWIN-64-PIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-PIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	popq	%rbx
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @dicaller() nounwind {
@@ -1267,6 +9266,79 @@ entry:
 	%1 = load void ()** @difunc, align 8
 	tail call void %1() nounwind
 	ret void
+; LINUX-64-STATIC: dicaller:
+; LINUX-64-STATIC: call    *difunc
+; LINUX-64-STATIC: call    *difunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dicaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*difunc
+; LINUX-32-STATIC-NEXT: 	call	*difunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dicaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*difunc
+; LINUX-32-PIC-NEXT: 	call	*difunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dicaller:
+; LINUX-64-PIC: 	pushq	%rbx
+; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), %rbx
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dicaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_difunc
+; DARWIN-32-STATIC-NEXT: 	call	*_difunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dicaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dicaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	call	L144$pb
+; DARWIN-32-PIC-NEXT: L144$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	call	*_difunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*_difunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dicaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dicaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dicaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @licaller() nounwind {
@@ -1276,6 +9348,78 @@ entry:
 	%1 = load void ()** @lifunc, align 8
 	tail call void %1() nounwind
 	ret void
+; LINUX-64-STATIC: licaller:
+; LINUX-64-STATIC: call    *lifunc
+; LINUX-64-STATIC: call    *lifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: licaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*lifunc
+; LINUX-32-STATIC-NEXT: 	call	*lifunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: licaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*lifunc
+; LINUX-32-PIC-NEXT: 	call	*lifunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: licaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	call	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _licaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _licaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _licaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	call	L145$pb
+; DARWIN-32-PIC-NEXT: L145$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L145$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L145$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _licaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _licaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _licaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @itailcaller() nounwind {
@@ -1285,6 +9429,86 @@ entry:
 	%1 = load void ()** @ifunc, align 8
 	tail call void %1() nounwind
 	ret void
+; LINUX-64-STATIC: itailcaller:
+; LINUX-64-STATIC: call    *ifunc
+; LINUX-64-STATIC: call    *ifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: itailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*ifunc
+; LINUX-32-STATIC-NEXT: 	call	*ifunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: itailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*ifunc
+; LINUX-32-PIC-NEXT: 	call	*ifunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: itailcaller:
+; LINUX-64-PIC: 	pushq	%rbx
+; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), %rbx
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	call	*(%rbx)
+; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _itailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _itailcaller:
+; DARWIN-32-DYNAMIC: 	pushl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ifunc$non_lazy_ptr, %esi
+; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	popl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _itailcaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	call	L146$pb
+; DARWIN-32-PIC-NEXT: L146$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L146$pb(%eax), %esi
+; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _itailcaller:
+; DARWIN-64-STATIC: 	pushq	%rbx
+; DARWIN-64-STATIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-STATIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	popq	%rbx
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _itailcaller:
+; DARWIN-64-DYNAMIC: 	pushq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-DYNAMIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	popq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _itailcaller:
+; DARWIN-64-PIC: 	pushq	%rbx
+; DARWIN-64-PIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-PIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	call	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	popq	%rbx
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @ditailcaller() nounwind {
@@ -1292,6 +9516,66 @@ entry:
 	%0 = load void ()** @difunc, align 8
 	tail call void %0() nounwind
 	ret void
+; LINUX-64-STATIC: ditailcaller:
+; LINUX-64-STATIC: call    *difunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ditailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*difunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ditailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*difunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ditailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	call	*(%rax)
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ditailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_difunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ditailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ditailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L147$pb
+; DARWIN-32-PIC-NEXT: L147$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	call	*_difunc-L147$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ditailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ditailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ditailcaller:
+; DARWIN-64-PIC: 	call	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
 
 define void @litailcaller() nounwind {
@@ -1299,4 +9583,64 @@ entry:
 	%0 = load void ()** @lifunc, align 8
 	tail call void %0() nounwind
 	ret void
+; LINUX-64-STATIC: litailcaller:
+; LINUX-64-STATIC: call    *lifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: litailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*lifunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: litailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*lifunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: litailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	call	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _litailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _litailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _litailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L148$pb
+; DARWIN-32-PIC-NEXT: L148$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L148$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _litailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _litailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _litailcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	call	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
 }
diff --git a/test/CodeGen/X86/add-trick32.ll b/test/CodeGen/X86/add-trick32.ll
index 42909b4b5874a..e86045db0abb0 100644
--- a/test/CodeGen/X86/add-trick32.ll
+++ b/test/CodeGen/X86/add-trick32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: not grep add %t
 ; RUN: grep subl %t | count 1
 
diff --git a/test/CodeGen/X86/add-trick64.ll b/test/CodeGen/X86/add-trick64.ll
index 5466d9d441b16..2f1fceea5ea49 100644
--- a/test/CodeGen/X86/add-trick64.ll
+++ b/test/CodeGen/X86/add-trick64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep add %t
 ; RUN: grep subq %t | count 2
 
diff --git a/test/CodeGen/X86/add-with-overflow.ll b/test/CodeGen/X86/add-with-overflow.ll
index d015cebbbdf22..0f705dc020883 100644
--- a/test/CodeGen/X86/add-with-overflow.ll
+++ b/test/CodeGen/X86/add-with-overflow.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {jo} | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep {jb} | count 2
-; RUN: llvm-as < %s | llc -march=x86 -O0 | grep {jo} | count 2
-; RUN: llvm-as < %s | llc -march=x86 -O0 | grep {jb} | count 2
+; RUN: llc < %s -march=x86 | grep {jo} | count 2
+; RUN: llc < %s -march=x86 | grep {jb} | count 2
+; RUN: llc < %s -march=x86 -O0 | grep {jo} | count 2
+; RUN: llc < %s -march=x86 -O0 | grep {jb} | count 2
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/aliases.ll b/test/CodeGen/X86/aliases.ll
index 3aadd05d05e91..0b26859b04c73 100644
--- a/test/CodeGen/X86/aliases.ll
+++ b/test/CodeGen/X86/aliases.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=i686-pc-linux-gnu -asm-verbose=false -o %t -f
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=false -o %t
 ; RUN: grep set %t   | count 7
 ; RUN: grep globl %t | count 6
 ; RUN: grep weak %t  | count 1
diff --git a/test/CodeGen/X86/aligned-comm.ll b/test/CodeGen/X86/aligned-comm.ll
index b2dc77d8be123..c0f3a81c4d67e 100644
--- a/test/CodeGen/X86/aligned-comm.ll
+++ b/test/CodeGen/X86/aligned-comm.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10 | grep {array,16512,7}
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 | grep {array,16512,7}
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin8 | not grep {7}
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep {array,16512,7}
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep {array,16512,7}
+; RUN: llc < %s -mtriple=i386-apple-darwin8 | not grep {7}
 
 ; Darwin 9+ should get alignment on common symbols.  Darwin8 does 
 ; not support this.
diff --git a/test/CodeGen/X86/all-ones-vector.ll b/test/CodeGen/X86/all-ones-vector.ll
index 01c0e36ea244b..10fecadaa0238 100644
--- a/test/CodeGen/X86/all-ones-vector.ll
+++ b/test/CodeGen/X86/all-ones-vector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse2 | grep pcmpeqd | count 4
+; RUN: llc < %s -march=x86 -mattr=sse2 | grep pcmpeqd | count 4
 
 define <4 x i32> @ioo() nounwind {
         ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll
index 0bd97c23e87bb..f45e9b84b2640 100644
--- a/test/CodeGen/X86/alloca-align-rounding.ll
+++ b/test/CodeGen/X86/alloca-align-rounding.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin | grep and | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=i686-pc-linux | grep and | count 1
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep and | count 1
+; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | grep and | count 1
 
 declare void @bar(<2 x i64>* %n)
 
diff --git a/test/CodeGen/X86/and-or-fold.ll b/test/CodeGen/X86/and-or-fold.ll
index 3501047abc165..7733b8a5baaa4 100644
--- a/test/CodeGen/X86/and-or-fold.ll
+++ b/test/CodeGen/X86/and-or-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep and | count 1
+; RUN: llc < %s -march=x86 | grep and | count 1
 
 ; The dag combiner should fold together (x&127)|(y&16711680) -> (x|y)&c1
 ; in this case.
diff --git a/test/CodeGen/X86/and-su.ll b/test/CodeGen/X86/and-su.ll
index bdc845448f5ff..b5ac23b241289 100644
--- a/test/CodeGen/X86/and-su.ll
+++ b/test/CodeGen/X86/and-su.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {(%} | count 1
+; RUN: llc < %s -march=x86 | grep {(%} | count 1
 
 ; Don't duplicate the load.
 
diff --git a/test/CodeGen/X86/anyext-uses.ll b/test/CodeGen/X86/anyext-uses.ll
index e8c3cf0e71b25..0cf169eb28d8d 100644
--- a/test/CodeGen/X86/anyext-uses.ll
+++ b/test/CodeGen/X86/anyext-uses.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep mov %t | count 8
 ; RUN: not grep implicit %t
 
diff --git a/test/CodeGen/X86/anyext.ll b/test/CodeGen/X86/anyext.ll
new file mode 100644
index 0000000000000..106fe83661b41
--- /dev/null
+++ b/test/CodeGen/X86/anyext.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64 | grep movzbl | count 2
+
+; Use movzbl to avoid partial-register updates.
+
+define i32 @foo(i32 %p, i8 zeroext %x) nounwind {
+  %q = trunc i32 %p to i8
+  %r = udiv i8 %q, %x
+  %s = zext i8 %r to i32
+  %t = and i32 %s, 1
+  ret i32 %t
+}
+define i32 @bar(i32 %p, i16 zeroext %x) nounwind {
+  %q = trunc i32 %p to i16
+  %r = udiv i16 %q, %x
+  %s = zext i16 %r to i32
+  %t = and i32 %s, 1
+  ret i32 %t
+}
diff --git a/test/CodeGen/X86/arg-cast.ll b/test/CodeGen/X86/arg-cast.ll
index 2e2bc3cc8f21d..c11151446bc5d 100644
--- a/test/CodeGen/X86/arg-cast.ll
+++ b/test/CodeGen/X86/arg-cast.ll
@@ -1,7 +1,7 @@
 ; This should compile to movl $2147483647, %eax + andl only.
-; RUN: llvm-as < %s | llc | grep andl
-; RUN: llvm-as < %s | llc | not grep movsd
-; RUN: llvm-as < %s | llc | grep esp | not grep add
+; RUN: llc < %s | grep andl
+; RUN: llc < %s | not grep movsd
+; RUN: llc < %s | grep esp | not grep add
 ; rdar://5736574
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/asm-block-labels.ll b/test/CodeGen/X86/asm-block-labels.ll
index 284a9fb00fde7..a43d43023196d 100644
--- a/test/CodeGen/X86/asm-block-labels.ll
+++ b/test/CodeGen/X86/asm-block-labels.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -std-compile-opts | llc
+; RUN: opt < %s -std-compile-opts | llc
 ; ModuleID = 'block12.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/asm-global-imm.ll b/test/CodeGen/X86/asm-global-imm.ll
index 333c7689ab4ab..96da224c8521f 100644
--- a/test/CodeGen/X86/asm-global-imm.ll
+++ b/test/CodeGen/X86/asm-global-imm.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | \
+; RUN: llc < %s -march=x86 -relocation-model=static | \
 ; RUN:   grep {test1 \$_GV}
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | \
+; RUN: llc < %s -march=x86 -relocation-model=static | \
 ; RUN:   grep {test2 _GV}
 ; PR882
 
diff --git a/test/CodeGen/X86/asm-indirect-mem.ll b/test/CodeGen/X86/asm-indirect-mem.ll
index 7f3353f6be657..c57aa995e8a8d 100644
--- a/test/CodeGen/X86/asm-indirect-mem.ll
+++ b/test/CodeGen/X86/asm-indirect-mem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc 
+; RUN: llc < %s 
 ; PR2267
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/CodeGen/X86/asm-modifier-P.ll b/test/CodeGen/X86/asm-modifier-P.ll
new file mode 100644
index 0000000000000..6139da8c3685c
--- /dev/null
+++ b/test/CodeGen/X86/asm-modifier-P.ll
@@ -0,0 +1,79 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-unknown-linux-gnu -relocation-model=pic    | FileCheck %s -check-prefix=CHECK-PIC-32
+; RUN: llc < %s -march=x86 -mtriple=i686-unknown-linux-gnu -relocation-model=static | FileCheck %s -check-prefix=CHECK-STATIC-32
+; RUN: llc < %s -march=x86-64 -relocation-model=static | FileCheck %s -check-prefix=CHECK-STATIC-64
+; RUN: llc < %s -march=x86-64 -relocation-model=pic    | FileCheck %s -check-prefix=CHECK-PIC-64
+; PR3379
+; XFAIL: *
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@G = external global i32              ; <i32*> [#uses=1]
+
+declare void @bar(...)
+
+; extern int G;
+; void test1() {
+;  asm("frob %0 x" : : "m"(G));
+;  asm("frob %P0 x" : : "m"(G));
+;}
+
+define void @test1() nounwind {
+entry:
+; P suffix removes (rip) in -static 64-bit mode.
+
+; CHECK-PIC-64: test1:
+; CHECK-PIC-64: movq	G@GOTPCREL(%rip), %rax
+; CHECK-PIC-64: frob (%rax) x
+; CHECK-PIC-64: frob (%rax) x
+
+; CHECK-STATIC-64: test1:
+; CHECK-STATIC-64: frob G(%rip) x
+; CHECK-STATIC-64: frob G x
+
+; CHECK-PIC-32: test1:
+; CHECK-PIC-32: frob G x
+; CHECK-PIC-32: frob G x
+
+; CHECK-STATIC-32: test1:
+; CHECK-STATIC-32: frob G x
+; CHECK-STATIC-32: frob G x
+
+        call void asm "frob $0 x", "*m"(i32* @G) nounwind
+        call void asm "frob ${0:P} x", "*m"(i32* @G) nounwind
+        ret void
+}
+
+define void @test3() nounwind {
+entry:
+; CHECK-STATIC-64: test3:
+; CHECK-STATIC-64: call bar
+; CHECK-STATIC-64: call test3
+; CHECK-STATIC-64: call $bar
+; CHECK-STATIC-64: call $test3
+
+; CHECK-STATIC-32: test3:
+; CHECK-STATIC-32: call bar
+; CHECK-STATIC-32: call test3
+; CHECK-STATIC-32: call $bar
+; CHECK-STATIC-32: call $test3
+
+; CHECK-PIC-64: test3:
+; CHECK-PIC-64: call bar@PLT
+; CHECK-PIC-64: call test3@PLT
+; CHECK-PIC-64: call $bar
+; CHECK-PIC-64: call $test3
+
+; CHECK-PIC-32: test3:
+; CHECK-PIC-32: call bar@PLT
+; CHECK-PIC-32: call test3@PLT
+; CHECK-PIC-32: call $bar
+; CHECK-PIC-32: call $test3
+
+
+; asm(" blah %P0" : : "X"(bar));
+  tail call void asm sideeffect "call ${0:P}", "X"(void (...)* @bar) nounwind
+  tail call void asm sideeffect "call ${0:P}", "X"(void (...)* bitcast (void ()* @test3 to void (...)*)) nounwind
+  tail call void asm sideeffect "call $0", "X"(void (...)* @bar) nounwind
+  tail call void asm sideeffect "call $0", "X"(void (...)* bitcast (void ()* @test3 to void (...)*)) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/asm-modifier.ll b/test/CodeGen/X86/asm-modifier.ll
new file mode 100644
index 0000000000000..44f972ec7198c
--- /dev/null
+++ b/test/CodeGen/X86/asm-modifier.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = 'asm.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define i32 @test1() nounwind {
+entry:
+; CHECK: test1:
+; CHECK: movw	%gs:6, %ax
+  %asmtmp.i = tail call i16 asm "movw\09%gs:${1:a}, ${0:w}", "=r,ir,~{dirflag},~{fpsr},~{flags}"(i32 6) nounwind ; <i16> [#uses=1]
+  %0 = zext i16 %asmtmp.i to i32                  ; <i32> [#uses=1]
+  ret i32 %0
+}
+
+define zeroext i16 @test2(i32 %address) nounwind {
+entry:
+; CHECK: test2:
+; CHECK: movw	%gs:(%eax), %ax
+  %asmtmp = tail call i16 asm "movw\09%gs:${1:a}, ${0:w}", "=r,ir,~{dirflag},~{fpsr},~{flags}"(i32 %address) nounwind ; <i16> [#uses=1]
+  ret i16 %asmtmp
+}
+
+@n = global i32 42                                ; <i32*> [#uses=3]
+@y = common global i32 0                          ; <i32*> [#uses=3]
+
+define void @test3() nounwind {
+entry:
+; CHECK: test3:
+; CHECK: movl _n, %eax
+  call void asm sideeffect "movl ${0:a}, %eax", "ir,~{dirflag},~{fpsr},~{flags},~{eax}"(i32* @n) nounwind
+  ret void
+}
+
+define void @test4() nounwind {
+entry:
+; CHECK: test4:
+; CHECK: movl	L_y$non_lazy_ptr, %ecx
+; CHECK: movl (%ecx), %eax
+  call void asm sideeffect "movl ${0:a}, %eax", "ir,~{dirflag},~{fpsr},~{flags},~{eax}"(i32* @y) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/atomic_add.ll b/test/CodeGen/X86/atomic_add.ll
new file mode 100644
index 0000000000000..d00f8e861c21e
--- /dev/null
+++ b/test/CodeGen/X86/atomic_add.ll
@@ -0,0 +1,217 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; rdar://7103704
+
+define void @sub1(i32* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub1:
+; CHECK: subl
+	%0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)		; <i32> [#uses=0]
+	ret void
+}
+
+define void @inc4(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc4:
+; CHECK: incq
+	%0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)		; <i64> [#uses=0]
+	ret void
+}
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind
+
+define void @add8(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add8:
+; CHECK: addq $2
+	%0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 2)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @add4(i64* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add4:
+; CHECK: addq
+	%0 = sext i32 %v to i64		; <i64> [#uses=1]
+	%1 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 %0)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @inc3(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc3:
+; CHECK: incb
+	%0 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 1)		; <i8> [#uses=0]
+	ret void
+}
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8* nocapture, i8) nounwind
+
+define void @add7(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add7:
+; CHECK: addb $2
+	%0 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 2)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @add3(i8* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add3:
+; CHECK: addb
+	%0 = trunc i32 %v to i8		; <i8> [#uses=1]
+	%1 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 %0)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @inc2(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc2:
+; CHECK: incw
+	%0 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 1)		; <i16> [#uses=0]
+	ret void
+}
+
+declare i16 @llvm.atomic.load.add.i16.p0i16(i16* nocapture, i16) nounwind
+
+define void @add6(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add6:
+; CHECK: addw $2
+	%0 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 2)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @add2(i16* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add2:
+; CHECK: addw
+	%0 = trunc i32 %v to i16		; <i16> [#uses=1]
+	%1 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 %0)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @inc1(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc1:
+; CHECK: incl
+	%0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 1)		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32* nocapture, i32) nounwind
+
+define void @add5(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add5:
+; CHECK: addl $2
+	%0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 2)		; <i32> [#uses=0]
+	ret void
+}
+
+define void @add1(i32* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add1:
+; CHECK: addl
+	%0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 %v)		; <i32> [#uses=0]
+	ret void
+}
+
+define void @dec4(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec4:
+; CHECK: decq
+	%0 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 1)		; <i64> [#uses=0]
+	ret void
+}
+
+declare i64 @llvm.atomic.load.sub.i64.p0i64(i64* nocapture, i64) nounwind
+
+define void @sub8(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub8:
+; CHECK: subq $2
+	%0 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 2)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @sub4(i64* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub4:
+; CHECK: subq
+	%0 = sext i32 %v to i64		; <i64> [#uses=1]
+	%1 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 %0)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @dec3(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec3:
+; CHECK: decb
+	%0 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 1)		; <i8> [#uses=0]
+	ret void
+}
+
+declare i8 @llvm.atomic.load.sub.i8.p0i8(i8* nocapture, i8) nounwind
+
+define void @sub7(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub7:
+; CHECK: subb $2
+	%0 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 2)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @sub3(i8* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub3:
+; CHECK: subb
+	%0 = trunc i32 %v to i8		; <i8> [#uses=1]
+	%1 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 %0)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @dec2(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec2:
+; CHECK: decw
+	%0 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 1)		; <i16> [#uses=0]
+	ret void
+}
+
+declare i16 @llvm.atomic.load.sub.i16.p0i16(i16* nocapture, i16) nounwind
+
+define void @sub6(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub6:
+; CHECK: subw $2
+	%0 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 2)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @sub2(i16* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub2:
+; CHECK: subw
+	%0 = trunc i32 %v to i16		; <i16> [#uses=1]
+	%1 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 %0)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @dec1(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec1:
+; CHECK: decl
+	%0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 1)		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32* nocapture, i32) nounwind
+
+define void @sub5(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub5:
+; CHECK: subl $2
+	%0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 2)		; <i32> [#uses=0]
+	ret void
+}
diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
index 6871a08b29e6a..3ef1887083d02 100644
--- a/test/CodeGen/X86/atomic_op.ll
+++ b/test/CodeGen/X86/atomic_op.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -o %t1 -f
+; RUN: llc < %s -march=x86 -o %t1
 ; RUN: grep "lock" %t1 | count 17
 ; RUN: grep "xaddl" %t1 | count 4 
 ; RUN: grep "cmpxchgl"  %t1 | count 13 
diff --git a/test/CodeGen/X86/attribute-sections.ll b/test/CodeGen/X86/attribute-sections.ll
new file mode 100644
index 0000000000000..30353346b5c96
--- /dev/null
+++ b/test/CodeGen/X86/attribute-sections.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+
+declare i32 @foo()
+@G0 = global i32 ()* @foo, section ".init_array"
+
+; LINUX:  .section  .init_array,"aw"
+; LINUX:  .globl G0
+
+@G1 = global i32 ()* @foo, section ".fini_array"
+
+; LINUX:  .section  .fini_array,"aw"
+; LINUX:  .globl G1
+
+@G2 = global i32 ()* @foo, section ".preinit_array"
+
+; LINUX:  .section .preinit_array,"aw"
+; LINUX:  .globl G2
+
diff --git a/test/CodeGen/X86/avoid-lea-scale2.ll b/test/CodeGen/X86/avoid-lea-scale2.ll
new file mode 100644
index 0000000000000..8003de262d2cf
--- /dev/null
+++ b/test/CodeGen/X86/avoid-lea-scale2.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86-64 | grep {leal.*-2(\[%\]rdi,\[%\]rdi)}
+
+define i32 @foo(i32 %x) nounwind readnone {
+  %t0 = shl i32 %x, 1
+  %t1 = add i32 %t0, -2
+  ret i32 %t1
+}
+
diff --git a/test/CodeGen/X86/avoid-loop-align-2.ll b/test/CodeGen/X86/avoid-loop-align-2.ll
index 9f0aeb32c417f..03e69e7a1a499 100644
--- a/test/CodeGen/X86/avoid-loop-align-2.ll
+++ b/test/CodeGen/X86/avoid-loop-align-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep align | count 3
+; RUN: llc < %s -march=x86 | grep align | count 3
 
 @x = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/X86/avoid-loop-align.ll b/test/CodeGen/X86/avoid-loop-align.ll
index dfc58181d9040..3e68f9486cfa4 100644
--- a/test/CodeGen/X86/avoid-loop-align.ll
+++ b/test/CodeGen/X86/avoid-loop-align.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep align | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep align | count 1
 
 @A = common global [100 x i32] zeroinitializer, align 32		; <[100 x i32]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/bitcast-int-to-vector.ll b/test/CodeGen/X86/bitcast-int-to-vector.ll
index 370bec09848fc..4c25979dcd5e7 100644
--- a/test/CodeGen/X86/bitcast-int-to-vector.ll
+++ b/test/CodeGen/X86/bitcast-int-to-vector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i1 @foo(i64 %a)
 {
diff --git a/test/CodeGen/X86/bitcast.ll b/test/CodeGen/X86/bitcast.ll
index f575409f2149b..c34c6753bfedc 100644
--- a/test/CodeGen/X86/bitcast.ll
+++ b/test/CodeGen/X86/bitcast.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 ; PR1033
 
 define i64 @test1(double %t) {
diff --git a/test/CodeGen/X86/bitcast2.ll b/test/CodeGen/X86/bitcast2.ll
index 3e26931578023..48922b5f5a132 100644
--- a/test/CodeGen/X86/bitcast2.ll
+++ b/test/CodeGen/X86/bitcast2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movd | count 2
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep rsp
+; RUN: llc < %s -march=x86-64 | grep movd | count 2
+; RUN: llc < %s -march=x86-64 | not grep rsp
 
 define i64 @test1(double %A) {
    %B = bitcast double %A to i64
diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll
index b9ce10f44198d..6b245c103e204 100644
--- a/test/CodeGen/X86/break-anti-dependencies.ll
+++ b/test/CodeGen/X86/break-anti-dependencies.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -disable-post-RA-scheduler=false -break-anti-dependencies=false > %t
+; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=false > %t
 ; RUN:   grep {%xmm0} %t | count 14
 ; RUN:   not grep {%xmm1} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -disable-post-RA-scheduler=false -break-anti-dependencies > %t
+; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies > %t
 ; RUN:   grep {%xmm0} %t | count 7
 ; RUN:   grep {%xmm1} %t | count 7
 
diff --git a/test/CodeGen/X86/bss_pagealigned.ll b/test/CodeGen/X86/bss_pagealigned.ll
new file mode 100644
index 0000000000000..4a1049bc560d4
--- /dev/null
+++ b/test/CodeGen/X86/bss_pagealigned.ll
@@ -0,0 +1,21 @@
+; RUN: llc --code-model=kernel -march=x86-64 <%s | FileCheck %s
+; PR4933
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+%struct.kmem_cache_order_objects = type { i64 }
+declare i8* @memset(i8*, i32, i64)
+define void @unxlate_dev_mem_ptr(i64 %phis, i8* %addr) nounwind {
+  %pte.addr.i = alloca %struct.kmem_cache_order_objects*
+  %call8 = call i8* @memset(i8* bitcast ([512 x %struct.kmem_cache_order_objects]* @bm_pte to i8*), i32 0, i64 4096)
+; CHECK: movq    $bm_pte, %rdi
+; CHECK-NEXT: xorl    %esi, %esi
+; CHECK-NEXT: movl    $4096, %edx
+; CHECK-NEXT: call    memset
+  ret void
+}
+@bm_pte = internal global [512 x %struct.kmem_cache_order_objects] zeroinitializer, section ".bss.page_aligned", align 4096
+; CHECK: .section        .bss.page_aligned,"aw",@nobits
+; CHECK-NEXT: .align  4096
+; CHECK-NEXT: bm_pte:
+; CHECK-NEXT: .zero   4096
+; CHECK-NEXT: .size   bm_pte, 4096
diff --git a/test/CodeGen/X86/bswap-inline-asm.ll b/test/CodeGen/X86/bswap-inline-asm.ll
index 91f8310361ad5..5bf58fa1d5054 100644
--- a/test/CodeGen/X86/bswap-inline-asm.ll
+++ b/test/CodeGen/X86/bswap-inline-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep APP %t
 ; RUN: grep bswapq %t | count 2
 ; RUN: grep bswapl %t | count 1
diff --git a/test/CodeGen/X86/bswap.ll b/test/CodeGen/X86/bswap.ll
index 592e25bae331a..0a72c1c47845b 100644
--- a/test/CodeGen/X86/bswap.ll
+++ b/test/CodeGen/X86/bswap.ll
@@ -1,8 +1,8 @@
 ; bswap should be constant folded when it is passed a constant argument
 
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   grep bswapl | count 3
-; RUN: llvm-as < %s | llc -march=x86 | grep rolw | count 1
+; RUN: llc < %s -march=x86 | grep rolw | count 1
 
 declare i16 @llvm.bswap.i16(i16)
 
diff --git a/test/CodeGen/X86/bt.ll b/test/CodeGen/X86/bt.ll
index a76242c977ced..ec447e5e9c813 100644
--- a/test/CodeGen/X86/bt.ll
+++ b/test/CodeGen/X86/bt.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep btl | count 28
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=pentium4 | grep btl | not grep esp
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn   | grep btl | not grep esp
+; RUN: llc < %s -march=x86 | grep btl | count 28
+; RUN: llc < %s -march=x86 -mcpu=pentium4 | grep btl | not grep esp
+; RUN: llc < %s -march=x86 -mcpu=penryn   | grep btl | not grep esp
 ; PR3253
 
 ; The register+memory form of the BT instruction should be usable on
diff --git a/test/CodeGen/X86/byval.ll b/test/CodeGen/X86/byval.ll
index a75214a6b084b..af36e1bb8cb4c 100644
--- a/test/CodeGen/X86/byval.ll
+++ b/test/CodeGen/X86/byval.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq	8(%rsp), %rax}
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86-64 | grep {movq	8(%rsp), %rax}
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep {movl	8(%esp), %edx} %t
 ; RUN: grep {movl	4(%esp), %eax} %t
 
diff --git a/test/CodeGen/X86/byval2.ll b/test/CodeGen/X86/byval2.ll
index f85c8ffbe4fe7..71129f5f6c9bc 100644
--- a/test/CodeGen/X86/byval2.ll
+++ b/test/CodeGen/X86/byval2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
-; RUN: llvm-as < %s | llc -march=x86    | grep rep.movsl | count 2
+; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
+; RUN: llc < %s -march=x86    | grep rep.movsl | count 2
 
 %struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
                    i64, i64, i64, i64, i64, i64, i64, i64,
diff --git a/test/CodeGen/X86/byval3.ll b/test/CodeGen/X86/byval3.ll
index 707a4c5d2785d..504e0bed79168 100644
--- a/test/CodeGen/X86/byval3.ll
+++ b/test/CodeGen/X86/byval3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
+; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
+; RUN: llc < %s -march=x86 | grep rep.movsl | count 2
 
 %struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
                    i32, i32, i32, i32, i32, i32, i32, i32,
diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll
index 5576c361ae163..4db9d650b439c 100644
--- a/test/CodeGen/X86/byval4.ll
+++ b/test/CodeGen/X86/byval4.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl	 | count 2
+; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
+; RUN: llc < %s -march=x86 | grep rep.movsl	 | count 2
 
 %struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
                    i16, i16, i16, i16, i16, i16, i16, i16,
diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll
index c6f4588dd45df..69c115b97326e 100644
--- a/test/CodeGen/X86/byval5.ll
+++ b/test/CodeGen/X86/byval5.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl	 | count 2
+; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
+; RUN: llc < %s -march=x86 | grep rep.movsl	 | count 2
 
 %struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
                    i8, i8, i8, i8, i8, i8, i8, i8,
diff --git a/test/CodeGen/X86/byval6.ll b/test/CodeGen/X86/byval6.ll
index 47269d21d930d..b060369a182ec 100644
--- a/test/CodeGen/X86/byval6.ll
+++ b/test/CodeGen/X86/byval6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep add | not grep 16
+; RUN: llc < %s -march=x86 | grep add | not grep 16
 
 	%struct.W = type { x86_fp80, x86_fp80 }
 @B = global %struct.W { x86_fp80 0xK4001A000000000000000, x86_fp80 0xK4001C000000000000000 }, align 32
diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll
index 6b64c6ce4dabd..0da93bad04e10 100644
--- a/test/CodeGen/X86/byval7.ll
+++ b/test/CodeGen/X86/byval7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | egrep {add|lea} | grep 16
+; RUN: llc < %s -march=x86 -mcpu=yonah | egrep {add|lea} | grep 16
 
 	%struct.S = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
                            <2 x i64> }
diff --git a/test/CodeGen/X86/call-imm.ll b/test/CodeGen/X86/call-imm.ll
index 6e9c70dd42fe8..87785bc3f3f40 100644
--- a/test/CodeGen/X86/call-imm.ll
+++ b/test/CodeGen/X86/call-imm.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-darwin-apple -relocation-model=static | grep {call.*12345678}
-; RUN: llvm-as < %s | llc -mtriple=i386-darwin-apple -relocation-model=pic | not grep {call.*12345678}
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-darwin-apple -relocation-model=static | grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-darwin-apple -relocation-model=pic | not grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep {call.*12345678}
 
 ; Call to immediate is not safe on x86-64 unless we *know* that the
 ; call will be within 32-bits pcrel from the dest immediate.
 
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {call.*\*%rax}
+; RUN: llc < %s -march=x86-64 | grep {call.*\*%rax}
 
 ; PR3666
 ; PR3773
diff --git a/test/CodeGen/X86/call-push.ll b/test/CodeGen/X86/call-push.ll
index ad9b796a85d01..7bae5cd2464d5 100644
--- a/test/CodeGen/X86/call-push.ll
+++ b/test/CodeGen/X86/call-push.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -disable-fp-elim | grep subl | count 1
+; RUN: llc < %s -march=x86 -disable-fp-elim | grep subl | count 1
 
         %struct.decode_t = type { i8, i8, i8, i8, i16, i8, i8, %struct.range_t** }
         %struct.range_t = type { float, float, i32, i32, i32, [0 x i8] }
diff --git a/test/CodeGen/X86/change-compare-stride-0.ll b/test/CodeGen/X86/change-compare-stride-0.ll
index 87194d61c37a6..d520a6ff13b24 100644
--- a/test/CodeGen/X86/change-compare-stride-0.ll
+++ b/test/CodeGen/X86/change-compare-stride-0.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
-; RUN: grep {cmpl	\$4294966818,} %t
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep {cmpl	\$-478,} %t
 ; RUN: not grep inc %t
 ; RUN: not grep {leal	1(} %t
 ; RUN: not grep {leal	-1(} %t
diff --git a/test/CodeGen/X86/change-compare-stride-1.ll b/test/CodeGen/X86/change-compare-stride-1.ll
index 49b691f4a75bf..a9ddbdb7f745a 100644
--- a/test/CodeGen/X86/change-compare-stride-1.ll
+++ b/test/CodeGen/X86/change-compare-stride-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep {cmpq	\$-478,} %t
 ; RUN: not grep inc %t
 ; RUN: not grep {leal	1(} %t
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index c3b3b412f2a9d..3f27187d44a89 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep bsr | count 2
-; RUN: llvm-as < %s | llc -march=x86 | grep bsf
-; RUN: llvm-as < %s | llc -march=x86 | grep cmov | count 3
+; RUN: llc < %s -march=x86 | grep bsr | count 2
+; RUN: llc < %s -march=x86 | grep bsf
+; RUN: llc < %s -march=x86 | grep cmov | count 3
 
 define i32 @t1(i32 %x) nounwind  {
 	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
new file mode 100644
index 0000000000000..f3c9a7addf83b
--- /dev/null
+++ b/test/CodeGen/X86/cmov.ll
@@ -0,0 +1,157 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
+entry:
+; CHECK: test1:
+; CHECK: btl
+; CHECK-NEXT: movl	$12, %eax
+; CHECK-NEXT: cmovae	(%rcx), %eax
+; CHECK-NEXT: ret
+
+	%0 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%1 = and i32 %0, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %1, 0		; <i1> [#uses=1]
+        %v = load i32* %vp
+	%.0 = select i1 %toBool, i32 %v, i32 12		; <i32> [#uses=1]
+	ret i32 %.0
+}
+define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
+entry:
+; CHECK: test2:
+; CHECK: btl
+; CHECK-NEXT: movl	$12, %eax
+; CHECK-NEXT: cmovb	(%rcx), %eax
+; CHECK-NEXT: ret
+
+	%0 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%1 = and i32 %0, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %1, 0		; <i1> [#uses=1]
+        %v = load i32* %vp
+	%.0 = select i1 %toBool, i32 12, i32 %v		; <i32> [#uses=1]
+	ret i32 %.0
+}
+
+
+; x86's 32-bit cmov doesn't clobber the high 32 bits of the destination
+; if the condition is false. An explicit zero-extend (movl) is needed
+; after the cmov.
+
+declare void @bar(i64) nounwind
+
+define void @test3(i64 %a, i64 %b, i1 %p) nounwind {
+; CHECK: test3:
+; CHECK:      cmovne  %edi, %esi
+; CHECK-NEXT: movl    %esi, %edi
+
+  %c = trunc i64 %a to i32
+  %d = trunc i64 %b to i32
+  %e = select i1 %p, i32 %c, i32 %d
+  %f = zext i32 %e to i64
+  call void @bar(i64 %f)
+  ret void
+}
+
+
+
+; CodeGen shouldn't try to do a setne after an expanded 8-bit conditional
+; move without recomputing EFLAGS, because the expansion of the conditional
+; move with control flow may clobber EFLAGS (e.g., with xor, to set the
+; register to zero).
+
+; The test is a little awkward; the important part is that there's a test before the
+; setne.
+; PR4814
+
+
+@g_3 = external global i8                         ; <i8*> [#uses=1]
+@g_96 = external global i8                        ; <i8*> [#uses=2]
+@g_100 = external global i8                       ; <i8*> [#uses=2]
+@_2E_str = external constant [15 x i8], align 1   ; <[15 x i8]*> [#uses=1]
+
+define i32 @test4() nounwind {
+entry:
+  %0 = load i8* @g_3, align 1                     ; <i8> [#uses=2]
+  %1 = sext i8 %0 to i32                          ; <i32> [#uses=1]
+  %.lobit.i = lshr i8 %0, 7                       ; <i8> [#uses=1]
+  %tmp.i = zext i8 %.lobit.i to i32               ; <i32> [#uses=1]
+  %tmp.not.i = xor i32 %tmp.i, 1                  ; <i32> [#uses=1]
+  %iftmp.17.0.i.i = ashr i32 %1, %tmp.not.i       ; <i32> [#uses=1]
+  %retval56.i.i = trunc i32 %iftmp.17.0.i.i to i8 ; <i8> [#uses=1]
+  %2 = icmp eq i8 %retval56.i.i, 0                ; <i1> [#uses=2]
+  %g_96.promoted.i = load i8* @g_96               ; <i8> [#uses=3]
+  %3 = icmp eq i8 %g_96.promoted.i, 0             ; <i1> [#uses=2]
+  br i1 %3, label %func_4.exit.i, label %bb.i.i.i
+
+bb.i.i.i:                                         ; preds = %entry
+  %4 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  br label %func_4.exit.i
+
+; CHECK: test4:
+; CHECK: g_100
+; CHECK: testb
+; CHECK: testb %al, %al
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: testb
+
+func_4.exit.i:                                    ; preds = %bb.i.i.i, %entry
+  %.not.i = xor i1 %2, true                       ; <i1> [#uses=1]
+  %brmerge.i = or i1 %3, %.not.i                  ; <i1> [#uses=1]
+  %.mux.i = select i1 %2, i8 %g_96.promoted.i, i8 0 ; <i8> [#uses=1]
+  br i1 %brmerge.i, label %func_1.exit, label %bb.i.i
+
+bb.i.i:                                           ; preds = %func_4.exit.i
+  %5 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  br label %func_1.exit
+
+func_1.exit:                                      ; preds = %bb.i.i, %func_4.exit.i
+  %g_96.tmp.0.i = phi i8 [ %g_96.promoted.i, %bb.i.i ], [ %.mux.i, %func_4.exit.i ] ; <i8> [#uses=2]
+  store i8 %g_96.tmp.0.i, i8* @g_96
+  %6 = zext i8 %g_96.tmp.0.i to i32               ; <i32> [#uses=1]
+  %7 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr ([15 x i8]* @_2E_str, i64 0, i64 0), i32 %6) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+
+; Should compile to setcc | -2.
+; rdar://6668608
+define i32 @test5(i32* nocapture %P) nounwind readonly {
+entry:
+; CHECK: test5:
+; CHECK: 	setg	%al
+; CHECK:	movzbl	%al, %eax
+; CHECK:	orl	$-2, %eax
+; CHECK:	ret
+
+	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %1, i32 -1, i32 -2		; <i32> [#uses=1]
+	ret i32 %iftmp.0.0
+}
+
+define i32 @test6(i32* nocapture %P) nounwind readonly {
+entry:
+; CHECK: test6:
+; CHECK: 	setl	%al
+; CHECK:	movzbl	%al, %eax
+; CHECK:	leal	4(%rax,%rax,8), %eax
+; CHECK:        ret
+	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %1, i32 4, i32 13		; <i32> [#uses=1]
+	ret i32 %iftmp.0.0
+}
+
+
+; Don't try to use a 16-bit conditional move to do an 8-bit select,
+; because it isn't worth it. Just use a branch instead.
+define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind {
+; CHECK: test7:
+; CHECK:     testb	$1, %dil
+; CHECK-NEXT:     jne	LBB
+
+  %d = select i1 %c, i8 %a, i8 %b
+  ret i8 %d
+}
diff --git a/test/CodeGen/X86/cmp-test.ll b/test/CodeGen/X86/cmp-test.ll
index 91c8a87ea5419..898c09b82f5e2 100644
--- a/test/CodeGen/X86/cmp-test.ll
+++ b/test/CodeGen/X86/cmp-test.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | count 1
-; RUN: llvm-as < %s | llc -march=x86 | grep test | count 1
+; RUN: llc < %s -march=x86 | grep cmp | count 1
+; RUN: llc < %s -march=x86 | grep test | count 1
 
 define i32 @f1(i32 %X, i32* %y) {
 	%tmp = load i32* %y		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/cmp0.ll b/test/CodeGen/X86/cmp0.ll
index f66f90c0b0f3a..de893745bae9c 100644
--- a/test/CodeGen/X86/cmp0.ll
+++ b/test/CodeGen/X86/cmp0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep -v cmp
+; RUN: llc < %s -march=x86-64 | grep -v cmp
 
 define i64 @foo(i64 %x) {
   %t = icmp eq i64 %x, 0
diff --git a/test/CodeGen/X86/cmp1.ll b/test/CodeGen/X86/cmp1.ll
index 241618c531ab5..d4aa399ae95d5 100644
--- a/test/CodeGen/X86/cmp1.ll
+++ b/test/CodeGen/X86/cmp1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep -v cmp
+; RUN: llc < %s -march=x86-64 | grep -v cmp
 
 define i64 @foo(i64 %x) {
   %t = icmp slt i64 %x, 1
diff --git a/test/CodeGen/X86/cmp2.ll b/test/CodeGen/X86/cmp2.ll
index 2c046ffc08413..9a8e00c8bca09 100644
--- a/test/CodeGen/X86/cmp2.ll
+++ b/test/CodeGen/X86/cmp2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep ucomisd | grep CPI | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep ucomisd | grep CPI | count 2
 
 define i32 @test(double %A) nounwind  {
  entry:
diff --git a/test/CodeGen/X86/coalesce-esp.ll b/test/CodeGen/X86/coalesce-esp.ll
new file mode 100644
index 0000000000000..0fe4e56c97cab
--- /dev/null
+++ b/test/CodeGen/X86/coalesce-esp.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s | grep {movl	%esp, %eax}
+; PR4572
+
+; Don't coalesce with %esp if it would end up putting %esp in
+; the index position of an address, because that can't be
+; encoded on x86. It would actually be slightly better to
+; swap the address operands though, since there's no scale.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-mingw32"
+	%"struct.std::valarray<unsigned int>" = type { i32, i32* }
+
+define void @_ZSt17__gslice_to_indexjRKSt8valarrayIjES2_RS0_(i32 %__o, %"struct.std::valarray<unsigned int>"* nocapture %__l, %"struct.std::valarray<unsigned int>"* nocapture %__s, %"struct.std::valarray<unsigned int>"* nocapture %__i) nounwind {
+entry:
+	%0 = alloca i32, i32 undef, align 4		; <i32*> [#uses=1]
+	br i1 undef, label %return, label %bb4
+
+bb4:		; preds = %bb7.backedge, %entry
+	%indvar = phi i32 [ %indvar.next, %bb7.backedge ], [ 0, %entry ]		; <i32> [#uses=2]
+	%scevgep24.sum = sub i32 undef, %indvar		; <i32> [#uses=2]
+	%scevgep25 = getelementptr i32* %0, i32 %scevgep24.sum		; <i32*> [#uses=1]
+	%scevgep27 = getelementptr i32* undef, i32 %scevgep24.sum		; <i32*> [#uses=1]
+	%1 = load i32* %scevgep27, align 4		; <i32> [#uses=0]
+	br i1 undef, label %bb7.backedge, label %bb5
+
+bb5:		; preds = %bb4
+	store i32 0, i32* %scevgep25, align 4
+	br label %bb7.backedge
+
+bb7.backedge:		; preds = %bb5, %bb4
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br label %bb4
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/test/CodeGen/X86/coalescer-commute1.ll b/test/CodeGen/X86/coalescer-commute1.ll
index 99394240c7c81..8aa0bfdd51fbd 100644
--- a/test/CodeGen/X86/coalescer-commute1.ll
+++ b/test/CodeGen/X86/coalescer-commute1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
 ; PR1877
 
 @NNTOT = weak global i32 0		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/coalescer-commute2.ll b/test/CodeGen/X86/coalescer-commute2.ll
index c67e0f582496a..5d10bbad09ef0 100644
--- a/test/CodeGen/X86/coalescer-commute2.ll
+++ b/test/CodeGen/X86/coalescer-commute2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep paddw | count 2
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep mov
+; RUN: llc < %s -march=x86-64 | grep paddw | count 2
+; RUN: llc < %s -march=x86-64 | not grep mov
 
 ; The 2-addr pass should ensure that identical code is produced for these functions
 ; no extra copy should be generated.
diff --git a/test/CodeGen/X86/coalescer-commute3.ll b/test/CodeGen/X86/coalescer-commute3.ll
index 7d4a80ab70f23..e5bd448a4158a 100644
--- a/test/CodeGen/X86/coalescer-commute3.ll
+++ b/test/CodeGen/X86/coalescer-commute3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 6
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 6
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
 
diff --git a/test/CodeGen/X86/coalescer-commute4.ll b/test/CodeGen/X86/coalescer-commute4.ll
index 9628f93e7916f..02a97813fdcd0 100644
--- a/test/CodeGen/X86/coalescer-commute4.ll
+++ b/test/CodeGen/X86/coalescer-commute4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
 ; PR1501
 
 define float @foo(i32* %x, float* %y, i32 %c) nounwind  {
diff --git a/test/CodeGen/X86/coalescer-commute5.ll b/test/CodeGen/X86/coalescer-commute5.ll
index c730ea76e9837..510d115f4ad76 100644
--- a/test/CodeGen/X86/coalescer-commute5.ll
+++ b/test/CodeGen/X86/coalescer-commute5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
 
 define i32 @t() {
 entry:
diff --git a/test/CodeGen/X86/coalescer-cross.ll b/test/CodeGen/X86/coalescer-cross.ll
new file mode 100644
index 0000000000000..7d6f399930fd4
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-cross.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | not grep movaps
+; rdar://6509240
+
+	type { %struct.TValue }		; type %0
+	type { %struct.L_Umaxalign, i32, %struct.Node* }		; type %1
+	%struct.CallInfo = type { %struct.TValue*, %struct.TValue*, %struct.TValue*, i32*, i32, i32 }
+	%struct.GCObject = type { %struct.lua_State }
+	%struct.L_Umaxalign = type { double }
+	%struct.Mbuffer = type { i8*, i32, i32 }
+	%struct.Node = type { %struct.TValue, %struct.TKey }
+	%struct.TKey = type { %1 }
+	%struct.TString = type { %struct.anon }
+	%struct.TValue = type { %struct.L_Umaxalign, i32 }
+	%struct.Table = type { %struct.GCObject*, i8, i8, i8, i8, %struct.Table*, %struct.TValue*, %struct.Node*, %struct.Node*, %struct.GCObject*, i32 }
+	%struct.UpVal = type { %struct.GCObject*, i8, i8, %struct.TValue*, %0 }
+	%struct.anon = type { %struct.GCObject*, i8, i8, i8, i32, i32 }
+	%struct.global_State = type { %struct.stringtable, i8* (i8*, i8*, i32, i32)*, i8*, i8, i8, i32, %struct.GCObject*, %struct.GCObject**, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.Mbuffer, i32, i32, i32, i32, i32, i32, i32 (%struct.lua_State*)*, %struct.TValue, %struct.lua_State*, %struct.UpVal, [9 x %struct.Table*], [17 x %struct.TString*] }
+	%struct.lua_Debug = type { i32, i8*, i8*, i8*, i8*, i32, i32, i32, i32, [60 x i8], i32 }
+	%struct.lua_State = type { %struct.GCObject*, i8, i8, i8, %struct.TValue*, %struct.TValue*, %struct.global_State*, %struct.CallInfo*, i32*, %struct.TValue*, %struct.TValue*, %struct.CallInfo*, %struct.CallInfo*, i32, i32, i16, i16, i8, i8, i32, i32, void (%struct.lua_State*, %struct.lua_Debug*)*, %struct.TValue, %struct.TValue, %struct.GCObject*, %struct.GCObject*, %struct.lua_longjmp*, i32 }
+	%struct.lua_longjmp = type { %struct.lua_longjmp*, [18 x i32], i32 }
+	%struct.stringtable = type { %struct.GCObject**, i32, i32 }
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.lua_State*)* @os_clock to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @os_clock(%struct.lua_State* nocapture %L) nounwind ssp {
+entry:
+	%0 = tail call i32 @"\01_clock$UNIX2003"() nounwind		; <i32> [#uses=1]
+	%1 = uitofp i32 %0 to double		; <double> [#uses=1]
+	%2 = fdiv double %1, 1.000000e+06		; <double> [#uses=1]
+	%3 = getelementptr %struct.lua_State* %L, i32 0, i32 4		; <%struct.TValue**> [#uses=3]
+	%4 = load %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=2]
+	%5 = getelementptr %struct.TValue* %4, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	store double %2, double* %5, align 4
+	%6 = getelementptr %struct.TValue* %4, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 3, i32* %6, align 4
+	%7 = load %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=1]
+	%8 = getelementptr %struct.TValue* %7, i32 1		; <%struct.TValue*> [#uses=1]
+	store %struct.TValue* %8, %struct.TValue** %3, align 4
+	ret i32 1
+}
+
+declare i32 @"\01_clock$UNIX2003"()
diff --git a/test/CodeGen/X86/coalescer-remat.ll b/test/CodeGen/X86/coalescer-remat.ll
index ab029f45658ce..4db520fee7477 100644
--- a/test/CodeGen/X86/coalescer-remat.ll
+++ b/test/CodeGen/X86/coalescer-remat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep xor | count 3
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep xor | count 3
 
 @val = internal global i64 0		; <i64*> [#uses=1]
 @"\01LC" = internal constant [7 x i8] c"0x%lx\0A\00"		; <[7 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/code_placement.ll b/test/CodeGen/X86/code_placement.ll
index 55167950d1a01..97471835a4c9e 100644
--- a/test/CodeGen/X86/code_placement.ll
+++ b/test/CodeGen/X86/code_placement.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | %prcontext jmp 1 | grep align
+; RUN: llc -march=x86 < %s | FileCheck %s
 
 @Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
 @Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
@@ -12,6 +12,8 @@ entry:
 	%tmp15 = add i32 %r, -1		; <i32> [#uses=1]
 	%tmp.16 = zext i32 %tmp15 to i64		; <i64> [#uses=2]
 	br label %bb
+; CHECK: jmp
+; CHECK-NEXT: align
 
 bb:		; preds = %bb1, %entry
 	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ]		; <i64> [#uses=3]
diff --git a/test/CodeGen/X86/codegen-prepare-cast.ll b/test/CodeGen/X86/codegen-prepare-cast.ll
index ae3eb5f6d68d5..2a8ead8c49097 100644
--- a/test/CodeGen/X86/codegen-prepare-cast.ll
+++ b/test/CodeGen/X86/codegen-prepare-cast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 ; PR4297
 
 target datalayout =
diff --git a/test/CodeGen/X86/codemodel.ll b/test/CodeGen/X86/codemodel.ll
new file mode 100644
index 0000000000000..b6ca1cedc22ee
--- /dev/null
+++ b/test/CodeGen/X86/codemodel.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -code-model=small  | FileCheck -check-prefix CHECK-SMALL %s
+; RUN: llc < %s -code-model=kernel | FileCheck -check-prefix CHECK-KERNEL %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@data = external global [0 x i32]		; <[0 x i32]*> [#uses=5]
+
+define i32 @foo() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo:
+; CHECK-SMALL:   movl data(%rip), %eax
+; CHECK-KERNEL: foo:
+; CHECK-KERNEL:  movl data, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i64 0, i64 0), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo2() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo2:
+; CHECK-SMALL:   movl data+40(%rip), %eax
+; CHECK-KERNEL: foo2:
+; CHECK-KERNEL:  movl data+40, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 10), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo3() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo3:
+; CHECK-SMALL:   movl data-40(%rip), %eax
+; CHECK-KERNEL: foo3:
+; CHECK-KERNEL:  movq $-40, %rax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -10), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo4() nounwind readonly {
+entry:
+; FIXME: We really can use movabsl here!
+; CHECK-SMALL:  foo4:
+; CHECK-SMALL:   movl $16777216, %eax
+; CHECK-SMALL:   movl data(%rax), %eax
+; CHECK-KERNEL: foo4:
+; CHECK-KERNEL:  movl data+16777216, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194304), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo1() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo1:
+; CHECK-SMALL:   movl data+16777212(%rip), %eax
+; CHECK-KERNEL: foo1:
+; CHECK-KERNEL:  movl data+16777212, %eax
+        %0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194303), align 4            ; <i32> [#uses=1]
+        ret i32 %0
+}
+define i32 @foo5() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo5:
+; CHECK-SMALL:   movl data-16777216(%rip), %eax
+; CHECK-KERNEL: foo5:
+; CHECK-KERNEL:  movq $-16777216, %rax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -4194304), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
diff --git a/test/CodeGen/X86/combine-lds.ll b/test/CodeGen/X86/combine-lds.ll
index a78a042d7ec37..b49d081a64f15 100644
--- a/test/CodeGen/X86/combine-lds.ll
+++ b/test/CodeGen/X86/combine-lds.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep fldl | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep fldl | count 1
 
 define double @doload64(i64 %x) nounwind  {
 	%tmp717 = bitcast i64 %x to double
diff --git a/test/CodeGen/X86/combiner-aa-0.ll b/test/CodeGen/X86/combiner-aa-0.ll
new file mode 100644
index 0000000000000..a61ef7acd13c7
--- /dev/null
+++ b/test/CodeGen/X86/combiner-aa-0.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 -combiner-global-alias-analysis -combiner-alias-analysis
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+	%struct.Hash_Key = type { [4 x i32], i32 }
+@g_flipV_hashkey = external global %struct.Hash_Key, align 16		; <%struct.Hash_Key*> [#uses=1]
+
+define void @foo() nounwind {
+	%t0 = load i32* undef, align 16		; <i32> [#uses=1]
+	%t1 = load i32* null, align 4		; <i32> [#uses=1]
+	%t2 = srem i32 %t0, 32		; <i32> [#uses=1]
+	%t3 = shl i32 1, %t2		; <i32> [#uses=1]
+	%t4 = xor i32 %t3, %t1		; <i32> [#uses=1]
+	store i32 %t4, i32* null, align 4
+	%t5 = getelementptr %struct.Hash_Key* @g_flipV_hashkey, i64 0, i32 0, i64 0		; <i32*> [#uses=2]
+	%t6 = load i32* %t5, align 4		; <i32> [#uses=1]
+	%t7 = shl i32 1, undef		; <i32> [#uses=1]
+	%t8 = xor i32 %t7, %t6		; <i32> [#uses=1]
+	store i32 %t8, i32* %t5, align 4
+	unreachable
+}
diff --git a/test/CodeGen/X86/combiner-aa-1.ll b/test/CodeGen/X86/combiner-aa-1.ll
new file mode 100644
index 0000000000000..58a7129b6005f
--- /dev/null
+++ b/test/CodeGen/X86/combiner-aa-1.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s --combiner-alias-analysis --combiner-global-alias-analysis
+; PR4880
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+%struct.alst_node = type { %struct.node }
+%struct.arg_node = type { %struct.node, i8*, %struct.alst_node* }
+%struct.arglst_node = type { %struct.alst_node, %struct.arg_node*, %struct.arglst_node* }
+%struct.lam_node = type { %struct.alst_node, %struct.arg_node*, %struct.alst_node* }
+%struct.node = type { i32 (...)**, %struct.node* }
+
+define i32 @._ZN8lam_node18resolve_name_clashEP8arg_nodeP9alst_node._ZNK8lam_nodeeqERK8exp_node._ZN11arglst_nodeD0Ev(%struct.lam_node* %this.this, %struct.arg_node* %outer_arg, %struct.alst_node* %env.cmp, %struct.arglst_node* %this, i32 %functionID) {
+comb_entry:
+  %.SV59 = alloca %struct.node*                   ; <%struct.node**> [#uses=1]
+  %0 = load i32 (...)*** null, align 4            ; <i32 (...)**> [#uses=1]
+  %1 = getelementptr inbounds i32 (...)** %0, i32 3 ; <i32 (...)**> [#uses=1]
+  %2 = load i32 (...)** %1, align 4               ; <i32 (...)*> [#uses=1]
+  store %struct.node* undef, %struct.node** %.SV59
+  %3 = bitcast i32 (...)* %2 to i32 (%struct.node*)* ; <i32 (%struct.node*)*> [#uses=1]
+  %4 = tail call i32 %3(%struct.node* undef)      ; <i32> [#uses=0]
+  unreachable
+}
diff --git a/test/CodeGen/X86/commute-intrinsic.ll b/test/CodeGen/X86/commute-intrinsic.ll
index 12c0e03f6f481..d810cb1eff784 100644
--- a/test/CodeGen/X86/commute-intrinsic.ll
+++ b/test/CodeGen/X86/commute-intrinsic.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 -relocation-model=static | not grep movaps
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -relocation-model=static | not grep movaps
 
 @a = external global <2 x i64>		; <<2 x i64>*> [#uses=1]
 
diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll
index 224f5d5e5c54c..56ea26b658d81 100644
--- a/test/CodeGen/X86/commute-two-addr.ll
+++ b/test/CodeGen/X86/commute-two-addr.ll
@@ -2,7 +2,7 @@
 ; insertion of register-register copies.
 
 ; Make sure there are only 3 mov's for each testcase
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {\\\<mov\\\>} | count 6
 
 
diff --git a/test/CodeGen/X86/compare-add.ll b/test/CodeGen/X86/compare-add.ll
index aa69a31a48fc2..358ee59c95a51 100644
--- a/test/CodeGen/X86/compare-add.ll
+++ b/test/CodeGen/X86/compare-add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep add
+; RUN: llc < %s -march=x86 | not grep add
 
 define i1 @X(i32 %X) {
         %Y = add i32 %X, 14             ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/compare-inf.ll b/test/CodeGen/X86/compare-inf.ll
new file mode 100644
index 0000000000000..2be90c9764c2b
--- /dev/null
+++ b/test/CodeGen/X86/compare-inf.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; Convert oeq and une to ole/oge/ule/uge when comparing with infinity
+; and negative infinity, because those are more efficient on x86.
+
+; CHECK: oeq_inff:
+; CHECK: ucomiss
+; CHECK: jae
+define float @oeq_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp oeq float %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: oeq_inf:
+; CHECK: ucomisd
+; CHECK: jae
+define double @oeq_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp oeq double %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: une_inff:
+; CHECK: ucomiss
+; CHECK: jb
+define float @une_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp une float %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: une_inf:
+; CHECK: ucomisd
+; CHECK: jb
+define double @une_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp une double %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: oeq_neg_inff:
+; CHECK: ucomiss
+; CHECK: jae
+define float @oeq_neg_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp oeq float %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: oeq_neg_inf:
+; CHECK: ucomisd
+; CHECK: jae
+define double @oeq_neg_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp oeq double %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: une_neg_inff:
+; CHECK: ucomiss
+; CHECK: jb
+define float @une_neg_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp une float %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: une_neg_inf:
+; CHECK: ucomisd
+; CHECK: jb
+define double @une_neg_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp une double %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
diff --git a/test/CodeGen/X86/compare_folding.ll b/test/CodeGen/X86/compare_folding.ll
index c6cda4a5b9792..84c152d77215a 100644
--- a/test/CodeGen/X86/compare_folding.ll
+++ b/test/CodeGen/X86/compare_folding.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | \
+; RUN: llc < %s -march=x86 -mcpu=yonah | \
 ; RUN:   grep movsd | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | \
+; RUN: llc < %s -march=x86 -mcpu=yonah | \
 ; RUN:   grep ucomisd
 declare i1 @llvm.isunordered.f64(double, double)
 
diff --git a/test/CodeGen/X86/compiler_used.ll b/test/CodeGen/X86/compiler_used.ll
new file mode 100644
index 0000000000000..be8de5e09f8a0
--- /dev/null
+++ b/test/CodeGen/X86/compiler_used.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep no_dead_strip | count 1
+; We should have a .no_dead_strip directive for Z but not for X/Y.
+
+@X = internal global i8 4
+@Y = internal global i32 123
+@Z = internal global i8 4
+
+@llvm.used = appending global [1 x i8*] [ i8* @Z ], section "llvm.metadata"
+@llvm.compiler_used = appending global [2 x i8*] [ i8* @X, i8* bitcast (i32* @Y to i8*)], section "llvm.metadata"
diff --git a/test/CodeGen/X86/complex-fca.ll b/test/CodeGen/X86/complex-fca.ll
index 05adb50b294f2..7e7acaa98a764 100644
--- a/test/CodeGen/X86/complex-fca.ll
+++ b/test/CodeGen/X86/complex-fca.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 2
+; RUN: llc < %s -march=x86 | grep mov | count 2
 
 define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %z) nounwind {
 entry:
diff --git a/test/CodeGen/X86/const-select.ll b/test/CodeGen/X86/const-select.ll
index 6e3156beb0f61..ca8cc1464c770 100644
--- a/test/CodeGen/X86/const-select.ll
+++ b/test/CodeGen/X86/const-select.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 
-; RUN: llvm-as < %s | llc | grep {LCPI1_0(,%eax,4)}
+; RUN: llc < %s | grep {LCPI1_0(,%eax,4)}
 define float @f(i32 %x) nounwind readnone {
 entry:
 	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
@@ -10,7 +10,7 @@ entry:
 	ret float %iftmp.0.0
 }
 
-; RUN: llvm-as < %s | llc | grep {movsbl.*(%e.x,%e.x,4), %eax}
+; RUN: llc < %s | grep {movsbl.*(%e.x,%e.x,4), %eax}
 define signext i8 @test(i8* nocapture %P, double %F) nounwind readonly {
 entry:
 	%0 = fcmp olt double %F, 4.200000e+01		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/constant-pool-remat-0.ll b/test/CodeGen/X86/constant-pool-remat-0.ll
index 80be8545d59c8..05388f9b2a96e 100644
--- a/test/CodeGen/X86/constant-pool-remat-0.ll
+++ b/test/CodeGen/X86/constant-pool-remat-0.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep LCPI | count 3
-; RUN: llvm-as < %s | llc -march=x86-64 -stats  -info-output-file - | grep asm-printer | grep 6
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep LCPI | count 3
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats  -info-output-file - | grep asm-printer | grep 12
+; RUN: llc < %s -march=x86-64 | grep LCPI | count 3
+; RUN: llc < %s -march=x86-64 -stats  -info-output-file - | grep asm-printer | grep 6
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep LCPI | count 3
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats  -info-output-file - | grep asm-printer | grep 12
 
 declare float @qux(float %y)
 
diff --git a/test/CodeGen/X86/constpool.ll b/test/CodeGen/X86/constpool.ll
index 60d51e56c3b40..2aac486323a84 100644
--- a/test/CodeGen/X86/constpool.ll
+++ b/test/CodeGen/X86/constpool.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc 
-; RUN: llvm-as < %s | llc -fast-isel
-; RUN: llvm-as < %s | llc -march=x86-64
-; RUN: llvm-as < %s | llc -fast-isel -march=x86-64
+; RUN: llc < %s 
+; RUN: llc < %s -fast-isel
+; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -fast-isel -march=x86-64
 ; PR4466
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
index 579e30ceadd09..2b4b83259b82a 100644
--- a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
+++ b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -o %t -f -stats -info-output-file - | \
+; RUN: llc < %s -march=x86-64 -o %t -stats -info-output-file - | \
 ; RUN:   grep {asm-printer} | grep {Number of machine instrs printed} | grep 5
 ; RUN: grep {leal	1(\%rsi),} %t
 
diff --git a/test/CodeGen/X86/copysign-zero.ll b/test/CodeGen/X86/copysign-zero.ll
index a08fa6519d71b..47522d8080587 100644
--- a/test/CodeGen/X86/copysign-zero.ll
+++ b/test/CodeGen/X86/copysign-zero.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | not grep orpd
-; RUN: llvm-as < %s | llc | grep andpd | count 1
+; RUN: llc < %s | not grep orpd
+; RUN: llc < %s | grep andpd | count 1
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll
index 4539ef623de57..4fe554de75a02 100644
--- a/test/CodeGen/X86/critical-edge-split.ll
+++ b/test/CodeGen/X86/critical-edge-split.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -tailcallopt=false -stats -info-output-file - | grep asm-printer | grep 31
+; RUN: llc < %s -mtriple=i386-apple-darwin -tailcallopt=false -stats -info-output-file - | grep asm-printer | grep 31
 
 	%CC = type { %Register }
 	%II = type { %"struct.XX::II::$_74" }
diff --git a/test/CodeGen/X86/cstring.ll b/test/CodeGen/X86/cstring.ll
index 27d6181db8bc7..5b5a7662ffff9 100644
--- a/test/CodeGen/X86/cstring.ll
+++ b/test/CodeGen/X86/cstring.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | not grep comm
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep comm
 ; rdar://6479858
 
 @str1 = internal constant [1 x i8] zeroinitializer
diff --git a/test/CodeGen/X86/dag-rauw-cse.ll b/test/CodeGen/X86/dag-rauw-cse.ll
index ba84711c03eb1..edcfeb78a4d0c 100644
--- a/test/CodeGen/X86/dag-rauw-cse.ll
+++ b/test/CodeGen/X86/dag-rauw-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {orl	\$1}
+; RUN: llc < %s -march=x86 | grep {orl	\$1}
 ; PR3018
 
 define i32 @test(i32 %A) nounwind {
diff --git a/test/CodeGen/X86/dagcombine-buildvector.ll b/test/CodeGen/X86/dagcombine-buildvector.ll
index b96fdfc03c68d..c0ee2ac3386b1 100644
--- a/test/CodeGen/X86/dagcombine-buildvector.ll
+++ b/test/CodeGen/X86/dagcombine-buildvector.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx -o %t
 ; RUN: grep unpcklpd %t | count 1
 ; RUN: grep movapd %t | count 1
 ; RUN: grep movaps %t | count 1
diff --git a/test/CodeGen/X86/dagcombine-cse.ll b/test/CodeGen/X86/dagcombine-cse.ll
index a673ebf47de5d..c3c7990d19ebb 100644
--- a/test/CodeGen/X86/dagcombine-cse.ll
+++ b/test/CodeGen/X86/dagcombine-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 14
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 14
 
 define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/darwin-bzero.ll b/test/CodeGen/X86/darwin-bzero.ll
index c292140e108de..a3c1e6f0c5549 100644
--- a/test/CodeGen/X86/darwin-bzero.ll
+++ b/test/CodeGen/X86/darwin-bzero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10 | grep __bzero
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep __bzero
 
 declare void @llvm.memset.i32(i8*, i8, i32, i32)
 
diff --git a/test/CodeGen/X86/darwin-no-dead-strip.ll b/test/CodeGen/X86/darwin-no-dead-strip.ll
index 63325b7a6ae0b..452d1f8ce3922 100644
--- a/test/CodeGen/X86/darwin-no-dead-strip.ll
+++ b/test/CodeGen/X86/darwin-no-dead-strip.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep no_dead_strip
+; RUN: llc < %s | grep no_dead_strip
 
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8.7.2"
diff --git a/test/CodeGen/X86/darwin-quote.ll b/test/CodeGen/X86/darwin-quote.ll
new file mode 100644
index 0000000000000..8fddc118f61ec
--- /dev/null
+++ b/test/CodeGen/X86/darwin-quote.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin  | FileCheck %s
+
+
+define internal i64 @baz() nounwind {
+  %tmp = load i64* @"+x"
+  ret i64 %tmp
+; CHECK: _baz:
+; CHECK:    movl "L_+x$non_lazy_ptr", %ecx
+}
+
+
+@"+x" = external global i64
+
+; CHECK: "L_+x$non_lazy_ptr":
+; CHECK:	.indirect_symbol "_+x"
diff --git a/test/CodeGen/X86/darwin-stub.ll b/test/CodeGen/X86/darwin-stub.ll
index 79eb31ac0fd47..b4d2e1aa566dd 100644
--- a/test/CodeGen/X86/darwin-stub.ll
+++ b/test/CodeGen/X86/darwin-stub.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin  |     grep stub
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 | not grep stub
+; RUN: llc < %s -mtriple=i386-apple-darwin  |     grep stub
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | not grep stub
 
 @"\01LC" = internal constant [13 x i8] c"Hello World!\00"		; <[13 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/div_const.ll b/test/CodeGen/X86/div_const.ll
index aa690f7f48575..f0ada41338b27 100644
--- a/test/CodeGen/X86/div_const.ll
+++ b/test/CodeGen/X86/div_const.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 365384439
+; RUN: llc < %s -march=x86 | grep 365384439
 
 define i32 @f9188_mul365384439_shift27(i32 %A) {
         %tmp1 = udiv i32 %A, 1577682821         ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/divrem.ll b/test/CodeGen/X86/divrem.ll
index a611eddc7682e..e86b52fe82d59 100644
--- a/test/CodeGen/X86/divrem.ll
+++ b/test/CodeGen/X86/divrem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep div | count 8
+; RUN: llc < %s -march=x86-64 | grep div | count 8
 
 define void @si64(i64 %x, i64 %y, i64* %p, i64* %q) {
 	%r = sdiv i64 %x, %y
diff --git a/test/CodeGen/X86/dll-linkage.ll b/test/CodeGen/X86/dll-linkage.ll
new file mode 100644
index 0000000000000..c634c7e1fd427
--- /dev/null
+++ b/test/CodeGen/X86/dll-linkage.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=i386-pc-mingw32 | FileCheck %s
+
+declare dllimport void @foo()
+
+define void @bar() nounwind {
+; CHECK: call	*__imp__foo
+  call void @foo()
+  ret void
+}
diff --git a/test/CodeGen/X86/dollar-name.ll b/test/CodeGen/X86/dollar-name.ll
index 885700ef82a53..3b263194a5a8d 100644
--- a/test/CodeGen/X86/dollar-name.ll
+++ b/test/CodeGen/X86/dollar-name.ll
@@ -1,12 +1,13 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux | grep {(\$bar)} | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux | grep {(\$qux)} | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux | grep {(\$hen)} | count 1
+; RUN: llc < %s -march=x86 -mtriple=i386-linux | FileCheck %s
 ; PR1339
 
 @"$bar" = global i32 zeroinitializer
 @"$qux" = external global i32
 
 define i32 @"$foo"() nounwind {
+; CHECK: movl	($bar),
+; CHECK: addl	($qux),
+; CHECK: call	($hen)
   %m = load i32* @"$bar"
   %n = load i32* @"$qux"
   %t = add i32 %m, %n
diff --git a/test/CodeGen/X86/dyn-stackalloc.ll b/test/CodeGen/X86/dyn-stackalloc.ll
index 049a32cea717a..1df092018dd83 100644
--- a/test/CodeGen/X86/dyn-stackalloc.ll
+++ b/test/CodeGen/X86/dyn-stackalloc.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 | not egrep {\\\$4294967289|-7\\(}
-; RUN: llvm-as < %s | llc -march=x86 | egrep {\\\$4294967280|-16\\(}
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {\\-16}
+; RUN: llc < %s -march=x86 | not egrep {\\\$4294967289|-7}
+; RUN: llc < %s -march=x86 | egrep {\\\$4294967280|-16}
+; RUN: llc < %s -march=x86-64 | grep {\\-16}
 
-define void @t() {
+define void @t() nounwind {
 A:
 	br label %entry
 
diff --git a/test/CodeGen/X86/empty-struct-return-type.ll b/test/CodeGen/X86/empty-struct-return-type.ll
new file mode 100644
index 0000000000000..34cd5d925052f
--- /dev/null
+++ b/test/CodeGen/X86/empty-struct-return-type.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | grep call
+; PR4688
+
+; Return types can be empty structs, which can be awkward.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @_ZN15QtSharedPointer22internalSafetyCheckAddEPVKv(i8* %ptr) {
+entry:
+	%0 = call { } @_ZNK5QHashIPv15QHashDummyValueE5valueERKS0_(i8** undef)		; <{ }> [#uses=0]
+        ret void
+}
+
+declare hidden { } @_ZNK5QHashIPv15QHashDummyValueE5valueERKS0_(i8** nocapture) nounwind
diff --git a/test/CodeGen/X86/epilogue.ll b/test/CodeGen/X86/epilogue.ll
index 5a378e19c49a8..52dcb61d87f8f 100644
--- a/test/CodeGen/X86/epilogue.ll
+++ b/test/CodeGen/X86/epilogue.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep lea
-; RUN: llvm-as < %s | llc -march=x86 | grep {movl	%ebp}
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86 | grep {movl	%ebp}
 
 declare void @bar(<2 x i64>* %n)
 
diff --git a/test/CodeGen/X86/extend.ll b/test/CodeGen/X86/extend.ll
index a54b6f112d88e..9553b1b578b15 100644
--- a/test/CodeGen/X86/extend.ll
+++ b/test/CodeGen/X86/extend.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | grep movzx | count 1
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | grep movsx | count 1
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep movzx | count 1
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep movsx | count 1
 
 @G1 = internal global i8 0              ; <i8*> [#uses=1]
 @G2 = internal global i8 0              ; <i8*> [#uses=1]
diff --git a/test/CodeGen/X86/extern_weak.ll b/test/CodeGen/X86/extern_weak.ll
index 0cc56302b70fc..01e32aae08ca4 100644
--- a/test/CodeGen/X86/extern_weak.ll
+++ b/test/CodeGen/X86/extern_weak.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep weak_reference | count 2
+; RUN: llc < %s -mtriple=i686-apple-darwin | grep weak_reference | count 2
 
 @Y = global i32 (i8*)* @X               ; <i32 (i8*)**> [#uses=0]
 
diff --git a/test/CodeGen/X86/extmul128.ll b/test/CodeGen/X86/extmul128.ll
index df487659edb52..9b598299e5360 100644
--- a/test/CodeGen/X86/extmul128.ll
+++ b/test/CodeGen/X86/extmul128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mul | count 2
+; RUN: llc < %s -march=x86-64 | grep mul | count 2
 
 define i128 @i64_sext_i128(i64 %a, i64 %b) {
   %aa = sext i64 %a to i128
diff --git a/test/CodeGen/X86/extmul64.ll b/test/CodeGen/X86/extmul64.ll
index 635da48133b63..9e20ded1111f4 100644
--- a/test/CodeGen/X86/extmul64.ll
+++ b/test/CodeGen/X86/extmul64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mul | count 2
+; RUN: llc < %s -march=x86 | grep mul | count 2
 
 define i64 @i32_sext_i64(i32 %a, i32 %b) {
   %aa = sext i32 %a to i64
diff --git a/test/CodeGen/X86/extract-combine.ll b/test/CodeGen/X86/extract-combine.ll
index 842ec24e0ec86..2040e872f7fe3 100644
--- a/test/CodeGen/X86/extract-combine.ll
+++ b/test/CodeGen/X86/extract-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mcpu=core2 -o %t -f
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -o %t
 ; RUN: not grep unpcklps %t
 
 define i32 @foo() nounwind {
diff --git a/test/CodeGen/X86/extract-extract.ll b/test/CodeGen/X86/extract-extract.ll
new file mode 100644
index 0000000000000..ad79ab9ae20ff
--- /dev/null
+++ b/test/CodeGen/X86/extract-extract.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 >/dev/null
+; PR4699
+
+; Handle this extractvalue-of-extractvalue case without getting in
+; trouble with CSE in DAGCombine.
+
+        %cc = type { %crd }
+        %cr = type { i32 }
+        %crd = type { i64, %cr* }
+        %pp = type { %cc }
+
+define fastcc void @foo(%pp* nocapture byval %p_arg) {
+entry:
+        %tmp2 = getelementptr %pp* %p_arg, i64 0, i32 0         ; <%cc*> [#uses=
+        %tmp3 = load %cc* %tmp2         ; <%cc> [#uses=1]
+        %tmp34 = extractvalue %cc %tmp3, 0              ; <%crd> [#uses=1]
+        %tmp345 = extractvalue %crd %tmp34, 0           ; <i64> [#uses=1]
+        %.ptr.i = load %cr** undef              ; <%cr*> [#uses=0]
+        %tmp15.i = shl i64 %tmp345, 3           ; <i64> [#uses=0]
+        store %cr* undef, %cr** undef
+        ret void
+}
+
+
diff --git a/test/CodeGen/X86/extractelement-from-arg.ll b/test/CodeGen/X86/extractelement-from-arg.ll
index 44704b6adb396..4ea37f0c46d3e 100644
--- a/test/CodeGen/X86/extractelement-from-arg.ll
+++ b/test/CodeGen/X86/extractelement-from-arg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | llc -march=x86-64 -mattr=+sse2
+; RUN: llc < %s -march=x86-64 -mattr=+sse2
 
 define void @test(float* %R, <4 x float> %X) nounwind {
 	%tmp = extractelement <4 x float> %X, i32 3
diff --git a/test/CodeGen/X86/extractelement-load.ll b/test/CodeGen/X86/extractelement-load.ll
index 601690ef7cabb..ee57d9b762952 100644
--- a/test/CodeGen/X86/extractelement-load.ll
+++ b/test/CodeGen/X86/extractelement-load.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as %s -o - | llc -march=x86 -mattr=+sse2 -mcpu=yonah | not grep movd
-; RUN: llvm-as %s -o - | llc -march=x86-64 -mattr=+sse2 -mcpu=core2 | not grep movd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=yonah | not grep movd
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 -mcpu=core2 | not grep movd
 
 define i32 @t(<2 x i64>* %val) nounwind  {
 	%tmp2 = load <2 x i64>* %val, align 16		; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/extractelement-shuffle.ll b/test/CodeGen/X86/extractelement-shuffle.ll
index b00c8e49e1c85..12a2ef30e17ed 100644
--- a/test/CodeGen/X86/extractelement-shuffle.ll
+++ b/test/CodeGen/X86/extractelement-shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Examples that exhibits a bug in DAGCombine.  The case is triggered by the
 ; following program.  The bug is DAGCombine assumes that the bit convert
diff --git a/test/CodeGen/X86/extractps.ll b/test/CodeGen/X86/extractps.ll
index 484d2c4e5e100..14778f097ef53 100644
--- a/test/CodeGen/X86/extractps.ll
+++ b/test/CodeGen/X86/extractps.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn > %t
+; RUN: llc < %s -march=x86 -mcpu=penryn > %t
 ; RUN: not grep movd %t
 ; RUN: grep {movss	%xmm} %t | count 1
 ; RUN: grep {extractps	\\\$1, %xmm0, } %t | count 1
diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll
index 7ac8e048edbc4..54947c394b5e8 100644
--- a/test/CodeGen/X86/fabs.ll
+++ b/test/CodeGen/X86/fabs.ll
@@ -1,8 +1,7 @@
 ; Make sure this testcase codegens to the fabs instruction, not a call to fabsf
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \
 ; RUN:   count 2
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math | \
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math | \
 ; RUN:   grep fabs\$ | count 3
 
 declare float @fabsf(float)
diff --git a/test/CodeGen/X86/fast-cc-callee-pops.ll b/test/CodeGen/X86/fast-cc-callee-pops.ll
index 941f7087f6241..5e88ed7f00d6f 100644
--- a/test/CodeGen/X86/fast-cc-callee-pops.ll
+++ b/test/CodeGen/X86/fast-cc-callee-pops.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -x86-asm-syntax=intel -mcpu=yonah | grep {ret	20}
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=yonah | grep {ret	20}
 
 ; Check that a fastcc function pops its stack variables before returning.
 
diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
index 3f3aa468675b3..e15182120094b 100644
--- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
+++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {add	ESP, 8}
 
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/fast-cc-pass-in-regs.ll b/test/CodeGen/X86/fast-cc-pass-in-regs.ll
index c8621a7780bd8..fe96c0c8be2a1 100644
--- a/test/CodeGen/X86/fast-cc-pass-in-regs.ll
+++ b/test/CodeGen/X86/fast-cc-pass-in-regs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {mov	EDX, 1}
 ; check that fastcc is passing stuff in regs.
 
diff --git a/test/CodeGen/X86/fast-isel-bail.ll b/test/CodeGen/X86/fast-isel-bail.ll
index fb4f37ef90be9..9072c5c7b5937 100644
--- a/test/CodeGen/X86/fast-isel-bail.ll
+++ b/test/CodeGen/X86/fast-isel-bail.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -O0
+; RUN: llc < %s -march=x86 -O0
 
 ; This file is for regression tests for cases where FastISel needs
 ; to gracefully bail out and let SelectionDAGISel take over.
diff --git a/test/CodeGen/X86/fast-isel-bc.ll b/test/CodeGen/X86/fast-isel-bc.ll
new file mode 100644
index 0000000000000..f2696ce814da1
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-bc.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -O0 -march=x86-64 -mattr=+mmx | FileCheck %s
+; PR4684
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.8"
+
+declare void @func2(<1 x i64>)
+
+define void @func1() nounwind {
+
+; This isn't spectacular, but it's MMX code at -O0...
+; CHECK: movl $2, %eax
+; CHECK: movd %rax, %mm0
+; CHECK: movd %mm0, %rdi
+
+        call void @func2(<1 x i64> <i64 2>)
+        ret void
+}
diff --git a/test/CodeGen/X86/fast-isel-call.ll b/test/CodeGen/X86/fast-isel-call.ll
index 9945746807cfa..5fcdbbbe53b25 100644
--- a/test/CodeGen/X86/fast-isel-call.ll
+++ b/test/CodeGen/X86/fast-isel-call.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel -march=x86 | grep and
+; RUN: llc < %s -fast-isel -march=x86 | grep and
 
 define i32 @t() nounwind {
 tak:
diff --git a/test/CodeGen/X86/fast-isel-constpool.ll b/test/CodeGen/X86/fast-isel-constpool.ll
index ac2595a7461d7..84d10f32c294c 100644
--- a/test/CodeGen/X86/fast-isel-constpool.ll
+++ b/test/CodeGen/X86/fast-isel-constpool.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel | grep {LCPI1_0(%rip)}
+; RUN: llc < %s -fast-isel | grep {LCPI1_0(%rip)}
 ; Make sure fast isel uses rip-relative addressing when required.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.0"
diff --git a/test/CodeGen/X86/fast-isel-fneg.ll b/test/CodeGen/X86/fast-isel-fneg.ll
new file mode 100644
index 0000000000000..5ffd48bce6553
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-fneg.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86-64 | FileCheck %s
+; RUN: llc < %s -fast-isel -march=x86 -mattr=+sse2 | grep xor | count 2
+
+; CHECK: doo:
+; CHECK: xor
+define double @doo(double %x) nounwind {
+  %y = fsub double -0.0, %x
+  ret double %y
+}
+
+; CHECK: foo:
+; CHECK: xor
+define float @foo(float %x) nounwind {
+  %y = fsub float -0.0, %x
+  ret float %y
+}
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
new file mode 100644
index 0000000000000..5b8acecc3c18c
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=x86-64 -O0 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -march=x86 -O0 | FileCheck %s --check-prefix=X32
+
+; GEP indices are interpreted as signed integers, so they
+; should be sign-extended to 64 bits on 64-bit targets.
+; PR3181
+define i32 @test1(i32 %t3, i32* %t1) nounwind {
+       %t9 = getelementptr i32* %t1, i32 %t3           ; <i32*> [#uses=1]
+       %t15 = load i32* %t9            ; <i32> [#uses=1]
+       ret i32 %t15
+; X32: test1:
+; X32:  	movl	(%ecx,%eax,4), %eax
+; X32:  	ret
+
+; X64: test1:
+; X64:  	movslq	%edi, %rax
+; X64:  	movl	(%rsi,%rax,4), %eax
+; X64:  	ret
+
+}
+define i32 @test2(i64 %t3, i32* %t1) nounwind {
+       %t9 = getelementptr i32* %t1, i64 %t3           ; <i32*> [#uses=1]
+       %t15 = load i32* %t9            ; <i32> [#uses=1]
+       ret i32 %t15
+; X32: test2:
+; X32:  	movl	(%eax,%ecx,4), %eax
+; X32:  	ret
+
+; X64: test2:
+; X64:  	movl	(%rsi,%rdi,4), %eax
+; X64:  	ret
+}
+
+
+
+; PR4984
+define i8 @test3(i8* %start) nounwind {
+entry:
+  %A = getelementptr i8* %start, i64 -2               ; <i8*> [#uses=1]
+  %B = load i8* %A, align 1                       ; <i8> [#uses=1]
+  ret i8 %B
+  
+  
+; X32: test3:
+; X32:  	movl	4(%esp), %eax
+; X32:  	movb	-2(%eax), %al
+; X32:  	ret
+
+; X64: test3:
+; X64:  	movb	-2(%rdi), %al
+; X64:  	ret
+
+}
diff --git a/test/CodeGen/X86/fast-isel-gv.ll b/test/CodeGen/X86/fast-isel-gv.ll
index b2f885095eceb..34f8b382522fc 100644
--- a/test/CodeGen/X86/fast-isel-gv.ll
+++ b/test/CodeGen/X86/fast-isel-gv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel | grep {_kill@GOTPCREL(%rip)}
+; RUN: llc < %s -fast-isel | grep {_kill@GOTPCREL(%rip)}
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10.0"
 @f = global i8 (...)* @kill		; <i8 (...)**> [#uses=1]
diff --git a/test/CodeGen/X86/fast-isel-i1.ll b/test/CodeGen/X86/fast-isel-i1.ll
index e1ff7921a11a8..d0665783ce646 100644
--- a/test/CodeGen/X86/fast-isel-i1.ll
+++ b/test/CodeGen/X86/fast-isel-i1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -fast-isel | grep {andb	\$1, %}
+; RUN: llc < %s -march=x86 -fast-isel | grep {andb	\$1, %}
 
 declare i64 @bar(i64)
 
diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
index dfee4f2a11ea1..35ec1e7115b24 100644
--- a/test/CodeGen/X86/fast-isel-mem.ll
+++ b/test/CodeGen/X86/fast-isel-mem.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -fast-isel -mtriple=i386-apple-darwin | \
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | \
 ; RUN:   grep lazy_ptr, | count 2
-; RUN: llvm-as < %s | llc -fast-isel -march=x86 -relocation-model=static | \
+; RUN: llc < %s -fast-isel -march=x86 -relocation-model=static | \
 ; RUN:   grep lea
 
 @src = external global i32
diff --git a/test/CodeGen/X86/fast-isel-phys.ll b/test/CodeGen/X86/fast-isel-phys.ll
index 91dcca57cc2b7..158ef551ce42d 100644
--- a/test/CodeGen/X86/fast-isel-phys.ll
+++ b/test/CodeGen/X86/fast-isel-phys.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel -fast-isel-abort -march=x86
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86
 
 define i8 @t2(i8 %a, i8 %c) nounwind {
        %tmp = shl i8 %a, %c
diff --git a/test/CodeGen/X86/fast-isel-shift-imm.ll b/test/CodeGen/X86/fast-isel-shift-imm.ll
index 7d8c9f5e002c5..35f7a72a285c0 100644
--- a/test/CodeGen/X86/fast-isel-shift-imm.ll
+++ b/test/CodeGen/X86/fast-isel-shift-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -O0 | grep {sarl	\$80, %eax}
+; RUN: llc < %s -march=x86 -O0 | grep {sarl	\$80, %eax}
 ; PR3242
 
 define i32 @foo(i32 %x) nounwind {
diff --git a/test/CodeGen/X86/fast-isel-tailcall.ll b/test/CodeGen/X86/fast-isel-tailcall.ll
index 6f4d2026814f9..c3e527c4e5b48 100644
--- a/test/CodeGen/X86/fast-isel-tailcall.ll
+++ b/test/CodeGen/X86/fast-isel-tailcall.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel -tailcallopt -march=x86 | not grep add
+; RUN: llc < %s -fast-isel -tailcallopt -march=x86 | not grep add
 ; PR4154
 
 ; On x86, -tailcallopt changes the ABI so the caller shouldn't readjust
diff --git a/test/CodeGen/X86/fast-isel-tls.ll b/test/CodeGen/X86/fast-isel-tls.ll
index 4dd14e6b21631..a5e6642e09c13 100644
--- a/test/CodeGen/X86/fast-isel-tls.ll
+++ b/test/CodeGen/X86/fast-isel-tls.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=pic -mtriple=i686-unknown-linux-gnu -fast-isel | grep __tls_get_addr
+; RUN: llc < %s -march=x86 -relocation-model=pic -mtriple=i686-unknown-linux-gnu -fast-isel | grep __tls_get_addr
 ; PR3654
 
 @v = thread_local global i32 0
diff --git a/test/CodeGen/X86/fast-isel-trunc.ll b/test/CodeGen/X86/fast-isel-trunc.ll
index 039f114737bb1..69b26c5442e4d 100644
--- a/test/CodeGen/X86/fast-isel-trunc.ll
+++ b/test/CodeGen/X86/fast-isel-trunc.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -fast-isel -fast-isel-abort
-; RUN: llvm-as < %s | llc -march=x86-64 -fast-isel -fast-isel-abort
+; RUN: llc < %s -march=x86 -fast-isel -fast-isel-abort
+; RUN: llc < %s -march=x86-64 -fast-isel -fast-isel-abort
 
 define i8 @t1(i32 %x) signext nounwind  {
 	%tmp1 = trunc i32 %x to i8
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index a9a016b7d0f34..3dcd736a14047 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -fast-isel -fast-isel-abort -march=x86 -mattr=sse2
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86 -mattr=sse2
 
 ; This tests very minimal fast-isel functionality.
 
@@ -64,3 +64,12 @@ define i8* @inttoptr_i32(i32 %p) nounwind {
   %t = inttoptr i32 %p to i8*
   ret i8* %t
 }
+
+define void @store_i1(i1* %p, i1 %t) nounwind {
+  store i1 %t, i1* %p
+  ret void
+}
+define i1 @load_i1(i1* %p) nounwind {
+  %t = load i1* %p
+  ret i1 %t
+}
diff --git a/test/CodeGen/X86/fastcall-correct-mangling.ll b/test/CodeGen/X86/fastcall-correct-mangling.ll
index d2db2795512d1..2b48f5f371d9d 100644
--- a/test/CodeGen/X86/fastcall-correct-mangling.ll
+++ b/test/CodeGen/X86/fastcall-correct-mangling.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=mingw32 | \
+; RUN: llc < %s -mtriple=i386-unknown-mingw32 | \
 ; RUN:   grep {@12}
 
 ; Check that a fastcall function gets correct mangling
diff --git a/test/CodeGen/X86/fastcc-2.ll b/test/CodeGen/X86/fastcc-2.ll
index 40c753ee3041a..d044a2ad9e840 100644
--- a/test/CodeGen/X86/fastcc-2.ll
+++ b/test/CodeGen/X86/fastcc-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | grep movsd
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 1
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep movsd
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 1
 
 define i32 @foo() nounwind {
 entry:
diff --git a/test/CodeGen/X86/fastcc-byval.ll b/test/CodeGen/X86/fastcc-byval.ll
index 94da50584c7b6..52b3e57b96bc5 100644
--- a/test/CodeGen/X86/fastcc-byval.ll
+++ b/test/CodeGen/X86/fastcc-byval.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -tailcallopt=false | grep {movl\[\[:space:\]\]*8(%esp), %eax} | count 2
+; RUN: llc < %s -tailcallopt=false | grep {movl\[\[:space:\]\]*8(%esp), %eax} | count 2
 ; PR3122
 ; rdar://6400815
 
diff --git a/test/CodeGen/X86/fastcc-sret.ll b/test/CodeGen/X86/fastcc-sret.ll
index 7fc111bbc2997..d45741884c7db 100644
--- a/test/CodeGen/X86/fastcc-sret.ll
+++ b/test/CodeGen/X86/fastcc-sret.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt=false | grep ret | not grep 4
+; RUN: llc < %s -march=x86 -tailcallopt=false | grep ret | not grep 4
 
 	%struct.foo = type { [4 x i32] }
 
diff --git a/test/CodeGen/X86/fastcc.ll b/test/CodeGen/X86/fastcc.ll
index f18f34deb1907..d538264c6d7c0 100644
--- a/test/CodeGen/X86/fastcc.ll
+++ b/test/CodeGen/X86/fastcc.ll
@@ -1,5 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | grep ecx | grep 0
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | grep xmm0 | grep 8
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s
+; CHECK: movsd %xmm0, 8(%esp)
+; CHECK: xorl %ecx, %ecx
 
 @d = external global double		; <double*> [#uses=1]
 @c = external global double		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/field-extract-use-trunc.ll b/test/CodeGen/X86/field-extract-use-trunc.ll
index c4f9587335e79..60205305a977f 100644
--- a/test/CodeGen/X86/field-extract-use-trunc.ll
+++ b/test/CodeGen/X86/field-extract-use-trunc.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep sar | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep sar
+; RUN: llc < %s -march=x86 | grep sar | count 1
+; RUN: llc < %s -march=x86-64 | not grep sar
 
 define i32 @test(i32 %f12) {
 	%tmp7.25 = lshr i32 %f12, 16		
diff --git a/test/CodeGen/X86/fildll.ll b/test/CodeGen/X86/fildll.ll
index 65944fdaee4ce..c5a3765c717b6 100644
--- a/test/CodeGen/X86/fildll.ll
+++ b/test/CodeGen/X86/fildll.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att -mattr=-sse2 | grep fildll | count 2
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att -mattr=-sse2 | grep fildll | count 2
 
 define fastcc double @sint64_to_fp(i64 %X) {
         %R = sitofp i64 %X to double            ; <double> [#uses=1]
diff --git a/test/CodeGen/X86/fmul-zero.ll b/test/CodeGen/X86/fmul-zero.ll
index 73aa713de52ef..03bad6594128b 100644
--- a/test/CodeGen/X86/fmul-zero.ll
+++ b/test/CodeGen/X86/fmul-zero.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -enable-unsafe-fp-math | not grep mulps
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mulps
+; RUN: llc < %s -march=x86-64 -enable-unsafe-fp-math | not grep mulps
+; RUN: llc < %s -march=x86-64 | grep mulps
 
 define void @test14(<4 x float>*) nounwind {
         load <4 x float>* %0, align 1
diff --git a/test/CodeGen/X86/fold-add.ll b/test/CodeGen/X86/fold-add.ll
index 2828ad22efbd2..5e80ea5478909 100644
--- a/test/CodeGen/X86/fold-add.ll
+++ b/test/CodeGen/X86/fold-add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {cmpb	\$0, (%r.\*,%r.\*)}
+; RUN: llc < %s -march=x86-64 | grep {cmpb	\$0, (%r.\*,%r.\*)}
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.6"
diff --git a/test/CodeGen/X86/fold-and-shift.ll b/test/CodeGen/X86/fold-and-shift.ll
index 705b795496446..9f79f7723b33d 100644
--- a/test/CodeGen/X86/fold-and-shift.ll
+++ b/test/CodeGen/X86/fold-and-shift.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep and
+; RUN: llc < %s -march=x86 | not grep and
 
 define i32 @t1(i8* %X, i32 %i) {
 entry:
diff --git a/test/CodeGen/X86/fold-call-2.ll b/test/CodeGen/X86/fold-call-2.ll
index 349f986830a05..7a2b03833ae9c 100644
--- a/test/CodeGen/X86/fold-call-2.ll
+++ b/test/CodeGen/X86/fold-call-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep mov | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep mov | count 1
 
 @f = external global void ()*		; <void ()**> [#uses=1]
 
diff --git a/test/CodeGen/X86/fold-call-3.ll b/test/CodeGen/X86/fold-call-3.ll
index 824ae003da25f..337a7edb17360 100644
--- a/test/CodeGen/X86/fold-call-3.ll
+++ b/test/CodeGen/X86/fold-call-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep call | grep 560
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep call | grep 560
 ; rdar://6522427
 
 	%"struct.clang::Action" = type { %"struct.clang::ActionBase" }
diff --git a/test/CodeGen/X86/fold-call.ll b/test/CodeGen/X86/fold-call.ll
index 53991717c6746..603e9ad66caaf 100644
--- a/test/CodeGen/X86/fold-call.ll
+++ b/test/CodeGen/X86/fold-call.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep mov
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep mov
+; RUN: llc < %s -march=x86 | not grep mov
+; RUN: llc < %s -march=x86-64 | not grep mov
 
 declare void @bar()
 
diff --git a/test/CodeGen/X86/fold-imm.ll b/test/CodeGen/X86/fold-imm.ll
index 1623f31d74024..f1fcbcfd13b45 100644
--- a/test/CodeGen/X86/fold-imm.ll
+++ b/test/CodeGen/X86/fold-imm.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep inc
-; RUN: llvm-as < %s | llc -march=x86 | grep add | grep 4
+; RUN: llc < %s -march=x86 | grep inc
+; RUN: llc < %s -march=x86 | grep add | grep 4
 
 define i32 @test(i32 %X) nounwind {
 entry:
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index 6e3da5c5ee827..eb182da10129e 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
 	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 }
 @stmt_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=1]
diff --git a/test/CodeGen/X86/fold-mul-lohi.ll b/test/CodeGen/X86/fold-mul-lohi.ll
index 312427af7096e..0351ecab117b2 100644
--- a/test/CodeGen/X86/fold-mul-lohi.ll
+++ b/test/CodeGen/X86/fold-mul-lohi.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep lea
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep lea
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86-64 | not grep lea
 
 @B = external global [1000 x i8], align 32
 @A = external global [1000 x i8], align 32
diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll
index f558aca420059..ef5202f554c59 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-0.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mcpu=yonah  | grep orps | grep CPI1_2  | count 2
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | grep orps | grep CPI1_2  | count 2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
 
 ; This testcase shouldn't need to spill the -1 value,
 ; so it should just use pcmpeqd to materialize an all-ones vector.
diff --git a/test/CodeGen/X86/fold-pcmpeqd-1.ll b/test/CodeGen/X86/fold-pcmpeqd-1.ll
index e2141ebf68515..cc4198d7caf00 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-1.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: grep pcmpeqd %t | count 1
 ; RUN: grep xor %t | count 1
 ; RUN: not grep LCP %t
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 2b75781218bcf..49f879504e063 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
 
 ; This testcase should need to spill the -1 value on x86-32,
 ; so it shouldn't use pcmpeqd to materialize an all-ones vector; it
diff --git a/test/CodeGen/X86/fold-sext-trunc.ll b/test/CodeGen/X86/fold-sext-trunc.ll
index 1016b1081aaeb..2605123d6dd49 100644
--- a/test/CodeGen/X86/fold-sext-trunc.ll
+++ b/test/CodeGen/X86/fold-sext-trunc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movslq | count 1
+; RUN: llc < %s -march=x86-64 | grep movslq | count 1
 ; PR4050
 
 	type { i64 }		; type %0
diff --git a/test/CodeGen/X86/fp-immediate-shorten.ll b/test/CodeGen/X86/fp-immediate-shorten.ll
index 32ba2171450be..cafc61a41ff29 100644
--- a/test/CodeGen/X86/fp-immediate-shorten.ll
+++ b/test/CodeGen/X86/fp-immediate-shorten.ll
@@ -1,6 +1,6 @@
 ;; Test that this FP immediate is stored in the constant pool as a float.
 
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2,-sse3 | \
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | \
 ; RUN:   grep {.long.1123418112}
 
 define double @D() {
diff --git a/test/CodeGen/X86/fp-in-intregs.ll b/test/CodeGen/X86/fp-in-intregs.ll
index 15606c34886b9..08ea77d75f26c 100644
--- a/test/CodeGen/X86/fp-in-intregs.ll
+++ b/test/CodeGen/X86/fp-in-intregs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not egrep {\(\(xor\|and\)ps\|movd\)}
+; RUN: llc < %s -march=x86 -mcpu=yonah | not egrep {\(\(xor\|and\)ps\|movd\)}
 
 ; These operations should be done in integer registers, eliminating constant
 ; pool loads, movd's etc.
diff --git a/test/CodeGen/X86/fp-stack-2results.ll b/test/CodeGen/X86/fp-stack-2results.ll
index f47fd7472ecb4..321e267cb2fa0 100644
--- a/test/CodeGen/X86/fp-stack-2results.ll
+++ b/test/CodeGen/X86/fp-stack-2results.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep fldz
-; RUN: llvm-as < %s | llc -march=x86-64 | grep fld1
+; RUN: llc < %s -march=x86 | grep fldz
+; RUN: llc < %s -march=x86-64 | grep fld1
 
 ; This is basically this code on x86-64:
 ; _Complex long double test() { return 1.0; }
diff --git a/test/CodeGen/X86/fp-stack-O0-crash.ll b/test/CodeGen/X86/fp-stack-O0-crash.ll
new file mode 100644
index 0000000000000..4768ea2019d17
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-O0-crash.ll
@@ -0,0 +1,30 @@
+; RUN: llc %s -O0 -fast-isel -regalloc=local -o -
+; PR4767
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10"
+
+define void @fn(x86_fp80 %x) nounwind ssp {
+entry:
+  %x.addr = alloca x86_fp80                       ; <x86_fp80*> [#uses=5]
+  store x86_fp80 %x, x86_fp80* %x.addr
+  br i1 false, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %tmp = load x86_fp80* %x.addr                   ; <x86_fp80> [#uses=1]
+  %tmp1 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %cmp = fcmp oeq x86_fp80 %tmp, %tmp1            ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then, label %if.end
+
+cond.false:                                       ; preds = %entry
+  %tmp2 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %tmp3 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %cmp4 = fcmp une x86_fp80 %tmp2, %tmp3          ; <i1> [#uses=1]
+  br i1 %cmp4, label %if.then, label %if.end
+
+if.then:                                          ; preds = %cond.false, %cond.true
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %cond.false, %cond.true
+  ret void
+}
diff --git a/test/CodeGen/X86/fp-stack-compare.ll b/test/CodeGen/X86/fp-stack-compare.ll
index 4e61d0fbe7dcb..4bdf4590b07c5 100644
--- a/test/CodeGen/X86/fp-stack-compare.ll
+++ b/test/CodeGen/X86/fp-stack-compare.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 | \
+; RUN: llc < %s -march=x86 -mcpu=i386 | \
 ; RUN:   grep {fucomi.*st.\[12\]}
 ; PR1012
 
diff --git a/test/CodeGen/X86/fp-stack-direct-ret.ll b/test/CodeGen/X86/fp-stack-direct-ret.ll
index 78be2a39defb1..5a28bb50a3435 100644
--- a/test/CodeGen/X86/fp-stack-direct-ret.ll
+++ b/test/CodeGen/X86/fp-stack-direct-ret.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep fstp
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep movsd
+; RUN: llc < %s -march=x86 | not grep fstp
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep movsd
 
 declare double @foo()
 
diff --git a/test/CodeGen/X86/fp-stack-ret-conv.ll b/test/CodeGen/X86/fp-stack-ret-conv.ll
index 5254e1c89f612..f220b24f90b07 100644
--- a/test/CodeGen/X86/fp-stack-ret-conv.ll
+++ b/test/CodeGen/X86/fp-stack-ret-conv.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah | grep cvtss2sd
-; RUN: llvm-as < %s | llc -mcpu=yonah | grep fstps
-; RUN: llvm-as < %s | llc -mcpu=yonah | not grep cvtsd2ss
+; RUN: llc < %s -mcpu=yonah | grep cvtss2sd
+; RUN: llc < %s -mcpu=yonah | grep fstps
+; RUN: llc < %s -mcpu=yonah | not grep cvtsd2ss
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/fp-stack-ret-store.ll b/test/CodeGen/X86/fp-stack-ret-store.ll
index 56392deb300d2..05dfc545db17b 100644
--- a/test/CodeGen/X86/fp-stack-ret-store.ll
+++ b/test/CodeGen/X86/fp-stack-ret-store.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah | not grep movss
+; RUN: llc < %s -mcpu=yonah | not grep movss
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
 
diff --git a/test/CodeGen/X86/fp-stack-ret.ll b/test/CodeGen/X86/fp-stack-ret.ll
index 3e6ad54e73b3d..c83a0cbf69e0c 100644
--- a/test/CodeGen/X86/fp-stack-ret.ll
+++ b/test/CodeGen/X86/fp-stack-ret.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=i686-apple-darwin8 -mcpu=yonah -march=x86 > %t
+; RUN: llc < %s -mtriple=i686-apple-darwin8 -mcpu=yonah -march=x86 > %t
 ; RUN: grep fldl %t | count 1
 ; RUN: not grep xmm %t
 ; RUN: grep {sub.*esp} %t | count 1
diff --git a/test/CodeGen/X86/fp-stack-retcopy.ll b/test/CodeGen/X86/fp-stack-retcopy.ll
index 997f8df20fea6..67dcb1871df49 100644
--- a/test/CodeGen/X86/fp-stack-retcopy.ll
+++ b/test/CodeGen/X86/fp-stack-retcopy.ll
@@ -1,5 +1,5 @@
 ; This should not copy the result of foo into an xmm register.
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin9 | not grep xmm
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin9 | not grep xmm
 ; rdar://5689903
 
 declare double @foo()
diff --git a/test/CodeGen/X86/fp-stack-set-st1.ll b/test/CodeGen/X86/fp-stack-set-st1.ll
index 00a73aeb416f9..894897a2a5f0f 100644
--- a/test/CodeGen/X86/fp-stack-set-st1.ll
+++ b/test/CodeGen/X86/fp-stack-set-st1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep fxch | count 2
+; RUN: llc < %s -march=x86 | grep fxch | count 2
 
 define i32 @main() nounwind {
 entry:
diff --git a/test/CodeGen/X86/fp2sint.ll b/test/CodeGen/X86/fp2sint.ll
index 80f7efbe4dec0..1675444887138 100644
--- a/test/CodeGen/X86/fp2sint.ll
+++ b/test/CodeGen/X86/fp2sint.ll
@@ -1,6 +1,6 @@
 ;; LowerFP_TO_SINT should not create a stack object if it's not needed.
 
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep add
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep add
 
 define i32 @main(i32 %argc, i8** %argv) {
 cond_false.i.i.i:               ; preds = %bb.i5
diff --git a/test/CodeGen/X86/fp_constant_op.ll b/test/CodeGen/X86/fp_constant_op.ll
index f2017b961fb57..8e823ede56a03 100644
--- a/test/CodeGen/X86/fp_constant_op.ll
+++ b/test/CodeGen/X86/fp_constant_op.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel -mcpu=i486 | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=i486 | \
 ; RUN:   grep {fadd\\|fsub\\|fdiv\\|fmul} | not grep -i ST
 
 ; Test that the load of the constant is folded into the operation.
diff --git a/test/CodeGen/X86/fp_load_cast_fold.ll b/test/CodeGen/X86/fp_load_cast_fold.ll
index 54523265e91e9..a160ac6944292 100644
--- a/test/CodeGen/X86/fp_load_cast_fold.ll
+++ b/test/CodeGen/X86/fp_load_cast_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep fild | not grep ESP
+; RUN: llc < %s -march=x86 | grep fild | not grep ESP
 
 define double @short(i16* %P) {
         %V = load i16* %P               ; <i16> [#uses=1]
diff --git a/test/CodeGen/X86/fp_load_fold.ll b/test/CodeGen/X86/fp_load_fold.ll
index 655ad3df32386..0145069b8cd63 100644
--- a/test/CodeGen/X86/fp_load_fold.ll
+++ b/test/CodeGen/X86/fp_load_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep -i ST | not grep {fadd\\|fsub\\|fdiv\\|fmul}
 
 ; Test that the load of the memory location is folded into the operation.
diff --git a/test/CodeGen/X86/fsxor-alignment.ll b/test/CodeGen/X86/fsxor-alignment.ll
index 4d25fca1eb11b..6a8dbcfaa7c3b 100644
--- a/test/CodeGen/X86/fsxor-alignment.ll
+++ b/test/CodeGen/X86/fsxor-alignment.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -enable-unsafe-fp-math | \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -enable-unsafe-fp-math | \
 ; RUN:  grep -v sp | grep xorps | count 2
 
 ; Don't fold the incoming stack arguments into the xorps instructions used
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index 4a85779ebf0a7..68575bc401d75 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -enable-full-lsr >%t
+; RUN: llc < %s -march=x86 -enable-full-lsr >%t
 ; RUN: grep {addl	\\\$4,} %t | count 3
 ; RUN: not grep {,%} %t
 
diff --git a/test/CodeGen/X86/ga-offset.ll b/test/CodeGen/X86/ga-offset.ll
index aaa2f84b88c94..9f6d3f75cf843 100644
--- a/test/CodeGen/X86/ga-offset.ll
+++ b/test/CodeGen/X86/ga-offset.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: not grep lea %t
 ; RUN: not grep add %t
 ; RUN: grep mov %t | count 1
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static > %t
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static > %t
 ; RUN: not grep lea %t
 ; RUN: not grep add %t
 ; RUN: grep mov %t | count 1
diff --git a/test/CodeGen/X86/global-sections-tls.ll b/test/CodeGen/X86/global-sections-tls.ll
new file mode 100644
index 0000000000000..2c2303042bc45
--- /dev/null
+++ b/test/CodeGen/X86/global-sections-tls.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+
+; PR4639
+@G1 = internal thread_local global i32 0		; <i32*> [#uses=1]
+; LINUX: .section		.tbss,"awT",@nobits
+; LINUX: G1:
+
+
+define i32* @foo() nounwind readnone {
+entry:
+	ret i32* @G1
+}
+
+
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
new file mode 100644
index 0000000000000..38948a7dc9124
--- /dev/null
+++ b/test/CodeGen/X86/global-sections.ll
@@ -0,0 +1,123 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=i386-apple-darwin9.7 | FileCheck %s -check-prefix=DARWIN
+
+
+; int G1;
+@G1 = common global i32 0
+
+; LINUX: .type   G1,@object
+; LINUX: .section .gnu.linkonce.b.G1,"aw",@nobits
+; LINUX: .comm  G1,4,4
+
+; DARWIN: .comm	_G1,4,2
+
+
+
+
+; const int G2 __attribute__((weak)) = 42;
+@G2 = weak_odr constant i32 42	
+
+
+; TODO: linux drops this into .rodata, we drop it into ".gnu.linkonce.r.G2"
+
+; DARWIN: .section __TEXT,__const_coal,coalesced
+; DARWIN: _G2:
+; DARWIN:    .long 42
+
+
+; int * const G3 = &G1;
+@G3 = constant i32* @G1
+
+; DARWIN: .section        __DATA,__const
+; DARWIN: .globl _G3
+; DARWIN: _G3:
+; DARWIN:     .long _G1
+
+
+; _Complex long long const G4 = 34;
+@G4 = constant {i64,i64} { i64 34, i64 0 }
+
+; DARWIN: .section        __TEXT,__const
+; DARWIN: _G4:
+; DARWIN:     .long 34
+
+
+; int G5 = 47;
+@G5 = global i32 47
+
+; LINUX: .data
+; LINUX: .globl G5
+; LINUX: G5:
+; LINUX:    .long 47
+
+; DARWIN: .section        __DATA,__data
+; DARWIN: .globl _G5
+; DARWIN: _G5:
+; DARWIN:    .long 47
+
+
+; PR4584
+@"foo bar" = linkonce global i32 42
+
+; LINUX: .type	foo_20_bar,@object
+; LINUX:.section	.gnu.linkonce.d.foo_20_bar,"aw",@progbits
+; LINUX: .weak	foo_20_bar
+; LINUX: foo_20_bar:
+
+; DARWIN: .section		__DATA,__datacoal_nt,coalesced
+; DARWIN: .globl	"_foo bar"
+; DARWIN:	.weak_definition "_foo bar"
+; DARWIN: "_foo bar":
+
+; PR4650
+@G6 = weak_odr constant [1 x i8] c"\01"
+
+; LINUX:   .type	G6,@object
+; LINUX:   .section	.gnu.linkonce.r.G6,"a",@progbits
+; LINUX:   .weak	G6
+; LINUX: G6:
+; LINUX:   .ascii	"\001"
+; LINUX:   .size	G6, 1
+
+; DARWIN:  .section __TEXT,__const_coal,coalesced
+; DARWIN:  .globl _G6
+; DARWIN:  .weak_definition _G6
+; DARWIN:_G6:
+; DARWIN:  .ascii "\001"
+
+
+@G7 = constant [10 x i8] c"abcdefghi\00"
+
+; DARWIN:	__TEXT,__cstring,cstring_literals
+; DARWIN:	.globl _G7
+; DARWIN: _G7:
+; DARWIN:	.asciz	"abcdefghi"
+
+; LINUX:	.section		.rodata.str1.1,"aMS",@progbits,1
+; LINUX:	.globl G7
+; LINUX: G7:
+; LINUX:	.asciz	"abcdefghi"
+
+
+@G8 = constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ]
+
+; DARWIN:	.section	__TEXT,__ustring
+; DARWIN:	.globl _G8
+; DARWIN: _G8:
+
+; LINUX:	.section		.rodata.str2.2,"aMS",@progbits,2
+; LINUX:	.globl G8
+; LINUX:G8:
+
+@G9 = constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ]
+
+; DARWIN:	.section        __TEXT,__const
+; DARWIN:	.globl _G9
+; DARWIN: _G9:
+
+; LINUX:	.section		.rodata.str4.4,"aMS",@progbits,4
+; LINUX:	.globl G9
+; LINUX:G9
+
+
+
diff --git a/test/CodeGen/X86/h-register-addressing-32.ll b/test/CodeGen/X86/h-register-addressing-32.ll
index 41d91285ddbe1..76ffd66524b94 100644
--- a/test/CodeGen/X86/h-register-addressing-32.ll
+++ b/test/CodeGen/X86/h-register-addressing-32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {movzbl	%\[abcd\]h,} | count 7
+; RUN: llc < %s -march=x86 | grep {movzbl	%\[abcd\]h,} | count 7
 
 ; Use h-register extract and zero-extend.
 
diff --git a/test/CodeGen/X86/h-register-addressing-64.ll b/test/CodeGen/X86/h-register-addressing-64.ll
index b38e0e478e99b..98817f3fb59f5 100644
--- a/test/CodeGen/X86/h-register-addressing-64.ll
+++ b/test/CodeGen/X86/h-register-addressing-64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 7
+; RUN: llc < %s -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 7
 
 ; Use h-register extract and zero-extend.
 
diff --git a/test/CodeGen/X86/h-register-store.ll b/test/CodeGen/X86/h-register-store.ll
index e8672422a7b09..d30e6b334e8b1 100644
--- a/test/CodeGen/X86/h-register-store.ll
+++ b/test/CodeGen/X86/h-register-store.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep mov %t | count 6
 ; RUN: grep {movb	%ah, (%rsi)} %t | count 3
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep mov %t | count 3
 ; RUN: grep {movb	%ah, (%e} %t | count 3
 
diff --git a/test/CodeGen/X86/h-registers-0.ll b/test/CodeGen/X86/h-registers-0.ll
index 2777be9cc3e0a..878fd93b737c7 100644
--- a/test/CodeGen/X86/h-registers-0.ll
+++ b/test/CodeGen/X86/h-registers-0.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 4
-; RUN: llvm-as < %s | llc -march=x86    > %t
+; RUN: llc < %s -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 4
+; RUN: llc < %s -march=x86    > %t
 ; RUN: grep {incb	%ah} %t | count 3
 ; RUN: grep {movzbl	%ah,} %t | count 3
 
diff --git a/test/CodeGen/X86/h-registers-1.ll b/test/CodeGen/X86/h-registers-1.ll
index 789f3dd18f08b..e97ebab69712b 100644
--- a/test/CodeGen/X86/h-registers-1.ll
+++ b/test/CodeGen/X86/h-registers-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep {movzbl	%\[abcd\]h,} %t | count 8
 ; RUN: grep {%\[abcd\]h} %t | not grep {%r\[\[:digit:\]\]*d}
 
diff --git a/test/CodeGen/X86/h-registers-2.ll b/test/CodeGen/X86/h-registers-2.ll
index 5541583239742..16e13f8396645 100644
--- a/test/CodeGen/X86/h-registers-2.ll
+++ b/test/CodeGen/X86/h-registers-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep {movzbl	%\[abcd\]h,} %t | count 1
 ; RUN: grep {shll	\$3,} %t | count 1
 
diff --git a/test/CodeGen/X86/h-registers-3.ll b/test/CodeGen/X86/h-registers-3.ll
index d353a2233797b..8a0b07b31c278 100644
--- a/test/CodeGen/X86/h-registers-3.ll
+++ b/test/CodeGen/X86/h-registers-3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86    | grep mov | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 1
+; RUN: llc < %s -march=x86    | grep mov | count 1
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
 
 define zeroext i8 @foo() nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/hidden-vis-2.ll b/test/CodeGen/X86/hidden-vis-2.ll
index e000547f44f2b..74554d15e2f6d 100644
--- a/test/CodeGen/X86/hidden-vis-2.ll
+++ b/test/CodeGen/X86/hidden-vis-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9   | grep mov | count 1
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 | not grep GOT
+; RUN: llc < %s -mtriple=i386-apple-darwin9   | grep mov | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | not grep GOT
 
 @x = weak hidden global i32 0		; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/X86/hidden-vis-3.ll b/test/CodeGen/X86/hidden-vis-3.ll
index 81dc76e14889a..4be881e84d682 100644
--- a/test/CodeGen/X86/hidden-vis-3.ll
+++ b/test/CodeGen/X86/hidden-vis-3.ll
@@ -1,13 +1,17 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9   | grep mov | count 3
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9   | grep non_lazy_ptr
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9   | grep long | count 2
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 | not grep GOT
+; RUN: llc < %s -mtriple=i386-apple-darwin9   | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | FileCheck %s -check-prefix=X64
 
 @x = external hidden global i32		; <i32*> [#uses=1]
 @y = extern_weak hidden global i32	; <i32*> [#uses=1]
 
 define i32 @t() nounwind readonly {
 entry:
+; X32: _t:
+; X32: movl _y, %eax
+
+; X64: _t:
+; X64: movl _y(%rip), %eax
+
 	%0 = load i32* @x, align 4		; <i32> [#uses=1]
 	%1 = load i32* @y, align 4		; <i32> [#uses=1]
 	%2 = add i32 %1, %0		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/hidden-vis-4.ll b/test/CodeGen/X86/hidden-vis-4.ll
index e6936de103607..a8aede52accdf 100644
--- a/test/CodeGen/X86/hidden-vis-4.ll
+++ b/test/CodeGen/X86/hidden-vis-4.ll
@@ -1,11 +1,12 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 | grep non_lazy_ptr
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 | grep long
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 | grep comm
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s
 
 @x = common hidden global i32 0		; <i32*> [#uses=1]
 
 define i32 @t() nounwind readonly {
 entry:
+; CHECK: t:
+; CHECK: movl _x, %eax
+; CHECK: .comm _x,4
 	%0 = load i32* @x, align 4		; <i32> [#uses=1]
 	ret i32 %0
 }
diff --git a/test/CodeGen/X86/hidden-vis.ll b/test/CodeGen/X86/hidden-vis.ll
index 058850c7b75ca..a948bdfe68757 100644
--- a/test/CodeGen/X86/hidden-vis.ll
+++ b/test/CodeGen/X86/hidden-vis.ll
@@ -1,20 +1,24 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | \
-; RUN:   grep .hidden | count 2
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin8.8.0 | \
-; RUN:   grep .private_extern | count 2
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=i686-apple-darwin8 | FileCheck %s -check-prefix=DARWIN
 
-%struct.Person = type { i32 }
 @a = hidden global i32 0
 @b = external global i32
 
+define weak hidden void @t1() nounwind {
+; LINUX: .hidden t1
+; LINUX: t1:
 
-define weak hidden void @_ZN6Person13privateMethodEv(%struct.Person* %this) {
+; DARWIN: .private_extern _t1
+; DARWIN: t1:
   ret void
 }
 
-declare void @function(i32)
+define weak void @t2() nounwind {
+; LINUX: t2:
+; LINUX: .hidden a
 
-define weak void @_ZN6PersonC1Ei(%struct.Person* %this, i32 %_c) {
+; DARWIN: t2:
+; DARWIN: .private_extern _a
   ret void
 }
 
diff --git a/test/CodeGen/X86/i128-and-beyond.ll b/test/CodeGen/X86/i128-and-beyond.ll
index db94b0ec05e6f..907a6b8de2fe2 100644
--- a/test/CodeGen/X86/i128-and-beyond.ll
+++ b/test/CodeGen/X86/i128-and-beyond.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep 18446744073709551615 | count 14
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep 18446744073709551615 | count 14
 
 ; These static initializers are too big to hand off to assemblers
 ; as monolithic blobs.
diff --git a/test/CodeGen/X86/i128-immediate.ll b/test/CodeGen/X86/i128-immediate.ll
index 69399336e30e0..c47569e700f52 100644
--- a/test/CodeGen/X86/i128-immediate.ll
+++ b/test/CodeGen/X86/i128-immediate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movq | count 2
+; RUN: llc < %s -march=x86-64 | grep movq | count 2
 
 define i128 @__addvti3() {
           ret i128 -1
diff --git a/test/CodeGen/X86/i128-mul.ll b/test/CodeGen/X86/i128-mul.ll
index f8c732ec68e45..e9d30d67019e2 100644
--- a/test/CodeGen/X86/i128-mul.ll
+++ b/test/CodeGen/X86/i128-mul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 ; PR1198
 
 define i64 @foo(i64 %x, i64 %y) {
diff --git a/test/CodeGen/X86/i128-ret.ll b/test/CodeGen/X86/i128-ret.ll
index 179a0134331b1..277f4283328b6 100644
--- a/test/CodeGen/X86/i128-ret.ll
+++ b/test/CodeGen/X86/i128-ret.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq	8(%rdi), %rdx}
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq	(%rdi), %rax}
+; RUN: llc < %s -march=x86-64 | grep {movq	8(%rdi), %rdx}
+; RUN: llc < %s -march=x86-64 | grep {movq	(%rdi), %rax}
 
 define i128 @test(i128 *%P) {
         %A = load i128* %P
diff --git a/test/CodeGen/X86/i256-add.ll b/test/CodeGen/X86/i256-add.ll
index 280ed6b1b33ba..5a7a7a7fe84a9 100644
--- a/test/CodeGen/X86/i256-add.ll
+++ b/test/CodeGen/X86/i256-add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep adcl %t | count 7
 ; RUN: grep sbbl %t | count 7
 
diff --git a/test/CodeGen/X86/i2k.ll b/test/CodeGen/X86/i2k.ll
index 712302da76d00..6116c2e716586 100644
--- a/test/CodeGen/X86/i2k.ll
+++ b/test/CodeGen/X86/i2k.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define void @foo(i2011* %x, i2011* %y, i2011* %p) nounwind {
   %a = load i2011* %x
diff --git a/test/CodeGen/X86/i64-mem-copy.ll b/test/CodeGen/X86/i64-mem-copy.ll
index 0d2b29c0b4201..847e2095f4c53 100644
--- a/test/CodeGen/X86/i64-mem-copy.ll
+++ b/test/CodeGen/X86/i64-mem-copy.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64           | grep {movq.*(%rsi), %rax}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {movsd.*(%eax),}
+; RUN: llc < %s -march=x86-64           | grep {movq.*(%rsi), %rax}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movsd.*(%eax),}
 
 ; Uses movsd to load / store i64 values if sse2 is available.
 
diff --git a/test/CodeGen/X86/iabs.ll b/test/CodeGen/X86/iabs.ll
index ca751ae1d2e4e..6a79ee879253f 100644
--- a/test/CodeGen/X86/iabs.ll
+++ b/test/CodeGen/X86/iabs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -stats  |& \
+; RUN: llc < %s -march=x86-64 -stats  |& \
 ; RUN:   grep {6 .*Number of machine instrs printed}
 
 ;; Integer absolute value, should produce something at least as good as:
diff --git a/test/CodeGen/X86/illegal-asm.ll b/test/CodeGen/X86/illegal-asm.ll
index 03cc507f23f21..43128dcf010b6 100644
--- a/test/CodeGen/X86/illegal-asm.ll
+++ b/test/CodeGen/X86/illegal-asm.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -disable-fp-elim
-; RUN: llvm-as < %s | llc -mtriple=i386-linux        -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-linux        -disable-fp-elim
 ; XFAIL: *
 ; Expected to run out of registers during allocation.
 ; PR3864
diff --git a/test/CodeGen/X86/illegal-insert.ll b/test/CodeGen/X86/illegal-insert.ll
index 59773b2491049..dbf1b14684c23 100644
--- a/test/CodeGen/X86/illegal-insert.ll
+++ b/test/CodeGen/X86/illegal-insert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 define <4 x double> @foo0(<4 x double> %t) {
   %r = insertelement <4 x double> %t, double 2.3, i32 0
diff --git a/test/CodeGen/X86/illegal-vector-args-return.ll b/test/CodeGen/X86/illegal-vector-args-return.ll
index 5ed6ddb55129d..cecf77af4de12 100644
--- a/test/CodeGen/X86/illegal-vector-args-return.ll
+++ b/test/CodeGen/X86/illegal-vector-args-return.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {mulpd	%xmm3, %xmm1}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {mulpd	%xmm2, %xmm0}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {addps	%xmm3, %xmm1}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {addps	%xmm2, %xmm0}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {mulpd	%xmm3, %xmm1}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {mulpd	%xmm2, %xmm0}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {addps	%xmm3, %xmm1}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {addps	%xmm2, %xmm0}
 
 define <4 x double> @foo(<4 x double> %x, <4 x double> %z) {
   %y = fmul <4 x double> %x, %z
diff --git a/test/CodeGen/X86/imp-def-copies.ll b/test/CodeGen/X86/imp-def-copies.ll
index 3d2f65653e7a4..91178403876f1 100644
--- a/test/CodeGen/X86/imp-def-copies.ll
+++ b/test/CodeGen/X86/imp-def-copies.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep mov
+; RUN: llc < %s -march=x86 | not grep mov
 
 	%struct.active_line = type { %struct.gs_fixed_point, %struct.gs_fixed_point, i32, i32, i32, %struct.line_segment*, i32, i16, i16, %struct.active_line*, %struct.active_line* }
 	%struct.gs_fixed_point = type { i32, i32 }
diff --git a/test/CodeGen/X86/imul-lea-2.ll b/test/CodeGen/X86/imul-lea-2.ll
index 0a2df1c977bcb..1cb54b37b0e1a 100644
--- a/test/CodeGen/X86/imul-lea-2.ll
+++ b/test/CodeGen/X86/imul-lea-2.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep lea | count 3
-; RUN: llvm-as < %s | llc -march=x86-64 | grep shl | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep imul
+; RUN: llc < %s -march=x86-64 | grep lea | count 3
+; RUN: llc < %s -march=x86-64 | grep shl | count 1
+; RUN: llc < %s -march=x86-64 | not grep imul
 
 define i64 @t1(i64 %a) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/imul-lea.ll b/test/CodeGen/X86/imul-lea.ll
index 6403a2668a39e..4e8e2af0f2fec 100644
--- a/test/CodeGen/X86/imul-lea.ll
+++ b/test/CodeGen/X86/imul-lea.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep lea
+; RUN: llc < %s -march=x86 | grep lea
 
 declare i32 @foo()
 
diff --git a/test/CodeGen/X86/inline-asm-2addr.ll b/test/CodeGen/X86/inline-asm-2addr.ll
index 619629407fe6b..4a2c7fc5ebac5 100644
--- a/test/CodeGen/X86/inline-asm-2addr.ll
+++ b/test/CodeGen/X86/inline-asm-2addr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep movq
+; RUN: llc < %s -march=x86-64 | not grep movq
 
 define i64 @t(i64 %a, i64 %b) nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/inline-asm-R-constraint.ll b/test/CodeGen/X86/inline-asm-R-constraint.ll
new file mode 100644
index 0000000000000..66c27ac877121
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-R-constraint.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+; 7282062
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+define void @udiv8(i8* %quotient, i16 zeroext %a, i8 zeroext %b, i8 zeroext %c, i8* %remainder) nounwind ssp {
+entry:
+; CHECK: udiv8:
+; CHECK-NOT: movb %ah, (%r8)
+  %a_addr = alloca i16, align 2                   ; <i16*> [#uses=2]
+  %b_addr = alloca i8, align 1                    ; <i8*> [#uses=2]
+  store i16 %a, i16* %a_addr
+  store i8 %b, i8* %b_addr
+  call void asm "\09\09movw\09$2, %ax\09\09\0A\09\09divb\09$3\09\09\09\0A\09\09movb\09%al, $0\09\0A\09\09movb %ah, ($4)", "=*m,=*m,*m,*m,R,~{dirflag},~{fpsr},~{flags},~{ax}"(i8* %quotient, i8* %remainder, i16* %a_addr, i8* %b_addr, i8* %remainder) nounwind
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/inline-asm-flag-clobber.ll b/test/CodeGen/X86/inline-asm-flag-clobber.ll
index 3c536b716f624..51ea843712d19 100644
--- a/test/CodeGen/X86/inline-asm-flag-clobber.ll
+++ b/test/CodeGen/X86/inline-asm-flag-clobber.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | %prcontext test 1 | grep j
+; RUN: llc -march=x86-64 < %s | FileCheck %s
 ; PR3701
 
 define i64 @t(i64* %arg) nounwind {
@@ -7,6 +7,8 @@ define i64 @t(i64* %arg) nounwind {
 ; <label>:1		; preds = %0
 	%2 = icmp eq i64* null, %arg		; <i1> [#uses=1]
 	%3 = tail call i64* asm sideeffect "movl %fs:0,$0", "=r,~{dirflag},~{fpsr},~{flags}"() nounwind		; <%struct.thread*> [#uses=0]
+; CHECK: test
+; CHECK-NEXT: j
 	br i1 %2, label %4, label %5
 
 ; <label>:4		; preds = %1
diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll
index 31d94d89c3765..09b09295153ef 100644
--- a/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define x86_fp80 @test1() {
         %tmp85 = call x86_fp80 asm sideeffect "fld0", "={st(0)}"()
diff --git a/test/CodeGen/X86/inline-asm-fpstack2.ll b/test/CodeGen/X86/inline-asm-fpstack2.ll
index 968561826b5c8..ffa6ee6e019ef 100644
--- a/test/CodeGen/X86/inline-asm-fpstack2.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep {fld	%%st(0)} %t
 ; PR4185
 
diff --git a/test/CodeGen/X86/inline-asm-fpstack3.ll b/test/CodeGen/X86/inline-asm-fpstack3.ll
index ac89a1d9ad51a..17945fe4149e8 100644
--- a/test/CodeGen/X86/inline-asm-fpstack3.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep {fld	%%st(0)} %t
 ; PR4459
 
diff --git a/test/CodeGen/X86/inline-asm-fpstack4.ll b/test/CodeGen/X86/inline-asm-fpstack4.ll
index c9122fad6cf77..bae2970db4abc 100644
--- a/test/CodeGen/X86/inline-asm-fpstack4.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR4484
 
 declare x86_fp80 @ceil()
diff --git a/test/CodeGen/X86/inline-asm-fpstack5.ll b/test/CodeGen/X86/inline-asm-fpstack5.ll
index 64f3788f45dd4..8b219cf927733 100644
--- a/test/CodeGen/X86/inline-asm-fpstack5.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR4485
 
 define void @test(x86_fp80* %a) {
diff --git a/test/CodeGen/X86/inline-asm-modifier-n.ll b/test/CodeGen/X86/inline-asm-modifier-n.ll
index 97eac388677b3..5e76b6c0580e9 100644
--- a/test/CodeGen/X86/inline-asm-modifier-n.ll
+++ b/test/CodeGen/X86/inline-asm-modifier-n.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep { 37}
+; RUN: llc < %s -march=x86 | grep { 37}
 ; rdar://7008959
 
 define void @bork() nounwind {
diff --git a/test/CodeGen/X86/inline-asm-mrv.ll b/test/CodeGen/X86/inline-asm-mrv.ll
index ca39c120585a6..78d7e776cf227 100644
--- a/test/CodeGen/X86/inline-asm-mrv.ll
+++ b/test/CodeGen/X86/inline-asm-mrv.ll
@@ -1,8 +1,8 @@
 ; PR2094
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movslq
-; RUN: llvm-as < %s | llc -march=x86-64 | grep addps
-; RUN: llvm-as < %s | llc -march=x86-64 | grep paddd
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep movq
+; RUN: llc < %s -march=x86-64 | grep movslq
+; RUN: llc < %s -march=x86-64 | grep addps
+; RUN: llc < %s -march=x86-64 | grep paddd
+; RUN: llc < %s -march=x86-64 | not grep movq
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/inline-asm-out-regs.ll b/test/CodeGen/X86/inline-asm-out-regs.ll
index 01f1397830a4b..46966f5370d38 100644
--- a/test/CodeGen/X86/inline-asm-out-regs.ll
+++ b/test/CodeGen/X86/inline-asm-out-regs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-unknown-linux-gnu
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu
 ; PR3391
 
 @pci_indirect = external global { }             ; <{ }*> [#uses=1]
diff --git a/test/CodeGen/X86/inline-asm-pic.ll b/test/CodeGen/X86/inline-asm-pic.ll
index 04ad48d292115..0b5ff08c3f326 100644
--- a/test/CodeGen/X86/inline-asm-pic.ll
+++ b/test/CodeGen/X86/inline-asm-pic.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic | grep lea
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic | grep call
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | grep lea
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | grep call
 
 @main_q = internal global i8* null		; <i8**> [#uses=1]
 
diff --git a/test/CodeGen/X86/inline-asm-q-regs.ll b/test/CodeGen/X86/inline-asm-q-regs.ll
new file mode 100644
index 0000000000000..ab44206f8065a
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-q-regs.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64
+; rdar://7066579
+
+	type { i64, i64, i64, i64, i64 }		; type %0
+
+define void @t() nounwind {
+entry:
+	%asmtmp = call %0 asm sideeffect "mov    %cr0, $0       \0Amov    %cr2, $1       \0Amov    %cr3, $2       \0Amov    %cr4, $3       \0Amov    %cr8, $0       \0A", "=q,=q,=q,=q,=q,~{dirflag},~{fpsr},~{flags}"() nounwind		; <%0> [#uses=0]
+	ret void
+}
diff --git a/test/CodeGen/X86/inline-asm-tied.ll b/test/CodeGen/X86/inline-asm-tied.ll
index 6df2c48415bc7..1f4a13f54b75d 100644
--- a/test/CodeGen/X86/inline-asm-tied.ll
+++ b/test/CodeGen/X86/inline-asm-tied.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 -O0 | grep {movl	%edx, 4(%esp)} | count 2
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 | grep {movl	%edx, 12(%esp)} | count 2
 ; rdar://6992609
 
 target triple = "i386-apple-darwin9.0"
diff --git a/test/CodeGen/X86/inline-asm-x-scalar.ll b/test/CodeGen/X86/inline-asm-x-scalar.ll
index aafbbd1fd0257..5a9628b3df74a 100644
--- a/test/CodeGen/X86/inline-asm-x-scalar.ll
+++ b/test/CodeGen/X86/inline-asm-x-scalar.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah
+; RUN: llc < %s -march=x86 -mcpu=yonah
 
 define void @test1() {
         tail call void asm sideeffect "ucomiss $0", "x"( float 0x41E0000000000000)
diff --git a/test/CodeGen/X86/inline-asm.ll b/test/CodeGen/X86/inline-asm.ll
index 02988fcc29f31..c66d7a8bd11bd 100644
--- a/test/CodeGen/X86/inline-asm.ll
+++ b/test/CodeGen/X86/inline-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define i32 @test1() nounwind {
 	; Dest is AX, dest type = i32.
diff --git a/test/CodeGen/X86/ins_subreg_coalesce-1.ll b/test/CodeGen/X86/ins_subreg_coalesce-1.ll
index 863cda94c5fcb..2243f93f3ddde 100644
--- a/test/CodeGen/X86/ins_subreg_coalesce-1.ll
+++ b/test/CodeGen/X86/ins_subreg_coalesce-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 2
+; RUN: llc < %s -march=x86 | grep mov | count 3
 
 define fastcc i32 @sqlite3ExprResolveNames() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/ins_subreg_coalesce-2.ll b/test/CodeGen/X86/ins_subreg_coalesce-2.ll
index 5c0b0d3d3e954..f2c9cc72719c4 100644
--- a/test/CodeGen/X86/ins_subreg_coalesce-2.ll
+++ b/test/CodeGen/X86/ins_subreg_coalesce-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep movw
+; RUN: llc < %s -march=x86-64 | not grep movw
 
 define i16 @test5(i16 %f12) nounwind {
 	%f11 = shl i16 %f12, 2		; <i16> [#uses=1]
diff --git a/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/test/CodeGen/X86/ins_subreg_coalesce-3.ll
index ee3ac66abef1a..e44308583297c 100644
--- a/test/CodeGen/X86/ins_subreg_coalesce-3.ll
+++ b/test/CodeGen/X86/ins_subreg_coalesce-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 11
+; RUN: llc < %s -march=x86-64 | grep mov | count 11
 
 	%struct.COMPOSITE = type { i8, i16, i16 }
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
@@ -17,7 +17,7 @@
 	%struct.metrics = type { i16, i16, i16, i16, i16 }
 	%struct.rec = type { %struct.head_type }
 
-define void @FontChange(i1 %foo) {
+define void @FontChange(i1 %foo) nounwind {
 entry:
 	br i1 %foo, label %bb298, label %bb49
 bb49:		; preds = %entry
diff --git a/test/CodeGen/X86/insertelement-copytoregs.ll b/test/CodeGen/X86/insertelement-copytoregs.ll
index 0eef5173b858f..34a29ca7d939c 100644
--- a/test/CodeGen/X86/insertelement-copytoregs.ll
+++ b/test/CodeGen/X86/insertelement-copytoregs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep -v IMPLICIT_DEF
+; RUN: llc < %s -march=x86-64 | grep -v IMPLICIT_DEF
 
 define void @foo(<2 x float>* %p) {
   %t = insertelement <2 x float> undef, float 0.0, i32 0
diff --git a/test/CodeGen/X86/insertelement-legalize.ll b/test/CodeGen/X86/insertelement-legalize.ll
index 95e17b40bc8b3..18aade2bb3027 100644
--- a/test/CodeGen/X86/insertelement-legalize.ll
+++ b/test/CodeGen/X86/insertelement-legalize.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -disable-mmx
+; RUN: llc < %s -march=x86 -disable-mmx
 
 ; Test to check that we properly legalize an insert vector element
 define void @test(<2 x i64> %val, <2 x i64>* %dst, i64 %x) nounwind {
diff --git a/test/CodeGen/X86/invalid-shift-immediate.ll b/test/CodeGen/X86/invalid-shift-immediate.ll
index 5c47f5ee685f1..77a9f7eda783a 100644
--- a/test/CodeGen/X86/invalid-shift-immediate.ll
+++ b/test/CodeGen/X86/invalid-shift-immediate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2098
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/isel-sink.ll b/test/CodeGen/X86/isel-sink.ll
index 4e68b7757ff5a..0f94b233bcfb5 100644
--- a/test/CodeGen/X86/isel-sink.ll
+++ b/test/CodeGen/X86/isel-sink.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep lea
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {movl	\$4, (.*,.*,4)}
 
 define i32 @test(i32* %X, i32 %B) {
diff --git a/test/CodeGen/X86/isel-sink2.ll b/test/CodeGen/X86/isel-sink2.ll
index 9d9c747fa4959..5ed0e00fd8736 100644
--- a/test/CodeGen/X86/isel-sink2.ll
+++ b/test/CodeGen/X86/isel-sink2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep {movb.7(%...)} %t
 ; RUN: not grep leal %t
 
diff --git a/test/CodeGen/X86/isel-sink3.ll b/test/CodeGen/X86/isel-sink3.ll
index 4e678c42cf771..8d3d97a930bef 100644
--- a/test/CodeGen/X86/isel-sink3.ll
+++ b/test/CodeGen/X86/isel-sink3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | grep {addl.\$4, %ecx}
-; RUN: llvm-as < %s | llc | not grep leal
+; RUN: llc < %s | grep {addl.\$4, %ecx}
+; RUN: llc < %s | not grep leal
 ; this should not sink %1 into bb1, that would increase reg pressure.
 
 ; rdar://6399178
diff --git a/test/CodeGen/X86/isint.ll b/test/CodeGen/X86/isint.ll
index 7acc5ccf20e7e..507a328c3ffd8 100644
--- a/test/CodeGen/X86/isint.ll
+++ b/test/CodeGen/X86/isint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: not grep cmp %t
 ; RUN: not grep xor %t
 ; RUN: grep jne %t | count 1
diff --git a/test/CodeGen/X86/isnan.ll b/test/CodeGen/X86/isnan.ll
index 65916ff577247..4d465c0c7aa8f 100644
--- a/test/CodeGen/X86/isnan.ll
+++ b/test/CodeGen/X86/isnan.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep call
+; RUN: llc < %s -march=x86 | not grep call
 
 declare i1 @llvm.isunordered.f64(double)
 
diff --git a/test/CodeGen/X86/isnan2.ll b/test/CodeGen/X86/isnan2.ll
index 18fe29a883e0b..7753346fd9402 100644
--- a/test/CodeGen/X86/isnan2.ll
+++ b/test/CodeGen/X86/isnan2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep pxor
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep pxor
 
 ; This should not need to materialize 0.0 to evaluate the condition.
 
diff --git a/test/CodeGen/X86/ispositive.ll b/test/CodeGen/X86/ispositive.ll
index 3799b9c70b071..8adf723aabc38 100644
--- a/test/CodeGen/X86/ispositive.ll
+++ b/test/CodeGen/X86/ispositive.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {shrl.*31}
+; RUN: llc < %s -march=x86 | grep {shrl.*31}
 
 define i32 @test1(i32 %X) {
 entry:
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
index a48f0616291fd..c695c29e068f5 100644
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -f -o %t
+; RUN: llc < %s -march=x86-64 -o %t
 ; RUN: grep inc %t | count 1
 ; RUN: grep dec %t | count 2
 ; RUN: grep addq %t | count 13
 ; RUN: not grep addb %t
-; RUN: grep leaq %t | count 8
-; RUN: grep leal %t | count 4
+; RUN: grep leaq %t | count 9
+; RUN: grep leal %t | count 3
 ; RUN: grep movq %t | count 5
 
 ; IV users in each of the loops from other loops shouldn't cause LSR
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index cb7d6271f9580..5e8e16217363e 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep jns
+; RUN: llc < %s -march=x86 | grep jns
 
 define i32 @f(i32 %X) {
 entry:
diff --git a/test/CodeGen/X86/ldzero.ll b/test/CodeGen/X86/ldzero.ll
index 2db78a2145b6a..dab04bc353c66 100644
--- a/test/CodeGen/X86/ldzero.ll
+++ b/test/CodeGen/X86/ldzero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; verify PR 1700 is still fixed
 ; ModuleID = 'hh.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/lea-2.ll b/test/CodeGen/X86/lea-2.ll
index a33b71c851ecc..69303507d6e6c 100644
--- a/test/CodeGen/X86/lea-2.ll
+++ b/test/CodeGen/X86/lea-2.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {lea	EAX, DWORD PTR \\\[... + 4\\*... - 5\\\]}
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   not grep add
 
 define i32 @test1(i32 %A, i32 %B) {
diff --git a/test/CodeGen/X86/lea-3.ll b/test/CodeGen/X86/lea-3.ll
index 39122bbdf5f37..44413d60785ea 100644
--- a/test/CodeGen/X86/lea-3.ll
+++ b/test/CodeGen/X86/lea-3.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {leal	(%rdi,%rdi,2), %eax}
+; RUN: llc < %s -march=x86-64 | grep {leal	(%rdi,%rdi,2), %eax}
 define i32 @test(i32 %a) {
         %tmp2 = mul i32 %a, 3           ; <i32> [#uses=1]
         ret i32 %tmp2
 }
 
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {leaq	(,%rdi,4), %rax}
+; RUN: llc < %s -march=x86-64 | grep {leaq	(,%rdi,4), %rax}
 define i64 @test2(i64 %a) {
         %tmp2 = shl i64 %a, 2
 	%tmp3 = or i64 %tmp2, %a
diff --git a/test/CodeGen/X86/lea-4.ll b/test/CodeGen/X86/lea-4.ll
index 8f0835f642fd0..2171204c01d12 100644
--- a/test/CodeGen/X86/lea-4.ll
+++ b/test/CodeGen/X86/lea-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep lea | count 2
+; RUN: llc < %s -march=x86-64 | grep lea | count 2
 
 define zeroext i16 @t1(i32 %on_off) nounwind {
 entry:
diff --git a/test/CodeGen/X86/lea-recursion.ll b/test/CodeGen/X86/lea-recursion.ll
index 390e35adfaf55..3f32fd27c5c11 100644
--- a/test/CodeGen/X86/lea-recursion.ll
+++ b/test/CodeGen/X86/lea-recursion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep lea | count 12
+; RUN: llc < %s -march=x86-64 | grep lea | count 12
 
 ; This testcase was written to demonstrate an instruction-selection problem,
 ; however it also happens to expose a limitation in the DAGCombiner's
diff --git a/test/CodeGen/X86/lea.ll b/test/CodeGen/X86/lea.ll
index 30a477ad120cc..22a96448f029a 100644
--- a/test/CodeGen/X86/lea.ll
+++ b/test/CodeGen/X86/lea.ll
@@ -1,9 +1,34 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86 | not grep orl
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
-define i32 @test(i32 %x) {
-        %tmp1 = shl i32 %x, 3           ; <i32> [#uses=1]
-        %tmp2 = add i32 %tmp1, 7                ; <i32> [#uses=1]
+define i32 @test1(i32 %x) nounwind {
+        %tmp1 = shl i32 %x, 3
+        %tmp2 = add i32 %tmp1, 7
         ret i32 %tmp2
+; CHECK: test1:
+; CHECK:    leal 7(,%rdi,8), %eax
 }
 
+
+; ISel the add of -4 with a neg and use an lea for the rest of the
+; arithemtic.
+define i32 @test2(i32 %x_offs) nounwind readnone {
+entry:
+	%t0 = icmp sgt i32 %x_offs, 4
+	br i1 %t0, label %bb.nph, label %bb2
+
+bb.nph:
+	%tmp = add i32 %x_offs, -5
+	%tmp6 = lshr i32 %tmp, 2
+	%tmp7 = mul i32 %tmp6, -4
+	%tmp8 = add i32 %tmp7, %x_offs
+	%tmp9 = add i32 %tmp8, -4
+	ret i32 %tmp9
+
+bb2:
+	ret i32 %x_offs
+; CHECK: test2:
+; CHECK:	leal	-5(%rdi), %eax
+; CHECK:	andl	$-4, %eax
+; CHECK:	negl	%eax
+; CHECK:	leal	-4(%rdi,%rax), %eax
+}
diff --git a/test/CodeGen/X86/legalizedag_vec.ll b/test/CodeGen/X86/legalizedag_vec.ll
index 97654b201ba01..574b46acea60c 100644
--- a/test/CodeGen/X86/legalizedag_vec.ll
+++ b/test/CodeGen/X86/legalizedag_vec.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse2 -disable-mmx -o %t -f
-; RUN: grep divdi3  %t | count 2
+; RUN: llc < %s -march=x86 -mattr=sse2 -disable-mmx -o %t
+; RUN: grep {call.*divdi3}  %t | count 2
 
 
 ; Test case for r63760 where we generate a legalization assert that an illegal
@@ -12,4 +12,4 @@
 define <2 x i64> @test_long_div(<2 x i64> %num, <2 x i64> %div) {
   %div.r = sdiv <2 x i64> %num, %div
   ret <2 x i64>  %div.r
-}                                     
-\ No newline at end of file
+}                                     
diff --git a/test/CodeGen/X86/lfence.ll b/test/CodeGen/X86/lfence.ll
index 0721d73054408..7a96ca30e7531 100644
--- a/test/CodeGen/X86/lfence.ll
+++ b/test/CodeGen/X86/lfence.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep lfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep lfence
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
 
diff --git a/test/CodeGen/X86/limited-prec.ll b/test/CodeGen/X86/limited-prec.ll
index 6afaea429b867..7bf4ac28fdf96 100644
--- a/test/CodeGen/X86/limited-prec.ll
+++ b/test/CodeGen/X86/limited-prec.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -limit-float-precision=6 -march=x86 | \
+; RUN: llc < %s -limit-float-precision=6 -march=x86 | \
 ; RUN:    not grep exp | not grep log | not grep pow
-; RUN: llvm-as < %s | llc -limit-float-precision=12 -march=x86 | \
+; RUN: llc < %s -limit-float-precision=12 -march=x86 | \
 ; RUN:    not grep exp | not grep log | not grep pow
-; RUN: llvm-as < %s | llc -limit-float-precision=18 -march=x86 | \
+; RUN: llc < %s -limit-float-precision=18 -march=x86 | \
 ; RUN:    not grep exp | not grep log | not grep pow
 
 define float @f1(float %x) nounwind noinline {
diff --git a/test/CodeGen/X86/live-out-reg-info.ll b/test/CodeGen/X86/live-out-reg-info.ll
index b6fb7dfc72c65..7132777b697cb 100644
--- a/test/CodeGen/X86/live-out-reg-info.ll
+++ b/test/CodeGen/X86/live-out-reg-info.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep testl
+; RUN: llc < %s -march=x86-64 | grep {testb	\[$\]1,}
 
 ; Make sure dagcombine doesn't eliminate the comparison due
 ; to an off-by-one bug with ComputeMaskedBits information.
diff --git a/test/CodeGen/X86/local-liveness.ll b/test/CodeGen/X86/local-liveness.ll
index 18d999b7d47eb..321f208e75caf 100644
--- a/test/CodeGen/X86/local-liveness.ll
+++ b/test/CodeGen/X86/local-liveness.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | grep {subl	%eax, %edx}
+; RUN: llc < %s -march=x86 -regalloc=local | grep {subl	%eax, %edx}
 
 ; Local regalloc shouldn't assume that both the uses of the
 ; sub instruction are kills, because one of them is tied
diff --git a/test/CodeGen/X86/long-setcc.ll b/test/CodeGen/X86/long-setcc.ll
index 8d9ebfb276f1f..e0165fb01b536 100644
--- a/test/CodeGen/X86/long-setcc.ll
+++ b/test/CodeGen/X86/long-setcc.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | count 1
-; RUN: llvm-as < %s | llc -march=x86 | grep shr | count 1
-; RUN: llvm-as < %s | llc -march=x86 | grep xor | count 1
+; RUN: llc < %s -march=x86 | grep cmp | count 1
+; RUN: llc < %s -march=x86 | grep shr | count 1
+; RUN: llc < %s -march=x86 | grep xor | count 1
 
 define i1 @t1(i64 %x) nounwind {
 	%B = icmp slt i64 %x, 0
diff --git a/test/CodeGen/X86/longlong-deadload.ll b/test/CodeGen/X86/longlong-deadload.ll
index a8e2c31d9481f..9a4c8f21237b2 100644
--- a/test/CodeGen/X86/longlong-deadload.ll
+++ b/test/CodeGen/X86/longlong-deadload.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep '4{(%...)}
+; RUN: llc < %s -march=x86 | not grep '4{(%...)}
 ; This should not load or store the top part of *P.
 
 define void @test(i64* %P) nounwind  {
diff --git a/test/CodeGen/X86/loop-hoist.ll b/test/CodeGen/X86/loop-hoist.ll
index 73284a488edeb..b52066dac62eb 100644
--- a/test/CodeGen/X86/loop-hoist.ll
+++ b/test/CodeGen/X86/loop-hoist.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -relocation-model=dynamic-no-pic -mtriple=i686-apple-darwin8.7.2 |\
-; RUN:   grep L_Arr.non_lazy_ptr
-; RUN: llvm-as < %s | \
-; RUN:   llc -disable-post-RA-scheduler=true \
-; RUN:       -relocation-model=dynamic-no-pic -mtriple=i686-apple-darwin8.7.2 |\
-; RUN:   %prcontext L_Arr.non_lazy_ptr 1 | grep {4(%esp)}
+; LSR should hoist the load from the "Arr" stub out of the loop.
+
+; RUN: llc < %s -relocation-model=dynamic-no-pic -mtriple=i686-apple-darwin8.7.2 | FileCheck %s
+
+; CHECK: _foo:
+; CHECK:    L_Arr$non_lazy_ptr
+; CHECK: LBB1_1:	## %cond_true
 
 @Arr = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/loop-strength-reduce-2.ll b/test/CodeGen/X86/loop-strength-reduce-2.ll
index 8ea5bdb208e3a..30b5114349480 100644
--- a/test/CodeGen/X86/loop-strength-reduce-2.ll
+++ b/test/CodeGen/X86/loop-strength-reduce-2.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=pic | \
+; RUN: llc < %s -march=x86 -relocation-model=pic | \
 ; RUN:   grep {, 4} | count 1
-; RUN: llvm-as < %s | llc -march=x86 | not grep lea
+; RUN: llc < %s -march=x86 | not grep lea
 ;
 ; Make sure the common loop invariant A is hoisted up to preheader,
 ; since too many registers are needed to subsume it into the addressing modes.
diff --git a/test/CodeGen/X86/loop-strength-reduce-3.ll b/test/CodeGen/X86/loop-strength-reduce-3.ll
index b6bb81471bcdb..70c91340c9488 100644
--- a/test/CodeGen/X86/loop-strength-reduce-3.ll
+++ b/test/CodeGen/X86/loop-strength-reduce-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | \
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | \
 ; RUN:   grep {A+} | count 2
 ;
 ; Make sure the common loop invariant A is not hoisted up to preheader,
diff --git a/test/CodeGen/X86/loop-strength-reduce.ll b/test/CodeGen/X86/loop-strength-reduce.ll
index 873710112b680..4cb56ca9ed245 100644
--- a/test/CodeGen/X86/loop-strength-reduce.ll
+++ b/test/CodeGen/X86/loop-strength-reduce.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | \
+; RUN: llc < %s -march=x86 -relocation-model=static | \
 ; RUN:   grep {A+} | count 2
 ;
 ; Make sure the common loop invariant A is not hoisted up to preheader,
diff --git a/test/CodeGen/X86/loop-strength-reduce2.ll b/test/CodeGen/X86/loop-strength-reduce2.ll
index 507a9e5a2fa73..a1f38a7edc028 100644
--- a/test/CodeGen/X86/loop-strength-reduce2.ll
+++ b/test/CodeGen/X86/loop-strength-reduce2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -relocation-model=pic | grep {\$pb} | grep mov
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic | grep {\$pb} | grep mov
 ;
 ; Make sure the PIC label flags2-"L1$pb" is not moved up to the preheader.
 
diff --git a/test/CodeGen/X86/loop-strength-reduce3.ll b/test/CodeGen/X86/loop-strength-reduce3.ll
index 4e95bdddb5b2b..e340edd650609 100644
--- a/test/CodeGen/X86/loop-strength-reduce3.ll
+++ b/test/CodeGen/X86/loop-strength-reduce3.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | grep 240
-; RUN: llvm-as < %s | llc -march=x86 | grep inc | count 1
+; RUN: llc < %s -march=x86 | grep cmp | grep 240
+; RUN: llc < %s -march=x86 | grep inc | count 1
 
 define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) {
 entry:
diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll
index 711f223749ce8..87b606f558a45 100644
--- a/test/CodeGen/X86/loop-strength-reduce4.ll
+++ b/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmp | grep 64
-; RUN: llvm-as < %s | llc -march=x86 | not grep inc
+; RUN: llc < %s -march=x86 | grep cmp | grep 64
+; RUN: llc < %s -march=x86 | not grep inc
 
 @state = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
 @S = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
diff --git a/test/CodeGen/X86/loop-strength-reduce5.ll b/test/CodeGen/X86/loop-strength-reduce5.ll
index 6e037e2aca31d..4ec2a02992519 100644
--- a/test/CodeGen/X86/loop-strength-reduce5.ll
+++ b/test/CodeGen/X86/loop-strength-reduce5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep inc | count 1
+; RUN: llc < %s -march=x86 | grep inc | count 1
 
 @X = weak global i16 0		; <i16*> [#uses=1]
 @Y = weak global i16 0		; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/loop-strength-reduce6.ll b/test/CodeGen/X86/loop-strength-reduce6.ll
index fa8b57aababb1..81da82ec3f7c0 100644
--- a/test/CodeGen/X86/loop-strength-reduce6.ll
+++ b/test/CodeGen/X86/loop-strength-reduce6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep inc
+; RUN: llc < %s -march=x86-64 | not grep inc
 
 define fastcc i32 @decodeMP3(i32 %isize, i32* %done) {
 entry:
diff --git a/test/CodeGen/X86/loop-strength-reduce7.ll b/test/CodeGen/X86/loop-strength-reduce7.ll
index b6a130a861900..4b565a67fb2d7 100644
--- a/test/CodeGen/X86/loop-strength-reduce7.ll
+++ b/test/CodeGen/X86/loop-strength-reduce7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep imul
+; RUN: llc < %s -march=x86 | not grep imul
 
 target triple = "i386-apple-darwin9.6"
 	%struct.III_psy_xmin = type { [22 x double], [13 x [3 x double]] }
diff --git a/test/CodeGen/X86/loop-strength-reduce8.ll b/test/CodeGen/X86/loop-strength-reduce8.ll
index 1846c7d4467c2..e14cd8a99e35e 100644
--- a/test/CodeGen/X86/loop-strength-reduce8.ll
+++ b/test/CodeGen/X86/loop-strength-reduce8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep leal | not grep 16
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep leal | not grep 16
 
 	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
 	%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index c998268600cb4..474450acc9b00 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -1,4 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | %prcontext decq 1 | grep jne
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; CHECK: decq
+; CHECK-NEXT: jne
 
 @Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
 @Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
diff --git a/test/CodeGen/X86/lsr-negative-stride.ll b/test/CodeGen/X86/lsr-negative-stride.ll
index 28d041f0603f1..b08356c8d3097 100644
--- a/test/CodeGen/X86/lsr-negative-stride.ll
+++ b/test/CodeGen/X86/lsr-negative-stride.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: not grep neg %t
 ; RUN: not grep sub.*esp %t
 ; RUN: not grep esi %t
diff --git a/test/CodeGen/X86/lsr-sort.ll b/test/CodeGen/X86/lsr-sort.ll
index 00e1d694ef402..40589892bb6f1 100644
--- a/test/CodeGen/X86/lsr-sort.ll
+++ b/test/CodeGen/X86/lsr-sort.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep inc %t | count 1
 ; RUN: not grep incw %t
 
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 0bf347c64271e..bc493bd8f724f 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep and %t
 ; RUN: not grep movz %t
 ; RUN: not grep sar %t
diff --git a/test/CodeGen/X86/masked-iv-unsafe.ll b/test/CodeGen/X86/masked-iv-unsafe.ll
index 639a7a6a3bb0a..f23c020195489 100644
--- a/test/CodeGen/X86/masked-iv-unsafe.ll
+++ b/test/CodeGen/X86/masked-iv-unsafe.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep and %t | count 6
 ; RUN: grep movzb %t | count 6
 ; RUN: grep sar %t | count 12
diff --git a/test/CodeGen/X86/maskmovdqu.ll b/test/CodeGen/X86/maskmovdqu.ll
index 4d1ed1dc226f2..7796f0e9a19e2 100644
--- a/test/CodeGen/X86/maskmovdqu.ll
+++ b/test/CodeGen/X86/maskmovdqu.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86    -mattr=+sse2 | grep -i EDI
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep -i RDI
+; RUN: llc < %s -march=x86    -mattr=+sse2 | grep -i EDI
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep -i RDI
 ; rdar://6573467
 
 define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index 0fccc35f3d273..2dc939e666fff 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 5
+; RUN: llc < %s -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
+; RUN: llc < %s -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 5
 
 	%struct.ParmT = type { [25 x i8], i8, i8* }
 @.str12 = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00"		; <[25 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index 97a2dd57c7109..24530cd27e4b9 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep call.*memcpy | count 2
+; RUN: llc < %s -march=x86-64 | grep call.*memcpy | count 2
 
 declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
 
diff --git a/test/CodeGen/X86/memmove-0.ll b/test/CodeGen/X86/memmove-0.ll
index a2b452dbdfc57..d4050689f5946 100644
--- a/test/CodeGen/X86/memmove-0.ll
+++ b/test/CodeGen/X86/memmove-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memcpy}
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memcpy}
 
 declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
 
diff --git a/test/CodeGen/X86/memmove-1.ll b/test/CodeGen/X86/memmove-1.ll
index 3b2debc247dd3..2057be88174db 100644
--- a/test/CodeGen/X86/memmove-1.ll
+++ b/test/CodeGen/X86/memmove-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
 
 declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
 
diff --git a/test/CodeGen/X86/memmove-2.ll b/test/CodeGen/X86/memmove-2.ll
index 37bbe0b54133f..68a9f4dfb9cb9 100644
--- a/test/CodeGen/X86/memmove-2.ll
+++ b/test/CodeGen/X86/memmove-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | not grep call
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | not grep call
 
 declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
 
diff --git a/test/CodeGen/X86/memmove-3.ll b/test/CodeGen/X86/memmove-3.ll
index 2e692c7f60b79..d8a419c07457a 100644
--- a/test/CodeGen/X86/memmove-3.ll
+++ b/test/CodeGen/X86/memmove-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
 
 declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
 
diff --git a/test/CodeGen/X86/memmove-4.ll b/test/CodeGen/X86/memmove-4.ll
index f23c7d5cb8548..027db1f48395a 100644
--- a/test/CodeGen/X86/memmove-4.ll
+++ b/test/CodeGen/X86/memmove-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep call
+; RUN: llc < %s | not grep call
 
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll
index 2ad665cda75c4..7deb52f8078ed 100644
--- a/test/CodeGen/X86/memset-2.ll
+++ b/test/CodeGen/X86/memset-2.ll
@@ -1,5 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep rep
-; RUN: llvm-as < %s | llc -march=x86 | grep memset
+; RUN: llc < %s | not grep rep
+; RUN: llc < %s | grep memset
+
+target triple = "i386"
 
 declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
 
diff --git a/test/CodeGen/X86/memset.ll b/test/CodeGen/X86/memset.ll
index 564174c18880d..cf7464d03bf26 100644
--- a/test/CodeGen/X86/memset.ll
+++ b/test/CodeGen/X86/memset.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 9
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 3
+; RUN: llc < %s -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 9
+; RUN: llc < %s -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 3
 
 	%struct.x = type { i16, i16 }
 
diff --git a/test/CodeGen/X86/memset64-on-x86-32.ll b/test/CodeGen/X86/memset64-on-x86-32.ll
index d76d4d4792463..da8fc51da8e1d 100644
--- a/test/CodeGen/X86/memset64-on-x86-32.ll
+++ b/test/CodeGen/X86/memset64-on-x86-32.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep stosl
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movq | count 10
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep stosl
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movq | count 10
 
 define void @bork() nounwind {
 entry:
diff --git a/test/CodeGen/X86/mfence.ll b/test/CodeGen/X86/mfence.ll
index 6abdbcedf2661..a1b22834d1aa4 100644
--- a/test/CodeGen/X86/mfence.ll
+++ b/test/CodeGen/X86/mfence.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep sfence
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep lfence
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep sfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mfence
 
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
diff --git a/test/CodeGen/X86/mingw-alloca.ll b/test/CodeGen/X86/mingw-alloca.ll
index 1df0e3a3e6b0b..7dcd84d8a1577 100644
--- a/test/CodeGen/X86/mingw-alloca.ll
+++ b/test/CodeGen/X86/mingw-alloca.ll
@@ -1,14 +1,12 @@
-; RUN: llvm-as < %s | llc -o %t -f
-; RUN: grep __alloca %t | count 2
-; RUN: grep 4294967288 %t
-; RUN: grep {pushl	%eax} %t
-; RUN: grep 8028 %t | count 2
+; RUN: llc < %s | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i386-mingw32"
+target triple = "i386-pc-mingw32"
 
-define void @foo1(i32 %N) {
+define void @foo1(i32 %N) nounwind {
 entry:
+; CHECK: _foo1:
+; CHECK: call __alloca
 	%tmp14 = alloca i32, i32 %N		; <i32*> [#uses=1]
 	call void @bar1( i32* %tmp14 )
 	ret void
@@ -16,8 +14,13 @@ entry:
 
 declare void @bar1(i32*)
 
-define void @foo2(i32 inreg  %N) {
+define void @foo2(i32 inreg  %N) nounwind {
 entry:
+; CHECK: _foo2:
+; CHECK: andl $-16, %esp
+; CHECK: pushl %eax
+; CHECK: call __alloca
+; CHECK: movl	8028(%esp), %eax
 	%A2 = alloca [2000 x i32], align 16		; <[2000 x i32]*> [#uses=1]
 	%A2.sub = getelementptr [2000 x i32]* %A2, i32 0, i32 0		; <i32*> [#uses=1]
 	call void @bar2( i32* %A2.sub, i32 %N )
diff --git a/test/CodeGen/X86/mmx-arg-passing.ll b/test/CodeGen/X86/mmx-arg-passing.ll
index 9496cbb8bbb83..426e98e019bcb 100644
--- a/test/CodeGen/X86/mmx-arg-passing.ll
+++ b/test/CodeGen/X86/mmx-arg-passing.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 3
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 1
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep xmm0
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep rdi
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | not grep movups
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 3
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep xmm0
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep rdi
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | not grep movups
 ;
 ; On Darwin x86-32, v8i8, v4i16, v2i32 values are passed in MM[0-2].
 ; On Darwin x86-32, v1i64 values are passed in memory.
diff --git a/test/CodeGen/X86/mmx-arg-passing2.ll b/test/CodeGen/X86/mmx-arg-passing2.ll
index aac614aa7b110..c42af082364c0 100644
--- a/test/CodeGen/X86/mmx-arg-passing2.ll
+++ b/test/CodeGen/X86/mmx-arg-passing2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movq2dq | count 1
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movdq2q | count 2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movq2dq | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movdq2q | count 2
 
 @g_v8qi = external global <8 x i8>
 
diff --git a/test/CodeGen/X86/mmx-arith.ll b/test/CodeGen/X86/mmx-arith.ll
index 501786ebc2257..e4dfdbfe1bb1a 100644
--- a/test/CodeGen/X86/mmx-arith.ll
+++ b/test/CodeGen/X86/mmx-arith.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx
+; RUN: llc < %s -march=x86 -mattr=+mmx
 
 ;; A basic sanity check to make sure that MMX arithmetic actually compiles.
 
diff --git a/test/CodeGen/X86/mmx-bitcast-to-i64.ll b/test/CodeGen/X86/mmx-bitcast-to-i64.ll
index c6bb48927b694..1fd8f67a0cccd 100644
--- a/test/CodeGen/X86/mmx-bitcast-to-i64.ll
+++ b/test/CodeGen/X86/mmx-bitcast-to-i64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movd | count 4
+; RUN: llc < %s -march=x86-64 | grep movd | count 4
 
 define i64 @foo(<1 x i64>* %p) {
   %t = load <1 x i64>* %p
diff --git a/test/CodeGen/X86/mmx-copy-gprs.ll b/test/CodeGen/X86/mmx-copy-gprs.ll
index 2047ce75e5705..3607043e94fcc 100644
--- a/test/CodeGen/X86/mmx-copy-gprs.ll
+++ b/test/CodeGen/X86/mmx-copy-gprs.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86-64           | grep {movq.*(%rsi), %rax}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2 | grep {movl.*4(%eax),}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {movsd.(%eax),}
+; RUN: llc < %s -march=x86-64           | grep {movq.*(%rsi), %rax}
+; RUN: llc < %s -march=x86 -mattr=-sse2 | grep {movl.*4(%eax),}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movsd.(%eax),}
 
 ; This test should use GPRs to copy the mmx value, not MMX regs.  Using mmx regs,
 ; increases the places that need to use emms.
diff --git a/test/CodeGen/X86/mmx-emms.ll b/test/CodeGen/X86/mmx-emms.ll
index 60ba84d8728df..5ff2588da6991 100644
--- a/test/CodeGen/X86/mmx-emms.ll
+++ b/test/CodeGen/X86/mmx-emms.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep emms
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep emms
 define void @foo() {
 entry:
 	call void @llvm.x86.mmx.emms( )
diff --git a/test/CodeGen/X86/mmx-insert-element.ll b/test/CodeGen/X86/mmx-insert-element.ll
index 0aa476dba80e6..a063ee1d6cf40 100644
--- a/test/CodeGen/X86/mmx-insert-element.ll
+++ b/test/CodeGen/X86/mmx-insert-element.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | not grep movq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psllq
+; RUN: llc < %s -march=x86 -mattr=+mmx | not grep movq
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep psllq
 
 define <2 x i32> @qux(i32 %A) nounwind {
 	%tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1		; <<2 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/mmx-pinsrw.ll b/test/CodeGen/X86/mmx-pinsrw.ll
index f1d04fa46cadb..3af09f4998d39 100644
--- a/test/CodeGen/X86/mmx-pinsrw.ll
+++ b/test/CodeGen/X86/mmx-pinsrw.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep pinsrw | count 1
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep pinsrw | count 1
 ; PR2562
 
 external global i16		; <i16*>:0 [#uses=1]
diff --git a/test/CodeGen/X86/mmx-punpckhdq.ll b/test/CodeGen/X86/mmx-punpckhdq.ll
index 126fc9d13be95..0af7e017b626b 100644
--- a/test/CodeGen/X86/mmx-punpckhdq.ll
+++ b/test/CodeGen/X86/mmx-punpckhdq.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep punpckhdq | count 1
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep punpckhdq | count 1
 
 define void @bork(<1 x i64>* %x) {
 entry:
diff --git a/test/CodeGen/X86/mmx-s2v.ll b/test/CodeGen/X86/mmx-s2v.ll
index 4ec2403e34176..c98023c0f4177 100644
--- a/test/CodeGen/X86/mmx-s2v.ll
+++ b/test/CodeGen/X86/mmx-s2v.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx
+; RUN: llc < %s -march=x86 -mattr=+mmx
 ; PR2574
 
 define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x) {; <label>:0
diff --git a/test/CodeGen/X86/mmx-shift.ll b/test/CodeGen/X86/mmx-shift.ll
index 277cf075cb930..dd0aa2ca31f48 100644
--- a/test/CodeGen/X86/mmx-shift.ll
+++ b/test/CodeGen/X86/mmx-shift.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psllq | grep 32
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep psllq | grep 32
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psrad
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep psrlw
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep psllq | grep 32
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep psllq | grep 32
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep psrad
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep psrlw
 
 define i64 @t1(<1 x i64> %mm1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/mmx-shuffle.ll b/test/CodeGen/X86/mmx-shuffle.ll
index 4b91cb9019393..e3125c7345b88 100644
--- a/test/CodeGen/X86/mmx-shuffle.ll
+++ b/test/CodeGen/X86/mmx-shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah
+; RUN: llc < %s -mcpu=yonah
 ; PR1427
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/mmx-vzmovl-2.ll b/test/CodeGen/X86/mmx-vzmovl-2.ll
index 4dd1e47394fdd..8253c200323c6 100644
--- a/test/CodeGen/X86/mmx-vzmovl-2.ll
+++ b/test/CodeGen/X86/mmx-vzmovl-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep pxor
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep punpckldq
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep pxor
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep punpckldq
 
 	%struct.vS1024 = type { [8 x <4 x i32>] }
 	%struct.vS512 = type { [4 x <4 x i32>] }
diff --git a/test/CodeGen/X86/mmx-vzmovl.ll b/test/CodeGen/X86/mmx-vzmovl.ll
index 95f95794531f3..d21e2404882d2 100644
--- a/test/CodeGen/X86/mmx-vzmovl.ll
+++ b/test/CodeGen/X86/mmx-vzmovl.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep movd
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep movq
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep movd
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep movq
 
 define void @foo(<1 x i64>* %a, <1 x i64>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/X86/movfs.ll b/test/CodeGen/X86/movfs.ll
index af102d49569f8..823e98689e7d7 100644
--- a/test/CodeGen/X86/movfs.ll
+++ b/test/CodeGen/X86/movfs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep fs
+; RUN: llc < %s -march=x86 | grep fs
 
 define i32 @foo() nounwind readonly {
 entry:
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index f621849e5b06e..b04048b92c133 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep gs
+; RUN: llc < %s -march=x86 | grep gs
 
 define i32 @foo() nounwind readonly {
 entry:
diff --git a/test/CodeGen/X86/mul-legalize.ll b/test/CodeGen/X86/mul-legalize.ll
index 487614f74ddf0..eca9e6f436c29 100644
--- a/test/CodeGen/X86/mul-legalize.ll
+++ b/test/CodeGen/X86/mul-legalize.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 24576
+; RUN: llc < %s -march=x86 | grep 24576
 ; PR2135
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/CodeGen/X86/mul-remat.ll b/test/CodeGen/X86/mul-remat.ll
index ffc8cc0ba6bc4..3fa005079de7e 100644
--- a/test/CodeGen/X86/mul-remat.ll
+++ b/test/CodeGen/X86/mul-remat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 1
+; RUN: llc < %s -march=x86 | grep mov | count 1
 ; PR1874
 	
 define i32 @test(i32 %a, i32 %b) {
diff --git a/test/CodeGen/X86/mul-shift-reassoc.ll b/test/CodeGen/X86/mul-shift-reassoc.ll
index f0ecb5bd08ee2..3777d8b8cfb4f 100644
--- a/test/CodeGen/X86/mul-shift-reassoc.ll
+++ b/test/CodeGen/X86/mul-shift-reassoc.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep lea
-; RUN: llvm-as < %s | llc -march=x86 | not grep add
+; RUN: llc < %s -march=x86 | grep lea
+; RUN: llc < %s -march=x86 | not grep add
 
 define i32 @test(i32 %X, i32 %Y) {
 	; Push the shl through the mul to allow an LEA to be formed, instead
diff --git a/test/CodeGen/X86/mul128.ll b/test/CodeGen/X86/mul128.ll
index c0ce6b309315f..6825b99f2425e 100644
--- a/test/CodeGen/X86/mul128.ll
+++ b/test/CodeGen/X86/mul128.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mul | count 3
+; RUN: llc < %s -march=x86-64 | grep mul | count 3
 
 define i128 @foo(i128 %t, i128 %u) {
   %k = mul i128 %t, %u
diff --git a/test/CodeGen/X86/mul64.ll b/test/CodeGen/X86/mul64.ll
index cd0f802a711e4..5a25c5d0e9dea 100644
--- a/test/CodeGen/X86/mul64.ll
+++ b/test/CodeGen/X86/mul64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mul | count 3
+; RUN: llc < %s -march=x86 | grep mul | count 3
 
 define i64 @foo(i64 %t, i64 %u) {
   %k = mul i64 %t, %u
diff --git a/test/CodeGen/X86/multiple-return-values-cross-block.ll b/test/CodeGen/X86/multiple-return-values-cross-block.ll
index f632b8744335e..e9837d0ebbf5c 100644
--- a/test/CodeGen/X86/multiple-return-values-cross-block.ll
+++ b/test/CodeGen/X86/multiple-return-values-cross-block.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 declare {x86_fp80, x86_fp80} @test()
 
diff --git a/test/CodeGen/X86/multiple-return-values.ll b/test/CodeGen/X86/multiple-return-values.ll
index 5f7a83f884585..018d997599a97 100644
--- a/test/CodeGen/X86/multiple-return-values.ll
+++ b/test/CodeGen/X86/multiple-return-values.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 
 define {i64, float} @bar(i64 %a, float %b) {
         %y = add i64 %a, 7
diff --git a/test/CodeGen/X86/nancvt.ll b/test/CodeGen/X86/nancvt.ll
index 96cac0dc329aa..0b56644f125a4 100644
--- a/test/CodeGen/X86/nancvt.ll
+++ b/test/CodeGen/X86/nancvt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -std-compile-opts | llc > %t
+; RUN: opt < %s -std-compile-opts | llc > %t
 ; RUN: grep 2147027116 %t | count 3
 ; RUN: grep 2147228864 %t | count 3
 ; RUN: grep 2146502828 %t | count 3
diff --git a/test/CodeGen/X86/narrow_op-1.ll b/test/CodeGen/X86/narrow_op-1.ll
index 0ee11b495585f..18f110821bd56 100644
--- a/test/CodeGen/X86/narrow_op-1.ll
+++ b/test/CodeGen/X86/narrow_op-1.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep orb | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep orb | grep 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep orl | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep orl | grep 16842752
+; RUN: llc < %s -march=x86-64 | grep orb | count 1
+; RUN: llc < %s -march=x86-64 | grep orb | grep 1
+; RUN: llc < %s -march=x86-64 | grep orl | count 1
+; RUN: llc < %s -march=x86-64 | grep orl | grep 16842752
 
 	%struct.bf = type { i64, i16, i16, i32 }
 @bfi = common global %struct.bf zeroinitializer, align 16
diff --git a/test/CodeGen/X86/narrow_op-2.ll b/test/CodeGen/X86/narrow_op-2.ll
index b441794f42f98..796ef7a29e498 100644
--- a/test/CodeGen/X86/narrow_op-2.ll
+++ b/test/CodeGen/X86/narrow_op-2.ll
@@ -1,12 +1,14 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep andb | count 2
-; RUN: llvm-as < %s | llc -march=x86-64 | grep andb | grep 254
-; RUN: llvm-as < %s | llc -march=x86-64 | grep andb | grep 253
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
 	%struct.bf = type { i64, i16, i16, i32 }
 @bfi = external global %struct.bf*
 
 define void @t1() nounwind ssp {
 entry:
+
+; CHECK: andb	$-2, 10(
+; CHECK: andb	$-3, 10(
+
 	%0 = load %struct.bf** @bfi, align 8
 	%1 = getelementptr %struct.bf* %0, i64 0, i32 1
 	%2 = bitcast i16* %1 to i32*
diff --git a/test/CodeGen/X86/neg_fp.ll b/test/CodeGen/X86/neg_fp.ll
index 1a7ee085b5de2..57164f2bcaf9c 100644
--- a/test/CodeGen/X86/neg_fp.ll
+++ b/test/CodeGen/X86/neg_fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
 ; RUN: grep xorps %t | count 1
 
 ; Test that when we don't -enable-unsafe-fp-math, we don't do the optimization
@@ -9,4 +9,4 @@ entry:
 	%sub = fsub float %a, %b		; <float> [#uses=1]
 	%neg = fsub float -0.000000e+00, %sub		; <float> [#uses=1]
 	ret float %neg
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/X86/negate-add-zero.ll b/test/CodeGen/X86/negate-add-zero.ll
index 689639f5f06db..c3f412e09ae86 100644
--- a/test/CodeGen/X86/negate-add-zero.ll
+++ b/test/CodeGen/X86/negate-add-zero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-unsafe-fp-math -march=x86 | not grep xor
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86 | not grep xor
 ; PR3374
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/negative-sin.ll b/test/CodeGen/X86/negative-sin.ll
index 8cc1bec2d1f99..7842eb8456eb4 100644
--- a/test/CodeGen/X86/negative-sin.ll
+++ b/test/CodeGen/X86/negative-sin.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -enable-unsafe-fp-math -march=x86-64 | \
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86-64 | \
 ; RUN:   not egrep {addsd|subsd|xor}
 
 declare double @sin(double %f)
@@ -6,7 +6,7 @@ declare double @sin(double %f)
 define double @foo(double %e)
 {
   %f = fsub double 0.0, %e
-  %g = call double @sin(double %f)
+  %g = call double @sin(double %f) readonly
   %h = fsub double 0.0, %g
   ret double %h
 }
diff --git a/test/CodeGen/X86/negative-subscript.ll b/test/CodeGen/X86/negative-subscript.ll
index f2bd315bd8679..28f7d6b2dbae9 100644
--- a/test/CodeGen/X86/negative-subscript.ll
+++ b/test/CodeGen/X86/negative-subscript.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; rdar://6559995
 
 @a = external global [255 x i8*], align 32
diff --git a/test/CodeGen/X86/negative_zero.ll b/test/CodeGen/X86/negative_zero.ll
index 3c47b8f1fddd3..29474c21f2444 100644
--- a/test/CodeGen/X86/negative_zero.ll
+++ b/test/CodeGen/X86/negative_zero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2,-sse3 | grep fchs
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | grep fchs
 
 
 define double @T() {
diff --git a/test/CodeGen/X86/nobt.ll b/test/CodeGen/X86/nobt.ll
index 55294280f5c85..35090e372916e 100644
--- a/test/CodeGen/X86/nobt.ll
+++ b/test/CodeGen/X86/nobt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep btl
+; RUN: llc < %s -march=x86 | not grep btl
 
 ; This tests some cases where BT must not be generated.  See also bt.ll.
 ; Fixes 20040709-[12].c in gcc testsuite.
diff --git a/test/CodeGen/X86/nofence.ll b/test/CodeGen/X86/nofence.ll
index 132ac9437da99..244d2e9780de9 100644
--- a/test/CodeGen/X86/nofence.ll
+++ b/test/CodeGen/X86/nofence.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep fence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep fence
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
 
diff --git a/test/CodeGen/X86/omit-label.ll b/test/CodeGen/X86/omit-label.ll
index 457b66b35dca4..0ec03ebace896 100644
--- a/test/CodeGen/X86/omit-label.ll
+++ b/test/CodeGen/X86/omit-label.ll
@@ -1,7 +1,11 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep BB1_1:
+; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-linux-gnu | FileCheck %s
 ; PR4126
+; PR4732
 
-; Don't omit this label's definition.
+; Don't omit these labels' definitions.
+
+; CHECK: bux:
+; CHECK: LBB1_1:
 
 define void @bux(i32 %p_53) nounwind optsize {
 entry:
@@ -21,3 +25,33 @@ bb3:		; preds = %bb.i, %entry
 }
 
 declare i32 @baz(...)
+
+; Don't omit this label in the assembly output.
+; CHECK: int321:
+; CHECK: LBB2_1
+; CHECK: LBB2_1
+; CHECK: LBB2_1:
+
+define void @int321(i8 signext %p_103, i32 %uint8p_104) nounwind readnone {
+entry:
+  %tobool = icmp eq i8 %p_103, 0                  ; <i1> [#uses=1]
+  %cmp.i = icmp sgt i8 %p_103, 0                  ; <i1> [#uses=1]
+  %or.cond = and i1 %tobool, %cmp.i               ; <i1> [#uses=1]
+  br i1 %or.cond, label %land.end.i, label %for.cond.preheader
+
+land.end.i:                                       ; preds = %entry
+  %conv3.i = sext i8 %p_103 to i32                ; <i32> [#uses=1]
+  %div.i = sdiv i32 1, %conv3.i                   ; <i32> [#uses=1]
+  %tobool.i = icmp eq i32 %div.i, -2147483647     ; <i1> [#uses=0]
+  br label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %land.end.i, %entry
+  %cmp = icmp sgt i8 %p_103, 1                    ; <i1> [#uses=1]
+  br i1 %cmp, label %for.end.split, label %for.cond
+
+for.cond:                                         ; preds = %for.cond.preheader, %for.cond
+  br label %for.cond
+
+for.end.split:                                    ; preds = %for.cond.preheader
+  ret void
+}
diff --git a/test/CodeGen/X86/opt-ext-uses.ll b/test/CodeGen/X86/opt-ext-uses.ll
index 322850c5523f3..fa2aef517477b 100644
--- a/test/CodeGen/X86/opt-ext-uses.ll
+++ b/test/CodeGen/X86/opt-ext-uses.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movw | count 1
+; RUN: llc < %s -march=x86 | grep movw | count 1
 
 define i16 @t() signext  {
 entry:
diff --git a/test/CodeGen/X86/optimize-max-0.ll b/test/CodeGen/X86/optimize-max-0.ll
index 90c14565e9a65..162c7a568fdf4 100644
--- a/test/CodeGen/X86/optimize-max-0.ll
+++ b/test/CodeGen/X86/optimize-max-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep cmov
+; RUN: llc < %s -march=x86 | not grep cmov
 
 ; LSR should be able to eliminate the max computations by
 ; making the loops use slt/ult comparisons instead of ne comparisons.
diff --git a/test/CodeGen/X86/optimize-max-1.ll b/test/CodeGen/X86/optimize-max-1.ll
index 084e1818f5ddd..ad6c24dce009b 100644
--- a/test/CodeGen/X86/optimize-max-1.ll
+++ b/test/CodeGen/X86/optimize-max-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep cmov
+; RUN: llc < %s -march=x86-64 | not grep cmov
 
 ; LSR should be able to eliminate both smax and umax expressions
 ; in loop trip counts.
diff --git a/test/CodeGen/X86/optimize-max-2.ll b/test/CodeGen/X86/optimize-max-2.ll
index effc3fc737d9e..8851c5b1a3056 100644
--- a/test/CodeGen/X86/optimize-max-2.ll
+++ b/test/CodeGen/X86/optimize-max-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep cmov %t | count 2
 ; RUN: grep jne %t | count 1
 
diff --git a/test/CodeGen/X86/or-branch.ll b/test/CodeGen/X86/or-branch.ll
index 20886d5793cab..9ebf8901b77c0 100644
--- a/test/CodeGen/X86/or-branch.ll
+++ b/test/CodeGen/X86/or-branch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86  | not grep set
+; RUN: llc < %s -march=x86  | not grep set
 
 define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {
 entry:
diff --git a/test/CodeGen/X86/overlap-shift.ll b/test/CodeGen/X86/overlap-shift.ll
index 7584a70b5a7d3..c1fc041e7d9b9 100644
--- a/test/CodeGen/X86/overlap-shift.ll
+++ b/test/CodeGen/X86/overlap-shift.ll
@@ -6,7 +6,7 @@
 
 ; Check that the shift gets turned into an LEA.
 
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   not grep {mov E.X, E.X}
 
 @G = external global i32                ; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/packed_struct.ll b/test/CodeGen/X86/packed_struct.ll
index 2a781e7e546be..da6e8f8745fef 100644
--- a/test/CodeGen/X86/packed_struct.ll
+++ b/test/CodeGen/X86/packed_struct.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep foos+5 %t
 ; RUN: grep foos+1 %t
 ; RUN: grep foos+9 %t
@@ -15,7 +15,7 @@ target triple = "i686-pc-linux-gnu"
 @foos = external global %struct.anon		; <%struct.anon*> [#uses=3]
 @bara = weak global [4 x <{ i32, i8 }>] zeroinitializer		; <[4 x <{ i32, i8 }>]*> [#uses=2]
 
-define i32 @foo() {
+define i32 @foo() nounwind {
 entry:
 	%tmp = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 1)		; <i32> [#uses=1]
 	%tmp3 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 2)		; <i32> [#uses=1]
@@ -25,7 +25,7 @@ entry:
 	ret i32 %tmp7
 }
 
-define i8 @bar() {
+define i8 @bar() nounwind {
 entry:
 	%tmp = load i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 0, i32 1)		; <i8> [#uses=1]
 	%tmp4 = load i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 3, i32 1)		; <i8> [#uses=1]
diff --git a/test/CodeGen/X86/peep-test-0.ll b/test/CodeGen/X86/peep-test-0.ll
index 8dcd23ae735df..e521d8e37854c 100644
--- a/test/CodeGen/X86/peep-test-0.ll
+++ b/test/CodeGen/X86/peep-test-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep cmp %t
 ; RUN: not grep test %t
 
diff --git a/test/CodeGen/X86/peep-test-1.ll b/test/CodeGen/X86/peep-test-1.ll
index 85e3bf251133f..f83f0f6aa6ff8 100644
--- a/test/CodeGen/X86/peep-test-1.ll
+++ b/test/CodeGen/X86/peep-test-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep dec %t | count 1
 ; RUN: not grep test %t
 ; RUN: not grep cmp %t
diff --git a/test/CodeGen/X86/peep-test-2.ll b/test/CodeGen/X86/peep-test-2.ll
index 788f610365cc0..274517297592b 100644
--- a/test/CodeGen/X86/peep-test-2.ll
+++ b/test/CodeGen/X86/peep-test-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep testl
+; RUN: llc < %s -march=x86 | grep testl
 
 ; It's tempting to eliminate the testl instruction here and just use the
 ; EFLAGS value from the incl, however it can't be known whether the add
diff --git a/test/CodeGen/X86/peep-test-3.ll b/test/CodeGen/X86/peep-test-3.ll
new file mode 100644
index 0000000000000..13a69edea57f0
--- /dev/null
+++ b/test/CodeGen/X86/peep-test-3.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; rdar://7226797
+
+; LLVM should omit the testl and use the flags result from the orl.
+
+; CHECK: or:
+define void @or(float* %A, i32 %IA, i32 %N) nounwind {
+entry:
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      orl %ecx, %edx
+; CHECK-NEXT: je
+  %3 = or i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* %A, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+; CHECK: xor:
+define void @xor(float* %A, i32 %IA, i32 %N) nounwind {
+entry:
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+; CHECK:      xorl $1, %e
+; CHECK-NEXT: je
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+  %3 = xor i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* %A, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+; CHECK: and:
+define void @and(float* %A, i32 %IA, i32 %N, i8* %p) nounwind {
+entry:
+  store i8 0, i8* %p
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      andl  $3, %
+; CHECK-NEXT: movb  %
+; CHECK-NEXT: je
+  %3 = and i32 %2, %1                              ; <i32> [#uses=1]
+  %t = trunc i32 %3 to i8
+  store i8 %t, i8* %p
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* null, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+; Just like @and, but without the trunc+store. This should use a testl
+; instead of an andl.
+; CHECK: test:
+define void @test(float* %A, i32 %IA, i32 %N, i8* %p) nounwind {
+entry:
+  store i8 0, i8* %p
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      testb $3, %
+; CHECK-NEXT: je
+  %3 = and i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* null, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/peep-vector-extract-concat.ll b/test/CodeGen/X86/peep-vector-extract-concat.ll
index e6c88bbff9d5e..e4ab2b5e05a42 100644
--- a/test/CodeGen/X86/peep-vector-extract-concat.ll
+++ b/test/CodeGen/X86/peep-vector-extract-concat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2,-sse41 | grep {pshufd	\$3, %xmm0, %xmm0}
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep {pshufd	\$3, %xmm0, %xmm0}
 
 define float @foo(<8 x float> %a) nounwind {
   %c = extractelement <8 x float> %a, i32 3
diff --git a/test/CodeGen/X86/peep-vector-extract-insert.ll b/test/CodeGen/X86/peep-vector-extract-insert.ll
index 77332d02a9338..5e18044e7e1b8 100644
--- a/test/CodeGen/X86/peep-vector-extract-insert.ll
+++ b/test/CodeGen/X86/peep-vector-extract-insert.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {pxor	%xmm0, %xmm0} | count 2
+; RUN: llc < %s -march=x86-64 | grep {pxor	%xmm0, %xmm0} | count 2
 
 define float @foo(<4 x float> %a) {
   %b = insertelement <4 x float> %a, float 0.0, i32 3
diff --git a/test/CodeGen/X86/personality.ll b/test/CodeGen/X86/personality.ll
new file mode 100644
index 0000000000000..5acf04cc06c10
--- /dev/null
+++ b/test/CodeGen/X86/personality.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s -check-prefix=X32
+; PR1632
+
+define void @_Z1fv() {
+entry:
+	invoke void @_Z1gv( )
+			to label %return unwind label %unwind
+
+unwind:		; preds = %entry
+	br i1 false, label %eh_then, label %cleanup20
+
+eh_then:		; preds = %unwind
+	invoke void @__cxa_end_catch( )
+			to label %return unwind label %unwind10
+
+unwind10:		; preds = %eh_then
+	%eh_select13 = tail call i64 (i8*, i8*, ...)* @llvm.eh.selector.i64( i8* null, i8* bitcast (void ()* @__gxx_personality_v0 to i8*), i32 1 )		; <i32> [#uses=2]
+	%tmp18 = icmp slt i64 %eh_select13, 0		; <i1> [#uses=1]
+	br i1 %tmp18, label %filter, label %cleanup20
+
+filter:		; preds = %unwind10
+	unreachable
+
+cleanup20:		; preds = %unwind10, %unwind
+	%eh_selector.0 = phi i64 [ 0, %unwind ], [ %eh_select13, %unwind10 ]		; <i32> [#uses=0]
+	ret void
+
+return:		; preds = %eh_then, %entry
+	ret void
+}
+
+declare void @_Z1gv()
+
+declare i64 @llvm.eh.selector.i64(i8*, i8*, ...)
+
+declare void @__gxx_personality_v0()
+
+declare void @__cxa_end_catch()
+
+; X64: Leh_frame_common_begin:
+; X64: .long	___gxx_personality_v0@GOTPCREL+4
+
+; X32: Leh_frame_common_begin:
+; X32: .long	L___gxx_personality_v0$non_lazy_ptr-
+; ....
+
+; X32: .section	__IMPORT,__pointers,non_lazy_symbol_pointers
+; X32: L___gxx_personality_v0$non_lazy_ptr:
+; X32:   .indirect_symbol ___gxx_personality_v0
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
index 7ca3ea8e91467..23c509c9936bd 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-2.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 5
+; RUN: llc < %s -march=x86 | grep mov | count 5
 ; PR2659
 
 define i32 @binomial(i32 %n, i32 %k) nounwind {
diff --git a/test/CodeGen/X86/phys_subreg_coalesce.ll b/test/CodeGen/X86/phys_subreg_coalesce.ll
index 3bbc55da16ab9..2c855ce8da637 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 -mattr=+sse2 | not grep movl
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=+sse2 | not grep movl
 
 	%struct.dpoint = type { double, double }
 
diff --git a/test/CodeGen/X86/pic-load-remat.ll b/test/CodeGen/X86/pic-load-remat.ll
index cb4e64044deda..77297521cd0d8 100644
--- a/test/CodeGen/X86/pic-load-remat.ll
+++ b/test/CodeGen/X86/pic-load-remat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic | grep psllw | grep pb
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic | grep psllw | grep pb
 
 define void @f() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
new file mode 100644
index 0000000000000..3a547f95f83fa
--- /dev/null
+++ b/test/CodeGen/X86/pic.ll
@@ -0,0 +1,208 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false | FileCheck %s -check-prefix=LINUX
+
+@ptr = external global i32* 
+@dst = external global i32 
+@src = external global i32 
+
+define void @test1() nounwind {
+entry:
+    store i32* @dst, i32** @ptr
+    %tmp.s = load i32* @src
+    store i32 %tmp.s, i32* @dst
+    ret void
+    
+; LINUX:    test1:
+; LINUX:	call	.L1$pb
+; LINUX-NEXT: .L1$pb:
+; LINUX-NEXT:	popl
+; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref1-.L1$pb),
+; LINUX:	movl	dst@GOT(%eax),
+; LINUX:	movl	ptr@GOT(%eax),
+; LINUX:	movl	src@GOT(%eax),
+; LINUX:	ret
+}
+
+@ptr2 = global i32* null
+@dst2 = global i32 0
+@src2 = global i32 0
+
+define void @test2() nounwind {
+entry:
+    store i32* @dst2, i32** @ptr2
+    %tmp.s = load i32* @src2
+    store i32 %tmp.s, i32* @dst2
+    ret void
+    
+; LINUX: test2:
+; LINUX:	call	.L2$pb
+; LINUX-NEXT: .L2$pb:
+; LINUX-NEXT:	popl
+; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref2-.L2$pb), %eax
+; LINUX:	movl	dst2@GOT(%eax),
+; LINUX:	movl	ptr2@GOT(%eax),
+; LINUX:	movl	src2@GOT(%eax),
+; LINUX:	ret
+
+}
+
+declare i8* @malloc(i32)
+
+define void @test3() nounwind {
+entry:
+    %ptr = call i8* @malloc(i32 40)
+    ret void
+; LINUX: test3:
+; LINUX: 	pushl	%ebx
+; LINUX-NEXT: 	subl	$8, %esp
+; LINUX-NEXT: 	call	.L3$pb
+; LINUX-NEXT: .L3$pb:
+; LINUX-NEXT: 	popl	%ebx
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref3-.L3$pb), %ebx
+; LINUX: 	movl	$40, (%esp)
+; LINUX: 	call	malloc@PLT
+; LINUX: 	addl	$8, %esp
+; LINUX: 	popl	%ebx
+; LINUX: 	ret
+}
+
+@pfoo = external global void(...)* 
+
+define void @test4() nounwind {
+entry:
+    %tmp = call void(...)*(...)* @afoo()
+    store void(...)* %tmp, void(...)** @pfoo
+    %tmp1 = load void(...)** @pfoo
+    call void(...)* %tmp1()
+    ret void
+; LINUX: test4:
+; LINUX: 	call	.L4$pb
+; LINUX-NEXT: .L4$pb:
+; LINUX: 	popl
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref4-.L4$pb),
+; LINUX: 	movl	pfoo@GOT(%esi),
+; LINUX: 	call	afoo@PLT
+; LINUX: 	call	*
+}
+
+declare void(...)* @afoo(...)
+
+define void @test5() nounwind {
+entry:
+    call void(...)* @foo()
+    ret void
+; LINUX: test5:
+; LINUX: call	.L5$pb
+; LINUX: popl	%ebx
+; LINUX: addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref5-.L5$pb), %ebx
+; LINUX: call	foo@PLT
+}
+
+declare void @foo(...)
+
+
+@ptr6 = internal global i32* null
+@dst6 = internal global i32 0
+@src6 = internal global i32 0
+
+define void @test6() nounwind {
+entry:
+    store i32* @dst6, i32** @ptr6
+    %tmp.s = load i32* @src6
+    store i32 %tmp.s, i32* @dst6
+    ret void
+    
+; LINUX: test6:
+; LINUX: 	call	.L6$pb
+; LINUX-NEXT: .L6$pb:
+; LINUX-NEXT: 	popl	%eax
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref6-.L6$pb), %eax
+; LINUX: 	leal	dst6@GOTOFF(%eax), %ecx
+; LINUX: 	movl	%ecx, ptr6@GOTOFF(%eax)
+; LINUX: 	movl	src6@GOTOFF(%eax), %ecx
+; LINUX: 	movl	%ecx, dst6@GOTOFF(%eax)
+; LINUX: 	ret
+}
+
+
+;; Test constant pool references.
+define double @test7(i32 %a.u) nounwind {
+entry:
+    %tmp = icmp eq i32 %a.u,0
+    %retval = select i1 %tmp, double 4.561230e+02, double 1.234560e+02
+    ret double %retval
+
+; LINUX: .LCPI7_0:
+
+; LINUX: test7:
+; LINUX:    call .L7$pb
+; LINUX: .L7$pb:
+; LINUX:    addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref7-.L7$pb), 
+; LINUX:    fldl	.LCPI7_0@GOTOFF(
+}
+
+
+;; Test jump table references.
+define void @test8(i32 %n.u) nounwind {
+entry:
+    switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
+bb:
+    tail call void(...)* @foo1()
+    ret void
+bb1:
+    tail call void(...)* @foo2()
+    ret void
+bb2:
+    tail call void(...)* @foo6()
+    ret void
+bb3:
+    tail call void(...)* @foo3()
+    ret void
+bb4:
+    tail call void(...)* @foo4()
+    ret void
+bb5:
+    tail call void(...)* @foo5()
+    ret void
+bb6:
+    tail call void(...)* @foo1()
+    ret void
+bb7:
+    tail call void(...)* @foo2()
+    ret void
+bb8:
+    tail call void(...)* @foo6()
+    ret void
+bb9:
+    tail call void(...)* @foo3()
+    ret void
+bb10:
+    tail call void(...)* @foo4()
+    ret void
+bb11:
+    tail call void(...)* @foo5()
+    ret void
+bb12:
+    tail call void(...)* @foo6()
+    ret void
+    
+; LINUX: test8:
+; LINUX:   call	.L8$pb
+; LINUX: .L8$pb:
+; LINUX:   addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref8-.L8$pb),
+; LINUX:   addl	.LJTI8_0@GOTOFF(
+; LINUX:   jmpl	*%ecx
+
+; LINUX: .LJTI8_0:
+; LINUX:   .long	 .LBB8_2@GOTOFF
+; LINUX:   .long	 .LBB8_2@GOTOFF
+; LINUX:   .long	 .LBB8_7@GOTOFF
+; LINUX:   .long	 .LBB8_3@GOTOFF
+; LINUX:   .long	 .LBB8_7@GOTOFF
+}
+
+declare void @foo1(...)
+declare void @foo2(...)
+declare void @foo6(...)
+declare void @foo3(...)
+declare void @foo4(...)
+declare void @foo5(...)
diff --git a/test/CodeGen/X86/pic_jumptable.ll b/test/CodeGen/X86/pic_jumptable.ll
index 04245d149a8c7..b3750c1e8e676 100644
--- a/test/CodeGen/X86/pic_jumptable.ll
+++ b/test/CodeGen/X86/pic_jumptable.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | not grep -F .text
-; RUN: llvm-as < %s | llc -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | not grep lea
-; RUN: llvm-as < %s | llc -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | grep add | count 2
-; RUN: llvm-as < %s | llc                       -mtriple=x86_64-apple-darwin | not grep 'lJTI'
+; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | not grep -F .text
+; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | not grep lea
+; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | grep add | count 2
+; RUN: llc < %s                       -mtriple=x86_64-apple-darwin | not grep 'lJTI'
 ; rdar://6971437
 
 declare void @_Z3bari(i32)
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
index e00d1e50e49b2..e2746a8c0638e 100644
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -stack-alignment=16 > %t
+; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 > %t
 ; RUN: grep pmul %t | count 12
 ; RUN: grep mov %t | count 12
 
diff --git a/test/CodeGen/X86/postalloc-coalescing.ll b/test/CodeGen/X86/postalloc-coalescing.ll
index 9c44a5a7075d1..a171436543c6c 100644
--- a/test/CodeGen/X86/postalloc-coalescing.ll
+++ b/test/CodeGen/X86/postalloc-coalescing.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 3
+; RUN: llc < %s -march=x86 | grep mov | count 3
 
 define fastcc i32 @_Z18yy_get_next_bufferv() {
 entry:
diff --git a/test/CodeGen/X86/pr1462.ll b/test/CodeGen/X86/pr1462.ll
index 7f9037a137dfd..62549a50356a3 100644
--- a/test/CodeGen/X86/pr1462.ll
+++ b/test/CodeGen/X86/pr1462.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1462
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-
diff --git a/test/CodeGen/X86/pr1489.ll b/test/CodeGen/X86/pr1489.ll
index 10fa96a3b81d9..c9e24bfb13fa8 100644
--- a/test/CodeGen/X86/pr1489.ll
+++ b/test/CodeGen/X86/pr1489.ll
@@ -1,12 +1,12 @@
-; RUN: llvm-as < %s | llc -disable-fp-elim -O0 -mcpu=i486 | grep 1082126238 | count 3
-; RUN: llvm-as < %s | llc -disable-fp-elim -O0 -mcpu=i486 | grep 3058016715 | count 1
+; RUN: llc < %s -disable-fp-elim -O0 -mcpu=i486 | grep 1082126238 | count 3
+; RUN: llc < %s -disable-fp-elim -O0 -mcpu=i486 | grep -- -1236950581 | count 1
 ;; magic constants are 3.999f and half of 3.999
 ; ModuleID = '1489.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
 @.str = internal constant [13 x i8] c"%d %d %d %d\0A\00"		; <[13 x i8]*> [#uses=1]
 
-define i32 @quux() {
+define i32 @quux() nounwind {
 entry:
 	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
 	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
@@ -16,7 +16,7 @@ entry:
 
 declare i32 @lrintf(float)
 
-define i32 @foo() {
+define i32 @foo() nounwind {
 entry:
 	%tmp1 = tail call i32 @lrint( double 3.999000e+00 )		; <i32> [#uses=1]
 	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
@@ -26,7 +26,7 @@ entry:
 
 declare i32 @lrint(double)
 
-define i32 @bar() {
+define i32 @bar() nounwind {
 entry:
 	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
 	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
@@ -34,7 +34,7 @@ entry:
 	ret i32 %tmp23
 }
 
-define i32 @baz() {
+define i32 @baz() nounwind {
 entry:
 	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
 	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
@@ -42,7 +42,7 @@ entry:
 	ret i32 %tmp23
 }
 
-define i32 @main() {
+define i32 @main() nounwind {
 entry:
 	%tmp = tail call i32 @baz( )		; <i32> [#uses=1]
 	%tmp1 = tail call i32 @bar( )		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/pr1505.ll b/test/CodeGen/X86/pr1505.ll
index e9e3d9060958d..883a806f38dec 100644
--- a/test/CodeGen/X86/pr1505.ll
+++ b/test/CodeGen/X86/pr1505.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=i486 | not grep fldl
+; RUN: llc < %s -mcpu=i486 | not grep fldl
 ; PR1505
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll
index c70e32760216f..12736cda4cd28 100644
--- a/test/CodeGen/X86/pr1505b.ll
+++ b/test/CodeGen/X86/pr1505b.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mcpu=i486 | grep fstpl | count 4
-; RUN: llvm-as < %s | llc -mcpu=i486 | grep fstps | count 3
+; RUN: llc < %s -mcpu=i486 | grep fstpl | count 4
+; RUN: llc < %s -mcpu=i486 | grep fstps | count 3
 ; PR1505
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/pr2177.ll b/test/CodeGen/X86/pr2177.ll
index b03c99095725a..e941bf7fdabe2 100644
--- a/test/CodeGen/X86/pr2177.ll
+++ b/test/CodeGen/X86/pr2177.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2177
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr2182.ll b/test/CodeGen/X86/pr2182.ll
index f65725db8bdc5..f97663c6c1ffe 100644
--- a/test/CodeGen/X86/pr2182.ll
+++ b/test/CodeGen/X86/pr2182.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {addl	\$3, (%eax)} | count 4
+; RUN: llc < %s | grep {addl	\$3, (%eax)} | count 4
 ; PR2182
 
 target datalayout =
diff --git a/test/CodeGen/X86/pr2326.ll b/test/CodeGen/X86/pr2326.ll
index 6cf750c6d4b06..f82dcb5d678ff 100644
--- a/test/CodeGen/X86/pr2326.ll
+++ b/test/CodeGen/X86/pr2326.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep sete
+; RUN: llc < %s -march=x86 | grep sete
 ; PR2326
 
 define i32 @func_59(i32 %p_60) nounwind  {
diff --git a/test/CodeGen/X86/pr2623.ll b/test/CodeGen/X86/pr2623.ll
index 51c86b75dd2d0..5d0eb5da2155b 100644
--- a/test/CodeGen/X86/pr2623.ll
+++ b/test/CodeGen/X86/pr2623.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2623
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr2656.ll b/test/CodeGen/X86/pr2656.ll
index 96976b8e466aa..afd71143c4588 100644
--- a/test/CodeGen/X86/pr2656.ll
+++ b/test/CodeGen/X86/pr2656.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {xorps.\*sp} | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {xorps.\*sp} | count 1
 ; PR2656
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr2659.ll b/test/CodeGen/X86/pr2659.ll
index 00e6e7bd8303e..0760e4c7fd5b4 100644
--- a/test/CodeGen/X86/pr2659.ll
+++ b/test/CodeGen/X86/pr2659.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 5
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 5
 ; PR2659
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr2849.ll b/test/CodeGen/X86/pr2849.ll
index 673598fe72493..0fec4813e1096 100644
--- a/test/CodeGen/X86/pr2849.ll
+++ b/test/CodeGen/X86/pr2849.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2849
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr2924.ll b/test/CodeGen/X86/pr2924.ll
index 2cab563116595..b9e8dc1740d96 100644
--- a/test/CodeGen/X86/pr2924.ll
+++ b/test/CodeGen/X86/pr2924.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR2924
 
 target datalayout =
diff --git a/test/CodeGen/X86/pr2982.ll b/test/CodeGen/X86/pr2982.ll
index f5dc1f4b9a410..3f9a5953153b6 100644
--- a/test/CodeGen/X86/pr2982.ll
+++ b/test/CodeGen/X86/pr2982.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR2982
 
 target datalayout =
diff --git a/test/CodeGen/X86/pr3154.ll b/test/CodeGen/X86/pr3154.ll
index 73f51018817ac..18df97c723027 100644
--- a/test/CodeGen/X86/pr3154.ll
+++ b/test/CodeGen/X86/pr3154.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -mattr=+sse2
-; RUN: llvm-as < %s | llc -mtriple=i386-pc-linux-gnu -mattr=+sse2 -relocation-model=pic -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mattr=+sse2
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mattr=+sse2 -relocation-model=pic -disable-fp-elim
 ; PR3154
 
 define void @ff_flac_compute_autocorr_sse2(i32* %data, i32 %len, i32 %lag, double* %autoc) nounwind {
diff --git a/test/CodeGen/X86/pr3216.ll b/test/CodeGen/X86/pr3216.ll
index fdc814ef33761..38c9f324ccac4 100644
--- a/test/CodeGen/X86/pr3216.ll
+++ b/test/CodeGen/X86/pr3216.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {sar.	\$5}
+; RUN: llc < %s -march=x86 | grep {sar.	\$5}
 
 @foo = global i8 127
 
diff --git a/test/CodeGen/X86/pr3241.ll b/test/CodeGen/X86/pr3241.ll
index 665a763f34f1e..2f7917b77c392 100644
--- a/test/CodeGen/X86/pr3241.ll
+++ b/test/CodeGen/X86/pr3241.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3241
 
 @g_620 = external global i32
diff --git a/test/CodeGen/X86/pr3243.ll b/test/CodeGen/X86/pr3243.ll
index 7be887b38e485..483b5bf3a2a69 100644
--- a/test/CodeGen/X86/pr3243.ll
+++ b/test/CodeGen/X86/pr3243.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3243
 
 declare signext i16 @safe_mul_func_int16_t_s_s(i16 signext, i32) nounwind readnone optsize
diff --git a/test/CodeGen/X86/pr3244.ll b/test/CodeGen/X86/pr3244.ll
index 0765f86405c55..2598c2f976b2d 100644
--- a/test/CodeGen/X86/pr3244.ll
+++ b/test/CodeGen/X86/pr3244.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3244
 
 @g_62 = external global i16             ; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/pr3250.ll b/test/CodeGen/X86/pr3250.ll
index dce154f1855c9..cccbf54bcc6b4 100644
--- a/test/CodeGen/X86/pr3250.ll
+++ b/test/CodeGen/X86/pr3250.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3250
 
 declare i32 @safe_sub_func_short_u_u(i16 signext, i16 signext) nounwind
diff --git a/test/CodeGen/X86/pr3317.ll b/test/CodeGen/X86/pr3317.ll
index aa5ee7ce7c8dc..9d6626b324d54 100644
--- a/test/CodeGen/X86/pr3317.ll
+++ b/test/CodeGen/X86/pr3317.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 ; PR3317
 
         %ArraySInt16 = type { %JavaObject, i8*, [0 x i16] }
diff --git a/test/CodeGen/X86/pr3366.ll b/test/CodeGen/X86/pr3366.ll
index a6f3e92676ae5..f813e2e58801a 100644
--- a/test/CodeGen/X86/pr3366.ll
+++ b/test/CodeGen/X86/pr3366.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movzbl
+; RUN: llc < %s -march=x86 | grep movzbl
 ; PR3366
 
 define void @_ada_c34002a() nounwind {
diff --git a/test/CodeGen/X86/pr3457.ll b/test/CodeGen/X86/pr3457.ll
index d4a98103ecc58..f7af927d61364 100644
--- a/test/CodeGen/X86/pr3457.ll
+++ b/test/CodeGen/X86/pr3457.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | not grep fstpt
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep fstpt
 ; PR3457
 ; rdar://6548010
 
diff --git a/test/CodeGen/X86/pr3495-2.ll b/test/CodeGen/X86/pr3495-2.ll
index f67ff75d46ae8..1372a1522bd42 100644
--- a/test/CodeGen/X86/pr3495-2.ll
+++ b/test/CodeGen/X86/pr3495-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of reloads omited}
+; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of reloads omited}
 
 target triple = "i386-apple-darwin9.6"
 	%struct.constraintVCGType = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll
index ca6204c101e9a..4b62bf40da4bf 100644
--- a/test/CodeGen/X86/pr3495.ll
+++ b/test/CodeGen/X86/pr3495.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of reloads omited} | grep 2
-; RUN: llvm-as < %s | llc -march=x86 -stats |& not grep {Number of available reloads turned into copies}
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of machine instrs printed} | grep 39
+; RUN: llc < %s -march=x86 -stats |& grep {Number of reloads omited} | grep 1
+; RUN: llc < %s -march=x86 -stats |& grep {Number of available reloads turned into copies} | grep 1
+; RUN: llc < %s -march=x86 -stats |& grep {Number of machine instrs printed} | grep 40
 ; PR3495
 ; The loop reversal kicks in once here, resulting in one fewer instruction.
 
diff --git a/test/CodeGen/X86/pr3522.ll b/test/CodeGen/X86/pr3522.ll
index f743700fd2516..7cdeaa099271b 100644
--- a/test/CodeGen/X86/pr3522.ll
+++ b/test/CodeGen/X86/pr3522.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& not grep machine-sink
+; RUN: llc < %s -march=x86 -stats |& not grep machine-sink
 ; PR3522
 
 target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/X86/pre-split1.ll b/test/CodeGen/X86/pre-split1.ll
index 4f9a5820e043d..e89b507414eb6 100644
--- a/test/CodeGen/X86/pre-split1.ll
+++ b/test/CodeGen/X86/pre-split1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
 ; XFAIL: *
 
diff --git a/test/CodeGen/X86/pre-split10.ll b/test/CodeGen/X86/pre-split10.ll
index 60297e9a5dc67..db039bd97acdf 100644
--- a/test/CodeGen/X86/pre-split10.ll
+++ b/test/CodeGen/X86/pre-split10.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
 
 define i32 @main(i32 %argc, i8** %argv) nounwind {
 entry:
diff --git a/test/CodeGen/X86/pre-split11.ll b/test/CodeGen/X86/pre-split11.ll
new file mode 100644
index 0000000000000..0a9f4e33f34c1
--- /dev/null
+++ b/test/CodeGen/X86/pre-split11.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -pre-alloc-split | FileCheck %s
+
+@.str = private constant [28 x i8] c"\0A\0ADOUBLE            D = %f\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
+@.str1 = private constant [37 x i8] c"double to long    l1 = %ld\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1]
+@.str2 = private constant [35 x i8] c"double to uint   ui1 = %u\09\09(0x%x)\0A\00", align 8 ; <[35 x i8]*> [#uses=1]
+@.str3 = private constant [37 x i8] c"double to ulong  ul1 = %lu\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+; CHECK: movsd %xmm0, (%rsp)
+entry:
+  %0 = icmp sgt i32 %argc, 4                      ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  %1 = getelementptr inbounds i8** %argv, i64 4   ; <i8**> [#uses=1]
+  %2 = load i8** %1, align 8                      ; <i8*> [#uses=1]
+  %3 = tail call double @atof(i8* %2) nounwind    ; <double> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb, %entry
+  %storemerge = phi double [ %3, %bb ], [ 2.000000e+00, %entry ] ; <double> [#uses=4]
+  %4 = fptoui double %storemerge to i32           ; <i32> [#uses=2]
+  %5 = fptoui double %storemerge to i64           ; <i64> [#uses=2]
+  %6 = fptosi double %storemerge to i64           ; <i64> [#uses=2]
+  %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i64 0, i64 0), double %storemerge) nounwind ; <i32> [#uses=0]
+  %8 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([37 x i8]* @.str1, i64 0, i64 0), i64 %6, i64 %6) nounwind ; <i32> [#uses=0]
+  %9 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8]* @.str2, i64 0, i64 0), i32 %4, i32 %4) nounwind ; <i32> [#uses=0]
+  %10 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([37 x i8]* @.str3, i64 0, i64 0), i64 %5, i64 %5) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare double @atof(i8* nocapture) nounwind readonly
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/X86/pre-split2.ll b/test/CodeGen/X86/pre-split2.ll
index 2009ad8b66d89..ba902f95513d2 100644
--- a/test/CodeGen/X86/pre-split2.ll
+++ b/test/CodeGen/X86/pre-split2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | count 2
 
 define i32 @t(i32 %arg) {
diff --git a/test/CodeGen/X86/pre-split3.ll b/test/CodeGen/X86/pre-split3.ll
index f34f1447edda2..2e314207c3e34 100644
--- a/test/CodeGen/X86/pre-split3.ll
+++ b/test/CodeGen/X86/pre-split3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
 
 define i32 @t(i32 %arg) {
diff --git a/test/CodeGen/X86/pre-split4.ll b/test/CodeGen/X86/pre-split4.ll
index a570f7304f372..10cef276c62f6 100644
--- a/test/CodeGen/X86/pre-split4.ll
+++ b/test/CodeGen/X86/pre-split4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 2
 
 define i32 @main(i32 %argc, i8** %argv) nounwind {
diff --git a/test/CodeGen/X86/pre-split5.ll b/test/CodeGen/X86/pre-split5.ll
index b83003f30feab..8def460809f21 100644
--- a/test/CodeGen/X86/pre-split5.ll
+++ b/test/CodeGen/X86/pre-split5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
 
 target triple = "i386-apple-darwin9.5"
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/CodeGen/X86/pre-split6.ll b/test/CodeGen/X86/pre-split6.ll
index e771b8067c21b..d38e63088d1c7 100644
--- a/test/CodeGen/X86/pre-split6.ll
+++ b/test/CodeGen/X86/pre-split6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split | grep {divsd	8} | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split | grep {divsd	8} | count 1
 
 @current_surfaces.b = external global i1		; <i1*> [#uses=1]
 
diff --git a/test/CodeGen/X86/pre-split7.ll b/test/CodeGen/X86/pre-split7.ll
index cd9d205a7138f..0b81c0bc09fe6 100644
--- a/test/CodeGen/X86/pre-split7.ll
+++ b/test/CodeGen/X86/pre-split7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
 
 @object_distance = external global double, align 8		; <double*> [#uses=1]
 @axis_slope_angle = external global double, align 8		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/pre-split8.ll b/test/CodeGen/X86/pre-split8.ll
index 22598195ed122..ea4b9496b3c37 100644
--- a/test/CodeGen/X86/pre-split8.ll
+++ b/test/CodeGen/X86/pre-split8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
 
 @current_surfaces.b = external global i1		; <i1*> [#uses=1]
diff --git a/test/CodeGen/X86/pre-split9.ll b/test/CodeGen/X86/pre-split9.ll
index 1be960f53a549..c27d925d43e4b 100644
--- a/test/CodeGen/X86/pre-split9.ll
+++ b/test/CodeGen/X86/pre-split9.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
 
 @current_surfaces.b = external global i1		; <i1*> [#uses=1]
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index d6517f7ef5b13..fac5915aae887 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse > %t
+; RUN: llc < %s -march=x86 -mattr=+sse > %t
 ; RUN: grep prefetchnta %t
 ; RUN: grep prefetcht0 %t
 ; RUN: grep prefetcht1 %t
diff --git a/test/CodeGen/X86/private-2.ll b/test/CodeGen/X86/private-2.ll
index 7478128567730..8aa744ead8caa 100644
--- a/test/CodeGen/X86/private-2.ll
+++ b/test/CodeGen/X86/private-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10 | grep L__ZZ20
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | grep L__ZZ20
 ; Quote should be outside of private prefix.
 ; rdar://6855766x
 
diff --git a/test/CodeGen/X86/private.ll b/test/CodeGen/X86/private.ll
index caf1035c3433f..22b6f35a70ef1 100644
--- a/test/CodeGen/X86/private.ll
+++ b/test/CodeGen/X86/private.ll
@@ -1,9 +1,9 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -mtriple=x86_64-pc-linux | grep .Lfoo:
-; RUN: llvm-as < %s | llc -mtriple=x86_64-pc-linux | grep call.*\.Lfoo
-; RUN: llvm-as < %s | llc -mtriple=x86_64-pc-linux | grep .Lbaz:
-; RUN: llvm-as < %s | llc -mtriple=x86_64-pc-linux | grep movl.*\.Lbaz
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep .Lfoo:
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep call.*\.Lfoo
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep .Lbaz:
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep movl.*\.Lbaz
 
 declare void @foo()
 
diff --git a/test/CodeGen/X86/ptrtoint-constexpr.ll b/test/CodeGen/X86/ptrtoint-constexpr.ll
new file mode 100644
index 0000000000000..72a428ea32083
--- /dev/null
+++ b/test/CodeGen/X86/ptrtoint-constexpr.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=i386-linux | FileCheck %s
+	%union.x = type { i64 }
+
+; CHECK:	.globl r
+; CHECK: r:
+; CHECK: .quad	((r) & 4294967295)
+
+@r = global %union.x { i64 ptrtoint (%union.x* @r to i64) }, align 4
diff --git a/test/CodeGen/X86/rdtsc.ll b/test/CodeGen/X86/rdtsc.ll
index f5d947fcbabb5..f21a44c36073b 100644
--- a/test/CodeGen/X86/rdtsc.ll
+++ b/test/CodeGen/X86/rdtsc.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep rdtsc
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rdtsc
+; RUN: llc < %s -march=x86 | grep rdtsc
+; RUN: llc < %s -march=x86-64 | grep rdtsc
 declare i64 @llvm.readcyclecounter()
 
 define i64 @foo() {
diff --git a/test/CodeGen/X86/red-zone.ll b/test/CodeGen/X86/red-zone.ll
index 60e16b05ca757..1ffb4e3c78f67 100644
--- a/test/CodeGen/X86/red-zone.ll
+++ b/test/CodeGen/X86/red-zone.ll
@@ -1,13 +1,25 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
-; RUN: not grep subq %t
-; RUN: not grep addq %t
-; RUN: grep {\\-4(%%rsp)} %t | count 2
-; RUN: llvm-as < %s | llc -march=x86-64 -disable-red-zone > %t
-; RUN: grep subq %t | count 1
-; RUN: grep addq %t | count 1
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
+; First without noredzone.
+; CHECK: f0:
+; CHECK: -4(%rsp)
+; CHECK: -4(%rsp)
+; CHECK: ret
 define x86_fp80 @f0(float %f) nounwind readnone {
 entry:
 	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
 	ret x86_fp80 %0
 }
+
+; Then with noredzone.
+; CHECK: f1:
+; CHECK: subq $4, %rsp
+; CHECK: (%rsp)
+; CHECK: (%rsp)
+; CHECK: addq $4, %rsp
+; CHECK: ret
+define x86_fp80 @f1(float %f) nounwind readnone noredzone {
+entry:
+	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %0
+}
diff --git a/test/CodeGen/X86/red-zone2.ll b/test/CodeGen/X86/red-zone2.ll
index dea7d7eb0ea43..9557d17150ec4 100644
--- a/test/CodeGen/X86/red-zone2.ll
+++ b/test/CodeGen/X86/red-zone2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep subq %t | count 1
 ; RUN: grep addq %t | count 1
 
diff --git a/test/CodeGen/X86/regpressure.ll b/test/CodeGen/X86/regpressure.ll
index 6d8cfbb781f9f..e0b5f7a870bb2 100644
--- a/test/CodeGen/X86/regpressure.ll
+++ b/test/CodeGen/X86/regpressure.ll
@@ -1,7 +1,7 @@
 ;; Both functions in this testcase should codegen to the same function, and
 ;; neither of them should require spilling anything to the stack.
 
-; RUN: llvm-as < %s | llc -march=x86 -stats |& \
+; RUN: llc < %s -march=x86 -stats |& \
 ; RUN:   not grep {Number of register spills}
 
 ;; This can be compiled to use three registers if the loads are not
diff --git a/test/CodeGen/X86/rem-2.ll b/test/CodeGen/X86/rem-2.ll
index 3e17fc0b43098..1b2af4b87a32e 100644
--- a/test/CodeGen/X86/rem-2.ll
+++ b/test/CodeGen/X86/rem-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep cltd
+; RUN: llc < %s -march=x86 | not grep cltd
 
 define i32 @test(i32 %X) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/rem.ll b/test/CodeGen/X86/rem.ll
index bba1f9b96bb4e..394070ecdf235 100644
--- a/test/CodeGen/X86/rem.ll
+++ b/test/CodeGen/X86/rem.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep div
+; RUN: llc < %s -march=x86 | not grep div
 
 define i32 @test1(i32 %X) {
         %tmp1 = srem i32 %X, 255                ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/remat-constant.ll b/test/CodeGen/X86/remat-constant.ll
index 8dfed5ed52e25..3e813209d4103 100644
--- a/test/CodeGen/X86/remat-constant.ll
+++ b/test/CodeGen/X86/remat-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static | grep xmm | count 2
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static | grep xmm | count 2
 
 declare void @bar() nounwind
 
diff --git a/test/CodeGen/X86/remat-mov-1.ll b/test/CodeGen/X86/remat-mov-1.ll
index 98b7bb45e9e71..d71b7a5b910a3 100644
--- a/test/CodeGen/X86/remat-mov-1.ll
+++ b/test/CodeGen/X86/remat-mov-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 4294967295 | grep mov | count 2
+; RUN: llc < %s -march=x86 | grep -- -1 | grep mov | count 2
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.ImgT = type { i8, i8*, i8*, %struct.FILE*, i32, i32, i32, i32, i8*, double*, float*, float*, float*, i32*, double, double, i32*, double*, i32*, i32* }
diff --git a/test/CodeGen/X86/remat-scalar-zero.ll b/test/CodeGen/X86/remat-scalar-zero.ll
new file mode 100644
index 0000000000000..790ae83c2b2bc
--- /dev/null
+++ b/test/CodeGen/X86/remat-scalar-zero.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu > %t
+; RUN: not grep xor %t
+; RUN: not grep movap %t
+; RUN: grep {\\.zero} %t
+
+; Remat should be able to fold the zero constant into the div instructions
+; as a constant-pool load.
+
+define void @foo(double* nocapture %x, double* nocapture %y) nounwind {
+entry:
+  %tmp1 = load double* %x                         ; <double> [#uses=1]
+  %arrayidx4 = getelementptr inbounds double* %x, i64 1 ; <double*> [#uses=1]
+  %tmp5 = load double* %arrayidx4                 ; <double> [#uses=1]
+  %arrayidx8 = getelementptr inbounds double* %x, i64 2 ; <double*> [#uses=1]
+  %tmp9 = load double* %arrayidx8                 ; <double> [#uses=1]
+  %arrayidx12 = getelementptr inbounds double* %x, i64 3 ; <double*> [#uses=1]
+  %tmp13 = load double* %arrayidx12               ; <double> [#uses=1]
+  %arrayidx16 = getelementptr inbounds double* %x, i64 4 ; <double*> [#uses=1]
+  %tmp17 = load double* %arrayidx16               ; <double> [#uses=1]
+  %arrayidx20 = getelementptr inbounds double* %x, i64 5 ; <double*> [#uses=1]
+  %tmp21 = load double* %arrayidx20               ; <double> [#uses=1]
+  %arrayidx24 = getelementptr inbounds double* %x, i64 6 ; <double*> [#uses=1]
+  %tmp25 = load double* %arrayidx24               ; <double> [#uses=1]
+  %arrayidx28 = getelementptr inbounds double* %x, i64 7 ; <double*> [#uses=1]
+  %tmp29 = load double* %arrayidx28               ; <double> [#uses=1]
+  %arrayidx32 = getelementptr inbounds double* %x, i64 8 ; <double*> [#uses=1]
+  %tmp33 = load double* %arrayidx32               ; <double> [#uses=1]
+  %arrayidx36 = getelementptr inbounds double* %x, i64 9 ; <double*> [#uses=1]
+  %tmp37 = load double* %arrayidx36               ; <double> [#uses=1]
+  %arrayidx40 = getelementptr inbounds double* %x, i64 10 ; <double*> [#uses=1]
+  %tmp41 = load double* %arrayidx40               ; <double> [#uses=1]
+  %arrayidx44 = getelementptr inbounds double* %x, i64 11 ; <double*> [#uses=1]
+  %tmp45 = load double* %arrayidx44               ; <double> [#uses=1]
+  %arrayidx48 = getelementptr inbounds double* %x, i64 12 ; <double*> [#uses=1]
+  %tmp49 = load double* %arrayidx48               ; <double> [#uses=1]
+  %arrayidx52 = getelementptr inbounds double* %x, i64 13 ; <double*> [#uses=1]
+  %tmp53 = load double* %arrayidx52               ; <double> [#uses=1]
+  %arrayidx56 = getelementptr inbounds double* %x, i64 14 ; <double*> [#uses=1]
+  %tmp57 = load double* %arrayidx56               ; <double> [#uses=1]
+  %arrayidx60 = getelementptr inbounds double* %x, i64 15 ; <double*> [#uses=1]
+  %tmp61 = load double* %arrayidx60               ; <double> [#uses=1]
+  %arrayidx64 = getelementptr inbounds double* %x, i64 16 ; <double*> [#uses=1]
+  %tmp65 = load double* %arrayidx64               ; <double> [#uses=1]
+  %div = fdiv double %tmp1, 0.000000e+00          ; <double> [#uses=1]
+  store double %div, double* %y
+  %div70 = fdiv double %tmp5, 2.000000e-01        ; <double> [#uses=1]
+  %arrayidx72 = getelementptr inbounds double* %y, i64 1 ; <double*> [#uses=1]
+  store double %div70, double* %arrayidx72
+  %div74 = fdiv double %tmp9, 2.000000e-01        ; <double> [#uses=1]
+  %arrayidx76 = getelementptr inbounds double* %y, i64 2 ; <double*> [#uses=1]
+  store double %div74, double* %arrayidx76
+  %div78 = fdiv double %tmp13, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx80 = getelementptr inbounds double* %y, i64 3 ; <double*> [#uses=1]
+  store double %div78, double* %arrayidx80
+  %div82 = fdiv double %tmp17, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx84 = getelementptr inbounds double* %y, i64 4 ; <double*> [#uses=1]
+  store double %div82, double* %arrayidx84
+  %div86 = fdiv double %tmp21, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx88 = getelementptr inbounds double* %y, i64 5 ; <double*> [#uses=1]
+  store double %div86, double* %arrayidx88
+  %div90 = fdiv double %tmp25, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx92 = getelementptr inbounds double* %y, i64 6 ; <double*> [#uses=1]
+  store double %div90, double* %arrayidx92
+  %div94 = fdiv double %tmp29, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx96 = getelementptr inbounds double* %y, i64 7 ; <double*> [#uses=1]
+  store double %div94, double* %arrayidx96
+  %div98 = fdiv double %tmp33, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx100 = getelementptr inbounds double* %y, i64 8 ; <double*> [#uses=1]
+  store double %div98, double* %arrayidx100
+  %div102 = fdiv double %tmp37, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx104 = getelementptr inbounds double* %y, i64 9 ; <double*> [#uses=1]
+  store double %div102, double* %arrayidx104
+  %div106 = fdiv double %tmp41, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx108 = getelementptr inbounds double* %y, i64 10 ; <double*> [#uses=1]
+  store double %div106, double* %arrayidx108
+  %div110 = fdiv double %tmp45, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx112 = getelementptr inbounds double* %y, i64 11 ; <double*> [#uses=1]
+  store double %div110, double* %arrayidx112
+  %div114 = fdiv double %tmp49, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx116 = getelementptr inbounds double* %y, i64 12 ; <double*> [#uses=1]
+  store double %div114, double* %arrayidx116
+  %div118 = fdiv double %tmp53, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx120 = getelementptr inbounds double* %y, i64 13 ; <double*> [#uses=1]
+  store double %div118, double* %arrayidx120
+  %div122 = fdiv double %tmp57, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx124 = getelementptr inbounds double* %y, i64 14 ; <double*> [#uses=1]
+  store double %div122, double* %arrayidx124
+  %div126 = fdiv double %tmp61, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx128 = getelementptr inbounds double* %y, i64 15 ; <double*> [#uses=1]
+  store double %div126, double* %arrayidx128
+  %div130 = fdiv double %tmp65, 0.000000e+00      ; <double> [#uses=1]
+  %arrayidx132 = getelementptr inbounds double* %y, i64 16 ; <double*> [#uses=1]
+  store double %div130, double* %arrayidx132
+  ret void
+}
diff --git a/test/CodeGen/X86/ret-addr.ll b/test/CodeGen/X86/ret-addr.ll
index 06a10c6a30f07..b7b57ab3b842f 100644
--- a/test/CodeGen/X86/ret-addr.ll
+++ b/test/CodeGen/X86/ret-addr.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -disable-fp-elim -march=x86 | not grep xor
-; RUN: llvm-as < %s | llc -disable-fp-elim -march=x86-64 | not grep xor
+; RUN: llc < %s -disable-fp-elim -march=x86 | not grep xor
+; RUN: llc < %s -disable-fp-elim -march=x86-64 | not grep xor
 
 define i8* @h() nounwind readnone optsize {
 entry:
diff --git a/test/CodeGen/X86/ret-i64-0.ll b/test/CodeGen/X86/ret-i64-0.ll
index c59e4cf9439eb..bca0f056b90de 100644
--- a/test/CodeGen/X86/ret-i64-0.ll
+++ b/test/CodeGen/X86/ret-i64-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep xor | count 2
+; RUN: llc < %s -march=x86 | grep xor | count 2
 
 define i64 @foo() nounwind {
   ret i64 0
diff --git a/test/CodeGen/X86/ret-mmx.ll b/test/CodeGen/X86/ret-mmx.ll
index 178ff4e8f7e04..04b57dd8d6c0c 100644
--- a/test/CodeGen/X86/ret-mmx.ll
+++ b/test/CodeGen/X86/ret-mmx.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx,+sse2
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2
 ; rdar://6602459
 
 @g_v1di = external global <1 x i64>
diff --git a/test/CodeGen/X86/rip-rel-address.ll b/test/CodeGen/X86/rip-rel-address.ll
index 2c0926a654430..24ff07b4b2199 100644
--- a/test/CodeGen/X86/rip-rel-address.ll
+++ b/test/CodeGen/X86/rip-rel-address.ll
@@ -1,7 +1,14 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -relocation-model=static | grep {a(%rip)}
+; RUN: llc < %s -march=x86-64 -relocation-model=pic -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=PIC64
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -relocation-model=static | FileCheck %s -check-prefix=STATIC64
+
+; Use %rip-relative addressing even in static mode on x86-64, because
+; it has a smaller encoding.
 
 @a = internal global double 3.4
 define double @foo() nounwind {
   %a = load double* @a
   ret double %a
+  
+; PIC64:    movsd	_a(%rip), %xmm0
+; STATIC64: movsd	a(%rip), %xmm0
 }
diff --git a/test/CodeGen/X86/rodata-relocs.ll b/test/CodeGen/X86/rodata-relocs.ll
index b800e098ce25b..276f8bb48d06e 100644
--- a/test/CodeGen/X86/rodata-relocs.ll
+++ b/test/CodeGen/X86/rodata-relocs.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -relocation-model=static | grep rodata | count 3
-; RUN: llvm-as < %s | llc -relocation-model=static | grep -F "rodata.cst" | count 2
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep rodata | count 2
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep -F ".data.rel.ro" | count 2
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep -F ".data.rel.ro.local" | count 1
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep -F ".data.rel" | count 4
-; RUN: llvm-as < %s | llc -relocation-model=pic | grep -F ".data.rel.local" | count 1
+; RUN: llc < %s -relocation-model=static | grep rodata | count 3
+; RUN: llc < %s -relocation-model=static | grep -F "rodata.cst" | count 2
+; RUN: llc < %s -relocation-model=pic | grep rodata | count 2
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.ro" | count 2
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.ro.local" | count 1
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel" | count 4
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.local" | count 1
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/rot16.ll b/test/CodeGen/X86/rot16.ll
index c196ce2cc139f..42ece47b03004 100644
--- a/test/CodeGen/X86/rot16.ll
+++ b/test/CodeGen/X86/rot16.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep rol %t | count 3
 ; RUN: grep ror %t | count 1
 ; RUN: grep shld %t | count 2
diff --git a/test/CodeGen/X86/rot32.ll b/test/CodeGen/X86/rot32.ll
index 7cebcb86ce120..655ed272837ac 100644
--- a/test/CodeGen/X86/rot32.ll
+++ b/test/CodeGen/X86/rot32.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep rol %t | count 3
 ; RUN: grep ror %t | count 1
 ; RUN: grep shld %t | count 2
diff --git a/test/CodeGen/X86/rot64.ll b/test/CodeGen/X86/rot64.ll
index 2408359a141dd..4e082bb860b45 100644
--- a/test/CodeGen/X86/rot64.ll
+++ b/test/CodeGen/X86/rot64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep rol %t | count 3
 ; RUN: grep ror %t | count 1
 ; RUN: grep shld %t | count 2
diff --git a/test/CodeGen/X86/rotate.ll b/test/CodeGen/X86/rotate.ll
index c567c0d33cf26..1e20273194d58 100644
--- a/test/CodeGen/X86/rotate.ll
+++ b/test/CodeGen/X86/rotate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {ro\[rl\]} | count 12
 
 define i32 @rotl32(i32 %A, i8 %Amt) {
diff --git a/test/CodeGen/X86/rotate2.ll b/test/CodeGen/X86/rotate2.ll
index 40e954cbdd015..2eea3999e7b8c 100644
--- a/test/CodeGen/X86/rotate2.ll
+++ b/test/CodeGen/X86/rotate2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rol | count 2
+; RUN: llc < %s -march=x86-64 | grep rol | count 2
 
 define i64 @test1(i64 %x) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/scalar-extract.ll b/test/CodeGen/X86/scalar-extract.ll
index 172c424a782f1..2845838409333 100644
--- a/test/CodeGen/X86/scalar-extract.ll
+++ b/test/CodeGen/X86/scalar-extract.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+mmx -o %t
 ; RUN: not grep movq  %t
 
 ; Check that widening doesn't introduce a mmx register in this case when
diff --git a/test/CodeGen/X86/scalar-min-max-fill-operand.ll b/test/CodeGen/X86/scalar-min-max-fill-operand.ll
index 6a6283a10dab7..fe40758d8ecd9 100644
--- a/test/CodeGen/X86/scalar-min-max-fill-operand.ll
+++ b/test/CodeGen/X86/scalar-min-max-fill-operand.ll
@@ -1,20 +1,20 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep min | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep max | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 2
+; RUN: llc < %s -march=x86-64 | grep min | count 1
+; RUN: llc < %s -march=x86-64 | grep max | count 1
+; RUN: llc < %s -march=x86-64 | grep mov | count 2
 
 declare float @bar()
 
-define float @foo(float %a)
+define float @foo(float %a) nounwind
 {
   %s = call float @bar()
   %t = fcmp olt float %s, %a
   %u = select i1 %t, float %s, float %a
   ret float %u
 }
-define float @hem(float %a)
+define float @hem(float %a) nounwind
 {
   %s = call float @bar()
-  %t = fcmp uge float %s, %a
+  %t = fcmp ogt float %s, %a
   %u = select i1 %t, float %s, float %a
   ret float %u
 }
diff --git a/test/CodeGen/X86/scalar_sse_minmax.ll b/test/CodeGen/X86/scalar_sse_minmax.ll
index 8c030b88440d4..bc4ab5d836c78 100644
--- a/test/CodeGen/X86/scalar_sse_minmax.ll
+++ b/test/CodeGen/X86/scalar_sse_minmax.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,+sse2 | \
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 | \
 ; RUN:   grep mins | count 3
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,+sse2 | \
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 | \
 ; RUN:   grep maxs | count 2
 
 declare i1 @llvm.isunordered.f64(double, double)
diff --git a/test/CodeGen/X86/scalarize-bitcast.ll b/test/CodeGen/X86/scalarize-bitcast.ll
index a07f9396040e0..f6b29ecfbb60d 100644
--- a/test/CodeGen/X86/scalarize-bitcast.ll
+++ b/test/CodeGen/X86/scalarize-bitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 ; PR3886
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/scev-interchange.ll b/test/CodeGen/X86/scev-interchange.ll
index b253dd975ff0e..81c919f8dfffa 100644
--- a/test/CodeGen/X86/scev-interchange.ll
+++ b/test/CodeGen/X86/scev-interchange.ll
@@ -1,10 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-	%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
 	%"struct.DataOutBase::GmvFlags" = type { i32 }
 	%"struct.FE_DGPNonparametric<3>" = type { [1156 x i8], i32, %"struct.PolynomialSpace<1>" }
-	%"struct.FE_Q<3>" = type { %"struct.FE_DGPNonparametric<3>", %"struct.std::vector<int,std::allocator<int> >" }
 	%"struct.FiniteElementData<1>" = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
 	%struct.Line = type { [2 x i32] }
 	%"struct.PolynomialSpace<1>" = type { %"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >", i32, %"struct.std::vector<int,std::allocator<int> >", %"struct.std::vector<int,std::allocator<int> >" }
@@ -12,9 +10,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 	%struct.Subscriptor = type { i32 (...)**, i32, %"struct.std::type_info"* }
 	%"struct.TableBase<2,double>" = type { %struct.Subscriptor, double*, i32, %"struct.TableIndices<2>" }
 	%"struct.TableIndices<2>" = type { %struct.Line }
-	%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* }
-	%struct.pthread_attr_t = type { i64, [48 x i8] }
-	%struct.pthread_mutex_t = type { %struct..0__pthread_mutex_s }
 	%"struct.std::_Bit_const_iterator" = type { %"struct.std::_Bit_iterator_base" }
 	%"struct.std::_Bit_iterator_base" = type { i64*, i32 }
 	%"struct.std::_Bvector_base<std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" }
@@ -34,21 +29,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 	%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
 	%"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" = type { %"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" }
 
-@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
-@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
-@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i64)* @pthread_cancel		; <i32 (i64)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %"struct.DataOutBase::GmvFlags"*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %"struct.DataOutBase::GmvFlags"*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%"struct.DataOutBase::GmvFlags"*)* @pthread_mutexattr_init		; <i32 (%"struct.DataOutBase::GmvFlags"*)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%"struct.DataOutBase::GmvFlags"*, i32)* @pthread_mutexattr_settype		; <i32 (%"struct.DataOutBase::GmvFlags"*, i32)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%"struct.DataOutBase::GmvFlags"*)* @pthread_mutexattr_destroy		; <i32 (%"struct.DataOutBase::GmvFlags"*)*> [#uses=0]
-
 declare void @_Unwind_Resume(i8*)
 
 declare i8* @_Znwm(i64)
@@ -71,7 +51,7 @@ declare fastcc void @_ZN11FE_Q_Helper12_GLOBAL__N_116invert_numberingERKSt6vecto
 
 declare fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias nocapture sret, i32)
 
-define fastcc void @_ZN4FE_QILi3EEC1Ej(%"struct.FE_Q<3>"* %this, i32 %degree) {
+define fastcc void @_ZN4FE_QILi3EEC1Ej(i32 %degree) {
 entry:
 	invoke fastcc void @_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_(%"struct.std::vector<bool,std::allocator<bool> >"* undef, i64 1, i8* undef)
 			to label %invcont.i unwind label %lpad.i
@@ -356,31 +336,3 @@ lpad204.i:		; preds = %invcont86.i
 }
 
 declare fastcc void @_ZN11Polynomials19LagrangeEquidistant23generate_complete_basisEj(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* noalias nocapture sret, i32)
-
-declare i32 @pthread_once(i32*, void ()*)
-
-declare i8* @pthread_getspecific(i32)
-
-declare i32 @pthread_setspecific(i32, i8*)
-
-declare i32 @pthread_create(i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
-
-declare i32 @pthread_cancel(i64)
-
-declare i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
-
-declare i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
-
-declare i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
-
-declare i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %"struct.DataOutBase::GmvFlags"*)
-
-declare i32 @pthread_key_create(i32*, void (i8*)*)
-
-declare i32 @pthread_key_delete(i32)
-
-declare i32 @pthread_mutexattr_init(%"struct.DataOutBase::GmvFlags"*)
-
-declare i32 @pthread_mutexattr_settype(%"struct.DataOutBase::GmvFlags"*, i32)
-
-declare i32 @pthread_mutexattr_destroy(%"struct.DataOutBase::GmvFlags"*)
diff --git a/test/CodeGen/X86/select-zero-one.ll b/test/CodeGen/X86/select-zero-one.ll
index 70785e9978fbc..c38a02080523a 100644
--- a/test/CodeGen/X86/select-zero-one.ll
+++ b/test/CodeGen/X86/select-zero-one.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep cmov
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep xor
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movzbl | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep cmov
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xor
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movzbl | count 1
 
 @r1 = weak global i32 0
 
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index e5d6101253727..95ed9e97cdfda 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=pentium 
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah 
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah  | not grep set
+; RUN: llc < %s -march=x86 -mcpu=pentium 
+; RUN: llc < %s -march=x86 -mcpu=yonah 
+; RUN: llc < %s -march=x86 -mcpu=yonah  | not grep set
 
 define i1 @boolSel(i1 %A, i1 %B, i1 %C) nounwind {
 	%X = select i1 %A, i1 %B, i1 %C		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/setoeq.ll b/test/CodeGen/X86/setoeq.ll
index 25a2b7e0b4936..4a9c1bacc5f2b 100644
--- a/test/CodeGen/X86/setoeq.ll
+++ b/test/CodeGen/X86/setoeq.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86  | grep set | count 2
-; RUN: llvm-as < %s | llc -march=x86  | grep and
+; RUN: llc < %s -march=x86  | grep set | count 2
+; RUN: llc < %s -march=x86  | grep and
 
 define zeroext i8 @t(double %x) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/setuge.ll b/test/CodeGen/X86/setuge.ll
index 3f1d882754ee7..4ca2f1871c0f5 100644
--- a/test/CodeGen/X86/setuge.ll
+++ b/test/CodeGen/X86/setuge.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86  | not grep set
+; RUN: llc < %s -march=x86  | not grep set
 
 declare i1 @llvm.isunordered.f32(float, float)
 
diff --git a/test/CodeGen/X86/sext-load.ll b/test/CodeGen/X86/sext-load.ll
index a6d1080bd84a9..c9b39d3a489e2 100644
--- a/test/CodeGen/X86/sext-load.ll
+++ b/test/CodeGen/X86/sext-load.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movsbl
+; RUN: llc < %s -march=x86 | grep movsbl
 
 define i32 @foo(i32 %X) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/sext-ret-val.ll b/test/CodeGen/X86/sext-ret-val.ll
index 946e6c78892ef..da1a1871e7e8c 100644
--- a/test/CodeGen/X86/sext-ret-val.ll
+++ b/test/CodeGen/X86/sext-ret-val.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movzbl | count 1
+; RUN: llc < %s -march=x86 | grep movzbl | count 1
 ; rdar://6699246
 
 define signext i8 @t1(i8* %A) nounwind readnone ssp {
diff --git a/test/CodeGen/X86/sext-select.ll b/test/CodeGen/X86/sext-select.ll
index 839ebc2b6c178..4aca0407b36f3 100644
--- a/test/CodeGen/X86/sext-select.ll
+++ b/test/CodeGen/X86/sext-select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movsw
+; RUN: llc < %s -march=x86 | grep movsw
 ; PR2139
 
 declare void @abort()
diff --git a/test/CodeGen/X86/sext-trunc.ll b/test/CodeGen/X86/sext-trunc.ll
index 97b4666827023..2eaf42577c701 100644
--- a/test/CodeGen/X86/sext-trunc.ll
+++ b/test/CodeGen/X86/sext-trunc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: llc < %s -march=x86 > %t
 ; RUN: grep movsbl %t
 ; RUN: not grep movz %t
 ; RUN: not grep and %t
diff --git a/test/CodeGen/X86/sfence.ll b/test/CodeGen/X86/sfence.ll
index fc75ccbcb6298..478287919ec49 100644
--- a/test/CodeGen/X86/sfence.ll
+++ b/test/CodeGen/X86/sfence.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep sfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep sfence
 
 declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
 
diff --git a/test/CodeGen/X86/shift-and.ll b/test/CodeGen/X86/shift-and.ll
index b6d78a485783d..fd278c2239f07 100644
--- a/test/CodeGen/X86/shift-and.ll
+++ b/test/CodeGen/X86/shift-and.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86    | grep and | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep and 
+; RUN: llc < %s -march=x86    | grep and | count 1
+; RUN: llc < %s -march=x86-64 | not grep and 
 
 define i32 @t1(i32 %t, i32 %val) nounwind {
        %shamt = and i32 %t, 31
diff --git a/test/CodeGen/X86/shift-coalesce.ll b/test/CodeGen/X86/shift-coalesce.ll
index 4662628b672e2..d38f9a88fcd6c 100644
--- a/test/CodeGen/X86/shift-coalesce.ll
+++ b/test/CodeGen/X86/shift-coalesce.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {shld.*CL}
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   not grep {mov CL, BL}
 
 ; PR687
diff --git a/test/CodeGen/X86/shift-codegen.ll b/test/CodeGen/X86/shift-codegen.ll
index deb4ed1f309b8..4cba1834bf6c2 100644
--- a/test/CodeGen/X86/shift-codegen.ll
+++ b/test/CodeGen/X86/shift-codegen.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=static -march=x86 | \
+; RUN: llc < %s -relocation-model=static -march=x86 | \
 ; RUN:   grep {shll	\$3} | count 2
 
 ; This should produce two shll instructions, not any lea's.
diff --git a/test/CodeGen/X86/shift-combine.ll b/test/CodeGen/X86/shift-combine.ll
index 543bb22378754..e443ac19a80f6 100644
--- a/test/CodeGen/X86/shift-combine.ll
+++ b/test/CodeGen/X86/shift-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep shrl
+; RUN: llc < %s | not grep shrl
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/shift-double.ll b/test/CodeGen/X86/shift-double.ll
index 24017fe2178aa..5adee7c76941a 100644
--- a/test/CodeGen/X86/shift-double.ll
+++ b/test/CodeGen/X86/shift-double.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {sh\[lr\]d} | count 5
 
 define i64 @test1(i64 %X, i8 %C) {
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index d26823220ff85..872817fd4953d 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN: llc < %s -march=x86 | \
 ; RUN:   grep {s\[ah\]\[rl\]l} | count 1
 
 define i32* @test1(i32* %P, i32 %X) {
diff --git a/test/CodeGen/X86/shift-i128.ll b/test/CodeGen/X86/shift-i128.ll
index fc22a3c69139d..c4d15ae9053ee 100644
--- a/test/CodeGen/X86/shift-i128.ll
+++ b/test/CodeGen/X86/shift-i128.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 
 define void @t(i128 %x, i128 %a, i128* nocapture %r) nounwind {
 entry:
diff --git a/test/CodeGen/X86/shift-i256.ll b/test/CodeGen/X86/shift-i256.ll
index 4a29b8626c6e1..d5f65a6ed18cd 100644
--- a/test/CodeGen/X86/shift-i256.ll
+++ b/test/CodeGen/X86/shift-i256.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
 
 define void @t(i256 %x, i256 %a, i256* nocapture %r) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/shift-one.ll b/test/CodeGen/X86/shift-one.ll
index dd49b7e04cf1d..0f80f90c773e0 100644
--- a/test/CodeGen/X86/shift-one.ll
+++ b/test/CodeGen/X86/shift-one.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep leal
+; RUN: llc < %s -march=x86 | not grep leal
 
 @x = external global i32                ; <i32*> [#uses=1]
 
diff --git a/test/CodeGen/X86/shift-parts.ll b/test/CodeGen/X86/shift-parts.ll
new file mode 100644
index 0000000000000..ce4f538f4de43
--- /dev/null
+++ b/test/CodeGen/X86/shift-parts.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 | grep shrdq
+; PR4736
+
+%0 = type { i32, i8, [35 x i8] }
+
+@g_144 = external global %0, align 8              ; <%0*> [#uses=1]
+
+define i32 @int87(i32 %uint64p_8) nounwind {
+entry:
+  %srcval4 = load i320* bitcast (%0* @g_144 to i320*), align 8 ; <i320> [#uses=1]
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  %call3.in.in.in.v = select i1 undef, i320 192, i320 128 ; <i320> [#uses=1]
+  %call3.in.in.in = lshr i320 %srcval4, %call3.in.in.in.v ; <i320> [#uses=1]
+  %call3.in = trunc i320 %call3.in.in.in to i32   ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %call3.in, 0              ; <i1> [#uses=1]
+  br i1 %tobool, label %for.cond, label %if.then
+
+if.then:                                          ; preds = %for.cond
+  ret i32 1
+}
diff --git a/test/CodeGen/X86/shl_elim.ll b/test/CodeGen/X86/shl_elim.ll
index d3616f4ac5de4..445889166bd5c 100644
--- a/test/CodeGen/X86/shl_elim.ll
+++ b/test/CodeGen/X86/shl_elim.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {movl	8(.esp), %eax}
-; RUN: llvm-as < %s | llc -march=x86 | grep {shrl	.eax}
-; RUN: llvm-as < %s | llc -march=x86 | grep {movswl	.ax, .eax}
+; RUN: llc < %s -march=x86 | grep {movl	8(.esp), %eax}
+; RUN: llc < %s -march=x86 | grep {shrl	.eax}
+; RUN: llc < %s -march=x86 | grep {movswl	.ax, .eax}
 
 define i32 @test1(i64 %a) {
         %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/shrink-fp-const1.ll b/test/CodeGen/X86/shrink-fp-const1.ll
index 3406aeeeb5c5d..49b9fa3c4129a 100644
--- a/test/CodeGen/X86/shrink-fp-const1.ll
+++ b/test/CodeGen/X86/shrink-fp-const1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | not grep cvtss2sd
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | not grep cvtss2sd
 ; PR1264
 
 define double @foo(double %x) {
diff --git a/test/CodeGen/X86/shrink-fp-const2.ll b/test/CodeGen/X86/shrink-fp-const2.ll
index 7e48b1bba8f1e..3d5203be09a09 100644
--- a/test/CodeGen/X86/shrink-fp-const2.ll
+++ b/test/CodeGen/X86/shrink-fp-const2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep flds
+; RUN: llc < %s -march=x86 | grep flds
 ; This should be a flds, not fldt.
 define x86_fp80 @test2() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/sincos.ll b/test/CodeGen/X86/sincos.ll
index 27215956b64d8..13f932982f14c 100644
--- a/test/CodeGen/X86/sincos.ll
+++ b/test/CodeGen/X86/sincos.ll
@@ -1,50 +1,48 @@
 ; Make sure this testcase codegens to the sin and cos instructions, not calls
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
 ; RUN:   grep sin\$ | count 3
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
 ; RUN:   grep cos\$ | count 3
 
-declare float  @sinf(float)
+declare float  @sinf(float) readonly
 
-declare double @sin(double)
+declare double @sin(double) readonly
 
-declare x86_fp80 @sinl(x86_fp80)
+declare x86_fp80 @sinl(x86_fp80) readonly
 
 define float @test1(float %X) {
-        %Y = call float @sinf(float %X)
+        %Y = call float @sinf(float %X) readonly
         ret float %Y
 }
 
 define double @test2(double %X) {
-        %Y = call double @sin(double %X)
+        %Y = call double @sin(double %X) readonly
         ret double %Y
 }
 
 define x86_fp80 @test3(x86_fp80 %X) {
-        %Y = call x86_fp80 @sinl(x86_fp80 %X)
+        %Y = call x86_fp80 @sinl(x86_fp80 %X) readonly
         ret x86_fp80 %Y
 }
 
-declare float @cosf(float)
+declare float @cosf(float) readonly
 
-declare double @cos(double)
+declare double @cos(double) readonly
 
-declare x86_fp80 @cosl(x86_fp80)
+declare x86_fp80 @cosl(x86_fp80) readonly
 
 define float @test4(float %X) {
-        %Y = call float @cosf(float %X)
+        %Y = call float @cosf(float %X) readonly
         ret float %Y
 }
 
 define double @test5(double %X) {
-        %Y = call double @cos(double %X)
+        %Y = call double @cos(double %X) readonly
         ret double %Y
 }
 
 define x86_fp80 @test6(x86_fp80 %X) {
-        %Y = call x86_fp80 @cosl(x86_fp80 %X)
+        %Y = call x86_fp80 @cosl(x86_fp80 %X) readonly
         ret x86_fp80 %Y
 }
 
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
new file mode 100644
index 0000000000000..0f4e63f9c674f
--- /dev/null
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
+
+; Currently, floating-point selects are lowered to CFG triangles.
+; This means that one side of the select is always unconditionally
+; evaluated, however with MachineSink we can sink the other side so
+; that it's conditionally evaluated.
+
+; CHECK: foo:
+; CHECK-NEXT: divsd
+; CHECK:      testb $1, %dil
+; CHECK-NEXT: jne
+; CHECK:      divsd
+
+define double @foo(double %x, double %y, i1 %c) nounwind {
+  %a = fdiv double %x, 3.2
+  %b = fdiv double %y, 3.3
+  %z = select i1 %c, double %a, double %b
+  ret double %z
+}
+
+; Hoist floating-point constant-pool loads out of loops.
+
+; CHECK: bar:
+; CHECK: movsd
+; CHECK: align
+define void @bar(double* nocapture %p, i64 %n) nounwind {
+entry:
+  %0 = icmp sgt i64 %n, 0
+  br i1 %0, label %bb, label %return
+
+bb:
+  %i.03 = phi i64 [ 0, %entry ], [ %3, %bb ]
+  %scevgep = getelementptr double* %p, i64 %i.03
+  %1 = load double* %scevgep, align 8
+  %2 = fdiv double 3.200000e+00, %1
+  store double %2, double* %scevgep, align 8
+  %3 = add nsw i64 %i.03, 1
+  %exitcond = icmp eq i64 %3, %n
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}
diff --git a/test/CodeGen/X86/small-byval-memcpy.ll b/test/CodeGen/X86/small-byval-memcpy.ll
index 8b87f7449cde4..9ec9182e5e3c2 100644
--- a/test/CodeGen/X86/small-byval-memcpy.ll
+++ b/test/CodeGen/X86/small-byval-memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | not grep movs
+; RUN: llc < %s | not grep movs
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/CodeGen/X86/smul-with-overflow-2.ll b/test/CodeGen/X86/smul-with-overflow-2.ll
index c3dbfd796f20b..7c23adba406cf 100644
--- a/test/CodeGen/X86/smul-with-overflow-2.ll
+++ b/test/CodeGen/X86/smul-with-overflow-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mul | count 1
-; RUN: llvm-as < %s | llc -march=x86 | grep add | count 3
+; RUN: llc < %s -march=x86 | grep mul | count 1
+; RUN: llc < %s -march=x86 | grep add | count 3
 
 define i32 @t1(i32 %a, i32 %b) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/smul-with-overflow-3.ll b/test/CodeGen/X86/smul-with-overflow-3.ll
index aa5e67a02998a..49c31f56ae835 100644
--- a/test/CodeGen/X86/smul-with-overflow-3.ll
+++ b/test/CodeGen/X86/smul-with-overflow-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {jno} | count 1
+; RUN: llc < %s -march=x86 | grep {jno} | count 1
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/smul-with-overflow.ll b/test/CodeGen/X86/smul-with-overflow.ll
index 6aefc03a39205..6d125e415e04a 100644
--- a/test/CodeGen/X86/smul-with-overflow.ll
+++ b/test/CodeGen/X86/smul-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {jo} | count 1
+; RUN: llc < %s -march=x86 | grep {jo} | count 1
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/soft-fp.ll b/test/CodeGen/X86/soft-fp.ll
index 0c697def1ec2a..a52135dc90878 100644
--- a/test/CodeGen/X86/soft-fp.ll
+++ b/test/CodeGen/X86/soft-fp.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86    -mattr=+sse2 -soft-float | not grep xmm
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 -soft-float | not grep xmm
+; RUN: llc < %s -march=x86    -mattr=+sse2 -soft-float | not grep xmm
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 -soft-float | not grep xmm
 
 	%struct.__va_list_tag = type { i32, i32, i8*, i8* }
 
diff --git a/test/CodeGen/X86/split-eh-lpad-edges.ll b/test/CodeGen/X86/split-eh-lpad-edges.ll
index 281ee7782da1e..fd40a7f703789 100644
--- a/test/CodeGen/X86/split-eh-lpad-edges.ll
+++ b/test/CodeGen/X86/split-eh-lpad-edges.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | not grep jmp
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep jmp
 ; rdar://6647639
 
 	%struct.FetchPlanHeader = type { i8*, i8*, i32, i8*, i8*, i8*, i8*, i8*, %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)*, %struct.__attributeDescriptionFlags }
diff --git a/test/CodeGen/X86/split-select.ll b/test/CodeGen/X86/split-select.ll
index 0b7804da4e714..07d4d52f97a37 100644
--- a/test/CodeGen/X86/split-select.ll
+++ b/test/CodeGen/X86/split-select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep test | count 1
+; RUN: llc < %s -march=x86-64 | grep test | count 1
 
 define void @foo(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) {
   %x = select i1 %c, <2 x i16> %a, <2 x i16> %b
diff --git a/test/CodeGen/X86/split-vector-rem.ll b/test/CodeGen/X86/split-vector-rem.ll
index 8c88769be78f9..681c6b0beaa09 100644
--- a/test/CodeGen/X86/split-vector-rem.ll
+++ b/test/CodeGen/X86/split-vector-rem.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep div | count 16
-; RUN: llvm-as < %s | llc -march=x86-64 | grep fmodf | count 8
+; RUN: llc < %s -march=x86-64 | grep div | count 16
+; RUN: llc < %s -march=x86-64 | grep fmodf | count 8
 
 define <8 x i32> @foo(<8 x i32> %t, <8 x i32> %u) {
 	%m = srem <8 x i32> %t, %u
diff --git a/test/CodeGen/X86/sret.ll b/test/CodeGen/X86/sret.ll
index 30e5af41123dd..b9455300bdbb1 100644
--- a/test/CodeGen/X86/sret.ll
+++ b/test/CodeGen/X86/sret.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep ret | grep 4
+; RUN: llc < %s -march=x86 | grep ret | grep 4
 
 	%struct.foo = type { [4 x i32] }
 
diff --git a/test/CodeGen/X86/sse-align-0.ll b/test/CodeGen/X86/sse-align-0.ll
index 5a888b2e784b7..b12a87d614d28 100644
--- a/test/CodeGen/X86/sse-align-0.ll
+++ b/test/CodeGen/X86/sse-align-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep mov
+; RUN: llc < %s -march=x86-64 | not grep mov
 
 define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   %t = load <4 x float>* %p
diff --git a/test/CodeGen/X86/sse-align-1.ll b/test/CodeGen/X86/sse-align-1.ll
index 0edc6e094580d..c7a5cd5591207 100644
--- a/test/CodeGen/X86/sse-align-1.ll
+++ b/test/CodeGen/X86/sse-align-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movap | count 2
+; RUN: llc < %s -march=x86-64 | grep movap | count 2
 
 define <4 x float> @foo(<4 x float>* %p) nounwind {
   %t = load <4 x float>* %p
diff --git a/test/CodeGen/X86/sse-align-10.ll b/test/CodeGen/X86/sse-align-10.ll
index 1a23eb2ae3d17..0f91697125567 100644
--- a/test/CodeGen/X86/sse-align-10.ll
+++ b/test/CodeGen/X86/sse-align-10.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movups | count 1
+; RUN: llc < %s -march=x86-64 | grep movups | count 1
 
 define <2 x i64> @bar(<2 x i64>* %p) nounwind {
   %t = load <2 x i64>* %p, align 8
diff --git a/test/CodeGen/X86/sse-align-11.ll b/test/CodeGen/X86/sse-align-11.ll
index a10b102c6b952..aa1b4370bccfc 100644
--- a/test/CodeGen/X86/sse-align-11.ll
+++ b/test/CodeGen/X86/sse-align-11.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin8 | grep movaps
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -mtriple=linux | grep movups
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin8 | grep movaps
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=linux | grep movups
 
 define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind {
 entry:
diff --git a/test/CodeGen/X86/sse-align-12.ll b/test/CodeGen/X86/sse-align-12.ll
index 297f1c458db91..4f025b916fd9c 100644
--- a/test/CodeGen/X86/sse-align-12.ll
+++ b/test/CodeGen/X86/sse-align-12.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep unpck %t | count 2
 ; RUN: grep shuf %t | count 2
 ; RUN: grep ps %t | count 4
diff --git a/test/CodeGen/X86/sse-align-2.ll b/test/CodeGen/X86/sse-align-2.ll
index ba693a2001515..102c3fb06cd7e 100644
--- a/test/CodeGen/X86/sse-align-2.ll
+++ b/test/CodeGen/X86/sse-align-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movup | count 2
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
 
 define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   %t = load <4 x float>* %p, align 4
diff --git a/test/CodeGen/X86/sse-align-3.ll b/test/CodeGen/X86/sse-align-3.ll
index 5bbcd59e0e9f9..c42f7f0bad997 100644
--- a/test/CodeGen/X86/sse-align-3.ll
+++ b/test/CodeGen/X86/sse-align-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movap | count 2
+; RUN: llc < %s -march=x86-64 | grep movap | count 2
 
 define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   store <4 x float> %x, <4 x float>* %p
diff --git a/test/CodeGen/X86/sse-align-4.ll b/test/CodeGen/X86/sse-align-4.ll
index f7e5fe3d684be..4c59934917f31 100644
--- a/test/CodeGen/X86/sse-align-4.ll
+++ b/test/CodeGen/X86/sse-align-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movup | count 2
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
 
 define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   store <4 x float> %x, <4 x float>* %p, align 4
diff --git a/test/CodeGen/X86/sse-align-5.ll b/test/CodeGen/X86/sse-align-5.ll
index 19e0eaf8fff8a..21cd2311b9169 100644
--- a/test/CodeGen/X86/sse-align-5.ll
+++ b/test/CodeGen/X86/sse-align-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movaps | count 1
+; RUN: llc < %s -march=x86-64 | grep movaps | count 1
 
 define <2 x i64> @bar(<2 x i64>* %p) nounwind {
   %t = load <2 x i64>* %p
diff --git a/test/CodeGen/X86/sse-align-6.ll b/test/CodeGen/X86/sse-align-6.ll
index dace291730f7f..0bbf4228a40bb 100644
--- a/test/CodeGen/X86/sse-align-6.ll
+++ b/test/CodeGen/X86/sse-align-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movups | count 1
+; RUN: llc < %s -march=x86-64 | grep movups | count 1
 
 define <2 x i64> @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
   %t = load <2 x i64>* %p, align 8
diff --git a/test/CodeGen/X86/sse-align-7.ll b/test/CodeGen/X86/sse-align-7.ll
index 7fb65b5f9e85e..5784481c5ae9a 100644
--- a/test/CodeGen/X86/sse-align-7.ll
+++ b/test/CodeGen/X86/sse-align-7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movaps | count 1
+; RUN: llc < %s -march=x86-64 | grep movaps | count 1
 
 define void @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
   store <2 x i64> %x, <2 x i64>* %p
diff --git a/test/CodeGen/X86/sse-align-8.ll b/test/CodeGen/X86/sse-align-8.ll
index 17a3d2987fff2..cfeff8161c5c0 100644
--- a/test/CodeGen/X86/sse-align-8.ll
+++ b/test/CodeGen/X86/sse-align-8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movups | count 1
+; RUN: llc < %s -march=x86-64 | grep movups | count 1
 
 define void @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
   store <2 x i64> %x, <2 x i64>* %p, align 8
diff --git a/test/CodeGen/X86/sse-align-9.ll b/test/CodeGen/X86/sse-align-9.ll
index 24b437ab3534e..cb26b9535a818 100644
--- a/test/CodeGen/X86/sse-align-9.ll
+++ b/test/CodeGen/X86/sse-align-9.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movup | count 2
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
 
 define <4 x float> @foo(<4 x float>* %p) nounwind {
   %t = load <4 x float>* %p, align 4
diff --git a/test/CodeGen/X86/sse-fcopysign.ll b/test/CodeGen/X86/sse-fcopysign.ll
index d8c32831a1e91..0e0e4a9a86cfc 100644
--- a/test/CodeGen/X86/sse-fcopysign.ll
+++ b/test/CodeGen/X86/sse-fcopysign.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep test
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep test
 
 define float @tst1(float %a, float %b) {
 	%tmp = tail call float @copysignf( float %b, float %a )
diff --git a/test/CodeGen/X86/sse-load-ret.ll b/test/CodeGen/X86/sse-load-ret.ll
index cbf3eb0e5f0d8..1ebcb1a6fa646 100644
--- a/test/CodeGen/X86/sse-load-ret.ll
+++ b/test/CodeGen/X86/sse-load-ret.ll
@@ -1,7 +1,5 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mcpu=yonah | not grep movss
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=x86 -mcpu=yonah | not grep xmm
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep movss
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep xmm
 
 define double @test1(double* %P) {
         %X = load double* %P            ; <double> [#uses=1]
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
new file mode 100644
index 0000000000000..17ffb5e464aa0
--- /dev/null
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -0,0 +1,392 @@
+; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
+
+; Some of these patterns can be matched as SSE min or max. Some of
+; then can be matched provided that the operands are swapped.
+; Some of them can't be matched at all and require a comparison
+; and a conditional branch.
+
+; The naming convention is {,x_}{o,u}{gt,lt,ge,le}{,_inverse}
+; x_ : use 0.0 instead of %y
+; _inverse : swap the arms of the select.
+
+; CHECK:      ogt:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ogt(double %x, double %y) nounwind {
+  %c = fcmp ogt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      olt:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @olt(double %x, double %y) nounwind {
+  %c = fcmp olt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ogt_inverse:
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ogt_inverse(double %x, double %y) nounwind {
+  %c = fcmp ogt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      olt_inverse:
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @olt_inverse(double %x, double %y) nounwind {
+  %c = fcmp olt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      oge:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @oge(double %x, double %y) nounwind {
+  %c = fcmp oge double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ole:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ole(double %x, double %y) nounwind {
+  %c = fcmp ole double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      oge_inverse:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @oge_inverse(double %x, double %y) nounwind {
+  %c = fcmp oge double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ole_inverse:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ole_inverse(double %x, double %y) nounwind {
+  %c = fcmp ole double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      x_ogt:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ogt(double %x) nounwind {
+  %c = fcmp ogt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_olt:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_olt(double %x) nounwind {
+  %c = fcmp olt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ogt_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ogt_inverse(double %x) nounwind {
+  %c = fcmp ogt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_olt_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_olt_inverse(double %x) nounwind {
+  %c = fcmp olt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_oge:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_oge(double %x) nounwind {
+  %c = fcmp oge double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ole:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ole(double %x) nounwind {
+  %c = fcmp ole double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_oge_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_oge_inverse(double %x) nounwind {
+  %c = fcmp oge double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ole_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ole_inverse(double %x) nounwind {
+  %c = fcmp ole double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      ugt:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ugt(double %x, double %y) nounwind {
+  %c = fcmp ugt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ult:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @ult(double %x, double %y) nounwind {
+  %c = fcmp ult double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ugt_inverse:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ugt_inverse(double %x, double %y) nounwind {
+  %c = fcmp ugt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ult_inverse:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @ult_inverse(double %x, double %y) nounwind {
+  %c = fcmp ult double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      uge:
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @uge(double %x, double %y) nounwind {
+  %c = fcmp uge double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ule:
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ule(double %x, double %y) nounwind {
+  %c = fcmp ule double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      uge_inverse:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @uge_inverse(double %x, double %y) nounwind {
+  %c = fcmp uge double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ule_inverse:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ule_inverse(double %x, double %y) nounwind {
+  %c = fcmp ule double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      x_ugt:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ugt(double %x) nounwind {
+  %c = fcmp ugt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ult:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ult(double %x) nounwind {
+  %c = fcmp ult double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ugt_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ugt_inverse(double %x) nounwind {
+  %c = fcmp ugt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ult_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ult_inverse(double %x) nounwind {
+  %c = fcmp ult double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_uge:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_uge(double %x) nounwind {
+  %c = fcmp uge double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ule:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ule(double %x) nounwind {
+  %c = fcmp ule double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_uge_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_uge_inverse(double %x) nounwind {
+  %c = fcmp uge double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ule_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ule_inverse(double %x) nounwind {
+  %c = fcmp ule double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; Test a few more misc. cases.
+
+; CHECK: clampTo3k_a:
+; CHECK: minsd
+define double @clampTo3k_a(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ogt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_b:
+; CHECK: minsd
+define double @clampTo3k_b(double %x) nounwind readnone {
+entry:
+  %0 = fcmp uge double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_c:
+; CHECK: maxsd
+define double @clampTo3k_c(double %x) nounwind readnone {
+entry:
+  %0 = fcmp olt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_d:
+; CHECK: maxsd
+define double @clampTo3k_d(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ule double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_e:
+; CHECK: maxsd
+define double @clampTo3k_e(double %x) nounwind readnone {
+entry:
+  %0 = fcmp olt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_f:
+; CHECK: maxsd
+define double @clampTo3k_f(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ule double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_g:
+; CHECK: minsd
+define double @clampTo3k_g(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ogt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_h:
+; CHECK: minsd
+define double @clampTo3k_h(double %x) nounwind readnone {
+entry:
+  %0 = fcmp uge double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
diff --git a/test/CodeGen/X86/sse-varargs.ll b/test/CodeGen/X86/sse-varargs.ll
index 806126da2faf4..da38f0e148f6d 100644
--- a/test/CodeGen/X86/sse-varargs.ll
+++ b/test/CodeGen/X86/sse-varargs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep xmm | grep esp
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xmm | grep esp
 
 define i32 @t() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
new file mode 100644
index 0000000000000..9f926f2bee7b3
--- /dev/null
+++ b/test/CodeGen/X86/sse2.ll
@@ -0,0 +1,34 @@
+; Tests for SSE2 and below, without SSE3+.
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 | FileCheck %s
+
+define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+	%tmp3 = load <2 x double>* %A, align 16
+	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
+	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
+	store <2 x double> %tmp9, <2 x double>* %r, align 16
+	ret void
+        
+; CHECK: t1:
+; CHECK: 	movl	8(%esp), %eax
+; CHECK-NEXT: 	movapd	(%eax), %xmm0
+; CHECK-NEXT: 	movlpd	12(%esp), %xmm0
+; CHECK-NEXT: 	movl	4(%esp), %eax
+; CHECK-NEXT: 	movapd	%xmm0, (%eax)
+; CHECK-NEXT: 	ret
+}
+
+define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+	%tmp3 = load <2 x double>* %A, align 16
+	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
+	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
+	store <2 x double> %tmp9, <2 x double>* %r, align 16
+	ret void
+        
+; CHECK: t2:
+; CHECK: 	movl	8(%esp), %eax
+; CHECK-NEXT: 	movapd	(%eax), %xmm0
+; CHECK-NEXT: 	movhpd	12(%esp), %xmm0
+; CHECK-NEXT: 	movl	4(%esp), %eax
+; CHECK-NEXT: 	movapd	%xmm0, (%eax)
+; CHECK-NEXT: 	ret
+}
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
new file mode 100644
index 0000000000000..703635c0f53a5
--- /dev/null
+++ b/test/CodeGen/X86/sse3.ll
@@ -0,0 +1,273 @@
+; These are tests for SSE3 codegen.  Yonah has SSE3 and earlier but not SSSE3+.
+
+; RUN: llc < %s -march=x86-64 -mcpu=yonah -mtriple=i686-apple-darwin9\
+; RUN:              | FileCheck %s --check-prefix=X64
+
+; Test for v8xi16 lowering where we extract the first element of the vector and
+; placed it in the second element of the result.
+
+define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind {
+entry:
+	%tmp3 = load <8 x i16>* %old
+	%tmp6 = shufflevector <8 x i16> %tmp3,
+                <8 x i16> < i16 0, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
+                <8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef  >
+	store <8 x i16> %tmp6, <8 x i16>* %dest
+	ret void
+        
+; X64: t0:
+; X64: 	movddup	(%rsi), %xmm0
+; X64:  pshuflw	$0, %xmm0, %xmm0
+; X64:	xorl	%eax, %eax
+; X64:	pinsrw	$0, %eax, %xmm0
+; X64:	movaps	%xmm0, (%rdi)
+; X64:	ret
+}
+
+define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp3
+        
+; X64: t1:
+; X64: 	movl	(%rsi), %eax
+; X64: 	movaps	(%rdi), %xmm0
+; X64: 	pinsrw	$0, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp
+; X64: t2:
+; X64:	pextrw	$1, %xmm1, %eax
+; X64:	pinsrw	$0, %eax, %xmm0
+; X64:	pinsrw	$3, %eax, %xmm0
+; X64:	ret
+}
+
+define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
+	ret <8 x i16> %tmp
+; X64: t3:
+; X64: 	pextrw	$5, %xmm0, %eax
+; X64: 	pshuflw	$44, %xmm0, %xmm0
+; X64: 	pshufhw	$27, %xmm0, %xmm0
+; X64: 	pinsrw	$3, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
+	ret <8 x i16> %tmp
+; X64: t4:
+; X64: 	pextrw	$7, %xmm0, %eax
+; X64: 	pshufhw	$100, %xmm0, %xmm1
+; X64: 	pinsrw	$1, %eax, %xmm1
+; X64: 	pextrw	$1, %xmm0, %eax
+; X64: 	movaps	%xmm1, %xmm0
+; X64: 	pinsrw	$4, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t5(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 >
+	ret <8 x i16> %tmp
+; X64: 	t5:
+; X64: 		movlhps	%xmm1, %xmm0
+; X64: 		pshufd	$114, %xmm0, %xmm0
+; X64: 		ret
+}
+
+define <8 x i16> @t6(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp
+; X64: 	t6:
+; X64: 		movss	%xmm1, %xmm0
+; X64: 		ret
+}
+
+define <8 x i16> @t7(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 >
+	ret <8 x i16> %tmp
+; X64: 	t7:
+; X64: 		pshuflw	$-80, %xmm0, %xmm0
+; X64: 		pshufhw	$-56, %xmm0, %xmm0
+; X64: 		ret
+}
+
+define void @t8(<2 x i64>* %res, <2 x i64>* %A) nounwind {
+	%tmp = load <2 x i64>* %A
+	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>
+	%tmp0 = extractelement <8 x i16> %tmp.upgrd.1, i32 0
+	%tmp1 = extractelement <8 x i16> %tmp.upgrd.1, i32 1
+	%tmp2 = extractelement <8 x i16> %tmp.upgrd.1, i32 2
+	%tmp3 = extractelement <8 x i16> %tmp.upgrd.1, i32 3
+	%tmp4 = extractelement <8 x i16> %tmp.upgrd.1, i32 4
+	%tmp5 = extractelement <8 x i16> %tmp.upgrd.1, i32 5
+	%tmp6 = extractelement <8 x i16> %tmp.upgrd.1, i32 6
+	%tmp7 = extractelement <8 x i16> %tmp.upgrd.1, i32 7
+	%tmp8 = insertelement <8 x i16> undef, i16 %tmp2, i32 0
+	%tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 1
+	%tmp10 = insertelement <8 x i16> %tmp9, i16 %tmp0, i32 2
+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 3
+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp6, i32 4
+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 5
+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp4, i32 6
+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 7
+	%tmp15.upgrd.2 = bitcast <8 x i16> %tmp15 to <2 x i64>
+	store <2 x i64> %tmp15.upgrd.2, <2 x i64>* %res
+	ret void
+; X64: 	t8:
+; X64: 		pshuflw	$-58, (%rsi), %xmm0
+; X64: 		pshufhw	$-58, %xmm0, %xmm0
+; X64: 		movaps	%xmm0, (%rdi)
+; X64: 		ret
+}
+
+define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
+	%tmp = load <4 x float>* %r
+	%tmp.upgrd.3 = bitcast <2 x i32>* %A to double*
+	%tmp.upgrd.4 = load double* %tmp.upgrd.3
+	%tmp.upgrd.5 = insertelement <2 x double> undef, double %tmp.upgrd.4, i32 0
+	%tmp5 = insertelement <2 x double> %tmp.upgrd.5, double undef, i32 1	
+	%tmp6 = bitcast <2 x double> %tmp5 to <4 x float>	
+	%tmp.upgrd.6 = extractelement <4 x float> %tmp, i32 0	
+	%tmp7 = extractelement <4 x float> %tmp, i32 1		
+	%tmp8 = extractelement <4 x float> %tmp6, i32 0		
+	%tmp9 = extractelement <4 x float> %tmp6, i32 1		
+	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.6, i32 0	
+	%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1
+	%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2
+	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3
+	store <4 x float> %tmp13, <4 x float>* %r
+	ret void
+; X64: 	t9:
+; X64: 		movsd	(%rsi), %xmm0
+; X64: 		movhps	%xmm0, (%rdi)
+; X64: 		ret
+}
+
+
+
+; FIXME: This testcase produces icky code. It can be made much better!
+; PR2585
+
+@g1 = external constant <4 x i32>
+@g2 = external constant <4 x i16>
+
+define internal void @t10() nounwind {
+        load <4 x i32>* @g1, align 16 
+        bitcast <4 x i32> %1 to <8 x i16>
+        shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> < i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef >
+        bitcast <8 x i16> %3 to <2 x i64>  
+        extractelement <2 x i64> %4, i32 0 
+        bitcast i64 %5 to <4 x i16>        
+        store <4 x i16> %6, <4 x i16>* @g2, align 8
+        ret void
+; X64: 	t10:
+; X64: 		movq	_g1@GOTPCREL(%rip), %rax
+; X64: 		movaps	(%rax), %xmm0
+; X64: 		pextrw	$4, %xmm0, %eax
+; X64: 		movaps	%xmm0, %xmm1
+; X64: 		movlhps	%xmm1, %xmm1
+; X64: 		pshuflw	$8, %xmm1, %xmm1
+; X64: 		pinsrw	$2, %eax, %xmm1
+; X64: 		pextrw	$6, %xmm0, %eax
+; X64: 		pinsrw	$3, %eax, %xmm1
+; X64: 		movq	_g2@GOTPCREL(%rip), %rax
+; X64: 		movq	%xmm1, (%rax)
+; X64: 		ret
+}
+
+
+; Pack various elements via shuffles.
+define <8 x i16> @t11(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp7
+
+; X64: t11:
+; X64:	movd	%xmm1, %eax
+; X64:	movlhps	%xmm0, %xmm0
+; X64:	pshuflw	$1, %xmm0, %xmm0
+; X64:	pinsrw	$1, %eax, %xmm0
+; X64:	ret
+}
+
+
+define <8 x i16> @t12(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+
+; X64: t12:
+; X64: 	pextrw	$3, %xmm1, %eax
+; X64: 	movlhps	%xmm0, %xmm0
+; X64: 	pshufhw	$3, %xmm0, %xmm0
+; X64: 	pinsrw	$5, %eax, %xmm0
+; X64: 	ret
+}
+
+
+define <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+; X64: t13:
+; X64: 	punpcklqdq	%xmm0, %xmm1
+; X64: 	pextrw	$3, %xmm1, %eax
+; X64: 	pshufd	$52, %xmm1, %xmm0
+; X64: 	pinsrw	$4, %eax, %xmm0
+; X64: 	ret
+}
+
+
+define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+; X64: t14:
+; X64: 	punpcklqdq	%xmm0, %xmm1
+; X64: 	pshufhw	$8, %xmm1, %xmm0
+; X64: 	ret
+}
+
+
+
+define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+        %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+        ret <8 x i16> %tmp8
+; X64: 	t15:
+; X64: 		pextrw	$7, %xmm0, %eax
+; X64: 		punpcklqdq	%xmm1, %xmm0
+; X64: 		pshuflw	$-128, %xmm0, %xmm0
+; X64: 		pinsrw	$2, %eax, %xmm0
+; X64: 		ret
+}
+
+
+; Test yonah where we convert a shuffle to pextrw and pinrsw
+define <16 x i8> @t16(<16 x i8> %T0) nounwind readnone {
+entry:
+        %tmp8 = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 0, i8 0, i8 0, i8 0,  i8 0, i8 0, i8 0, i8 0>, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+        %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+        ret <16 x i8> %tmp9
+; X64: 	t16:
+; X64: 		movaps	LCPI17_0(%rip), %xmm1
+; X64: 		movd	%xmm1, %eax
+; X64: 		pinsrw	$0, %eax, %xmm1
+; X64: 		pextrw	$8, %xmm0, %eax
+; X64: 		pinsrw	$1, %eax, %xmm1
+; X64: 		pextrw	$1, %xmm1, %ecx
+; X64: 		movd	%xmm1, %edx
+; X64: 		pinsrw	$0, %edx, %xmm1
+; X64: 		movzbl	%cl, %ecx
+; X64: 		andw	$-256, %ax
+; X64: 		orw	%cx, %ax
+; X64: 		movaps	%xmm1, %xmm0
+; X64: 		pinsrw	$1, %eax, %xmm0
+; X64: 		ret
+}
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
new file mode 100644
index 0000000000000..a734c05b8686f
--- /dev/null
+++ b/test/CodeGen/X86/sse41.ll
@@ -0,0 +1,226 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X64
+
+@g16 = external global i16
+
+define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind {
+        %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 1
+        ret <4 x i32> %tmp1
+; X32: pinsrd_1:
+; X32:    pinsrd $1, 4(%esp), %xmm0
+
+; X64: pinsrd_1:
+; X64:    pinsrd $1, %edi, %xmm0
+}
+
+define <16 x i8> @pinsrb_1(i8 %s, <16 x i8> %tmp) nounwind {
+        %tmp1 = insertelement <16 x i8> %tmp, i8 %s, i32 1
+        ret <16 x i8> %tmp1
+; X32: pinsrb_1:
+; X32:    pinsrb $1, 4(%esp), %xmm0
+
+; X64: pinsrb_1:
+; X64:    pinsrb $1, %edi, %xmm0
+}
+
+
+define <2 x i64> @pmovsxbd_1(i32* %p) nounwind {
+entry:
+	%0 = load i32* %p, align 4
+	%1 = insertelement <4 x i32> undef, i32 %0, i32 0
+	%2 = insertelement <4 x i32> %1, i32 0, i32 1
+	%3 = insertelement <4 x i32> %2, i32 0, i32 2
+	%4 = insertelement <4 x i32> %3, i32 0, i32 3
+	%5 = bitcast <4 x i32> %4 to <16 x i8>
+	%6 = tail call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %5) nounwind readnone
+	%7 = bitcast <4 x i32> %6 to <2 x i64>
+	ret <2 x i64> %7
+        
+; X32: _pmovsxbd_1:
+; X32:   movl      4(%esp), %eax
+; X32:   pmovsxbd   (%eax), %xmm0
+
+; X64: _pmovsxbd_1:
+; X64:   pmovsxbd   (%rdi), %xmm0
+}
+
+define <2 x i64> @pmovsxwd_1(i64* %p) nounwind readonly {
+entry:
+	%0 = load i64* %p		; <i64> [#uses=1]
+	%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0		; <<2 x i64>> [#uses=1]
+	%1 = bitcast <2 x i64> %tmp2 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone		; <<4 x i32>> [#uses=1]
+	%3 = bitcast <4 x i32> %2 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %3
+        
+; X32: _pmovsxwd_1:
+; X32:   movl 4(%esp), %eax
+; X32:   pmovsxwd (%eax), %xmm0
+
+; X64: _pmovsxwd_1:
+; X64:   pmovsxwd (%rdi), %xmm0
+}
+
+
+
+
+define <2 x i64> @pmovzxbq_1() nounwind {
+entry:
+	%0 = load i16* @g16, align 2		; <i16> [#uses=1]
+	%1 = insertelement <8 x i16> undef, i16 %0, i32 0		; <<8 x i16>> [#uses=1]
+	%2 = bitcast <8 x i16> %1 to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%3 = tail call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %2) nounwind readnone		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %3
+
+; X32: _pmovzxbq_1:
+; X32:   movl	L_g16$non_lazy_ptr, %eax
+; X32:   pmovzxbq	(%eax), %xmm0
+
+; X64: _pmovzxbq_1:
+; X64:   movq	_g16@GOTPCREL(%rip), %rax
+; X64:   pmovzxbq	(%rax), %xmm0
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+
+
+define i32 @extractps_1(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  %i = bitcast float %s to i32
+  ret i32 %i
+
+; X32: _extractps_1:  
+; X32:	  extractps	$3, %xmm0, %eax
+
+; X64: _extractps_1:  
+; X64:	  extractps	$3, %xmm0, %eax
+}
+define i32 @extractps_2(<4 x float> %v) nounwind {
+  %t = bitcast <4 x float> %v to <4 x i32>
+  %s = extractelement <4 x i32> %t, i32 3
+  ret i32 %s
+
+; X32: _extractps_2:
+; X32:	  extractps	$3, %xmm0, %eax
+
+; X64: _extractps_2:
+; X64:	  extractps	$3, %xmm0, %eax
+}
+
+
+; The non-store form of extractps puts its result into a GPR.
+; This makes it suitable for an extract from a <4 x float> that
+; is bitcasted to i32, but unsuitable for much of anything else.
+
+define float @ext_1(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  %t = fadd float %s, 1.0
+  ret float %t
+
+; X32: _ext_1:
+; X32:	  pshufd	$3, %xmm0, %xmm0
+; X32:	  addss	LCPI8_0, %xmm0
+
+; X64: _ext_1:
+; X64:	  pshufd	$3, %xmm0, %xmm0
+; X64:	  addss	LCPI8_0(%rip), %xmm0
+}
+define float @ext_2(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  ret float %s
+
+; X32: _ext_2:
+; X32:	  pshufd	$3, %xmm0, %xmm0
+
+; X64: _ext_2:
+; X64:	  pshufd	$3, %xmm0, %xmm0
+}
+define i32 @ext_3(<4 x i32> %v) nounwind {
+  %i = extractelement <4 x i32> %v, i32 3
+  ret i32 %i
+
+; X32: _ext_3:
+; X32:	  pextrd	$3, %xmm0, %eax
+
+; X64: _ext_3:
+; X64:	  pextrd	$3, %xmm0, %eax
+}
+
+define <4 x float> @insertps_1(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 1) nounwind readnone
+        ret <4 x float> %tmp1
+; X32: _insertps_1:
+; X32:    insertps  $1, %xmm1, %xmm0
+
+; X64: _insertps_1:
+; X64:    insertps  $1, %xmm1, %xmm0
+}
+
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+define <4 x float> @insertps_2(<4 x float> %t1, float %t2) nounwind {
+        %tmp1 = insertelement <4 x float> %t1, float %t2, i32 0
+        ret <4 x float> %tmp1
+; X32: _insertps_2:
+; X32:    insertps  $0, 4(%esp), %xmm0
+
+; X64: _insertps_2:
+; X64:    insertps  $0, %xmm1, %xmm0        
+}
+
+define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp2 = extractelement <4 x float> %t2, i32 0
+        %tmp1 = insertelement <4 x float> %t1, float %tmp2, i32 0
+        ret <4 x float> %tmp1
+; X32: _insertps_3:
+; X32:    insertps  $0, %xmm1, %xmm0        
+
+; X64: _insertps_3:
+; X64:    insertps  $0, %xmm1, %xmm0        
+}
+
+define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_1:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    sete	%al
+
+; X64: _ptestz_1:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    sete	%al
+}
+
+define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_2:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    setb	%al
+
+; X64: _ptestz_2:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    setb	%al
+}
+
+define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_3:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    seta	%al
+
+; X64: _ptestz_3:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    seta	%al
+}
+
+
+declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+
diff --git a/test/CodeGen/X86/sse42.ll b/test/CodeGen/X86/sse42.ll
new file mode 100644
index 0000000000000..c9c4d012102a3
--- /dev/null
+++ b/test/CodeGen/X86/sse42.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X64
+
+declare i32 @llvm.x86.sse42.crc32.8(i32, i8) nounwind
+declare i32 @llvm.x86.sse42.crc32.16(i32, i16) nounwind
+declare i32 @llvm.x86.sse42.crc32.32(i32, i32) nounwind
+
+define i32 @crc32_8(i32 %a, i8 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+; X32: _crc32_8:
+; X32:     crc32   8(%esp), %eax
+
+; X64: _crc32_8:
+; X64:     crc32   %sil, %eax
+}
+
+
+define i32 @crc32_16(i32 %a, i16 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.16(i32 %a, i16 %b)
+  ret i32 %tmp
+; X32: _crc32_16:
+; X32:     crc32   8(%esp), %eax
+
+; X64: _crc32_16:
+; X64:     crc32   %si, %eax
+}
+
+
+define i32 @crc32_32(i32 %a, i32 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32(i32 %a, i32 %b)
+  ret i32 %tmp
+; X32: _crc32_32:
+; X32:     crc32   8(%esp), %eax
+
+; X64: _crc32_32:
+; X64:     crc32   %esi, %eax
+}
diff --git a/test/CodeGen/X86/sse_reload_fold.ll b/test/CodeGen/X86/sse_reload_fold.ll
index 547763e4a7935..dc3d6fe6797dd 100644
--- a/test/CodeGen/X86/sse_reload_fold.ll
+++ b/test/CodeGen/X86/sse_reload_fold.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN: llc -march=x86-64 -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& \
+; RUN: llc < %s -march=x86-64 -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& \
 ; RUN:   grep fail | count 1
 
 declare float @test_f(float %f)
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index dda6f0d928c95..cb65e9b50fe2f 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=static -mcpu=yonah | grep {andpd.*4(%esp), %xmm}
+; RUN: llc < %s -relocation-model=static -mcpu=yonah | grep {andpd.*4(%esp), %xmm}
 
 ; The double argument is at 4(esp) which is 16-byte aligned, allowing us to
 ; fold the load into the andpd.
diff --git a/test/CodeGen/X86/stack-color-with-reg-2.ll b/test/CodeGen/X86/stack-color-with-reg-2.ll
index bc4182f65dcb0..c1f2672293512 100644
--- a/test/CodeGen/X86/stack-color-with-reg-2.ll
+++ b/test/CodeGen/X86/stack-color-with-reg-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs | grep {movl\[\[:space:\]\]%eax, %ebx}
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs | grep {movl\[\[:space:\]\]%eax, %ebx}
 
 	%"struct..0$_67" = type { i32, %"struct.llvm::MachineOperand"**, %"struct.llvm::MachineOperand"* }
 	%"struct..1$_69" = type { i32 }
diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll
index 72a985a6c29bb..672f77eef02c9 100644
--- a/test/CodeGen/X86/stack-color-with-reg.ll
+++ b/test/CodeGen/X86/stack-color-with-reg.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
-; RUN:   grep stackcoloring %t | grep "loads eliminated" 
-; RUN:   grep stackcoloring %t | grep "stores eliminated"
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
+; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 5
+; RUN:   grep asm-printer %t   | grep 179
 
 	type { [62 x %struct.Bitvec*] }		; type %0
 	type { i8* }		; type %1
diff --git a/test/CodeGen/X86/stdarg.ll b/test/CodeGen/X86/stdarg.ll
new file mode 100644
index 0000000000000..9778fa1389486
--- /dev/null
+++ b/test/CodeGen/X86/stdarg.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 | grep {testb	\[%\]al, \[%\]al}
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+define void @foo(i32 %x, ...) nounwind {
+entry:
+  %ap = alloca [1 x %struct.__va_list_tag], align 8; <[1 x %struct.__va_list_tag]*> [#uses=2]
+  %ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*; <i8*> [#uses=2]
+  call void @llvm.va_start(i8* %ap12)
+  %ap3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0; <%struct.__va_list_tag*> [#uses=1]
+  call void @bar(%struct.__va_list_tag* %ap3) nounwind
+  call void @llvm.va_end(i8* %ap12)
+  ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @bar(%struct.__va_list_tag*)
+
+declare void @llvm.va_end(i8*) nounwind
diff --git a/test/CodeGen/X86/store-empty-member.ll b/test/CodeGen/X86/store-empty-member.ll
new file mode 100644
index 0000000000000..37f86c60fae5f
--- /dev/null
+++ b/test/CodeGen/X86/store-empty-member.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; Don't crash on an empty struct member.
+
+; CHECK: movl  $2, 4(%esp)
+; CHECK: movl  $1, (%esp)
+
+%testType = type {i32, [0 x i32], i32}
+
+define void @foo() nounwind {
+  %1 = alloca %testType
+  volatile store %testType {i32 1, [0 x i32] zeroinitializer, i32 2}, %testType* %1
+  ret void
+}
diff --git a/test/CodeGen/X86/store-fp-constant.ll b/test/CodeGen/X86/store-fp-constant.ll
index 70cb046600bc3..206886bb608fd 100644
--- a/test/CodeGen/X86/store-fp-constant.ll
+++ b/test/CodeGen/X86/store-fp-constant.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep rodata
-; RUN: llvm-as < %s | llc -march=x86 | not grep literal
+; RUN: llc < %s -march=x86 | not grep rodata
+; RUN: llc < %s -march=x86 | not grep literal
 ;
 ; Check that no FP constants in this testcase ends up in the 
 ; constant pool.
diff --git a/test/CodeGen/X86/store-global-address.ll b/test/CodeGen/X86/store-global-address.ll
index 0695eee9a8883..c8d4cbceea3da 100644
--- a/test/CodeGen/X86/store-global-address.ll
+++ b/test/CodeGen/X86/store-global-address.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep movl | count 1
+; RUN: llc < %s -march=x86 | grep movl | count 1
 
 @dst = global i32 0             ; <i32*> [#uses=1]
 @ptr = global i32* null         ; <i32**> [#uses=1]
diff --git a/test/CodeGen/X86/store_op_load_fold.ll b/test/CodeGen/X86/store_op_load_fold.ll
index acef174638786..66d0e47c6d489 100644
--- a/test/CodeGen/X86/store_op_load_fold.ll
+++ b/test/CodeGen/X86/store_op_load_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep mov
+; RUN: llc < %s -march=x86 | not grep mov
 ;
 ; Test the add and load are folded into the store instruction.
 
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 09aaba155d99b..0ccfe470db5f9 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {and	DWORD PTR} | count 2
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/X86/storetrunc-fp.ll b/test/CodeGen/X86/storetrunc-fp.ll
index 945cf48f9bde5..03ad093ba8607 100644
--- a/test/CodeGen/X86/storetrunc-fp.ll
+++ b/test/CodeGen/X86/storetrunc-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep flds
+; RUN: llc < %s -march=x86 | not grep flds
 
 define void @foo(x86_fp80 %a, x86_fp80 %b, float* %fp) {
 	%c = fadd x86_fp80 %a, %b
diff --git a/test/CodeGen/X86/stride-nine-with-base-reg.ll b/test/CodeGen/X86/stride-nine-with-base-reg.ll
index cc26487cf264b..7aae9eb1ab967 100644
--- a/test/CodeGen/X86/stride-nine-with-base-reg.ll
+++ b/test/CodeGen/X86/stride-nine-with-base-reg.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | not grep lea
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep lea
+; RUN: llc < %s -march=x86 -relocation-model=static | not grep lea
+; RUN: llc < %s -march=x86-64 | not grep lea
 
 ; P should be sunk into the loop and folded into the address mode. There
 ; shouldn't be any lea instructions inside the loop.
diff --git a/test/CodeGen/X86/stride-reuse.ll b/test/CodeGen/X86/stride-reuse.ll
index 277a4430acaad..a99a9c95a4cc9 100644
--- a/test/CodeGen/X86/stride-reuse.ll
+++ b/test/CodeGen/X86/stride-reuse.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep lea
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep lea
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86-64 | not grep lea
 
 @B = external global [1000 x float], align 32
 @A = external global [1000 x float], align 32
diff --git a/test/CodeGen/X86/sub-with-overflow.ll b/test/CodeGen/X86/sub-with-overflow.ll
index 98f02524d7a18..19f4079abb5f7 100644
--- a/test/CodeGen/X86/sub-with-overflow.ll
+++ b/test/CodeGen/X86/sub-with-overflow.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {jo} | count 1
-; RUN: llvm-as < %s | llc -march=x86 | grep {jb} | count 1
+; RUN: llc < %s -march=x86 | grep {jo} | count 1
+; RUN: llc < %s -march=x86 | grep {jb} | count 1
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/subreg-to-reg-0.ll b/test/CodeGen/X86/subreg-to-reg-0.ll
index 6b60f6526595a..d718c85a1d1f4 100644
--- a/test/CodeGen/X86/subreg-to-reg-0.ll
+++ b/test/CodeGen/X86/subreg-to-reg-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 1
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
 
 ; Do eliminate the zero-extension instruction and rely on
 ; x86-64's implicit zero-extension!
diff --git a/test/CodeGen/X86/subreg-to-reg-1.ll b/test/CodeGen/X86/subreg-to-reg-1.ll
index aa26f06aba96a..a297728aee897 100644
--- a/test/CodeGen/X86/subreg-to-reg-1.ll
+++ b/test/CodeGen/X86/subreg-to-reg-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {leal	.*), %e.\*} | count 1
+; RUN: llc < %s -march=x86-64 | grep {leal	.*), %e.\*} | count 1
 
 ; Don't eliminate or coalesce away the explicit zero-extension!
 ; This is currently using an leal because of a 3-addressification detail,
diff --git a/test/CodeGen/X86/subreg-to-reg-2.ll b/test/CodeGen/X86/subreg-to-reg-2.ll
index d0b40cd5d4712..49d2e88d2c8d6 100644
--- a/test/CodeGen/X86/subreg-to-reg-2.ll
+++ b/test/CodeGen/X86/subreg-to-reg-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movl
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl
 ; rdar://6707985
 
 	%XXOO = type { %"struct.XXC::XXCC", i8*, %"struct.XXC::XXOO::$_71" }
diff --git a/test/CodeGen/X86/subreg-to-reg-3.ll b/test/CodeGen/X86/subreg-to-reg-3.ll
index 6634538c2afd0..931ae758ac5cc 100644
--- a/test/CodeGen/X86/subreg-to-reg-3.ll
+++ b/test/CodeGen/X86/subreg-to-reg-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep imull
+; RUN: llc < %s -march=x86-64 | grep imull
 
 ; Don't eliminate or coalesce away the explicit zero-extension!
 
diff --git a/test/CodeGen/X86/subreg-to-reg-4.ll b/test/CodeGen/X86/subreg-to-reg-4.ll
index bb6af3988c95c..0ea5541c89dca 100644
--- a/test/CodeGen/X86/subreg-to-reg-4.ll
+++ b/test/CodeGen/X86/subreg-to-reg-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep leaq %t
 ; RUN: not grep incq %t
 ; RUN: not grep decq %t
diff --git a/test/CodeGen/X86/subreg-to-reg-5.ll b/test/CodeGen/X86/subreg-to-reg-5.ll
index 81b262ace84d1..ba4c307d1090a 100644
--- a/test/CodeGen/X86/subreg-to-reg-5.ll
+++ b/test/CodeGen/X86/subreg-to-reg-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: grep addl %t
 ; RUN: not egrep {movl|movq} %t
 
diff --git a/test/CodeGen/X86/subreg-to-reg-6.ll b/test/CodeGen/X86/subreg-to-reg-6.ll
index f18eef7d1970e..76430cd783e36 100644
--- a/test/CodeGen/X86/subreg-to-reg-6.ll
+++ b/test/CodeGen/X86/subreg-to-reg-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 define i64 @foo() nounwind {
 entry:
diff --git a/test/CodeGen/X86/switch-zextload.ll b/test/CodeGen/X86/switch-zextload.ll
index f3c701ff5f929..55425bc7da5cb 100644
--- a/test/CodeGen/X86/switch-zextload.ll
+++ b/test/CodeGen/X86/switch-zextload.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 1
+; RUN: llc < %s -march=x86 | grep mov | count 1
 
 ; Do zextload, instead of a load and a separate zext.
 
diff --git a/test/CodeGen/X86/swizzle.ll b/test/CodeGen/X86/swizzle.ll
index d00bb9a0fadbd..23e0c2453d646 100644
--- a/test/CodeGen/X86/swizzle.ll
+++ b/test/CodeGen/X86/swizzle.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movlps
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep movups
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movlps
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movups
 ; rdar://6523650
 
 	%struct.vector4_t = type { <4 x float> }
diff --git a/test/CodeGen/X86/tailcall-i1.ll b/test/CodeGen/X86/tailcall-i1.ll
index 0ec6a77807220..8ef1f11383be9 100644
--- a/test/CodeGen/X86/tailcall-i1.ll
+++ b/test/CodeGen/X86/tailcall-i1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
 define fastcc i1 @i1test(i32, i32, i32, i32) {
   entry:
   %4 = tail call fastcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
diff --git a/test/CodeGen/X86/tailcall-stackalign.ll b/test/CodeGen/X86/tailcall-stackalign.ll
index ff960b8ce1ffa..110472c8b9f3b 100644
--- a/test/CodeGen/X86/tailcall-stackalign.ll
+++ b/test/CodeGen/X86/tailcall-stackalign.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc  -mtriple=i686-unknown-linux  -tailcallopt | grep -A 1 call | grep -A 1 tailcaller | grep subl | grep 12
+; RUN: llc < %s  -mtriple=i686-unknown-linux  -tailcallopt | grep -A 1 call | grep -A 1 tailcaller | grep subl | grep 12
 ; Linux has 8 byte alignment so the params cause stack size 20 when tailcallopt
 ; is enabled, ensure that a normal fastcc call has matching stack size
 
diff --git a/test/CodeGen/X86/tailcall-structret.ll b/test/CodeGen/X86/tailcall-structret.ll
index e94d7d8befaa9..d8be4b2e2dfd8 100644
--- a/test/CodeGen/X86/tailcall-structret.ll
+++ b/test/CodeGen/X86/tailcall-structret.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
 define fastcc { { i8*, i8* }*, i8*} @init({ { i8*, i8* }*, i8*}, i32) {
 entry:
       %2 = tail call fastcc { { i8*, i8* }*, i8* } @init({ { i8*, i8*}*, i8*} %0, i32 %1)
diff --git a/test/CodeGen/X86/tailcall-void.ll b/test/CodeGen/X86/tailcall-void.ll
index 27b2a2856adaa..4e578d1b6410d 100644
--- a/test/CodeGen/X86/tailcall-void.ll
+++ b/test/CodeGen/X86/tailcall-void.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
 define fastcc void @i1test(i32, i32, i32, i32) {
   entry:
    tail call fastcc void @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
diff --git a/test/CodeGen/X86/tailcall1.ll b/test/CodeGen/X86/tailcall1.ll
index deedb86e95b8b..a4f87c021a951 100644
--- a/test/CodeGen/X86/tailcall1.ll
+++ b/test/CodeGen/X86/tailcall1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
 	ret i32 %a3
diff --git a/test/CodeGen/X86/tailcallbyval.ll b/test/CodeGen/X86/tailcallbyval.ll
index 916be566a14a3..7002560c82a05 100644
--- a/test/CodeGen/X86/tailcallbyval.ll
+++ b/test/CodeGen/X86/tailcallbyval.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep TAILCALL
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep {movl\[\[:space:\]\]*4(%esp), %eax} | count 1
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep {movl\[\[:space:\]\]*4(%esp), %eax} | count 1
 %struct.s = type {i32, i32, i32, i32, i32, i32, i32, i32,
                   i32, i32, i32, i32, i32, i32, i32, i32,
                   i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/tailcallbyval64.ll b/test/CodeGen/X86/tailcallbyval64.ll
index 7b65863f00b0c..7c685b85807e0 100644
--- a/test/CodeGen/X86/tailcallbyval64.ll
+++ b/test/CodeGen/X86/tailcallbyval64.ll
@@ -1,15 +1,15 @@
-; RUN: llvm-as < %s | llc -march=x86-64  -tailcallopt  | grep TAILCALL
+; RUN: llc < %s -march=x86-64  -tailcallopt  | grep TAILCALL
 ; Expect 2 rep;movs because of tail call byval lowering.
-; RUN: llvm-as < %s | llc -march=x86-64  -tailcallopt  | grep rep | wc -l | grep 2
+; RUN: llc < %s -march=x86-64  -tailcallopt  | grep rep | wc -l | grep 2
 ; A sequence of copyto/copyfrom virtual registers is used to deal with byval
 ; lowering appearing after moving arguments to registers. The following two
 ; checks verify that the register allocator changes those sequences to direct
 ; moves to argument register where it can (for registers that are not used in 
 ; byval lowering - not rsi, not rdi, not rcx).
 ; Expect argument 4 to be moved directly to register edx.
-; RUN: llvm-as < %s | llc -march=x86-64  -tailcallopt  | grep movl | grep {7} | grep edx
+; RUN: llc < %s -march=x86-64  -tailcallopt  | grep movl | grep {7} | grep edx
 ; Expect argument 6 to be moved directly to register r8.
-; RUN: llvm-as < %s | llc -march=x86-64  -tailcallopt  | grep movl | grep {17} | grep r8
+; RUN: llc < %s -march=x86-64  -tailcallopt  | grep movl | grep {17} | grep r8
 
 %struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
                    i64, i64, i64, i64, i64, i64, i64, i64,
diff --git a/test/CodeGen/X86/tailcallfp.ll b/test/CodeGen/X86/tailcallfp.ll
index f6149351038b3..c0b609ac956ec 100644
--- a/test/CodeGen/X86/tailcallfp.ll
+++ b/test/CodeGen/X86/tailcallfp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel -tailcallopt | not grep call
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -tailcallopt | not grep call
 define fastcc i32 @bar(i32 %X, i32(double, i32) *%FP) {
      %Y = tail call fastcc i32 %FP(double 0.0, i32 %X)
      ret i32 %Y
diff --git a/test/CodeGen/X86/tailcallfp2.ll b/test/CodeGen/X86/tailcallfp2.ll
index 151701ed439df..be4f96cfb5e64 100644
--- a/test/CodeGen/X86/tailcallfp2.ll
+++ b/test/CodeGen/X86/tailcallfp2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | grep {jmp} | grep {\\*%eax}
+; RUN: llc < %s -march=x86 -tailcallopt | grep {jmp} | grep {\\*%eax}
 
 declare i32 @putchar(i32)
 
diff --git a/test/CodeGen/X86/tailcallpic1.ll b/test/CodeGen/X86/tailcallpic1.ll
index 54074eb0ba2a6..60e3be5c50fda 100644
--- a/test/CodeGen/X86/tailcallpic1.ll
+++ b/test/CodeGen/X86/tailcallpic1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep TAILCALL
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep TAILCALL
 
 define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
diff --git a/test/CodeGen/X86/tailcallpic2.ll b/test/CodeGen/X86/tailcallpic2.ll
index 60818e4f62c62..eaa76312396c6 100644
--- a/test/CodeGen/X86/tailcallpic2.ll
+++ b/test/CodeGen/X86/tailcallpic2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep -v TAILCALL
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep -v TAILCALL
 
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index c81327e5143a7..73c59bb639a6f 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -1,14 +1,17 @@
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 | grep TAILCALL
+; RUN: llc < %s -tailcallopt -march=x86-64 | FileCheck %s
+
 ; Check that lowered arguments on the stack do not overwrite each other.
-; Move param %in1 to temp register (%eax).
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	40(%rsp), %eax}
-; Add %in1 %p1 to another temporary register (%r9d).
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%edi, %r10d}
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {addl	32(%rsp), %r10d}
+; Add %in1 %p1 to a different temporary register (%eax).
+; CHECK: movl  %edi, %eax
+; CHECK: addl  32(%rsp), %eax
+; Move param %in1 to temp register (%r10d).
+; CHECK: movl  40(%rsp), %r10d
 ; Move result of addition to stack.
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%r10d, 40(%rsp)}
+; CHECK: movl  %eax, 40(%rsp)
 ; Move param %in2 to stack.
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%eax, 32(%rsp)}
+; CHECK: movl  %r10d, 32(%rsp)
+; Eventually, do a TAILCALL
+; CHECK: TAILCALL
 
 declare fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %a, i32 %b)
 
diff --git a/test/CodeGen/X86/test-nofold.ll b/test/CodeGen/X86/test-nofold.ll
index a24a9a0940a0e..772ff6c3e7664 100644
--- a/test/CodeGen/X86/test-nofold.ll
+++ b/test/CodeGen/X86/test-nofold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep {testl.*%e.x.*%e.x}
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep {testl.*%e.x.*%e.x}
 ; rdar://5752025
 
 ; We don't want to fold the and into the test, because the and clobbers its
diff --git a/test/CodeGen/X86/test-shrink-bug.ll b/test/CodeGen/X86/test-shrink-bug.ll
new file mode 100644
index 0000000000000..64631ea5fc9b9
--- /dev/null
+++ b/test/CodeGen/X86/test-shrink-bug.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s | FileCheck %s
+
+; Codegen shouldn't reduce the comparison down to testb $-1, %al
+; because that changes the result of the signed test.
+; PR5132
+; CHECK: testw  $255, %ax
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+@g_14 = global i8 -6, align 1                     ; <i8*> [#uses=1]
+
+declare i32 @func_16(i8 signext %p_19, i32 %p_20) nounwind
+
+define i32 @func_35(i64 %p_38) nounwind ssp {
+entry:
+  %tmp = load i8* @g_14                           ; <i8> [#uses=2]
+  %conv = zext i8 %tmp to i32                     ; <i32> [#uses=1]
+  %cmp = icmp sle i32 1, %conv                    ; <i1> [#uses=1]
+  %conv2 = zext i1 %cmp to i32                    ; <i32> [#uses=1]
+  %call = call i32 @func_16(i8 signext %tmp, i32 %conv2) ssp ; <i32> [#uses=1]
+  ret i32 1
+}
diff --git a/test/CodeGen/X86/test-shrink.ll b/test/CodeGen/X86/test-shrink.ll
new file mode 100644
index 0000000000000..1d636930641f8
--- /dev/null
+++ b/test/CodeGen/X86/test-shrink.ll
@@ -0,0 +1,158 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
+
+; CHECK-64: g64xh:
+; CHECK-64:   testb $8, %ah
+; CHECK-64:   ret
+; CHECK-32: g64xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g64xh(i64 inreg %x) nounwind {
+  %t = and i64 %x, 2048
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64xl:
+; CHECK-64:   testb $8, %dil
+; CHECK-64:   ret
+; CHECK-32: g64xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g64xl(i64 inreg %x) nounwind {
+  %t = and i64 %x, 8
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32xh:
+; CHECK-64:   testb $8, %ah
+; CHECK-64:   ret
+; CHECK-32: g32xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g32xh(i32 inreg %x) nounwind {
+  %t = and i32 %x, 2048
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32xl:
+; CHECK-64:   testb $8, %dil
+; CHECK-64:   ret
+; CHECK-32: g32xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g32xl(i32 inreg %x) nounwind {
+  %t = and i32 %x, 8
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g16xh:
+; CHECK-64:   testb $8, %ah
+; CHECK-64:   ret
+; CHECK-32: g16xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g16xh(i16 inreg %x) nounwind {
+  %t = and i16 %x, 2048
+  %s = icmp eq i16 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g16xl:
+; CHECK-64:   testb $8, %dil
+; CHECK-64:   ret
+; CHECK-32: g16xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g16xl(i16 inreg %x) nounwind {
+  %t = and i16 %x, 8
+  %s = icmp eq i16 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64x16:
+; CHECK-64:   testw $-32640, %di
+; CHECK-64:   ret
+; CHECK-32: g64x16:
+; CHECK-32:   testw $-32640, %ax
+; CHECK-32:   ret
+define void @g64x16(i64 inreg %x) nounwind {
+  %t = and i64 %x, 32896
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32x16:
+; CHECK-64:   testw $-32640, %di
+; CHECK-64:   ret
+; CHECK-32: g32x16:
+; CHECK-32:   testw $-32640, %ax
+; CHECK-32:   ret
+define void @g32x16(i32 inreg %x) nounwind {
+  %t = and i32 %x, 32896
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64x32:
+; CHECK-64:   testl $268468352, %edi
+; CHECK-64:   ret
+; CHECK-32: g64x32:
+; CHECK-32:   testl $268468352, %eax
+; CHECK-32:   ret
+define void @g64x32(i64 inreg %x) nounwind {
+  %t = and i64 %x, 268468352
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+
+declare void @bar()
diff --git a/test/CodeGen/X86/testl-commute.ll b/test/CodeGen/X86/testl-commute.ll
index dbbef0a894f23..3d5f672f98fce 100644
--- a/test/CodeGen/X86/testl-commute.ll
+++ b/test/CodeGen/X86/testl-commute.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {testl.*\(%r.i\), %} | count 3
+; RUN: llc < %s | grep {testl.*\(%r.i\), %} | count 3
 ; rdar://5671654
 ; The loads should fold into the testl instructions, no matter how
 ; the inputs are commuted.
diff --git a/test/CodeGen/X86/tls-pic.ll b/test/CodeGen/X86/tls-pic.ll
new file mode 100644
index 0000000000000..4cad8376d8d9d
--- /dev/null
+++ b/test/CodeGen/X86/tls-pic.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64 %s
+
+@i = thread_local global i32 15
+
+define i32 @f1() {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}
+
+; X32: f1:
+; X32:   leal i@TLSGD(,%ebx), %eax
+; X32:   call ___tls_get_addr@PLT
+
+; X64: f1:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   call __tls_get_addr@PLT
+
+
+@i2 = external thread_local global i32
+
+define i32* @f2() {
+entry:
+	ret i32* @i
+}
+
+; X32: f2:
+; X32:   leal i@TLSGD(,%ebx), %eax
+; X32:   call ___tls_get_addr@PLT
+
+; X64: f2:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   call __tls_get_addr@PLT
+
+
+
+define i32 @f3() {
+entry:
+	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+; X32: f3:
+; X32:   leal	i@TLSGD(,%ebx), %eax
+; X32:   call ___tls_get_addr@PLT
+
+; X64: f3:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   call __tls_get_addr@PLT
+
+
+define i32* @f4() nounwind {
+entry:
+	ret i32* @i
+}
+
+; X32: f4:
+; X32:   leal	i@TLSGD(,%ebx), %eax
+; X32:   call ___tls_get_addr@PLT
+
+; X64: f4:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   call __tls_get_addr@PLT
+
+
+
diff --git a/test/CodeGen/X86/tls1.ll b/test/CodeGen/X86/tls1.ll
index 85ff360a5508c..0cae5c4f2888f 100644
--- a/test/CodeGen/X86/tls1.ll
+++ b/test/CodeGen/X86/tls1.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
 
 @i = thread_local global i32 15
diff --git a/test/CodeGen/X86/tls10.ll b/test/CodeGen/X86/tls10.ll
index 2f5f02b9ac96b..fb61596d09ca9 100644
--- a/test/CodeGen/X86/tls10.ll
+++ b/test/CodeGen/X86/tls10.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t
 ; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2
 ; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
 
diff --git a/test/CodeGen/X86/tls11.ll b/test/CodeGen/X86/tls11.ll
index b6aed9aaa04d1..a2c1a1f75deb0 100644
--- a/test/CodeGen/X86/tls11.ll
+++ b/test/CodeGen/X86/tls11.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movw	%gs:i@NTPOFF, %ax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movw	%fs:i@TPOFF, %ax} %t2
 
 @i = thread_local global i16 15
diff --git a/test/CodeGen/X86/tls12.ll b/test/CodeGen/X86/tls12.ll
index b5288391f03f0..c29f6adacd202 100644
--- a/test/CodeGen/X86/tls12.ll
+++ b/test/CodeGen/X86/tls12.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movb	%gs:i@NTPOFF, %al} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movb	%fs:i@TPOFF, %al} %t2
 
 @i = thread_local global i8 15
diff --git a/test/CodeGen/X86/tls13.ll b/test/CodeGen/X86/tls13.ll
index ec23a41113eef..08778ec2ce8b1 100644
--- a/test/CodeGen/X86/tls13.ll
+++ b/test/CodeGen/X86/tls13.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movswl	%gs:i@NTPOFF, %eax} %t
 ; RUN: grep {movzwl	%gs:j@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movswl	%fs:i@TPOFF, %edi} %t2
 ; RUN: grep {movzwl	%fs:j@TPOFF, %edi} %t2
 
diff --git a/test/CodeGen/X86/tls14.ll b/test/CodeGen/X86/tls14.ll
index 941601eb4f9b6..88426dd43d50e 100644
--- a/test/CodeGen/X86/tls14.ll
+++ b/test/CodeGen/X86/tls14.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movsbl	%gs:i@NTPOFF, %eax} %t
 ; RUN: grep {movzbl	%gs:j@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movsbl	%fs:i@TPOFF, %edi} %t2
 ; RUN: grep {movzbl	%fs:j@TPOFF, %edi} %t2
 
diff --git a/test/CodeGen/X86/tls15.ll b/test/CodeGen/X86/tls15.ll
index 62f3677629be3..7abf070d3fd20 100644
--- a/test/CodeGen/X86/tls15.ll
+++ b/test/CodeGen/X86/tls15.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t | count 1
 ; RUN: grep {leal	i@NTPOFF(%eax), %ecx} %t
 ; RUN: grep {leal	j@NTPOFF(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2 | count 1
 ; RUN: grep {leaq	i@TPOFF(%rax), %rcx} %t2
 ; RUN: grep {leaq	j@TPOFF(%rax), %rax} %t2
diff --git a/test/CodeGen/X86/tls2.ll b/test/CodeGen/X86/tls2.ll
index baa51bbb6ead9..5a94296afefc1 100644
--- a/test/CodeGen/X86/tls2.ll
+++ b/test/CodeGen/X86/tls2.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t
 ; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2
 ; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
 
diff --git a/test/CodeGen/X86/tls3.ll b/test/CodeGen/X86/tls3.ll
index 061849901fcf5..7327cc41777e9 100644
--- a/test/CodeGen/X86/tls3.ll
+++ b/test/CodeGen/X86/tls3.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	i@INDNTPOFF, %eax} %t
 ; RUN: grep {movl	%gs:(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	i@GOTTPOFF(%rip), %rax} %t2
 ; RUN: grep {movl	%fs:(%rax), %eax} %t2
 
diff --git a/test/CodeGen/X86/tls4.ll b/test/CodeGen/X86/tls4.ll
index 33f221b8ad3ab..d2e40e389bd57 100644
--- a/test/CodeGen/X86/tls4.ll
+++ b/test/CodeGen/X86/tls4.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t
 ; RUN: grep {addl	i@INDNTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2
 ; RUN: grep {addq	i@GOTTPOFF(%rip), %rax} %t2
 
diff --git a/test/CodeGen/X86/tls5.ll b/test/CodeGen/X86/tls5.ll
index ff7b9e0a5ffe7..4d2cc02b50286 100644
--- a/test/CodeGen/X86/tls5.ll
+++ b/test/CodeGen/X86/tls5.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
 
 @i = internal thread_local global i32 15
diff --git a/test/CodeGen/X86/tls6.ll b/test/CodeGen/X86/tls6.ll
index ab53929206a18..505106ee14ed4 100644
--- a/test/CodeGen/X86/tls6.ll
+++ b/test/CodeGen/X86/tls6.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t
 ; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2
 ; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
 
diff --git a/test/CodeGen/X86/tls7.ll b/test/CodeGen/X86/tls7.ll
index 6a7739bc1a31d..e9116e772090f 100644
--- a/test/CodeGen/X86/tls7.ll
+++ b/test/CodeGen/X86/tls7.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
 
 @i = hidden thread_local global i32 15
diff --git a/test/CodeGen/X86/tls8.ll b/test/CodeGen/X86/tls8.ll
index fd9d472bb66c3..375af94920f5b 100644
--- a/test/CodeGen/X86/tls8.ll
+++ b/test/CodeGen/X86/tls8.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:0, %eax} %t
 ; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movq	%fs:0, %rax} %t2
 ; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
 
diff --git a/test/CodeGen/X86/tls9.ll b/test/CodeGen/X86/tls9.ll
index bc0a6f0bbe618..214146fe998c7 100644
--- a/test/CodeGen/X86/tls9.ll
+++ b/test/CodeGen/X86/tls9.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
 ; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
 ; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
 
 @i = external hidden thread_local global i32
diff --git a/test/CodeGen/X86/trap.ll b/test/CodeGen/X86/trap.ll
index 9a013ffbe5652..03ae6bfc869ec 100644
--- a/test/CodeGen/X86/trap.ll
+++ b/test/CodeGen/X86/trap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep ud2
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep ud2
 define i32 @test() noreturn nounwind  {
 entry:
 	tail call void @llvm.trap( )
diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll
index 25a1191d8f14f..374d404a968cf 100644
--- a/test/CodeGen/X86/trunc-to-bool.ll
+++ b/test/CodeGen/X86/trunc-to-bool.ll
@@ -1,7 +1,7 @@
 ; An integer truncation to i1 should be done with an and instruction to make
 ; sure only the LSBit survives. Test that this is the case both for a returned
 ; value and as the operand of a branch.
-; RUN: llvm-as < %s | llc -march=x86 | grep {\\(and\\)\\|\\(test.*\\\$1\\)} | \
+; RUN: llc < %s -march=x86 | grep {\\(and\\)\\|\\(test.*\\\$1\\)} | \
 ; RUN:   count 5
 
 define i1 @test1(i32 %X) zeroext {
diff --git a/test/CodeGen/X86/twoaddr-coalesce-2.ll b/test/CodeGen/X86/twoaddr-coalesce-2.ll
index 3fe4cd1b781a9..6f16a2548aa67 100644
--- a/test/CodeGen/X86/twoaddr-coalesce-2.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& \
 ; RUN:   grep {twoaddrinstr} | grep {Number of instructions aggressively commuted}
 ; rdar://6480363
 
diff --git a/test/CodeGen/X86/twoaddr-coalesce.ll b/test/CodeGen/X86/twoaddr-coalesce.ll
index 5293b77879601..d0e13f61f2d05 100644
--- a/test/CodeGen/X86/twoaddr-coalesce.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 5
+; RUN: llc < %s -march=x86 | grep mov | count 5
 ; rdar://6523745
 
 @"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/twoaddr-delete.ll b/test/CodeGen/X86/twoaddr-delete.ll
index bbf4e62363a14..77e3c75c6dd0b 100644
--- a/test/CodeGen/X86/twoaddr-delete.ll
+++ b/test/CodeGen/X86/twoaddr-delete.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {twoaddrinstr} | grep {Number of dead instructions deleted}
+; RUN: llc < %s -march=x86 -stats |& grep {twoaddrinstr} | grep {Number of dead instructions deleted}
 
 	%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
 	%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
diff --git a/test/CodeGen/X86/twoaddr-pass-sink.ll b/test/CodeGen/X86/twoaddr-pass-sink.ll
index 765588059f75b..077fee0773926 100644
--- a/test/CodeGen/X86/twoaddr-pass-sink.ll
+++ b/test/CodeGen/X86/twoaddr-pass-sink.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& grep {Number of 3-address instructions sunk}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& grep {Number of 3-address instructions sunk}
 
 define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/twoaddr-remat.ll b/test/CodeGen/X86/twoaddr-remat.ll
index b74b70cedb76b..4940c78371d9d 100644
--- a/test/CodeGen/X86/twoaddr-remat.ll
+++ b/test/CodeGen/X86/twoaddr-remat.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 59796 | count 3
+; RUN: llc < %s -march=x86 | grep 59796 | count 3
 
 	%Args = type %Value*
 	%Exec = type opaque*
diff --git a/test/CodeGen/X86/uint_to_fp-2.ll b/test/CodeGen/X86/uint_to_fp-2.ll
index d6304370e293c..da5105d8a4eac 100644
--- a/test/CodeGen/X86/uint_to_fp-2.ll
+++ b/test/CodeGen/X86/uint_to_fp-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 1
 ; rdar://6504833
 
 define float @f(i32 %x) nounwind readnone {
diff --git a/test/CodeGen/X86/uint_to_fp.ll b/test/CodeGen/X86/uint_to_fp.ll
index 148437f3ddbf1..41ee1947edc49 100644
--- a/test/CodeGen/X86/uint_to_fp.ll
+++ b/test/CodeGen/X86/uint_to_fp.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep {sub.*esp}
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep cvtsi2ss
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep {sub.*esp}
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep cvtsi2ss
 ; rdar://6034396
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/umul-with-carry.ll b/test/CodeGen/X86/umul-with-carry.ll
index 547e179bb219b..7416051693be0 100644
--- a/test/CodeGen/X86/umul-with-carry.ll
+++ b/test/CodeGen/X86/umul-with-carry.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep {jc} | count 1
+; RUN: llc < %s -march=x86 | grep {jc} | count 1
 ; XFAIL: *
 
 ; FIXME: umul-with-overflow not supported yet.
diff --git a/test/CodeGen/X86/umul-with-overflow.ll b/test/CodeGen/X86/umul-with-overflow.ll
index 9e69154f10f9d..d522bd80acfd9 100644
--- a/test/CodeGen/X86/umul-with-overflow.ll
+++ b/test/CodeGen/X86/umul-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "\\\\\\\<mul"
+; RUN: llc < %s -march=x86 | grep "\\\\\\\<mul"
 
 declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
 define i1 @a(i32 %x) zeroext nounwind {
diff --git a/test/CodeGen/X86/urem-i8-constant.ll b/test/CodeGen/X86/urem-i8-constant.ll
index bc93684877b57..e3cb69ca591f6 100644
--- a/test/CodeGen/X86/urem-i8-constant.ll
+++ b/test/CodeGen/X86/urem-i8-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep 111
+; RUN: llc < %s -march=x86 | grep 111
 
 define i8 @foo(i8 %tmp325) {
 	%t546 = urem i8 %tmp325, 37
diff --git a/test/CodeGen/X86/v4f32-immediate.ll b/test/CodeGen/X86/v4f32-immediate.ll
index bd6045c068e2f..b5ebaa74bd071 100644
--- a/test/CodeGen/X86/v4f32-immediate.ll
+++ b/test/CodeGen/X86/v4f32-immediate.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse | grep movaps
+; RUN: llc < %s -march=x86 -mattr=+sse | grep movaps
 
 define <4 x float> @foo() {
   ret <4 x float> <float 0x4009C9D0A0000000, float 0x4002666660000000, float 0x3FF3333340000000, float 0x3FB99999A0000000>
diff --git a/test/CodeGen/X86/variable-sized-darwin-bzero.ll b/test/CodeGen/X86/variable-sized-darwin-bzero.ll
index b0cdf496d5fc7..4817db22c355a 100644
--- a/test/CodeGen/X86/variable-sized-darwin-bzero.ll
+++ b/test/CodeGen/X86/variable-sized-darwin-bzero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin10 | grep __bzero
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin10 | grep __bzero
 
 declare void @llvm.memset.i64(i8*, i8, i64, i32)
 
diff --git a/test/CodeGen/X86/variadic-node-pic.ll b/test/CodeGen/X86/variadic-node-pic.ll
index 4d76445b2f95a..1182a306abd08 100644
--- a/test/CodeGen/X86/variadic-node-pic.ll
+++ b/test/CodeGen/X86/variadic-node-pic.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic -code-model=large
+; RUN: llc < %s -relocation-model=pic -code-model=large
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/vec_add.ll b/test/CodeGen/X86/vec_add.ll
index 72415a3ab28bd..7c77d11a7b54e 100644
--- a/test/CodeGen/X86/vec_add.ll
+++ b/test/CodeGen/X86/vec_add.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define <2 x i64> @test(<2 x i64> %a, <2 x i64> %b) {
 entry:
diff --git a/test/CodeGen/X86/vec_align.ll b/test/CodeGen/X86/vec_align.ll
index d88104d79e829..e27311561b2cd 100644
--- a/test/CodeGen/X86/vec_align.ll
+++ b/test/CodeGen/X86/vec_align.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=yonah -relocation-model=static | grep movaps | count 2
+; RUN: llc < %s -mcpu=yonah -relocation-model=static | grep movaps | count 2
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/vec_call.ll b/test/CodeGen/X86/vec_call.ll
index ebdac7d3de9b1..b3efc7b16b7d9 100644
--- a/test/CodeGen/X86/vec_call.ll
+++ b/test/CodeGen/X86/vec_call.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {subl.*60}
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {movaps.*32}
 
 
diff --git a/test/CodeGen/X86/vec_clear.ll b/test/CodeGen/X86/vec_clear.ll
index 514de953efec7..166d4363ec8d5 100644
--- a/test/CodeGen/X86/vec_clear.ll
+++ b/test/CodeGen/X86/vec_clear.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t
 ; RUN: not grep and %t
 ; RUN: not grep psrldq %t
 ; RUN: grep xorps %t
diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll
new file mode 100644
index 0000000000000..c8c7257cbb9c3
--- /dev/null
+++ b/test/CodeGen/X86/vec_compare.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+
+
+define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test1:
+; CHECK: pcmpgtd
+; CHECK: ret
+
+	%C = icmp sgt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test2:
+; CHECK: pcmp
+; CHECK: pcmp
+; CHECK: xorps
+; CHECK: ret
+	%C = icmp sge <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test3:
+; CHECK: pcmpgtd
+; CHECK: movaps
+; CHECK: ret
+	%C = icmp slt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test4:
+; CHECK: movaps
+; CHECK: pcmpgtd
+; CHECK: ret
+	%C = icmp ugt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
diff --git a/test/CodeGen/X86/vec_ctbits.ll b/test/CodeGen/X86/vec_ctbits.ll
index f057c9a39a637..f0158d643c17c 100644
--- a/test/CodeGen/X86/vec_ctbits.ll
+++ b/test/CodeGen/X86/vec_ctbits.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>)
 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>)
diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll
index d6726be1db6a3..dab5dd144f064 100644
--- a/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/test/CodeGen/X86/vec_extract-sse4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
 ; RUN: grep extractps   %t | count 1
 ; RUN: grep pextrd      %t | count 1
 ; RUN: not grep pshufd  %t
diff --git a/test/CodeGen/X86/vec_extract.ll b/test/CodeGen/X86/vec_extract.ll
index ee7567cf7609a..b0137304e8a9c 100644
--- a/test/CodeGen/X86/vec_extract.ll
+++ b/test/CodeGen/X86/vec_extract.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t
 ; RUN: grep movss    %t | count 3
 ; RUN: grep movhlps  %t | count 1
 ; RUN: grep pshufd   %t | count 1
diff --git a/test/CodeGen/X86/vec_fneg.ll b/test/CodeGen/X86/vec_fneg.ll
index a801472622f2a..d49c70e563911 100644
--- a/test/CodeGen/X86/vec_fneg.ll
+++ b/test/CodeGen/X86/vec_fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define <4 x float> @t1(<4 x float> %Q) {
         %tmp15 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %Q
diff --git a/test/CodeGen/X86/vec_i64.ll b/test/CodeGen/X86/vec_i64.ll
index 3939af57c8ced..462e16e130230 100644
--- a/test/CodeGen/X86/vec_i64.ll
+++ b/test/CodeGen/X86/vec_i64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movq %t | count 2
 
 ; Used movq to load i64 into a v2i64 when the top i64 is 0.
diff --git a/test/CodeGen/X86/vec_ins_extract-1.ll b/test/CodeGen/X86/vec_ins_extract-1.ll
index c7eb221635d62..29511934af019 100644
--- a/test/CodeGen/X86/vec_ins_extract-1.ll
+++ b/test/CodeGen/X86/vec_ins_extract-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep {(%esp,%eax,4)} | count 4
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep {(%esp,%eax,4)} | count 4
 
 ; Inserts and extracts with variable indices must be lowered
 ; to memory accesses.
diff --git a/test/CodeGen/X86/vec_ins_extract.ll b/test/CodeGen/X86/vec_ins_extract.ll
index 7882839575855..bf43deb1d19a6 100644
--- a/test/CodeGen/X86/vec_ins_extract.ll
+++ b/test/CodeGen/X86/vec_ins_extract.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalarrepl -instcombine | \
+; RUN: opt < %s -scalarrepl -instcombine | \
 ; RUN:   llc -march=x86 -mcpu=yonah | not grep sub.*esp
 
 ; This checks that various insert/extract idiom work without going to the
diff --git a/test/CodeGen/X86/vec_insert-2.ll b/test/CodeGen/X86/vec_insert-2.ll
index 8d0bcc4fbf340..b08044bb869bf 100644
--- a/test/CodeGen/X86/vec_insert-2.ll
+++ b/test/CodeGen/X86/vec_insert-2.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep {\$36,} | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep shufps | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep pinsrw | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep movhpd | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2,-sse41 | grep unpcklpd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep {\$36,} | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep shufps | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep pinsrw | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movhpd | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep unpcklpd | count 1
 
 define <4 x float> @t1(float %s, <4 x float> %tmp) nounwind {
         %tmp1 = insertelement <4 x float> %tmp, float %s, i32 3
diff --git a/test/CodeGen/X86/vec_insert-3.ll b/test/CodeGen/X86/vec_insert-3.ll
index e43eca4b875f5..a18cd86489cc7 100644
--- a/test/CodeGen/X86/vec_insert-3.ll
+++ b/test/CodeGen/X86/vec_insert-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2,-sse41 | grep punpcklqdq | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep punpcklqdq | count 1
 
 define <2 x i64> @t1(i64 %s, <2 x i64> %tmp) nounwind {
         %tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1
diff --git a/test/CodeGen/X86/vec_insert-5.ll b/test/CodeGen/X86/vec_insert-5.ll
index 1a9768a98e6c0..291fc0454c9c7 100644
--- a/test/CodeGen/X86/vec_insert-5.ll
+++ b/test/CodeGen/X86/vec_insert-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: grep psllq %t | grep 32
 ; RUN: grep pslldq %t | grep 12
 ; RUN: grep psrldq %t | grep 8
diff --git a/test/CodeGen/X86/vec_insert-6.ll b/test/CodeGen/X86/vec_insert-6.ll
index 5ef270f90820d..54aa43f0c35dc 100644
--- a/test/CodeGen/X86/vec_insert-6.ll
+++ b/test/CodeGen/X86/vec_insert-6.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pslldq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 6
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pslldq
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 6
 
 define <4 x float> @t3(<4 x float>* %P) nounwind  {
 	%tmp1 = load <4 x float>* %P
diff --git a/test/CodeGen/X86/vec_insert-7.ll b/test/CodeGen/X86/vec_insert-7.ll
index 8cfc63aa6bf1b..9ede10f63d3e8 100644
--- a/test/CodeGen/X86/vec_insert-7.ll
+++ b/test/CodeGen/X86/vec_insert-7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx -mtriple=i686-apple-darwin9 -o - | grep punpckldq
+; RUN: llc < %s -march=x86 -mattr=+mmx -mtriple=i686-apple-darwin9 -o - | grep punpckldq
 
 define <2 x i32> @mmx_movzl(<2 x i32> %x) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_insert-8.ll b/test/CodeGen/X86/vec_insert-8.ll
index 0f6924c66f9e3..650951cc9e5ee 100644
--- a/test/CodeGen/X86/vec_insert-8.ll
+++ b/test/CodeGen/X86/vec_insert-8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
 
 ; tests variable insert and extract of a 4 x i32
 
diff --git a/test/CodeGen/X86/vec_insert.ll b/test/CodeGen/X86/vec_insert.ll
index 3a9464ceff125..a7274a9000445 100644
--- a/test/CodeGen/X86/vec_insert.ll
+++ b/test/CodeGen/X86/vec_insert.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep movss | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | not grep pinsrw
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | not grep pinsrw
 
 define void @test(<4 x float>* %F, i32 %I) {
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_insert_4.ll b/test/CodeGen/X86/vec_insert_4.ll
index a0aa0c0bfea00..2c31e56b4af6d 100644
--- a/test/CodeGen/X86/vec_insert_4.ll
+++ b/test/CodeGen/X86/vec_insert_4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep 1084227584 | count 1
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep 1084227584 | count 1
 
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/vec_loadsingles.ll b/test/CodeGen/X86/vec_loadsingles.ll
index 67122763ec9b9..8812c4f820c62 100644
--- a/test/CodeGen/X86/vec_loadsingles.ll
+++ b/test/CodeGen/X86/vec_loadsingles.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
 
 define <4 x float> @a(<4 x float> %a, float* nocapture %p) nounwind readonly {
 entry:
diff --git a/test/CodeGen/X86/vec_logical.ll b/test/CodeGen/X86/vec_logical.ll
index f8957629f8a1d..1dc0b163aeb38 100644
--- a/test/CodeGen/X86/vec_logical.ll
+++ b/test/CodeGen/X86/vec_logical.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: grep xorps %t | count 2
 ; RUN: grep andnps %t
 ; RUN: grep movaps %t | count 2
diff --git a/test/CodeGen/X86/vec_return.ll b/test/CodeGen/X86/vec_return.ll
index 106966fd52128..66762b4a0604e 100644
--- a/test/CodeGen/X86/vec_return.ll
+++ b/test/CodeGen/X86/vec_return.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: grep xorps %t | count 1
 ; RUN: grep movaps %t | count 1
 ; RUN: not grep shuf %t
diff --git a/test/CodeGen/X86/vec_select.ll b/test/CodeGen/X86/vec_select.ll
index ecb825b00bd8d..033e9f7027f94 100644
--- a/test/CodeGen/X86/vec_select.ll
+++ b/test/CodeGen/X86/vec_select.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse
+; RUN: llc < %s -march=x86 -mattr=+sse
 
 define void @test(i32 %C, <4 x float>* %A, <4 x float>* %B) {
         %tmp = load <4 x float>* %A             ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_set-2.ll b/test/CodeGen/X86/vec_set-2.ll
index ae9530db8df5f..a8f1187084d6a 100644
--- a/test/CodeGen/X86/vec_set-2.ll
+++ b/test/CodeGen/X86/vec_set-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 1
 
 define <4 x float> @test1(float %a) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_set-3.ll b/test/CodeGen/X86/vec_set-3.ll
index 546ca0bcf3072..ada17e0092a80 100644
--- a/test/CodeGen/X86/vec_set-3.ll
+++ b/test/CodeGen/X86/vec_set-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep pshufd %t | count 2
 
 define <4 x float> @test(float %a) nounwind {
diff --git a/test/CodeGen/X86/vec_set-4.ll b/test/CodeGen/X86/vec_set-4.ll
index da7ef80a3a5e5..332c8b70760fe 100644
--- a/test/CodeGen/X86/vec_set-4.ll
+++ b/test/CodeGen/X86/vec_set-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pinsrw | count 2
 
 define <2 x i64> @test(i16 %a) nounwind {
 entry:
diff --git a/test/CodeGen/X86/vec_set-5.ll b/test/CodeGen/X86/vec_set-5.ll
index d3329701119b4..f811a7404a273 100644
--- a/test/CodeGen/X86/vec_set-5.ll
+++ b/test/CodeGen/X86/vec_set-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movlhps   %t | count 1
 ; RUN: grep movq      %t | count 2
 
diff --git a/test/CodeGen/X86/vec_set-6.ll b/test/CodeGen/X86/vec_set-6.ll
index c7b6747a86f72..0713d956ee448 100644
--- a/test/CodeGen/X86/vec_set-6.ll
+++ b/test/CodeGen/X86/vec_set-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movss    %t | count 1
 ; RUN: grep movq     %t | count 1
 ; RUN: grep shufps   %t | count 1
diff --git a/test/CodeGen/X86/vec_set-7.ll b/test/CodeGen/X86/vec_set-7.ll
index 6f98c510efca3..d993178a9892b 100644
--- a/test/CodeGen/X86/vec_set-7.ll
+++ b/test/CodeGen/X86/vec_set-7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 1
 
 define <2 x i64> @test(<2 x i64>* %p) nounwind {
 	%tmp = bitcast <2 x i64>* %p to double*		
diff --git a/test/CodeGen/X86/vec_set-8.ll b/test/CodeGen/X86/vec_set-8.ll
index cca436bf64332..9697f1186d451 100644
--- a/test/CodeGen/X86/vec_set-8.ll
+++ b/test/CodeGen/X86/vec_set-8.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep movsd
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movd.*%rdi,.*%xmm0}
+; RUN: llc < %s -march=x86-64 | not grep movsd
+; RUN: llc < %s -march=x86-64 | grep {movd.*%rdi,.*%xmm0}
 
 define <2 x i64> @test(i64 %i) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_set-9.ll b/test/CodeGen/X86/vec_set-9.ll
index 5c1b8f5dacb85..3656e5f6ca47b 100644
--- a/test/CodeGen/X86/vec_set-9.ll
+++ b/test/CodeGen/X86/vec_set-9.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movd | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movlhps.*%xmm0, %xmm0}
+; RUN: llc < %s -march=x86-64 | grep movd | count 1
+; RUN: llc < %s -march=x86-64 | grep {movlhps.*%xmm0, %xmm0}
 
 define <2 x i64> @test3(i64 %A) nounwind {
 entry:
diff --git a/test/CodeGen/X86/vec_set-A.ll b/test/CodeGen/X86/vec_set-A.ll
index f33263f1aef5c..f05eecf8c3aed 100644
--- a/test/CodeGen/X86/vec_set-A.ll
+++ b/test/CodeGen/X86/vec_set-A.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {movl.*\$1, %}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movl.*\$1, %}
 define <2 x i64> @test1() nounwind {
 entry:
 	ret <2 x i64> < i64 1, i64 0 >
diff --git a/test/CodeGen/X86/vec_set-B.ll b/test/CodeGen/X86/vec_set-B.ll
index d318964686c9b..f5b3e8baa33a9 100644
--- a/test/CodeGen/X86/vec_set-B.ll
+++ b/test/CodeGen/X86/vec_set-B.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep movaps
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep esp | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep esp | count 2
 
 ; These should both generate something like this:
 ;_test3:
diff --git a/test/CodeGen/X86/vec_set-C.ll b/test/CodeGen/X86/vec_set-C.ll
index fc86853e10fd2..7636ac3b37415 100644
--- a/test/CodeGen/X86/vec_set-C.ll
+++ b/test/CodeGen/X86/vec_set-C.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mov | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mov | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movd
 
 define <2 x i64> @t1(i64 %x) nounwind  {
 	%tmp8 = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
diff --git a/test/CodeGen/X86/vec_set-D.ll b/test/CodeGen/X86/vec_set-D.ll
index 71bdd849953ba..3d6369e1c76ae 100644
--- a/test/CodeGen/X86/vec_set-D.ll
+++ b/test/CodeGen/X86/vec_set-D.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
 
 define <4 x i32> @t(i32 %x, i32 %y) nounwind  {
 	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
diff --git a/test/CodeGen/X86/vec_set-E.ll b/test/CodeGen/X86/vec_set-E.ll
index ee63234cc978b..d78be669fc7f3 100644
--- a/test/CodeGen/X86/vec_set-E.ll
+++ b/test/CodeGen/X86/vec_set-E.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
 
 define <4 x float> @t(float %X) nounwind  {
 	%tmp11 = insertelement <4 x float> undef, float %X, i32 0
diff --git a/test/CodeGen/X86/vec_set-F.ll b/test/CodeGen/X86/vec_set-F.ll
index db83eb2e8531f..4f0acb2d151d9 100644
--- a/test/CodeGen/X86/vec_set-F.ll
+++ b/test/CodeGen/X86/vec_set-F.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mov | count 3
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mov | count 3
 
 define <2 x i64> @t1(<2 x i64>* %ptr) nounwind  {
 	%tmp45 = bitcast <2 x i64>* %ptr to <2 x i32>*
diff --git a/test/CodeGen/X86/vec_set-G.ll b/test/CodeGen/X86/vec_set-G.ll
index f81907cb69f2b..4a542feafaffc 100644
--- a/test/CodeGen/X86/vec_set-G.ll
+++ b/test/CodeGen/X86/vec_set-G.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss
 
 define fastcc void @t(<4 x float> %A) nounwind  {
 	%tmp41896 = extractelement <4 x float> %A, i32 0		; <float> [#uses=1]
diff --git a/test/CodeGen/X86/vec_set-H.ll b/test/CodeGen/X86/vec_set-H.ll
index ea7b85355c239..5037e36d3fd5a 100644
--- a/test/CodeGen/X86/vec_set-H.ll
+++ b/test/CodeGen/X86/vec_set-H.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep movz
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movz
 
 define <2 x i64> @doload64(i16 signext  %x) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_set-I.ll b/test/CodeGen/X86/vec_set-I.ll
index e1c44d0a0f4b4..64f36f99e4d26 100644
--- a/test/CodeGen/X86/vec_set-I.ll
+++ b/test/CodeGen/X86/vec_set-I.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep xorp
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xorp
 
 define void @t1() nounwind  {
 	%tmp298.i.i = load <4 x float>* null, align 16
diff --git a/test/CodeGen/X86/vec_set-J.ll b/test/CodeGen/X86/vec_set-J.ll
index 488d360734163..d90ab85b8cf7d 100644
--- a/test/CodeGen/X86/vec_set-J.ll
+++ b/test/CodeGen/X86/vec_set-J.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss
 ; PR2472
 
 define <4 x i32> @a(<4 x i32> %a) nounwind {
diff --git a/test/CodeGen/X86/vec_set.ll b/test/CodeGen/X86/vec_set.ll
index 77636eda1c027..c316df887c16e 100644
--- a/test/CodeGen/X86/vec_set.ll
+++ b/test/CodeGen/X86/vec_set.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep punpckl | count 7
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep punpckl | count 7
 
 define void @test(<8 x i16>* %b, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
         %tmp = insertelement <8 x i16> zeroinitializer, i16 %a0, i32 0          ; <<8 x i16>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shift.ll b/test/CodeGen/X86/vec_shift.ll
index 9c595bc6ef067..ddf0469b72a71 100644
--- a/test/CodeGen/X86/vec_shift.ll
+++ b/test/CodeGen/X86/vec_shift.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psllw
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psrlq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psraw
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psllw
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psrlq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psraw
 
 define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shift2.ll b/test/CodeGen/X86/vec_shift2.ll
index b73f5f4900064..c5f9dc4ace329 100644
--- a/test/CodeGen/X86/vec_shift2.ll
+++ b/test/CodeGen/X86/vec_shift2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep CPI
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep CPI
 
 define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind  {
 	%tmp1 = bitcast <2 x i64> %b1 to <8 x i16>
diff --git a/test/CodeGen/X86/vec_shift3.ll b/test/CodeGen/X86/vec_shift3.ll
index 2641c5d596742..1ebf455c05550 100644
--- a/test/CodeGen/X86/vec_shift3.ll
+++ b/test/CodeGen/X86/vec_shift3.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psllq
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psraw
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psllq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psraw
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 2
 
 define <2 x i64> @t1(<2 x i64> %x1, i32 %bits) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-10.ll b/test/CodeGen/X86/vec_shuffle-10.ll
index 297469d920242..a63e3868ad75f 100644
--- a/test/CodeGen/X86/vec_shuffle-10.ll
+++ b/test/CodeGen/X86/vec_shuffle-10.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep unpcklps %t | count 1
 ; RUN: grep pshufd   %t | count 1
 ; RUN: not grep {sub.*esp} %t
diff --git a/test/CodeGen/X86/vec_shuffle-11.ll b/test/CodeGen/X86/vec_shuffle-11.ll
index 463858f1b65ba..640745ae2645a 100644
--- a/test/CodeGen/X86/vec_shuffle-11.ll
+++ b/test/CodeGen/X86/vec_shuffle-11.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep mov
+; RUN: llc < %s -march=x86 -mattr=+sse2 
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep mov
 
 define <4 x i32> @test() nounwind {
         %tmp131 = call <2 x i64> @llvm.x86.sse2.psrl.dq( <2 x i64> < i64 -1, i64 -1 >, i32 96 )         ; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-14.ll b/test/CodeGen/X86/vec_shuffle-14.ll
index 6e8d0b8077da9..f0cfc44ab19a7 100644
--- a/test/CodeGen/X86/vec_shuffle-14.ll
+++ b/test/CodeGen/X86/vec_shuffle-14.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd | count 1
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movd | count 2
-; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movq | count 3
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep xor
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movd | count 2
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movq | count 3
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xor
 
 define <4 x i32> @t1(i32 %a) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-15.ll b/test/CodeGen/X86/vec_shuffle-15.ll
index 062f77c279e45..5a9b8fd34579a 100644
--- a/test/CodeGen/X86/vec_shuffle-15.ll
+++ b/test/CodeGen/X86/vec_shuffle-15.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define <2 x i64> @t00(<2 x i64> %a, <2 x i64> %b) nounwind  {
 	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 0 >
diff --git a/test/CodeGen/X86/vec_shuffle-16.ll b/test/CodeGen/X86/vec_shuffle-16.ll
index b3a5b769e67e0..470f676d4627e 100644
--- a/test/CodeGen/X86/vec_shuffle-16.ll
+++ b/test/CodeGen/X86/vec_shuffle-16.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin -o %t
 ; RUN: grep shufps %t | count 4
 ; RUN: grep movaps %t | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t
 ; RUN: grep pshufd %t | count 4
 ; RUN: not grep shufps %t
 ; RUN: not grep mov %t
diff --git a/test/CodeGen/X86/vec_shuffle-17.ll b/test/CodeGen/X86/vec_shuffle-17.ll
index 992d79184b87d..9c33abb4421a4 100644
--- a/test/CodeGen/X86/vec_shuffle-17.ll
+++ b/test/CodeGen/X86/vec_shuffle-17.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movd.*%rdi, %xmm0}
-; RUN: llvm-as < %s | llc -march=x86-64 | not grep xor
+; RUN: llc < %s -march=x86-64 | grep {movd.*%rdi, %xmm0}
+; RUN: llc < %s -march=x86-64 | not grep xor
 ; PR2108
 
 define <2 x i64> @doload64(i64 %x) nounwind  {
diff --git a/test/CodeGen/X86/vec_shuffle-18.ll b/test/CodeGen/X86/vec_shuffle-18.ll
index 85392632a29e3..1104a4a8856b7 100644
--- a/test/CodeGen/X86/vec_shuffle-18.ll
+++ b/test/CodeGen/X86/vec_shuffle-18.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
 
 	%struct.vector4_t = type { <4 x float> }
 
diff --git a/test/CodeGen/X86/vec_shuffle-19.ll b/test/CodeGen/X86/vec_shuffle-19.ll
index 4e7db20db5648..9fc09dfdd2b89 100644
--- a/test/CodeGen/X86/vec_shuffle-19.ll
+++ b/test/CodeGen/X86/vec_shuffle-19.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
 ; PR2485
 
 define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind  {
diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll
index 71890844894f0..6d1bac0743d49 100644
--- a/test/CodeGen/X86/vec_shuffle-20.ll
+++ b/test/CodeGen/X86/vec_shuffle-20.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
 
 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-22.ll b/test/CodeGen/X86/vec_shuffle-22.ll
index d19f110fc1004..5307ced4899bd 100644
--- a/test/CodeGen/X86/vec_shuffle-22.ll
+++ b/test/CodeGen/X86/vec_shuffle-22.ll
@@ -1,7 +1,7 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=pentium-m -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=pentium-m -o %t
 ; RUN: grep movlhps %t | count 1
 ; RUN: grep pshufd %t | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
 ; RUN: grep movlhps %t | count 1
 ; RUN: grep movddup %t | count 1
 
diff --git a/test/CodeGen/X86/vec_shuffle-23.ll b/test/CodeGen/X86/vec_shuffle-23.ll
index 7e8aa5dc4bf6f..05a3a1e9d2767 100644
--- a/test/CodeGen/X86/vec_shuffle-23.ll
+++ b/test/CodeGen/X86/vec_shuffle-23.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2                | not grep punpck
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2                |     grep pshufd
+; RUN: llc < %s -march=x86 -mattr=+sse2                | not grep punpck
+; RUN: llc < %s -march=x86 -mattr=+sse2                |     grep pshufd
 
 define i32 @t() nounwind {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-24.ll b/test/CodeGen/X86/vec_shuffle-24.ll
index 170ba35173f3d..7562f1d89594e 100644
--- a/test/CodeGen/X86/vec_shuffle-24.ll
+++ b/test/CodeGen/X86/vec_shuffle-24.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2  |     grep punpck
+; RUN: llc < %s -march=x86 -mattr=+sse2  |     grep punpck
 
 define i32 @t() nounwind optsize {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-25.ll b/test/CodeGen/X86/vec_shuffle-25.ll
index 18922aaac10e8..2aa2d252849ca 100644
--- a/test/CodeGen/X86/vec_shuffle-25.ll
+++ b/test/CodeGen/X86/vec_shuffle-25.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep unpcklps %t | count 3
 ; RUN: grep unpckhps %t | count 1
  
diff --git a/test/CodeGen/X86/vec_shuffle-26.ll b/test/CodeGen/X86/vec_shuffle-26.ll
index abd6e90b79071..8cc15d1e7c27a 100644
--- a/test/CodeGen/X86/vec_shuffle-26.ll
+++ b/test/CodeGen/X86/vec_shuffle-26.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep unpcklps %t | count 1
 ; RUN: grep unpckhps %t | count 3
 
diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll
index 231ac0c3b8342..d700ccbf53032 100644
--- a/test/CodeGen/X86/vec_shuffle-27.ll
+++ b/test/CodeGen/X86/vec_shuffle-27.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep addps %t | count 2
 ; RUN: grep mulps %t | count 2
 ; RUN: grep subps %t | count 2
diff --git a/test/CodeGen/X86/vec_shuffle-28.ll b/test/CodeGen/X86/vec_shuffle-28.ll
index f7e5001e64d1f..343685bf8ad2c 100644
--- a/test/CodeGen/X86/vec_shuffle-28.ll
+++ b/test/CodeGen/X86/vec_shuffle-28.ll
@@ -1,8 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -o %t -f
-; RUN: grep movd %t | count 1
-; RUN: grep pshuflw %t | count 1
-; RUN: grep pinsrw %t | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
 ; RUN: grep pshufb %t | count 1
 
 ; FIXME: this test has a superfluous punpcklqdq pre-pshufb currently.
diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll
index 018b4cf1a06ba..556f1037d0c5c 100644
--- a/test/CodeGen/X86/vec_shuffle-3.ll
+++ b/test/CodeGen/X86/vec_shuffle-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movlhps %t | count 1
 ; RUN: grep movhlps %t | count 1
 
diff --git a/test/CodeGen/X86/vec_shuffle-30.ll b/test/CodeGen/X86/vec_shuffle-30.ll
index 50a3df8f0b2af..3f69150ac533c 100644
--- a/test/CodeGen/X86/vec_shuffle-30.ll
+++ b/test/CodeGen/X86/vec_shuffle-30.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -disable-mmx -o %t -f
-; RUN: grep pshufhw %t | grep 161 | count 1
+; RUN: llc < %s -march=x86 -mattr=sse41 -disable-mmx -o %t
+; RUN: grep pshufhw %t | grep -- -95 | count 1
 ; RUN: grep shufps %t | count 1
 ; RUN: not grep pslldq %t
 
 ; Test case when creating pshufhw, we incorrectly set the higher order bit
 ; for an undef,
-define void @test(<8 x i16>* %dest, <8 x i16> %in) {
+define void @test(<8 x i16>* %dest, <8 x i16> %in) nounwind {
 entry:
   %0 = load <8 x i16>* %dest
   %1 = shufflevector <8 x i16> %0, <8 x i16> %in, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 13, i32 undef, i32 14, i32 14>
@@ -14,7 +14,7 @@ entry:
 }                              
 
 ; A test case where we shouldn't generate a punpckldq but a pshufd and a pslldq
-define void @test2(<4 x i32>* %dest, <4 x i32> %in) {
+define void @test2(<4 x i32>* %dest, <4 x i32> %in) nounwind {
 entry:
   %0 = shufflevector <4 x i32> %in, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> < i32 undef, i32 5, i32 undef, i32 2>
   store <4 x i32> %0, <4 x i32>* %dest
diff --git a/test/CodeGen/X86/vec_shuffle-31.ll b/test/CodeGen/X86/vec_shuffle-31.ll
index efcd0300e35ff..bb06e15425bb2 100644
--- a/test/CodeGen/X86/vec_shuffle-31.ll
+++ b/test/CodeGen/X86/vec_shuffle-31.ll
@@ -1,9 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -o %t -f
-; RUN: grep pextrw %t | count 1
-; RUN: grep movlhps %t | count 1
-; RUN: grep pshufhw %t | count 1
-; RUN: grep pinsrw %t | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
 ; RUN: grep pshufb %t | count 1
 
 define <8 x i16> @shuf3(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
diff --git a/test/CodeGen/X86/vec_shuffle-34.ll b/test/CodeGen/X86/vec_shuffle-34.ll
index 99c95d1623e72..d057b3fa7ea8e 100644
--- a/test/CodeGen/X86/vec_shuffle-34.ll
+++ b/test/CodeGen/X86/vec_shuffle-34.ll
@@ -1,10 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -o %t -f
-; RUN: grep pextrw %t | count 1
-; RUN: grep punpcklqdq %t | count 1
-; RUN: grep pshuflw %t | count 1
-; RUN: grep pinsrw %t | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f
-; RUN: grep pshufb %t | count 2
+; RUN: llc < %s -march=x86 -mcpu=core2 | grep pshufb | count 2
 
 define <8 x i16> @shuf2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-35.ll b/test/CodeGen/X86/vec_shuffle-35.ll
index 7be8468849031..7f0fcb5969e4f 100644
--- a/test/CodeGen/X86/vec_shuffle-35.ll
+++ b/test/CodeGen/X86/vec_shuffle-35.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -stack-alignment=16 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=yonah -stack-alignment=16 -o %t
 ; RUN: grep pextrw %t | count 13
 ; RUN: grep pinsrw %t | count 14
 ; RUN: grep rolw %t | count 13
 ; RUN: not grep esp %t
 ; RUN: not grep ebp %t
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -stack-alignment=16 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=core2 -stack-alignment=16 -o %t
 ; RUN: grep pshufb %t | count 3
 
 define <16 x i8> @shuf1(<16 x i8> %T0) nounwind readnone {
diff --git a/test/CodeGen/X86/vec_shuffle-36.ll b/test/CodeGen/X86/vec_shuffle-36.ll
index 005118705856f..8a93a7eeee3b6 100644
--- a/test/CodeGen/X86/vec_shuffle-36.ll
+++ b/test/CodeGen/X86/vec_shuffle-36.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep pshufb %t | count 1
 
 
diff --git a/test/CodeGen/X86/vec_shuffle-4.ll b/test/CodeGen/X86/vec_shuffle-4.ll
index 3c03baa5062ca..829fedf97cc55 100644
--- a/test/CodeGen/X86/vec_shuffle-4.ll
+++ b/test/CodeGen/X86/vec_shuffle-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
 ; RUN: grep shuf %t | count 2
 ; RUN: not grep unpck %t
 
diff --git a/test/CodeGen/X86/vec_shuffle-5.ll b/test/CodeGen/X86/vec_shuffle-5.ll
index e356f2456b75c..c24167a6150d9 100644
--- a/test/CodeGen/X86/vec_shuffle-5.ll
+++ b/test/CodeGen/X86/vec_shuffle-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t  -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movhlps %t | count 1
 ; RUN: grep shufps  %t | count 1
 
diff --git a/test/CodeGen/X86/vec_shuffle-6.ll b/test/CodeGen/X86/vec_shuffle-6.ll
index f7c9f2daa39d6..f034b0aa7102a 100644
--- a/test/CodeGen/X86/vec_shuffle-6.ll
+++ b/test/CodeGen/X86/vec_shuffle-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep movapd %t | count 1
 ; RUN: grep movaps %t | count 1
 ; RUN: grep movups %t | count 2
diff --git a/test/CodeGen/X86/vec_shuffle-7.ll b/test/CodeGen/X86/vec_shuffle-7.ll
index fbcfac5b57a46..4cdca09c72f51 100644
--- a/test/CodeGen/X86/vec_shuffle-7.ll
+++ b/test/CodeGen/X86/vec_shuffle-7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t  -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep xorps %t | count 1
 ; RUN: not grep shufps %t
 
diff --git a/test/CodeGen/X86/vec_shuffle-8.ll b/test/CodeGen/X86/vec_shuffle-8.ll
index 73d75e63914d3..964ce7b2892be 100644
--- a/test/CodeGen/X86/vec_shuffle-8.ll
+++ b/test/CodeGen/X86/vec_shuffle-8.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \
+; RUN: llc < %s -march=x86 -mattr=+sse2 | \
 ; RUN:   not grep shufps
 
 define void @test(<4 x float>* %res, <4 x float>* %A) {
diff --git a/test/CodeGen/X86/vec_shuffle-9.ll b/test/CodeGen/X86/vec_shuffle-9.ll
index 68577d455f756..2bef24d443eb3 100644
--- a/test/CodeGen/X86/vec_shuffle-9.ll
+++ b/test/CodeGen/X86/vec_shuffle-9.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
 ; RUN: grep punpck %t | count 2
 ; RUN: not grep pextrw %t
 
diff --git a/test/CodeGen/X86/vec_shuffle.ll b/test/CodeGen/X86/vec_shuffle.ll
index f43aa1d4e7147..c05b79a54a15d 100644
--- a/test/CodeGen/X86/vec_shuffle.ll
+++ b/test/CodeGen/X86/vec_shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
 ; RUN: grep shufp   %t | count 1
 ; RUN: grep movupd  %t | count 1
 ; RUN: grep pshufhw %t | count 1
diff --git a/test/CodeGen/X86/vec_splat-2.ll b/test/CodeGen/X86/vec_splat-2.ll
index c6e3dddd5fa6c..cde5ae99563e5 100644
--- a/test/CodeGen/X86/vec_splat-2.ll
+++ b/test/CodeGen/X86/vec_splat-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pshufd | count 1
 
 define void @test(<2 x i64>* %P, i8 %x) nounwind {
 	%tmp = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0		; <<16 x i8>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_splat-3.ll b/test/CodeGen/X86/vec_splat-3.ll
index 1f1a214479f4f..649b85c5dadd6 100644
--- a/test/CodeGen/X86/vec_splat-3.ll
+++ b/test/CodeGen/X86/vec_splat-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep punpcklwd %t | count 4
 ; RUN: grep punpckhwd %t | count 4
 ; RUN: grep "pshufd" %t | count 8
diff --git a/test/CodeGen/X86/vec_splat-4.ll b/test/CodeGen/X86/vec_splat-4.ll
index 220e1cd34d571..d9941e65bde35 100644
--- a/test/CodeGen/X86/vec_splat-4.ll
+++ b/test/CodeGen/X86/vec_splat-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep punpcklbw %t | count 16
 ; RUN: grep punpckhbw %t | count 16
 ; RUN: grep "pshufd" %t | count 16
diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll
index 89914fda63baf..a87fbd0dc6555 100644
--- a/test/CodeGen/X86/vec_splat.ll
+++ b/test/CodeGen/X86/vec_splat.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse3 | grep movddup
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pshufd
+; RUN: llc < %s -march=x86 -mattr=+sse3 | grep movddup
 
 define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll
index 69900a686e25a..b1613fb3a3740 100644
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,+sse2 -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 -o %t
 ; RUN: grep minss %t | grep CPI | count 2
 ; RUN: grep CPI   %t | not grep movss
 
diff --git a/test/CodeGen/X86/vec_zero-2.ll b/test/CodeGen/X86/vec_zero-2.ll
index efdf5649a14b8..e42b5384b0817 100644
--- a/test/CodeGen/X86/vec_zero-2.ll
+++ b/test/CodeGen/X86/vec_zero-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define i32 @t() {
 entry:
diff --git a/test/CodeGen/X86/vec_zero.ll b/test/CodeGen/X86/vec_zero.ll
index 0a7a543412698..ae5af586cdc33 100644
--- a/test/CodeGen/X86/vec_zero.ll
+++ b/test/CodeGen/X86/vec_zero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep xorps | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xorps | count 2
 
 define void @foo(<4 x float>* %P) {
         %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
index 0ccf745e524e2..296378c6e9f5c 100644
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -1,6 +1,6 @@
-; RUN: llvm-as < %s | llc -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
-; RUN: llvm-as < %s | llc -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1
-; RUN: llvm-as < %s | llc -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 2
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 2
 
 @M1 = external global <1 x i64>
 @M2 = external global <2 x i32>
diff --git a/test/CodeGen/X86/vector-intrinsics.ll b/test/CodeGen/X86/vector-intrinsics.ll
index 32916589879dc..edf58b9da1114 100644
--- a/test/CodeGen/X86/vector-intrinsics.ll
+++ b/test/CodeGen/X86/vector-intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep call | count 16
+; RUN: llc < %s -march=x86-64 | grep call | count 16
 
 declare <4 x double> @llvm.sin.v4f64(<4 x double> %p)
 declare <4 x double> @llvm.cos.v4f64(<4 x double> %p)
diff --git a/test/CodeGen/X86/vector-rem.ll b/test/CodeGen/X86/vector-rem.ll
index cfdd34ee1c9b7..51cd872643f2d 100644
--- a/test/CodeGen/X86/vector-rem.ll
+++ b/test/CodeGen/X86/vector-rem.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep div | count 8
-; RUN: llvm-as < %s | llc -march=x86-64 | grep fmodf | count 4
+; RUN: llc < %s -march=x86-64 | grep div | count 8
+; RUN: llc < %s -march=x86-64 | grep fmodf | count 4
 
 define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) {
 	%m = srem <4 x i32> %t, %u
diff --git a/test/CodeGen/X86/vector-variable-idx.ll b/test/CodeGen/X86/vector-variable-idx.ll
index 82927e96983d9..2a4d18c141a30 100644
--- a/test/CodeGen/X86/vector-variable-idx.ll
+++ b/test/CodeGen/X86/vector-variable-idx.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movss | count 2
+; RUN: llc < %s -march=x86-64 | grep movss | count 2
 ; PR2676
 
 define float @foo(<4 x float> %p, i32 %t) {
diff --git a/test/CodeGen/X86/vector.ll b/test/CodeGen/X86/vector.ll
index 8e1de2f62cb0d..3fff8497dfdaf 100644
--- a/test/CodeGen/X86/vector.ll
+++ b/test/CodeGen/X86/vector.ll
@@ -1,6 +1,6 @@
 ; Test that vectors are scalarized/lowered correctly.
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=i386 > %t
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah > %t
+; RUN: llc < %s -march=x86 -mcpu=i386 > %t
+; RUN: llc < %s -march=x86 -mcpu=yonah > %t
 
 %d8 = type <8 x double>
 %f1 = type <1 x float>
diff --git a/test/CodeGen/X86/vfcmp.ll b/test/CodeGen/X86/vfcmp.ll
index 85b82a0ac8e87..f5f5293622b24 100644
--- a/test/CodeGen/X86/vfcmp.ll
+++ b/test/CodeGen/X86/vfcmp.ll
@@ -1,8 +1,10 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2
 ; PR2620
 
-define void @t(i32 %m_task_id, i32 %start_x, i32 %end_x) nounwind {
-	vfcmp olt <2 x double> zeroinitializer, zeroinitializer		; <<2 x i64>>:1 [#uses=1]
+
+define void @t2(i32 %m_task_id, i32 %start_x, i32 %end_x) nounwind {
+	%A = fcmp olt <2 x double> zeroinitializer, zeroinitializer		; <<2 x i64>>:1 [#uses=1]
+        sext <2 x i1> %A to <2 x i64>
 	extractelement <2 x i64> %1, i32 1		; <i64>:2 [#uses=1]
 	lshr i64 %2, 63		; <i64>:3 [#uses=1]
 	trunc i64 %3 to i1		; <i1>:4 [#uses=1]
diff --git a/test/CodeGen/X86/volatile.ll b/test/CodeGen/X86/volatile.ll
index f919b5de49615..5e1e0c858e655 100644
--- a/test/CodeGen/X86/volatile.ll
+++ b/test/CodeGen/X86/volatile.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse2 | grep movsd | count 5
-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse2 -O0 | grep movsd | count 5
+; RUN: llc < %s -march=x86 -mattr=sse2 | grep movsd | count 5
+; RUN: llc < %s -march=x86 -mattr=sse2 -O0 | grep movsd | count 5
 
 @x = external global double
 
diff --git a/test/CodeGen/X86/vortex-bug.ll b/test/CodeGen/X86/vortex-bug.ll
index d62bb24e33dfc..40f11175b20a4 100644
--- a/test/CodeGen/X86/vortex-bug.ll
+++ b/test/CodeGen/X86/vortex-bug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 	%struct.blktkntype = type { i32, i32 }
 	%struct.fieldstruc = type { [128 x i8], %struct.blktkntype*, i32, i32 }
diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
index d7a20e46c18e8..ae845e0a33d19 100644
--- a/test/CodeGen/X86/vshift-1.ll
+++ b/test/CodeGen/X86/vshift-1.ll
@@ -1,13 +1,12 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx -o %t -f
-; RUN: grep psllq  %t | count 2
-; RUN: grep pslld %t | count 2
-; RUN: grep psllw %t | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same.
 
 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
 entry:
+; CHECK: shift1a:
+; CHECK: psllq
   %shl = shl <2 x i64> %val, < i64 32, i64 32 >
   store <2 x i64> %shl, <2 x i64>* %dst
   ret void
@@ -15,6 +14,9 @@ entry:
 
 define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 entry:
+; CHECK: shift1b:
+; CHECK: movd
+; CHECK-NEXT: psllq
   %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
   %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
   %shl = shl <2 x i64> %val, %1
@@ -25,6 +27,8 @@ entry:
 
 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
 entry:
+; CHECK: shift2a:
+; CHECK: pslld
   %shl = shl <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 >
   store <4 x i32> %shl, <4 x i32>* %dst
   ret void
@@ -32,6 +36,9 @@ entry:
 
 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: pslld
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -43,13 +50,20 @@ entry:
 
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
 entry:
+; CHECK: shift3a:
+; CHECK: psllw
   %shl = shl <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
   store <8 x i16> %shl, <8 x i16>* %dst
   ret void
 }
 
+; Make sure the shift amount is properly zero extended.
 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
 entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psllw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
index 0807174420e8c..36feb11603d87 100644
--- a/test/CodeGen/X86/vshift-2.ll
+++ b/test/CodeGen/X86/vshift-2.ll
@@ -1,13 +1,12 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx -o %t -f
-; RUN: grep psrlq  %t | count 2
-; RUN: grep psrld %t | count 2
-; RUN: grep psrlw %t | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same.
 
 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
 entry:
+; CHECK: shift1a:
+; CHECK: psrlq
   %lshr = lshr <2 x i64> %val, < i64 32, i64 32 >
   store <2 x i64> %lshr, <2 x i64>* %dst
   ret void
@@ -15,6 +14,9 @@ entry:
 
 define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 entry:
+; CHECK: shift1b:
+; CHECK: movd
+; CHECK-NEXT: psrlq
   %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
   %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
   %lshr = lshr <2 x i64> %val, %1
@@ -24,6 +26,8 @@ entry:
 
 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
 entry:
+; CHECK: shift2a:
+; CHECK: psrld
   %lshr = lshr <4 x i32> %val, < i32 17, i32 17, i32 17, i32 17 >
   store <4 x i32> %lshr, <4 x i32>* %dst
   ret void
@@ -31,6 +35,9 @@ entry:
 
 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: psrld
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -43,13 +50,20 @@ entry:
 
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
 entry:
+; CHECK: shift3a:
+; CHECK: psrlw
   %lshr = lshr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
   store <8 x i16> %lshr, <8 x i16>* %dst
   ret void
 }
 
+; properly zero extend the shift amount
 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
 entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psrlw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
@@ -61,4 +75,4 @@ entry:
   %lshr = lshr <8 x i16> %val, %7
   store <8 x i16> %lshr, <8 x i16>* %dst
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/X86/vshift-3.ll b/test/CodeGen/X86/vshift-3.ll
index eea8ad1c798eb..20d3f48a1a67c 100644
--- a/test/CodeGen/X86/vshift-3.ll
+++ b/test/CodeGen/X86/vshift-3.ll
@@ -1,13 +1,15 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx -o %t -f
-; RUN: grep psrad %t | count 2
-; RUN: grep psraw %t | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same.
 
 ; Note that x86 does have ashr 
+
+; shift1a can't use a packed shift
 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
 entry:
+; CHECK: shift1a:
+; CHECK: sarl
   %ashr = ashr <2 x i64> %val, < i64 32, i64 32 >
   store <2 x i64> %ashr, <2 x i64>* %dst
   ret void
@@ -15,6 +17,8 @@ entry:
 
 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
 entry:
+; CHECK: shift2a:
+; CHECK: psrad	$5
   %ashr = ashr <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 >
   store <4 x i32> %ashr, <4 x i32>* %dst
   ret void
@@ -22,6 +26,9 @@ entry:
 
 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: psrad
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -33,6 +40,8 @@ entry:
 
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
 entry:
+; CHECK: shift3a:
+; CHECK: psraw	$5
   %ashr = ashr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
   store <8 x i16> %ashr, <8 x i16>* %dst
   ret void
@@ -40,6 +49,10 @@ entry:
 
 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
 entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psraw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
@@ -51,4 +64,4 @@ entry:
   %ashr = ashr <8 x i16> %val, %7
   store <8 x i16> %ashr, <8 x i16>* %dst
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/X86/vshift-4.ll b/test/CodeGen/X86/vshift-4.ll
index 03ab95c0e105c..9773cbed0ae37 100644
--- a/test/CodeGen/X86/vshift-4.ll
+++ b/test/CodeGen/X86/vshift-4.ll
@@ -1,21 +1,23 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx -o %t -f
-; RUN: grep psllq %t | count 1
-; RUN: grep pslld %t | count 3
-; RUN: grep psllw %t | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same when using a shuffle splat.
 
 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
 entry:
+; CHECK: shift1a:
+; CHECK: psllq
   %shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   %shl = shl <2 x i64> %val, %shamt
   store <2 x i64> %shl, <2 x i64>* %dst
   ret void
 }
 
+; shift1b can't use a packed shift
 define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
 entry:
+; CHECK: shift1b:
+; CHECK: shll
   %shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> <i32 0, i32 1>
   %shl = shl <2 x i64> %val, %shamt
   store <2 x i64> %shl, <2 x i64>* %dst
@@ -24,6 +26,8 @@ entry:
 
 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
 entry:
+; CHECK: shift2a:
+; CHECK: pslld
   %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %shl = shl <4 x i32> %val, %shamt
   store <4 x i32> %shl, <4 x i32>* %dst
@@ -32,6 +36,8 @@ entry:
 
 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
 entry:
+; CHECK: shift2b:
+; CHECK: pslld
   %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 1, i32 1>
   %shl = shl <4 x i32> %val, %shamt
   store <4 x i32> %shl, <4 x i32>* %dst
@@ -40,6 +46,8 @@ entry:
 
 define void @shift2c(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
 entry:
+; CHECK: shift2c:
+; CHECK: pslld
   %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %shl = shl <4 x i32> %val, %shamt
   store <4 x i32> %shl, <4 x i32>* %dst
@@ -48,6 +56,9 @@ entry:
 
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst, <8 x i16> %amt) nounwind {
 entry:
+; CHECK: shift3a:
+; CHECK: movzwl
+; CHECK: psllw
   %shamt = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
   %shl = shl <8 x i16> %val, %shamt
   store <8 x i16> %shl, <8 x i16>* %dst
@@ -56,6 +67,9 @@ entry:
 
 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
 entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: psllw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll
new file mode 100644
index 0000000000000..a543f382b5137
--- /dev/null
+++ b/test/CodeGen/X86/vshift-5.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+
+; When loading the shift amount from memory, avoid generating the splat.
+
+define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
+entry:
+; CHECK: shift5a:
+; CHECK: movd
+; CHECK-NEXT: pslld
+  %amt = load i32* %pamt 
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
+entry:
+; CHECK: shift5b:
+; CHECK: movd
+; CHECK-NEXT: psrad
+  %amt = load i32* %pamt 
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
+  %shr = ashr <4 x i32> %val, %shamt
+  store <4 x i32> %shr, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5c(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift5c:
+; CHECK: movd
+; CHECK-NEXT: pslld
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5d(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift5d:
+; CHECK: movd
+; CHECK-NEXT: psrad
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shr = ashr <4 x i32> %val, %shamt
+  store <4 x i32> %shr, <4 x i32>* %dst
+  ret void
+}
diff --git a/test/CodeGen/X86/vshift_scalar.ll b/test/CodeGen/X86/vshift_scalar.ll
index 8895cdf8aff63..9dd8478caaed4 100644
--- a/test/CodeGen/X86/vshift_scalar.ll
+++ b/test/CodeGen/X86/vshift_scalar.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 
 ; Legalization test that requires scalarizing a vector.
 
diff --git a/test/CodeGen/X86/vshift_split.ll b/test/CodeGen/X86/vshift_split.ll
index 8f485ddd9a6f4..359d36d8af698 100644
--- a/test/CodeGen/X86/vshift_split.ll
+++ b/test/CodeGen/X86/vshift_split.ll
@@ -1,8 +1,8 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s -march=x86 -mattr=+sse2
 
 ; Example that requires splitting and expanding a vector shift.
 define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
 entry:
-	%shr = lshr <2 x i64> %val, < i64 2, i64 2 >		; <<2 x i64>> [#uses=1]
+	%shr = lshr <2 x i64> %val, < i64 2, i64 3 >
 	ret <2 x i64> %shr
 }
diff --git a/test/CodeGen/X86/vshift_split2.ll b/test/CodeGen/X86/vshift_split2.ll
index e9438492a0fb3..0f8c2b896e2b5 100644
--- a/test/CodeGen/X86/vshift_split2.ll
+++ b/test/CodeGen/X86/vshift_split2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah
+; RUN: llc < %s -march=x86 -mcpu=yonah
 
 ; Legalization example that requires splitting a large vector into smaller pieces.
 
diff --git a/test/CodeGen/X86/weak.ll b/test/CodeGen/X86/weak.ll
index 28638afd57ef5..8590e8d0001e2 100644
--- a/test/CodeGen/X86/weak.ll
+++ b/test/CodeGen/X86/weak.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86
+; RUN: llc < %s -march=x86
 @a = extern_weak global i32             ; <i32*> [#uses=1]
 @b = global i32* @a             ; <i32**> [#uses=0]
 
diff --git a/test/CodeGen/X86/wide-integer-fold.ll b/test/CodeGen/X86/wide-integer-fold.ll
new file mode 100644
index 0000000000000..b3b4d24ab3acc
--- /dev/null
+++ b/test/CodeGen/X86/wide-integer-fold.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK:  movq  $-65535, %rax
+
+; DAGCombiner should fold this to a simple constant.
+
+define i64 @foo(i192 %a) nounwind {
+  %t = or i192 %a, -22300404916163702203072254898040925442801665
+  %s = and i192 %t, -22300404916163702203072254898040929737768960
+  %u = lshr i192 %s, 128
+  %v = trunc i192 %u to i64
+  ret i64 %v
+}
diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll
index 419078174d1a6..8f607f5ed5930 100644
--- a/test/CodeGen/X86/widen_arith-1.ll
+++ b/test/CodeGen/X86/widen_arith-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep paddb  %t | count 1
 ; RUN: grep pextrb %t | count 1
 ; RUN: not grep pextrw %t
diff --git a/test/CodeGen/X86/widen_arith-2.ll b/test/CodeGen/X86/widen_arith-2.ll
index de6cd0871be7d..e2420f0ff19c6 100644
--- a/test/CodeGen/X86/widen_arith-2.ll
+++ b/test/CodeGen/X86/widen_arith-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep paddb  %t | count 1
 ; RUN: grep pand %t | count 1
 
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index fbba4457e2266..a22d2547566fc 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep paddw  %t | count 1
 ; RUN: grep movd %t | count 2
 ; RUN: grep pextrw %t | count 1
diff --git a/test/CodeGen/X86/widen_arith-4.ll b/test/CodeGen/X86/widen_arith-4.ll
index e19ab6574aad7..898bff01378a3 100644
--- a/test/CodeGen/X86/widen_arith-4.ll
+++ b/test/CodeGen/X86/widen_arith-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep psubw  %t | count 1
 ; RUN: grep pmullw %t | count 1
 
diff --git a/test/CodeGen/X86/widen_arith-5.ll b/test/CodeGen/X86/widen_arith-5.ll
index 6ff099dd8f9f2..1ecf09d9ff32d 100644
--- a/test/CodeGen/X86/widen_arith-5.ll
+++ b/test/CodeGen/X86/widen_arith-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep pmulld  %t | count 1
 ; RUN: grep psubd  %t | count 1
 ; RUN: grep movaps %t | count 1
diff --git a/test/CodeGen/X86/widen_arith-6.ll b/test/CodeGen/X86/widen_arith-6.ll
index 7b0bb33c00241..358325885f2a3 100644
--- a/test/CodeGen/X86/widen_arith-6.ll
+++ b/test/CodeGen/X86/widen_arith-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep mulps  %t | count 1
 ; RUN: grep addps  %t | count 1
 
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index ed8d27cde649c..441a360486336 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep paddw  %t | count 1
 ; RUN: grep movd  %t | count 1
 ; RUN: grep pextrd  %t | count 1
diff --git a/test/CodeGen/X86/widen_cast-2.ll b/test/CodeGen/X86/widen_cast-2.ll
index 3b45ce308d24d..ded5707aed40d 100644
--- a/test/CodeGen/X86/widen_cast-2.ll
+++ b/test/CodeGen/X86/widen_cast-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep pextrd  %t | count 5
 ; RUN: grep movd  %t | count 3
 
diff --git a/test/CodeGen/X86/widen_cast-3.ll b/test/CodeGen/X86/widen_cast-3.ll
index 33cc41f73fe34..67a760f5df093 100644
--- a/test/CodeGen/X86/widen_cast-3.ll
+++ b/test/CodeGen/X86/widen_cast-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep paddd  %t | count 1
 ; RUN: grep pextrd %t | count 2
 
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll
index b090cb1614ce1..614eeedbe79d3 100644
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep sarb  %t | count 8
 
 ; v8i8 that is widen to v16i8 then split
diff --git a/test/CodeGen/X86/widen_cast-5.ll b/test/CodeGen/X86/widen_cast-5.ll
index 76969429befe5..92618d6fe157e 100644
--- a/test/CodeGen/X86/widen_cast-5.ll
+++ b/test/CodeGen/X86/widen_cast-5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; bitcast a i64 to v2i32
 
diff --git a/test/CodeGen/X86/widen_cast-6.ll b/test/CodeGen/X86/widen_cast-6.ll
index 0fa1b7a7604a9..386f749a50661 100644
--- a/test/CodeGen/X86/widen_cast-6.ll
+++ b/test/CodeGen/X86/widen_cast-6.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse41 -disable-mmx -o %t
 ; RUN: grep movd  %t | count 1
 
 ; Test bit convert that requires widening in the operand.
diff --git a/test/CodeGen/X86/widen_conv-1.ll b/test/CodeGen/X86/widen_conv-1.ll
index a4aab7bb1da60..ccc8b4ff06e6f 100644
--- a/test/CodeGen/X86/widen_conv-1.ll
+++ b/test/CodeGen/X86/widen_conv-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; RUN: grep pshufd %t | count 1
 ; RUN: grep paddd  %t | count 1
 
diff --git a/test/CodeGen/X86/widen_conv-2.ll b/test/CodeGen/X86/widen_conv-2.ll
index 191a261ccf18b..9b7ab74eb2e14 100644
--- a/test/CodeGen/X86/widen_conv-2.ll
+++ b/test/CodeGen/X86/widen_conv-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; sign extension v2i32 to v2i16
 
diff --git a/test/CodeGen/X86/widen_conv-3.ll b/test/CodeGen/X86/widen_conv-3.ll
index 154788d667bae..4ec76a908e811 100644
--- a/test/CodeGen/X86/widen_conv-3.ll
+++ b/test/CodeGen/X86/widen_conv-3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 ; grep cvtsi2ss  %t | count 1 
 ; sign to float v2i16 to v2f32
 
diff --git a/test/CodeGen/X86/widen_conv-4.ll b/test/CodeGen/X86/widen_conv-4.ll
index 1ea5788ab3e4e..61a26a8b80bda 100644
--- a/test/CodeGen/X86/widen_conv-4.ll
+++ b/test/CodeGen/X86/widen_conv-4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; unsigned to float v7i16 to v7f32
 
diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll
new file mode 100644
index 0000000000000..f6c4af03209be
--- /dev/null
+++ b/test/CodeGen/X86/widen_load-0.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -o - -march=x86-64 | FileCheck %s
+; PR4891
+
+; Both loads should happen before either store.
+
+; CHECK: movl  (%rdi), %eax
+; CHECK: movl  (%rsi), %ecx
+; CHECK: movl  %ecx, (%rdi)
+; CHECK: movl  %eax, (%rsi)
+
+define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
+entry:
+  %0 = load <2 x i16>* %b, align 2                ; <<2 x i16>> [#uses=1]
+  %1 = load i32* %c, align 4                      ; <i32> [#uses=1]
+  %tmp1 = bitcast i32 %1 to <2 x i16>             ; <<2 x i16>> [#uses=1]
+  store <2 x i16> %tmp1, <2 x i16>* %b, align 2
+  %tmp5 = bitcast <2 x i16> %0 to <1 x i32>       ; <<1 x i32>> [#uses=1]
+  %tmp3 = extractelement <1 x i32> %tmp5, i32 0   ; <i32> [#uses=1]
+  store i32 %tmp3, i32* %c, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/widen_load-1.ll b/test/CodeGen/X86/widen_load-1.ll
new file mode 100644
index 0000000000000..2d34b31314d5b
--- /dev/null
+++ b/test/CodeGen/X86/widen_load-1.ll
@@ -0,0 +1,45 @@
+; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -disable-mmx | FileCheck %s
+; PR4891
+
+; This load should be before the call, not after.
+
+; CHECK: movq    compl+128(%rip), %xmm0
+; CHECK: movaps  %xmm0, (%rsp)
+; CHECK: call    killcommon
+
+@compl = linkonce global [20 x i64] zeroinitializer, align 64 ; <[20 x i64]*> [#uses=1]
+
+declare void @killcommon(i32* noalias)
+
+define void @reset(<2 x float>* noalias %garbage1) {
+"file complex.c, line 27, bb1":
+  %changed = alloca i32, align 4                  ; <i32*> [#uses=3]
+  br label %"file complex.c, line 27, bb13"
+
+"file complex.c, line 27, bb13":                  ; preds = %"file complex.c, line 27, bb1"
+  store i32 0, i32* %changed, align 4
+  %r2 = getelementptr float* bitcast ([20 x i64]* @compl to float*), i64 32 ; <float*> [#uses=1]
+  %r3 = bitcast float* %r2 to <2 x float>*        ; <<2 x float>*> [#uses=1]
+  %r4 = load <2 x float>* %r3, align 4            ; <<2 x float>> [#uses=1]
+  call void @killcommon(i32* %changed)
+  br label %"file complex.c, line 34, bb4"
+
+"file complex.c, line 34, bb4":                   ; preds = %"file complex.c, line 27, bb13"
+  %r5 = load i32* %changed, align 4               ; <i32> [#uses=1]
+  %r6 = icmp eq i32 %r5, 0                        ; <i1> [#uses=1]
+  %r7 = zext i1 %r6 to i32                        ; <i32> [#uses=1]
+  %r8 = icmp ne i32 %r7, 0                        ; <i1> [#uses=1]
+  br i1 %r8, label %"file complex.c, line 34, bb7", label %"file complex.c, line 27, bb5"
+
+"file complex.c, line 27, bb5":                   ; preds = %"file complex.c, line 34, bb4"
+  br label %"file complex.c, line 35, bb6"
+
+"file complex.c, line 35, bb6":                   ; preds = %"file complex.c, line 27, bb5"
+  %r11 = ptrtoint <2 x float>* %garbage1 to i64   ; <i64> [#uses=1]
+  %r12 = inttoptr i64 %r11 to <2 x float>*        ; <<2 x float>*> [#uses=1]
+  store <2 x float> %r4, <2 x float>* %r12, align 4
+  br label %"file complex.c, line 34, bb7"
+
+"file complex.c, line 34, bb7":                   ; preds = %"file complex.c, line 35, bb6", %"file complex.c, line 34, bb4"
+  ret void
+}
diff --git a/test/CodeGen/X86/widen_select-1.ll b/test/CodeGen/X86/widen_select-1.ll
index 3d757b8a8a500..aca0b67cb663b 100644
--- a/test/CodeGen/X86/widen_select-1.ll
+++ b/test/CodeGen/X86/widen_select-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; widening select v6i32 and then a sub
 
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index a676f33d6c68b..15da87005c92f 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; widening shuffle v3float and then a add
 
diff --git a/test/CodeGen/X86/widen_shuffle-2.ll b/test/CodeGen/X86/widen_shuffle-2.ll
index c2dfa3d272c3b..617cc1de4ba8a 100644
--- a/test/CodeGen/X86/widen_shuffle-2.ll
+++ b/test/CodeGen/X86/widen_shuffle-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -o %t
 
 ; widening shuffle v3float and then a add
 
diff --git a/test/CodeGen/X86/x86-64-and-mask.ll b/test/CodeGen/X86/x86-64-and-mask.ll
index 3d61e5dbe5a77..3c73891112670 100644
--- a/test/CodeGen/X86/x86-64-and-mask.ll
+++ b/test/CodeGen/X86/x86-64-and-mask.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {movl.*%edi, %eax}
+; RUN: llc < %s | grep {movl.*%edi, %eax}
 ; This should be a single mov, not a load of immediate + andq.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/x86-64-arg.ll b/test/CodeGen/X86/x86-64-arg.ll
index 22a095b0d9b50..ec8dd8edb6342 100644
--- a/test/CodeGen/X86/x86-64-arg.ll
+++ b/test/CodeGen/X86/x86-64-arg.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep {movl	%edi, %eax}
+; RUN: llc < %s | grep {movl	%edi, %eax}
 ; The input value is already sign extended, don't re-extend it.
 ; This testcase corresponds to:
 ;   int test(short X) { return (int)X; }
diff --git a/test/CodeGen/X86/x86-64-asm.ll b/test/CodeGen/X86/x86-64-asm.ll
index 8ccf8b67448b3..2640e593ec185 100644
--- a/test/CodeGen/X86/x86-64-asm.ll
+++ b/test/CodeGen/X86/x86-64-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc
+; RUN: llc < %s
 ; PR1029
 
 target datalayout = "e-p:64:64"
diff --git a/test/CodeGen/X86/x86-64-dead-stack-adjust.ll b/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
index 15a30de21c6f4..79316f29de37a 100644
--- a/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
+++ b/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | not grep rsp
-; RUN: llvm-as < %s | llc | grep cvttsd2siq
+; RUN: llc < %s | not grep rsp
+; RUN: llc < %s | grep cvttsd2siq
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/x86-64-disp.ll b/test/CodeGen/X86/x86-64-disp.ll
index 4a8f6cdfb60da..d8059ebb1c196 100644
--- a/test/CodeGen/X86/x86-64-disp.ll
+++ b/test/CodeGen/X86/x86-64-disp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 2
+; RUN: llc < %s -march=x86-64 | grep mov | count 2
 
 ; Fold an offset into an address even if it's not a 32-bit
 ; signed integer.
diff --git a/test/CodeGen/X86/x86-64-frameaddr.ll b/test/CodeGen/X86/x86-64-frameaddr.ll
index 80060996f32bd..57163d3c68398 100644
--- a/test/CodeGen/X86/x86-64-frameaddr.ll
+++ b/test/CodeGen/X86/x86-64-frameaddr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep movq | grep rbp
+; RUN: llc < %s -march=x86-64 | grep movq | grep rbp
 
 define i64* @stack_end_address() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/x86-64-gv-offset.ll b/test/CodeGen/X86/x86-64-gv-offset.ll
index b89e1b95368d0..365e4af63fc17 100644
--- a/test/CodeGen/X86/x86-64-gv-offset.ll
+++ b/test/CodeGen/X86/x86-64-gv-offset.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | not grep lea
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep lea
 
 	%struct.x = type { float, double }
 @X = global %struct.x { float 1.000000e+00, double 2.000000e+00 }, align 16		; <%struct.x*> [#uses=2]
diff --git a/test/CodeGen/X86/x86-64-malloc.ll b/test/CodeGen/X86/x86-64-malloc.ll
index 4beb5c21acab6..b4f1fa6667205 100644
--- a/test/CodeGen/X86/x86-64-malloc.ll
+++ b/test/CodeGen/X86/x86-64-malloc.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {shll.*3, %edi}
+; RUN: llc < %s -march=x86-64 | grep {shll.*3, %edi}
 ; PR3829
 ; The generated code should multiply by 3 (sizeof i8*) as an i32,
 ; not as an i64!
diff --git a/test/CodeGen/X86/x86-64-mem.ll b/test/CodeGen/X86/x86-64-mem.ll
index 7497362a15462..d15f516cddee3 100644
--- a/test/CodeGen/X86/x86-64-mem.ll
+++ b/test/CodeGen/X86/x86-64-mem.ll
@@ -1,10 +1,9 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -o %t1
 ; RUN: grep GOTPCREL %t1 | count 4
 ; RUN: grep %%rip      %t1 | count 6
 ; RUN: grep movq     %t1 | count 6
 ; RUN: grep leaq     %t1 | count 1
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=static -o %t2 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=static -o %t2
 ; RUN: grep movl %t2 | count 2
 ; RUN: grep movq %t2 | count 2
 
diff --git a/test/CodeGen/X86/x86-64-pic-1.ll b/test/CodeGen/X86/x86-64-pic-1.ll
index f5303c6ad2d4f..b21918ef80d4b 100644
--- a/test/CodeGen/X86/x86-64-pic-1.ll
+++ b/test/CodeGen/X86/x86-64-pic-1.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {call	f@PLT} %t1
 
 define void @g() {
diff --git a/test/CodeGen/X86/x86-64-pic-10.ll b/test/CodeGen/X86/x86-64-pic-10.ll
index bc0d0c09f4d0f..0f65e57449596 100644
--- a/test/CodeGen/X86/x86-64-pic-10.ll
+++ b/test/CodeGen/X86/x86-64-pic-10.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {call	g@PLT} %t1
 
 @g = alias weak i32 ()* @f
diff --git a/test/CodeGen/X86/x86-64-pic-11.ll b/test/CodeGen/X86/x86-64-pic-11.ll
index f7e0def2d06b0..ef816853326e2 100644
--- a/test/CodeGen/X86/x86-64-pic-11.ll
+++ b/test/CodeGen/X86/x86-64-pic-11.ll
@@ -1,8 +1,7 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {call	__fixunsxfti@PLT} %t1
 
-define i128 @f(x86_fp80 %a)  {
+define i128 @f(x86_fp80 %a) nounwind {
 entry:
 	%tmp78 = fptoui x86_fp80 %a to i128
 	ret i128 %tmp78
diff --git a/test/CodeGen/X86/x86-64-pic-2.ll b/test/CodeGen/X86/x86-64-pic-2.ll
index 39aecbadc4872..a52c564f96836 100644
--- a/test/CodeGen/X86/x86-64-pic-2.ll
+++ b/test/CodeGen/X86/x86-64-pic-2.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {call	f} %t1
 ; RUN: not grep {call	f@PLT} %t1
 
diff --git a/test/CodeGen/X86/x86-64-pic-3.ll b/test/CodeGen/X86/x86-64-pic-3.ll
index 0f5f4b706ab4d..246c00f74119d 100644
--- a/test/CodeGen/X86/x86-64-pic-3.ll
+++ b/test/CodeGen/X86/x86-64-pic-3.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {call	f} %t1
 ; RUN: not grep {call	f@PLT} %t1
 
diff --git a/test/CodeGen/X86/x86-64-pic-4.ll b/test/CodeGen/X86/x86-64-pic-4.ll
index f8dfa927828ab..90fc1194a33bb 100644
--- a/test/CodeGen/X86/x86-64-pic-4.ll
+++ b/test/CodeGen/X86/x86-64-pic-4.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {movq	a@GOTPCREL(%rip),} %t1
 
 @a = global i32 0
diff --git a/test/CodeGen/X86/x86-64-pic-5.ll b/test/CodeGen/X86/x86-64-pic-5.ll
index 694755da53817..6369bde6943da 100644
--- a/test/CodeGen/X86/x86-64-pic-5.ll
+++ b/test/CodeGen/X86/x86-64-pic-5.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {movl	a(%rip),} %t1
 ; RUN: not grep GOTPCREL %t1
 
diff --git a/test/CodeGen/X86/x86-64-pic-6.ll b/test/CodeGen/X86/x86-64-pic-6.ll
index 965a550108b26..6e19ad35bcf44 100644
--- a/test/CodeGen/X86/x86-64-pic-6.ll
+++ b/test/CodeGen/X86/x86-64-pic-6.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {movl	a(%rip),} %t1
 ; RUN: not grep GOTPCREL %t1
 
diff --git a/test/CodeGen/X86/x86-64-pic-7.ll b/test/CodeGen/X86/x86-64-pic-7.ll
index 95b7197ff174f..4d98ee614026d 100644
--- a/test/CodeGen/X86/x86-64-pic-7.ll
+++ b/test/CodeGen/X86/x86-64-pic-7.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {movq	f@GOTPCREL(%rip),} %t1
 
 define void ()* @g() nounwind {
diff --git a/test/CodeGen/X86/x86-64-pic-8.ll b/test/CodeGen/X86/x86-64-pic-8.ll
index 369e0cf365aca..d3b567c610763 100644
--- a/test/CodeGen/X86/x86-64-pic-8.ll
+++ b/test/CodeGen/X86/x86-64-pic-8.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {leaq	f(%rip),} %t1
 ; RUN: not grep GOTPCREL %t1
 
diff --git a/test/CodeGen/X86/x86-64-pic-9.ll b/test/CodeGen/X86/x86-64-pic-9.ll
index 175ec4e5ef95e..076103133fa96 100644
--- a/test/CodeGen/X86/x86-64-pic-9.ll
+++ b/test/CodeGen/X86/x86-64-pic-9.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep {leaq	f(%rip),} %t1
 ; RUN: not grep GOTPCREL %t1
 
diff --git a/test/CodeGen/X86/x86-64-ret0.ll b/test/CodeGen/X86/x86-64-ret0.ll
index d4252e7d6e443..c74f6d803b1cc 100644
--- a/test/CodeGen/X86/x86-64-ret0.ll
+++ b/test/CodeGen/X86/x86-64-ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep mov | count 1
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
 
 define i32 @f() nounwind  {
 	tail call void @t( i32 1 ) nounwind 
diff --git a/test/CodeGen/X86/x86-64-shortint.ll b/test/CodeGen/X86/x86-64-shortint.ll
index 369527fd29cfc..7f96543ba49d4 100644
--- a/test/CodeGen/X86/x86-64-shortint.ll
+++ b/test/CodeGen/X86/x86-64-shortint.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep movswl
+; RUN: llc < %s | grep movswl
 
 target datalayout = "e-p:64:64"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/x86-64-sret-return.ll b/test/CodeGen/X86/x86-64-sret-return.ll
index 9298661998b0b..7b5f189faa0fa 100644
--- a/test/CodeGen/X86/x86-64-sret-return.ll
+++ b/test/CodeGen/X86/x86-64-sret-return.ll
@@ -1,9 +1,11 @@
-; RUN: llvm-as < %s | llc | grep {movq	%rdi, %rax}
+; RUN: llc < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
 	%struct.foo = type { [4 x i64] }
 
+; CHECK: bar:
+; CHECK: movq %rdi, %rax
 define void @bar(%struct.foo* noalias sret  %agg.result, %struct.foo* %d) nounwind  {
 entry:
 	%d_addr = alloca %struct.foo*		; <%struct.foo**> [#uses=2]
@@ -52,3 +54,10 @@ entry:
 return:		; preds = %entry
 	ret void
 }
+
+; CHECK: foo:
+; CHECK: movq %rdi, %rax
+define void @foo({ i64 }* noalias nocapture sret %agg.result) nounwind {
+  store { i64 } { i64 0 }, { i64 }* %agg.result
+  ret void
+}
diff --git a/test/CodeGen/X86/x86-64-varargs.ll b/test/CodeGen/X86/x86-64-varargs.ll
index 2964dd3969f0a..428f4493b069e 100644
--- a/test/CodeGen/X86/x86-64-varargs.ll
+++ b/test/CodeGen/X86/x86-64-varargs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -code-model=large -relocation-model=static | grep call | not grep rax
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -code-model=large -relocation-model=static | grep call | not grep rax
 
 @.str = internal constant [26 x i8] c"%d, %f, %d, %lld, %d, %f\0A\00"		; <[26 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/x86-frameaddr.ll b/test/CodeGen/X86/x86-frameaddr.ll
index b9d6d13880b55..d5958745dfff8 100644
--- a/test/CodeGen/X86/x86-frameaddr.ll
+++ b/test/CodeGen/X86/x86-frameaddr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | grep ebp
+; RUN: llc < %s -march=x86 | grep mov | grep ebp
 
 define i8* @t() nounwind {
 entry:
diff --git a/test/CodeGen/X86/x86-frameaddr2.ll b/test/CodeGen/X86/x86-frameaddr2.ll
index f50ab072c33e3..c5091154152bc 100644
--- a/test/CodeGen/X86/x86-frameaddr2.ll
+++ b/test/CodeGen/X86/x86-frameaddr2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 3
+; RUN: llc < %s -march=x86 | grep mov | count 3
 
 define i8* @t() nounwind {
 entry:
diff --git a/test/CodeGen/X86/x86-store-gv-addr.ll b/test/CodeGen/X86/x86-store-gv-addr.ll
index 799340d35dd2b..089517aadb128 100644
--- a/test/CodeGen/X86/x86-store-gv-addr.ll
+++ b/test/CodeGen/X86/x86-store-gv-addr.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin -relocation-model=static | not grep lea
-; RUN: llvm-as < %s | llc -mtriple=x86_64-pc-linux-gnu -relocation-model=static | not grep lea
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=static | not grep lea
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -relocation-model=static | not grep lea
 
 @v = external global i32, align 8
 @v_addr = external global i32*, align 8
diff --git a/test/CodeGen/X86/xmm-r64.ll b/test/CodeGen/X86/xmm-r64.ll
index f7d2143664ef1..2a6b5c71aa4f7 100644
--- a/test/CodeGen/X86/xmm-r64.ll
+++ b/test/CodeGen/X86/xmm-r64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llc < %s -march=x86-64
 
 define <4 x i32> @test() {
         %tmp1039 = call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )               ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
new file mode 100644
index 0000000000000..7bd06bba4c3e4
--- /dev/null
+++ b/test/CodeGen/X86/xor.ll
@@ -0,0 +1,133 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2  | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+
+; Though it is undefined, we want xor undef,undef to produce zero.
+define <4 x i32> @test1() nounwind {
+	%tmp = xor <4 x i32> undef, undef
+	ret <4 x i32> %tmp
+        
+; X32: test1:
+; X32:	xorps	%xmm0, %xmm0
+; X32:	ret
+}
+
+; Though it is undefined, we want xor undef,undef to produce zero.
+define i32 @test2() nounwind{
+	%tmp = xor i32 undef, undef
+	ret i32 %tmp
+; X32: test2:
+; X32:	xorl	%eax, %eax
+; X32:	ret
+}
+
+define i32 @test3(i32 %a, i32 %b) nounwind  {
+entry:
+        %tmp1not = xor i32 %b, -2
+	%tmp3 = and i32 %tmp1not, %a
+        %tmp4 = lshr i32 %tmp3, 1
+        ret i32 %tmp4
+        
+; X64: test3:
+; X64:	notl	%esi
+; X64:	andl	%edi, %esi
+; X64:	movl	%esi, %eax
+; X64:	shrl	%eax
+; X64:	ret
+
+; X32: test3:
+; X32: 	movl	8(%esp), %eax
+; X32: 	notl	%eax
+; X32: 	andl	4(%esp), %eax
+; X32: 	shrl	%eax
+; X32: 	ret
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i32 %a_addr.0, %b_addr.0
+        %tmp4not = xor i32 %tmp3, 2147483647
+        %tmp6 = and i32 %tmp4not, %b_addr.0
+        %tmp8 = shl i32 %tmp6, 1
+        %tmp10 = icmp eq i32 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i32 %tmp3
+        
+; X64: test4:
+; X64:    notl	[[REG:%[a-z]+]]
+; X64:    andl	{{.*}}[[REG]]
+; X32: test4:
+; X32:    notl	[[REG:%[a-z]+]]
+; X32:    andl	{{.*}}[[REG]]
+}
+
+define i16 @test5(i16 %a, i16 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i16 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i16 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i16 %a_addr.0, %b_addr.0
+        %tmp4not = xor i16 %tmp3, 32767
+        %tmp6 = and i16 %tmp4not, %b_addr.0
+        %tmp8 = shl i16 %tmp6, 1
+        %tmp10 = icmp eq i16 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i16 %tmp3
+; X64: test5:
+; X64:    notw	[[REG:%[a-z]+]]
+; X64:    andw	{{.*}}[[REG]]
+; X32: test5:
+; X32:    notw	[[REG:%[a-z]+]]
+; X32:    andw	{{.*}}[[REG]]
+}
+
+define i8 @test6(i8 %a, i8 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i8 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i8 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i8 %a_addr.0, %b_addr.0
+        %tmp4not = xor i8 %tmp3, 127
+        %tmp6 = and i8 %tmp4not, %b_addr.0
+        %tmp8 = shl i8 %tmp6, 1
+        %tmp10 = icmp eq i8 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i8 %tmp3
+; X64: test6:
+; X64:    notb	[[REG:%[a-z]+]]
+; X64:    andb	{{.*}}[[REG]]
+; X32: test6:
+; X32:    notb	[[REG:%[a-z]+]]
+; X32:    andb	{{.*}}[[REG]]
+}
+
+define i32 @test7(i32 %a, i32 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i32 %a_addr.0, %b_addr.0
+        %tmp4not = xor i32 %tmp3, 2147483646
+        %tmp6 = and i32 %tmp4not, %b_addr.0
+        %tmp8 = shl i32 %tmp6, 1
+        %tmp10 = icmp eq i32 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i32 %tmp3
+; X64: test7:
+; X64:    xorl	$2147483646, [[REG:%[a-z]+]]
+; X64:    andl	{{.*}}[[REG]]
+; X32: test7:
+; X32:    xorl	$2147483646, [[REG:%[a-z]+]]
+; X32:    andl	{{.*}}[[REG]]
+}
+
diff --git a/test/CodeGen/X86/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll
index 7640ba5aca412..3e3bb95d06f77 100644
--- a/test/CodeGen/X86/zero-remat.ll
+++ b/test/CodeGen/X86/zero-remat.ll
@@ -1,16 +1,40 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep xor | count 4
-; RUN: llvm-as < %s | llc -march=x86-64 -stats  -info-output-file - | grep asm-printer  | grep 12
-; RUN: llvm-as < %s | llc -march=x86 | grep fldz
-; RUN: llvm-as < %s | llc -march=x86 | not grep fldl
+; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -march=x86-64 -stats  -info-output-file - | grep asm-printer  | grep 12
+; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
 
 declare void @bar(double %x)
 declare void @barf(float %x)
 
 define double @foo() nounwind {
+
   call void @bar(double 0.0)
   ret double 0.0
+
+;CHECK-32: foo:
+;CHECK-32: call
+;CHECK-32: fldz
+;CHECK-32: ret
+
+;CHECK-64: foo:
+;CHECK-64: pxor
+;CHECK-64: call
+;CHECK-64: pxor
+;CHECK-64: ret
 }
+
+
 define float @foof() nounwind {
   call void @barf(float 0.0)
   ret float 0.0
+
+;CHECK-32: foof:
+;CHECK-32: call
+;CHECK-32: fldz
+;CHECK-32: ret
+
+;CHECK-64: foof:
+;CHECK-64: pxor
+;CHECK-64: call
+;CHECK-64: pxor
+;CHECK-64: ret
 }
diff --git a/test/CodeGen/X86/zext-inreg-0.ll b/test/CodeGen/X86/zext-inreg-0.ll
index 1a734642d0318..ae6221af9d813 100644
--- a/test/CodeGen/X86/zext-inreg-0.ll
+++ b/test/CodeGen/X86/zext-inreg-0.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep and
-; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: llc < %s -march=x86 | not grep and
+; RUN: llc < %s -march=x86-64 > %t
 ; RUN: not grep and %t
 ; RUN: not grep movzbq %t
 ; RUN: not grep movzwq %t
diff --git a/test/CodeGen/X86/zext-inreg-1.ll b/test/CodeGen/X86/zext-inreg-1.ll
index bc8e482d562de..17fe374e01ecc 100644
--- a/test/CodeGen/X86/zext-inreg-1.ll
+++ b/test/CodeGen/X86/zext-inreg-1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep and
+; RUN: llc < %s -march=x86 | not grep and
 
 ; These tests differ from the ones in zext-inreg-0.ll in that
 ; on x86-64 they do require and instructions.
diff --git a/test/CodeGen/XCore/2008-11-17-Shl64.ll b/test/CodeGen/XCore/2008-11-17-Shl64.ll
index 97ea41b8d0c06..04b1b5a0016e2 100644
--- a/test/CodeGen/XCore/2008-11-17-Shl64.ll
+++ b/test/CodeGen/XCore/2008-11-17-Shl64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; PR3080
 define i64 @test(i64 %a) {
 	%result = shl i64 %a, 1
diff --git a/test/CodeGen/XCore/2009-01-08-Crash.ll b/test/CodeGen/XCore/2009-01-08-Crash.ll
index 6f5fb7c6871e9..a31ea1e2e9be6 100644
--- a/test/CodeGen/XCore/2009-01-08-Crash.ll
+++ b/test/CodeGen/XCore/2009-01-08-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ;; This caused a compilation failure since the
 ;; address arithmetic was folded into the LDWSP instruction,
 ;; resulting in a negative offset which eliminateFrameIndex was
diff --git a/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll b/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
index b9333c94abe33..b2bbcb1183d19 100644
--- a/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
+++ b/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; PR3324
 define double @f1(double %a, double %b, double %c, double %d, double %e, double %f, double %g) nounwind {
 entry:
diff --git a/test/CodeGen/XCore/2009-03-27-v2f64-param.ll b/test/CodeGen/XCore/2009-03-27-v2f64-param.ll
index e834d66df2410..a6b9699987eb6 100644
--- a/test/CodeGen/XCore/2009-03-27-v2f64-param.ll
+++ b/test/CodeGen/XCore/2009-03-27-v2f64-param.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore
+; RUN: llc < %s -march=xcore
 ; PR3898
 
 define i32 @vector_param(<2 x double> %x) nounwind {
diff --git a/test/CodeGen/XCore/2009-07-15-store192.ll b/test/CodeGen/XCore/2009-07-15-store192.ll
new file mode 100644
index 0000000000000..5278af8ac229c
--- /dev/null
+++ b/test/CodeGen/XCore/2009-07-15-store192.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=xcore > %t1.s
+define void @store32(i8* %p) nounwind {
+entry:
+	%0 = bitcast i8* %p to i192*
+	store i192 0, i192* %0, align 4
+	ret void
+}
diff --git a/test/CodeGen/XCore/addsub64.ll b/test/CodeGen/XCore/addsub64.ll
index 41224fca1cc25..a1494adfcc46d 100644
--- a/test/CodeGen/XCore/addsub64.ll
+++ b/test/CodeGen/XCore/addsub64.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore -mcpu=xs1b-generic > %t1.s
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic > %t1.s
 ; RUN: grep ladd %t1.s | count 2
 ; RUN: grep lsub %t1.s | count 2
 define i64 @add64(i64 %a, i64 %b) {
diff --git a/test/CodeGen/XCore/ashr.ll b/test/CodeGen/XCore/ashr.ll
new file mode 100644
index 0000000000000..d585e8b10d98f
--- /dev/null
+++ b/test/CodeGen/XCore/ashr.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+define i32 @ashr(i32 %a, i32 %b) {
+	%1 = ashr i32 %a, %b
+	ret i32 %1
+}
+; CHECK: ashr:
+; CHECK-NEXT: ashr r0, r0, r1
+
+define i32 @ashri1(i32 %a) {
+	%1 = ashr i32 %a, 24
+	ret i32 %1
+}
+; CHECK: ashri1:
+; CHECK-NEXT: ashr r0, r0, 24
+
+define i32 @ashri2(i32 %a) {
+	%1 = ashr i32 %a, 31
+	ret i32 %1
+}
+; CHECK: ashri2:
+; CHECK-NEXT: ashr r0, r0, 32
+
+define i32 @f1(i32 %a) {
+        %1 = icmp slt i32 %a, 0
+	br i1 %1, label %less, label %not_less
+less:
+	ret i32 10
+not_less:
+	ret i32 17
+}
+; CHECK: f1:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bf r0
+
+define i32 @f2(i32 %a) {
+        %1 = icmp sge i32 %a, 0
+	br i1 %1, label %greater, label %not_greater
+greater:
+	ret i32 10
+not_greater:
+	ret i32 17
+}
+; CHECK: f2:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bt r0
+
+define i32 @f3(i32 %a) {
+        %1 = icmp slt i32 %a, 0
+	%2 = select i1 %1, i32 10, i32 17
+	ret i32 %2
+}
+; CHECK: f3:
+; CHECK-NEXT: ashr r1, r0, 32
+; CHECK-NEXT: ldc r0, 10
+; CHECK-NEXT: bt r1
+; CHECK: ldc r0, 17
+
+define i32 @f4(i32 %a) {
+        %1 = icmp sge i32 %a, 0
+	%2 = select i1 %1, i32 10, i32 17
+	ret i32 %2
+}
+; CHECK: f4:
+; CHECK-NEXT: ashr r1, r0, 32
+; CHECK-NEXT: ldc r0, 17
+; CHECK-NEXT: bt r1
+; CHECK: ldc r0, 10
+
+define i32 @f5(i32 %a) {
+        %1 = icmp sge i32 %a, 0
+	%2 = zext i1 %1 to i32
+	ret i32 %2
+}
+; CHECK: f5:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: eq r0, r0, 0
diff --git a/test/CodeGen/XCore/basictest.ll b/test/CodeGen/XCore/basictest.ll
index 803ffcb74cc12..de5eaff08073d 100644
--- a/test/CodeGen/XCore/basictest.ll
+++ b/test/CodeGen/XCore/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore
+; RUN: llc < %s -march=xcore
 
 define i32 @test(i32 %X) {
 	%tmp.1 = add i32 %X, 1
diff --git a/test/CodeGen/XCore/bitrev.ll b/test/CodeGen/XCore/bitrev.ll
index 38f3948697638..09202d3656787 100644
--- a/test/CodeGen/XCore/bitrev.ll
+++ b/test/CodeGen/XCore/bitrev.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep bitrev %t1.s | count 1 
 declare i32 @llvm.xcore.bitrev(i32)
 
diff --git a/test/CodeGen/XCore/constants.ll b/test/CodeGen/XCore/constants.ll
new file mode 100644
index 0000000000000..95fa11e77470b
--- /dev/null
+++ b/test/CodeGen/XCore/constants.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic | FileCheck %s
+
+; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4
+; CHECK: .LCPI1_0:
+; CHECK: .long 12345678
+; CHECK: f:
+; CHECK: ldw r0, cp[.LCPI1_0]
+define i32 @f() {
+entry:
+	ret i32 12345678
+}
diff --git a/test/CodeGen/XCore/cos.ll b/test/CodeGen/XCore/cos.ll
index 334f0d50561d6..8211f85b9bc29 100644
--- a/test/CodeGen/XCore/cos.ll
+++ b/test/CodeGen/XCore/cos.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl cosf" %t1.s | count 1
 ; RUN: grep "bl cos" %t1.s | count 2
 declare double @llvm.cos.f64(double)
diff --git a/test/CodeGen/XCore/exp.ll b/test/CodeGen/XCore/exp.ll
index 8412e7a599564..d23d484ed62e4 100644
--- a/test/CodeGen/XCore/exp.ll
+++ b/test/CodeGen/XCore/exp.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl expf" %t1.s | count 1
 ; RUN: grep "bl exp" %t1.s | count 2
 declare double @llvm.exp.f64(double)
diff --git a/test/CodeGen/XCore/exp2.ll b/test/CodeGen/XCore/exp2.ll
index a53b767ad0d0e..4c4d17f4bbf71 100644
--- a/test/CodeGen/XCore/exp2.ll
+++ b/test/CodeGen/XCore/exp2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl exp2f" %t1.s | count 1
 ; RUN: grep "bl exp2" %t1.s | count 2
 declare double @llvm.exp2.f64(double)
diff --git a/test/CodeGen/XCore/fneg.ll b/test/CodeGen/XCore/fneg.ll
index 3fb7b01869409..e3dd3dd45c23b 100644
--- a/test/CodeGen/XCore/fneg.ll
+++ b/test/CodeGen/XCore/fneg.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
-; RUN: grep "xor" %t1.s | count 1
+; RUN: llc < %s -march=xcore | grep "xor" | count 1
 define i1 @test(double %F) nounwind {
 entry:
 	%0 = fsub double -0.000000e+00, %F
diff --git a/test/CodeGen/XCore/getid.ll b/test/CodeGen/XCore/getid.ll
index 810e8ad6e75ce..ecab65c0e92e2 100644
--- a/test/CodeGen/XCore/getid.ll
+++ b/test/CodeGen/XCore/getid.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "get r11, id" %t1.s | count 1 
 declare i32 @llvm.xcore.getid()
 
diff --git a/test/CodeGen/XCore/globals.ll b/test/CodeGen/XCore/globals.ll
new file mode 100644
index 0000000000000..342e5932dd106
--- /dev/null
+++ b/test/CodeGen/XCore/globals.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic | FileCheck %s
+
+define i32 *@addr_G1() {
+entry:
+; CHECK: addr_G1:
+; CHECK: ldaw r0, dp[G1]
+	ret i32* @G1
+}
+
+define i32 *@addr_G2() {
+entry:
+; CHECK: addr_G2:
+; CHECK: ldaw r0, dp[G2]
+	ret i32* @G2
+}
+
+define i32 *@addr_G3() {
+entry:
+; CHECK: addr_G3:
+; CHECK: ldaw r11, cp[G3]
+; CHECK: mov r0, r11
+	ret i32* @G3
+}
+
+define i32 **@addr_G4() {
+entry:
+; CHECK: addr_G4:
+; CHECK: ldaw r0, dp[G4]
+	ret i32** @G4
+}
+
+define i32 **@addr_G5() {
+entry:
+; CHECK: addr_G5:
+; CHECK: ldaw r11, cp[G5]
+; CHECK: mov r0, r11
+	ret i32** @G5
+}
+
+define i32 **@addr_G6() {
+entry:
+; CHECK: addr_G6:
+; CHECK: ldaw r0, dp[G6]
+	ret i32** @G6
+}
+
+define i32 **@addr_G7() {
+entry:
+; CHECK: addr_G7:
+; CHECK: ldaw r11, cp[G7]
+; CHECK: mov r0, r11
+	ret i32** @G7
+}
+
+define i32 *@addr_G8() {
+entry:
+; CHECK: addr_G8:
+; CHECK: ldaw r0, dp[G8]
+	ret i32* @G8
+}
+
+@G1 = global i32 4712
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G1:
+
+@G2 = global i32 0
+; CHECK: .section .dp.bss,"awd",@nobits
+; CHECK: G2:
+
+@G3 = constant i32 9401
+; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4
+; CHECK: G3:
+
+@G4 = global i32* @G1
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G4:
+
+@G5 = constant i32* @G1
+; CHECK: .section .cp.rodata,"ac",@progbits
+; CHECK: G5:
+
+@G6 = global i32* @G8
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G6:
+
+@G7 = constant i32* @G8
+; CHECK: .section .cp.rodata,"ac",@progbits
+; CHECK: G7:
+
+@G8 = internal global i32 9312
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G8:
diff --git a/test/CodeGen/XCore/load.ll b/test/CodeGen/XCore/load.ll
new file mode 100644
index 0000000000000..adfea212a2790
--- /dev/null
+++ b/test/CodeGen/XCore/load.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: not grep add %t1.s
+; RUN: not grep ldaw %t1.s
+; RUN: not grep lda16 %t1.s
+; RUN: not grep zext %t1.s
+; RUN: not grep sext %t1.s
+; RUN: grep "ldw" %t1.s | count 2
+; RUN: grep "ld16s" %t1.s | count 1
+; RUN: grep "ld8u" %t1.s | count 1
+
+define i32 @load32(i32* %p, i32 %offset) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 %offset
+	%1 = load i32* %0, align 4
+	ret i32 %1
+}
+
+define i32 @load32_imm(i32* %p) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 11
+	%1 = load i32* %0, align 4
+	ret i32 %1
+}
+
+define i32 @load16(i16* %p, i32 %offset) nounwind {
+entry:
+	%0 = getelementptr i16* %p, i32 %offset
+	%1 = load i16* %0, align 2
+	%2 = sext i16 %1 to i32
+	ret i32 %2
+}
+
+define i32 @load8(i8* %p, i32 %offset) nounwind {
+entry:
+	%0 = getelementptr i8* %p, i32 %offset
+	%1 = load i8* %0, align 1
+	%2 = zext i8 %1 to i32
+	ret i32 %2
+}
diff --git a/test/CodeGen/XCore/log.ll b/test/CodeGen/XCore/log.ll
index 88d9d7ffcd392..a08471f48e4ae 100644
--- a/test/CodeGen/XCore/log.ll
+++ b/test/CodeGen/XCore/log.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl logf" %t1.s | count 1
 ; RUN: grep "bl log" %t1.s | count 2
 declare double @llvm.log.f64(double)
diff --git a/test/CodeGen/XCore/log10.ll b/test/CodeGen/XCore/log10.ll
index f844d8fc6a24c..a72b8bfaf6b9d 100644
--- a/test/CodeGen/XCore/log10.ll
+++ b/test/CodeGen/XCore/log10.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl log10f" %t1.s | count 1
 ; RUN: grep "bl log10" %t1.s | count 2
 declare double @llvm.log10.f64(double)
diff --git a/test/CodeGen/XCore/log2.ll b/test/CodeGen/XCore/log2.ll
index b8a3dbd2317fb..d257433a01a76 100644
--- a/test/CodeGen/XCore/log2.ll
+++ b/test/CodeGen/XCore/log2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl log2f" %t1.s | count 1
 ; RUN: grep "bl log2" %t1.s | count 2
 declare double @llvm.log2.f64(double)
diff --git a/test/CodeGen/XCore/pow.ll b/test/CodeGen/XCore/pow.ll
index a7b6318c1091d..b461185b7fde4 100644
--- a/test/CodeGen/XCore/pow.ll
+++ b/test/CodeGen/XCore/pow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl powf" %t1.s | count 1
 ; RUN: grep "bl pow" %t1.s | count 2
 declare double @llvm.pow.f64(double, double)
diff --git a/test/CodeGen/XCore/powi.ll b/test/CodeGen/XCore/powi.ll
index 30e6d7ea88f3a..de31cbed00c06 100644
--- a/test/CodeGen/XCore/powi.ll
+++ b/test/CodeGen/XCore/powi.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl __powidf2" %t1.s | count 1
 ; RUN: grep "bl __powisf2" %t1.s | count 1
 declare double @llvm.powi.f64(double, i32)
diff --git a/test/CodeGen/XCore/private.ll b/test/CodeGen/XCore/private.ll
index 888ccdf297d59..9a2f5b32dc391 100644
--- a/test/CodeGen/XCore/private.ll
+++ b/test/CodeGen/XCore/private.ll
@@ -1,6 +1,6 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llvm-as < %s | llc -march=xcore > %t
+; RUN: llc < %s -march=xcore > %t
 ; RUN: grep .Lfoo: %t
 ; RUN: grep bl.*\.Lfoo %t
 ; RUN: grep .Lbaz: %t
diff --git a/test/CodeGen/XCore/sext.ll b/test/CodeGen/XCore/sext.ll
new file mode 100644
index 0000000000000..9cd4ad66a5cd8
--- /dev/null
+++ b/test/CodeGen/XCore/sext.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+define i32 @sext1(i32 %a) {
+	%1 = trunc i32 %a to i1
+	%2 = sext i1 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext1:
+; CHECK: sext r0, 1
+
+define i32 @sext2(i32 %a) {
+	%1 = trunc i32 %a to i2
+	%2 = sext i2 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext2:
+; CHECK: sext r0, 2
+
+define i32 @sext8(i32 %a) {
+	%1 = trunc i32 %a to i8
+	%2 = sext i8 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext8:
+; CHECK: sext r0, 8
+
+define i32 @sext16(i32 %a) {
+	%1 = trunc i32 %a to i16
+	%2 = sext i16 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext16:
+; CHECK: sext r0, 16
diff --git a/test/CodeGen/XCore/sin.ll b/test/CodeGen/XCore/sin.ll
index 41aab675953f2..ced026f1d3e19 100644
--- a/test/CodeGen/XCore/sin.ll
+++ b/test/CodeGen/XCore/sin.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl sinf" %t1.s | count 1
 ; RUN: grep "bl sin" %t1.s | count 2
 declare double @llvm.sin.f64(double)
diff --git a/test/CodeGen/XCore/sqrt.ll b/test/CodeGen/XCore/sqrt.ll
index 221d1ac1a781d..364d1a14c6ae3 100644
--- a/test/CodeGen/XCore/sqrt.ll
+++ b/test/CodeGen/XCore/sqrt.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "bl sqrtf" %t1.s | count 1
 ; RUN: grep "bl sqrt" %t1.s | count 2
 declare double @llvm.sqrt.f64(double)
diff --git a/test/CodeGen/XCore/store.ll b/test/CodeGen/XCore/store.ll
new file mode 100644
index 0000000000000..2213743ff897b
--- /dev/null
+++ b/test/CodeGen/XCore/store.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: not grep add %t1.s
+; RUN: not grep ldaw %t1.s
+; RUN: not grep lda16 %t1.s
+; RUN: grep "stw" %t1.s | count 2
+; RUN: grep "st16" %t1.s | count 1
+; RUN: grep "st8" %t1.s | count 1
+
+define void @store32(i32* %p, i32 %offset, i32 %val) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 %offset
+	store i32 %val, i32* %0, align 4
+	ret void
+}
+
+define void @store32_imm(i32* %p, i32 %val) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 11
+	store i32 %val, i32* %0, align 4
+	ret void
+}
+
+define void @store16(i16* %p, i32 %offset, i16 %val) nounwind {
+entry:
+	%0 = getelementptr i16* %p, i32 %offset
+	store i16 %val, i16* %0, align 2
+	ret void
+}
+
+define void @store8(i8* %p, i32 %offset, i8 %val) nounwind {
+entry:
+	%0 = getelementptr i8* %p, i32 %offset
+	store i8 %val, i8* %0, align 1
+	ret void
+}
diff --git a/test/CodeGen/XCore/tls.ll b/test/CodeGen/XCore/tls.ll
new file mode 100644
index 0000000000000..ed41afae09960
--- /dev/null
+++ b/test/CodeGen/XCore/tls.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic | FileCheck %s
+
+define i32 *@addr_G() {
+entry:
+; CHECK: addr_G:
+; CHECK: get r11, id
+	ret i32* @G
+}
+
+@G = thread_local global i32 15
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G:
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
diff --git a/test/CodeGen/XCore/trap.ll b/test/CodeGen/XCore/trap.ll
index b3d3bc2270e32..45f886d332aa4 100644
--- a/test/CodeGen/XCore/trap.ll
+++ b/test/CodeGen/XCore/trap.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore > %t1.s
+; RUN: llc < %s -march=xcore > %t1.s
 ; RUN: grep "ecallf" %t1.s | count 1
 ; RUN: grep "ldc" %t1.s | count 1
 define i32 @test() noreturn nounwind  {
diff --git a/test/CodeGen/XCore/unaligned_load.ll b/test/CodeGen/XCore/unaligned_load.ll
new file mode 100644
index 0000000000000..0ee8e1c32667b
--- /dev/null
+++ b/test/CodeGen/XCore/unaligned_load.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl __misaligned_load" %t1.s | count 1
+; RUN: grep ld16s %t1.s | count 2
+; RUN: grep ldw %t1.s | count 2
+; RUN: grep shl %t1.s | count 2
+; RUN: grep shr %t1.s | count 1
+; RUN: grep zext %t1.s | count 1
+; RUN: grep "or " %t1.s | count 2
+
+; Byte aligned load. Expands to call to __misaligned_load.
+define i32 @align1(i32* %p) nounwind {
+entry:
+	%0 = load i32* %p, align 1		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+; Half word aligned load. Expands to two 16bit loads.
+define i32 @align2(i32* %p) nounwind {
+entry:
+	%0 = load i32* %p, align 2		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+@a = global [5 x i8] zeroinitializer, align 4
+
+; Constant offset from word aligned base. Expands to two 32bit loads.
+define i32 @align3() nounwind {
+entry:
+	%0 = load i32* bitcast (i8* getelementptr ([5 x i8]* @a, i32 0, i32 1) to i32*), align 1
+	ret i32 %0
+}
diff --git a/test/CodeGen/XCore/unaligned_store.ll b/test/CodeGen/XCore/unaligned_store.ll
new file mode 100644
index 0000000000000..62078e6f60778
--- /dev/null
+++ b/test/CodeGen/XCore/unaligned_store.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl __misaligned_store" %t1.s | count 1
+; RUN: grep st16 %t1.s | count 2
+; RUN: grep shr %t1.s | count 1
+
+; Byte aligned store. Expands to call to __misaligned_store.
+define void @align1(i32* %p, i32 %val) nounwind {
+entry:
+	store i32 %val, i32* %p, align 1
+	ret void
+}
+
+; Half word aligned store. Expands to two 16bit stores.
+define void @align2(i32* %p, i32 %val) nounwind {
+entry:
+	store i32 %val, i32* %p, align 2
+	ret void
+}
diff --git a/test/CodeGen/XCore/unaligned_store_combine.ll b/test/CodeGen/XCore/unaligned_store_combine.ll
new file mode 100644
index 0000000000000..493ca6a975f83
--- /dev/null
+++ b/test/CodeGen/XCore/unaligned_store_combine.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl memmove" %t1.s | count 1
+; RUN: grep "ldc r., 8" %t1.s | count 1
+
+; Unaligned load / store pair. Should be combined into a memmove
+; of size 8
+define void @f(i64* %dst, i64* %src) nounwind {
+entry:
+	%0 = load i64* %src, align 1
+	store i64 %0, i64* %dst, align 1
+	ret void
+}