102 files changed, 4361 insertions, 260 deletions
diff --git a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
index 82ef2b82cbe6..b6feb5abbc3f 100644
--- a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
+++ b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 | grep dst | count 4
+; RUN: llc < %s -march=ppc64 -mattr=+altivec | grep dst | count 4
 
 define hidden void @_Z4borkPc(i8* %image) {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
index 8802b97d2a6a..00a402e0e487 100644
--- a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
+++ b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vadduhm
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubuhm
+; XFAIL: *
 
 define <4 x i32> @test() nounwind {
 	ret <4 x i32> < i32 4293066722, i32 4293066722, i32 4293066722, i32 4293066722>
diff --git a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
index 84aa40c4b52a..91253daae396 100644
--- a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
+++ b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
@@ -10,8 +10,8 @@ target triple = "powerpc-apple-darwin10.0"
 define void @foo(i32 %y) nounwind ssp {
 entry:
 ; CHECK: foo
-; CHECK: add r3
-; CHECK: 0(r3)
+; CHECK: add r2
+; CHECK: 0(r2)
   %y_addr = alloca i32                            ; <i32*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   store i32 %y, i32* %y_addr
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
index 974a99a52cb5..097611a7619c 100644
--- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -2,21 +2,21 @@
 ; ModuleID = 'hh.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
 target triple = "powerpc-apple-darwin9.6"
-; This formerly used R0 for both the stack address and CR.
 
 define void @foo() nounwind {
 entry:
-;CHECK:  mfcr r2
-;CHECK:  lis r3, 1
-;CHECK:  rlwinm r2, r2, 8, 0, 31
-;CHECK:  ori r3, r3, 34524
-;CHECK:  stwx r2, r1, r3
-; Make sure that the register scavenger returns the same temporary register.
-;CHECK:  mfcr r2
-;CHECK:  lis r3, 1
-;CHECK:  rlwinm r2, r2, 12, 0, 31
-;CHECK:  ori r3, r3, 34520
-;CHECK:  stwx r2, r1, r3
+; Note that part of what is being checked here is proper register reuse.
+; CHECK: mfcr [[T1:r[0-9]+]]                         ; cr2
+; CHECK: lis [[T2:r[0-9]+]], 1
+; CHECK: addi r3, r1, 72
+; CHECK: rlwinm [[T1]], [[T1]], 8, 0, 31
+; CHECK: ori [[T2]], [[T2]], 34540
+; CHECK: stwx [[T1]], r1, [[T2]]
+; CHECK: lis [[T3:r[0-9]+]], 1
+; CHECK: mfcr [[T4:r[0-9]+]]                         ; cr3
+; CHECK: ori [[T3]], [[T3]], 34536
+; CHECK: rlwinm [[T4]], [[T4]], 12, 0, 31
+; CHECK: stwx [[T4]], r1, [[T3]]
   %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
@@ -25,11 +25,16 @@ entry:
   br label %return
 
 return:                                           ; preds = %entry
-;CHECK:  lis r3, 1
-;CHECK:  ori r3, r3, 34524
-;CHECK:  lwzx r2, r1, r3
-;CHECK:  rlwinm r2, r2, 24, 0, 31
-;CHECK:  mtcrf 32, r2
+; CHECK: lis [[T1:r[0-9]+]], 1
+; CHECK: ori [[T1]], [[T1]], 34536
+; CHECK: lwzx [[T1]], r1, [[T1]]
+; CHECK: rlwinm [[T1]], [[T1]], 20, 0, 31
+; CHECK: mtcrf 16, [[T1]]
+; CHECK: lis [[T1]], 1
+; CHECK: ori [[T1]], [[T1]], 34540
+; CHECK: lwzx [[T1]], r1, [[T1]]
+; CHECK: rlwinm [[T1]], [[T1]], 24, 0, 31
+; CHECK: mtcrf 32, [[T1]]
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 4a850984a909..000000000000
--- a/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=ppc32 -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
index 72ae9d6c73b3..0dbc2d0180ff 100644
--- a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
+++ b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
@@ -18,8 +18,8 @@ entry:
 ; CHECK: _g:
 ; CHECK:  mflr r0
 ; CHECK:  stw r0, 8(r1)
-; CHECK:  lwz r3, 0(r1)
-; CHECK:  lwz r3, 8(r3)
+; CHECK:  lwz r2, 0(r1)
+; CHECK:  lwz r3, 8(r2)
   %0 = tail call i8* @llvm.returnaddress(i32 1)   ; <i8*> [#uses=1]
   ret i8* %0
 }
diff --git a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
index bf3d577a3677..d1a3c9f46b57 100644
--- a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
+++ b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
@@ -7,7 +7,7 @@ define i32 @main() nounwind {
 entry:
 ; Make sure we're generating references using the red zone
 ; CHECK: main:
-; CHECK: stw r3, -12(r1)
+; CHECK: stw r2, -12(r1)
   %retval = alloca i32
   %0 = alloca i32
   %"alloca point" = bitcast i32 0 to i32
diff --git a/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll b/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
index 9d2e390c1c97..5bff58f2bbf5 100644
--- a/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
+++ b/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -code-model=small < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
diff --git a/test/CodeGen/PowerPC/2012-11-16-mischedcall.ll b/test/CodeGen/PowerPC/2012-11-16-mischedcall.ll
new file mode 100644
index 000000000000..35e3fdd26e72
--- /dev/null
+++ b/test/CodeGen/PowerPC/2012-11-16-mischedcall.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=powerpc64-bgq-linux -enable-misched < %s | FileCheck %s
+;
+; PR14315: misched should not move the physreg copy of %t below the calls.
+
+@.str89 = external unnamed_addr constant [6 x i8], align 1
+
+declare void @init() nounwind
+
+declare void @clock() nounwind
+
+; CHECK: %entry
+; CHECK: fmr 31, 1
+; CHECK: bl init
+define void @s332(double %t) nounwind {
+entry:
+  tail call void @init()
+  tail call void @clock() nounwind
+  br label %for.cond2
+
+for.cond2:                                        ; preds = %for.body4, %entry
+  %i.0 = phi i32 [ %inc, %for.body4 ], [ 0, %entry ]
+  %cmp3 = icmp slt i32 undef, 16000
+  br i1 %cmp3, label %for.body4, label %L20
+
+for.body4:                                        ; preds = %for.cond2
+  %cmp5 = fcmp ogt double undef, %t
+  %inc = add nsw i32 %i.0, 1
+  br i1 %cmp5, label %L20, label %for.cond2
+
+L20:                                              ; preds = %for.body4, %for.cond2
+  %index.0 = phi i32 [ -2, %for.cond2 ], [ %i.0, %for.body4 ]
+  unreachable
+}
diff --git a/test/CodeGen/PowerPC/DbgValueOtherTargets.test b/test/CodeGen/PowerPC/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..9702934f7e68
--- /dev/null
+++ b/test/CodeGen/PowerPC/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=ppc32 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
index 6f985c819fb6..e8765deab05d 100644
--- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
+++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep "stw r4, 32751"
+; RUN:   grep "stw r3, 32751"
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep "stw r4, 32751"
+; RUN:   grep "stw r3, 32751"
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep "std r4, 9024"
+; RUN:   grep "std r3, 9024"
 
 define void @test() nounwind {
 	store i32 0, i32* inttoptr (i64 48725999 to i32*)
diff --git a/test/CodeGen/PowerPC/a2q-stackalign.ll b/test/CodeGen/PowerPC/a2q-stackalign.ll
new file mode 100644
index 000000000000..00c329119376
--- /dev/null
+++ b/test/CodeGen/PowerPC/a2q-stackalign.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck -check-prefix=CHECK-A2Q %s
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc64-bgq-linux -mcpu=a2 | FileCheck -check-prefix=CHECK-BGQ %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare i32 @bar(i8* %a) nounwind;
+define i32 @foo() nounwind {
+  %p = alloca i8, i8 115
+  store i8 0, i8* %p
+  %r = call i32 @bar(i8* %p)
+  ret i32 %r
+}
+
+; Without QPX, the allocated stack frame is 240 bytes, but with QPX
+; (because we require 32-byte alignment), it is 256 bytes.
+; CHECK-A2: @foo
+; CHECK-A2: stdu 1, -240(1)
+; CHECK-A2Q: @foo
+; CHECK-A2Q: stdu 1, -256(1)
+; CHECK-BGQ: @foo
+; CHECK-BGQ: stdu 1, -256(1)
+
diff --git a/test/CodeGen/PowerPC/a2q.ll b/test/CodeGen/PowerPC/a2q.ll
new file mode 100644
index 000000000000..b26480f08b39
--- /dev/null
+++ b/test/CodeGen/PowerPC/a2q.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+; RUN: llc < %s -march=ppc64 -mcpu=a2 -mattr=+qpx | FileCheck %s
+
+define void @foo() {
+entry:
+  ret void
+}
+
+; CHECK: @foo
+
diff --git a/test/CodeGen/PowerPC/allocate-r0.ll b/test/CodeGen/PowerPC/allocate-r0.ll
new file mode 100644
index 000000000000..1cf4cec07695
--- /dev/null
+++ b/test/CodeGen/PowerPC/allocate-r0.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i64 @foo(i64 %a) nounwind {
+entry:
+  call void asm sideeffect "", "~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12}"() nounwind
+  br label %return
+
+; CHECK: @foo
+; Because r0 is allocatable, we can use it to hold r3 without spilling.
+; CHECK: mr 0, 3
+; CHECK: mr 3, 0
+
+return:                                           ; preds = %entry
+  ret i64 %a
+}
+
diff --git a/test/CodeGen/PowerPC/anon_aggr.ll b/test/CodeGen/PowerPC/anon_aggr.ll
new file mode 100644
index 000000000000..52587e2c0b87
--- /dev/null
+++ b/test/CodeGen/PowerPC/anon_aggr.ll
@@ -0,0 +1,99 @@
+; RUN: llc -O0 -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+
+; Test case for PR 14779: anonymous aggregates are not handled correctly.
+; The bug is triggered by passing a byval structure after an anonymous
+; aggregate.
+
+%tarray = type { i64, i8* }
+
+define i8* @func1({ i64, i8* } %array, i8* %ptr) {
+entry:
+  %array_ptr = extractvalue {i64, i8* } %array, 1
+  %cond = icmp eq i8* %array_ptr, %ptr
+  br i1 %cond, label %equal, label %unequal
+equal:
+  ret i8* %array_ptr
+unequal:
+  ret i8* %ptr
+}
+
+; CHECK: func1:
+; CHECK: cmpld {{[0-9]+}}, 4, 5
+; CHECK: std 4, -[[OFFSET1:[0-9]+]]
+; CHECK: std 5, -[[OFFSET2:[0-9]+]]
+; CHECK: ld 3, -[[OFFSET1]](1)
+; CHECK: ld 3, -[[OFFSET2]](1)
+
+
+define i8* @func2({ i64, i8* } %array1, %tarray* byval %array2) {
+entry:
+  %array1_ptr = extractvalue {i64, i8* } %array1, 1
+  %tmp = getelementptr inbounds %tarray* %array2, i32 0, i32 1
+  %array2_ptr = load i8** %tmp
+  %cond = icmp eq i8* %array1_ptr, %array2_ptr
+  br i1 %cond, label %equal, label %unequal
+equal:
+  ret i8* %array1_ptr
+unequal:
+  ret i8* %array2_ptr
+}
+
+; CHECK: func2:
+; CHECK: addi [[REG1:[0-9]+]], 1, 64
+; CHECK: ld [[REG2:[0-9]+]], 8([[REG1]])
+; CHECK: cmpld {{[0-9]+}}, 4, [[REG2]]
+; CHECK: std [[REG2]], -[[OFFSET1:[0-9]+]]
+; CHECK: std 4, -[[OFFSET2:[0-9]+]]
+; CHECK: ld 3, -[[OFFSET2]](1)
+; CHECK: ld 3, -[[OFFSET1]](1)
+
+define i8* @func3({ i64, i8* }* byval %array1, %tarray* byval %array2) {
+entry:
+  %tmp1 = getelementptr inbounds { i64, i8* }* %array1, i32 0, i32 1
+  %array1_ptr = load i8** %tmp1
+  %tmp2 = getelementptr inbounds %tarray* %array2, i32 0, i32 1
+  %array2_ptr = load i8** %tmp2
+  %cond = icmp eq i8* %array1_ptr, %array2_ptr
+  br i1 %cond, label %equal, label %unequal
+equal:
+  ret i8* %array1_ptr
+unequal:
+  ret i8* %array2_ptr
+}
+
+; CHECK: func3:
+; CHECK: addi [[REG1:[0-9]+]], 1, 64
+; CHECK: addi [[REG2:[0-9]+]], 1, 48
+; CHECK: ld [[REG3:[0-9]+]], 8([[REG1]])
+; CHECK: ld [[REG4:[0-9]+]], 8([[REG2]])
+; CHECK: cmpld {{[0-9]+}}, [[REG4]], [[REG3]]
+; CHECK: std [[REG3]], -[[OFFSET1:[0-9]+]](1)
+; CHECK: std [[REG4]], -[[OFFSET2:[0-9]+]](1)
+; CHECK: ld 3, -[[OFFSET2]](1)
+; CHECK: ld 3, -[[OFFSET1]](1)
+
+define i8* @func4(i64 %p1, i64 %p2, i64 %p3, i64 %p4,
+                  i64 %p5, i64 %p6, i64 %p7, i64 %p8,
+                  { i64, i8* } %array1, %tarray* byval %array2) {
+entry:
+  %array1_ptr = extractvalue {i64, i8* } %array1, 1
+  %tmp = getelementptr inbounds %tarray* %array2, i32 0, i32 1
+  %array2_ptr = load i8** %tmp
+  %cond = icmp eq i8* %array1_ptr, %array2_ptr
+  br i1 %cond, label %equal, label %unequal
+equal:
+  ret i8* %array1_ptr
+unequal:
+  ret i8* %array2_ptr
+}
+
+; CHECK: func4:
+; CHECK: addi [[REG1:[0-9]+]], 1, 128
+; CHECK: ld [[REG2:[0-9]+]], 120(1)
+; CHECK: ld [[REG3:[0-9]+]], 8([[REG1]])
+; CHECK: cmpld {{[0-9]+}}, [[REG2]], [[REG3]]
+; CHECK: std [[REG2]], -[[OFFSET1:[0-9]+]](1)
+; CHECK: std [[REG3]], -[[OFFSET2:[0-9]+]](1)
+; CHECK: ld 3, -[[OFFSET1]](1)
+; CHECK: ld 3, -[[OFFSET2]](1)
+
diff --git a/test/CodeGen/PowerPC/asym-regclass-copy.ll b/test/CodeGen/PowerPC/asym-regclass-copy.ll
new file mode 100644
index 000000000000..d04a6c98ee19
--- /dev/null
+++ b/test/CodeGen/PowerPC/asym-regclass-copy.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; This tests that the GPRC/GPRC_NOR0 intersection subclass relationship with
+; GPRC is handled correctly. When it was not, this test would assert.
+
+@gen_random.last = external unnamed_addr global i64, align 8
+@.str = external unnamed_addr constant [4 x i8], align 1
+
+declare double @gen_random(double) #0
+
+declare void @benchmark_heapsort(i32 signext, double* nocapture) #0
+
+define signext i32 @main(i32 signext %argc, i8** nocapture %argv) #0 {
+entry:
+  br i1 undef, label %cond.true, label %cond.end
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.true, %entry
+  %cond = phi i32 [ 0, %cond.true ], [ 8000000, %entry ]
+  %add = add i32 %cond, 1
+  %conv = sext i32 %add to i64
+  %mul = shl nsw i64 %conv, 3
+  %call1 = tail call noalias i8* @malloc(i64 %mul) #1
+  br i1 undef, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %cond.end
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvars.iv = phi i64 [ 1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %add
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %cond.end
+  ret i32 0
+}
+
+declare noalias i8* @malloc(i64) #0
+
+declare signext i32 @printf(i8* nocapture, ...) #0
+
+declare void @free(i8* nocapture) #0
+
+declare i64 @strtol(i8*, i8** nocapture, i32 signext) #0
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll
index cbfa4094fb4e..838db20ddd1b 100644
--- a/test/CodeGen/PowerPC/atomic-1.ll
+++ b/test/CodeGen/PowerPC/atomic-1.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=ppc32 |  FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 |  FileCheck %s
 
 define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind {
 ; CHECK: exchange_and_add:
-; CHECK: lwarx
+; CHECK: lwarx {{r[0-9]+}}, 0, {{r[0-9]+}}
   %tmp = atomicrmw add i32* %mem, i32 %val monotonic
-; CHECK: stwcx.
+; CHECK: stwcx. {{r[0-9]+}}, 0, {{r[0-9]+}}
   ret i32 %tmp
 }
 
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index a427379a8b6d..40b4a2eea976 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -24,3 +24,23 @@ define i64 @exchange(i64* %mem, i64 %val) nounwind {
 ; CHECK: stdcx.
   ret i64 %tmp
 }
+
+define void @atomic_store(i64* %mem, i64 %val) nounwind {
+entry:
+; CHECK: @atomic_store
+  store atomic i64 %val, i64* %mem release, align 64
+; CHECK: ldarx
+; CHECK: stdcx.
+  ret void
+}
+
+define i64 @atomic_load(i64* %mem) nounwind {
+entry:
+; CHECK: @atomic_load
+  %tmp = load atomic i64* %mem acquire, align 64
+; CHECK: ldarx
+; CHECK: stdcx.
+; CHECK: stdcx.
+  ret i64 %tmp
+}
+
diff --git a/test/CodeGen/PowerPC/available-externally.ll b/test/CodeGen/PowerPC/available-externally.ll
index fdead7dd8b34..abed0de80b88 100644
--- a/test/CodeGen/PowerPC/available-externally.ll
+++ b/test/CodeGen/PowerPC/available-externally.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -relocation-model=static | FileCheck %s -check-prefix=STATIC
 ; RUN: llc < %s -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -relocation-model=pic -mtriple=powerpc64-apple-darwin8 | FileCheck %s -check-prefix=PIC64
 ; RUN: llc < %s -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
+; RUN: llc < %s -relocation-model=dynamic-no-pic -mtriple=powerpc64-apple-darwin8 | FileCheck %s -check-prefix=DYNAMIC64
 ; PR4482
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "powerpc-apple-darwin8"
@@ -16,10 +18,18 @@ entry:
 ; PIC: bl L_exact_log2$stub
 ; PIC: blr
 
+; PIC64: _foo:
+; PIC64: bl L_exact_log2$stub
+; PIC64: blr
+
 ; DYNAMIC: _foo:
 ; DYNAMIC: bl L_exact_log2$stub
 ; DYNAMIC: blr
 
+; DYNAMIC64: _foo:
+; DYNAMIC64: bl L_exact_log2$stub
+; DYNAMIC64: blr
+
         %A = call i32 @exact_log2(i64 %x) nounwind
 	ret i32 %A
 }
@@ -34,13 +44,13 @@ entry:
 ; PIC: L_exact_log2$stub:
 ; PIC: .indirect_symbol _exact_log2
 ; PIC: mflr r0
-; PIC: bcl 20,31,L_exact_log2$stub$tmp
+; PIC: bcl 20, 31, L_exact_log2$stub$tmp
 
 ; PIC: L_exact_log2$stub$tmp:
 ; PIC: mflr r11
-; PIC: addis r11,r11,ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
+; PIC: addis r11, r11, ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
 ; PIC: mtlr r0
-; PIC: lwzu r12,lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
+; PIC: lwzu r12, lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
 ; PIC: mtctr r12
 ; PIC: bctr
 
@@ -51,12 +61,32 @@ entry:
 
 ; PIC: .subsections_via_symbols
 
+; PIC64: .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+; PIC64: L_exact_log2$stub:
+; PIC64: .indirect_symbol _exact_log2
+; PIC64: mflr r0
+; PIC64: bcl 20, 31, L_exact_log2$stub$tmp
+
+; PIC64: L_exact_log2$stub$tmp:
+; PIC64: mflr r11
+; PIC64: addis r11, r11, ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
+; PIC64: mtlr r0
+; PIC64: ldu r12, lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
+; PIC64: mtctr r12
+; PIC64: bctr
+
+; PIC64: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; PIC64: L_exact_log2$lazy_ptr:
+; PIC64: .indirect_symbol _exact_log2
+; PIC64: .quad dyld_stub_binding_helper
+
+; PIC64: .subsections_via_symbols
 
 ; DYNAMIC: .section __TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
 ; DYNAMIC: L_exact_log2$stub:
 ; DYNAMIC: .indirect_symbol _exact_log2
-; DYNAMIC: lis r11,ha16(L_exact_log2$lazy_ptr)
-; DYNAMIC: lwzu r12,lo16(L_exact_log2$lazy_ptr)(r11)
+; DYNAMIC: lis r11, ha16(L_exact_log2$lazy_ptr)
+; DYNAMIC: lwzu r12, lo16(L_exact_log2$lazy_ptr)(r11)
 ; DYNAMIC: mtctr r12
 ; DYNAMIC: bctr
 
@@ -65,7 +95,15 @@ entry:
 ; DYNAMIC: .indirect_symbol _exact_log2
 ; DYNAMIC: .long dyld_stub_binding_helper
 
-
-
-
-
+; DYNAMIC64: .section __TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
+; DYNAMIC64: L_exact_log2$stub:
+; DYNAMIC64: .indirect_symbol _exact_log2
+; DYNAMIC64: lis r11, ha16(L_exact_log2$lazy_ptr)
+; DYNAMIC64: ldu r12, lo16(L_exact_log2$lazy_ptr)(r11)
+; DYNAMIC64: mtctr r12
+; DYNAMIC64: bctr
+
+; DYNAMIC64: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; DYNAMIC64: L_exact_log2$lazy_ptr:
+; DYNAMIC64: .indirect_symbol _exact_log2
+; DYNAMIC64: .quad dyld_stub_binding_helper
diff --git a/test/CodeGen/PowerPC/bswap-load-store.ll b/test/CodeGen/PowerPC/bswap-load-store.ll
index 4f6bfc729913..53bbc52167c4 100644
--- a/test/CodeGen/PowerPC/bswap-load-store.ll
+++ b/test/CodeGen/PowerPC/bswap-load-store.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=ppc32 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -march=ppc32 -mcpu=ppc32 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=ppc64 -mcpu=ppc64 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s -check-prefix=PWR7
+; RUN: llc < %s -march=ppc32 -mcpu=pwr7 | FileCheck %s -check-prefix=X32
 
 
 define void @STWBRX(i32 %i, i8* %ptr, i32 %off) {
@@ -34,18 +36,47 @@ define i16 @LHBRX(i8* %ptr, i32 %off) {
         ret i16 %tmp6
 }
 
+define void @STDBRX(i64 %i, i8* %ptr, i64 %off) {
+        %tmp1 = getelementptr i8* %ptr, i64 %off                ; <i8*> [#uses=1]
+        %tmp1.upgrd.1 = bitcast i8* %tmp1 to i64*               ; <i64*> [#uses=1]
+        %tmp13 = tail call i64 @llvm.bswap.i64( i64 %i )                ; <i64> [#uses=1]
+        store i64 %tmp13, i64* %tmp1.upgrd.1
+        ret void
+}
+
+define i64 @LDBRX(i8* %ptr, i64 %off) {
+        %tmp1 = getelementptr i8* %ptr, i64 %off                ; <i8*> [#uses=1]
+        %tmp1.upgrd.2 = bitcast i8* %tmp1 to i64*               ; <i64*> [#uses=1]
+        %tmp = load i64* %tmp1.upgrd.2          ; <i64> [#uses=1]
+        %tmp14 = tail call i64 @llvm.bswap.i64( i64 %tmp )              ; <i64> [#uses=1]
+        ret i64 %tmp14
+}
+
 declare i32 @llvm.bswap.i32(i32)
 
 declare i16 @llvm.bswap.i16(i16)
 
+declare i64 @llvm.bswap.i64(i64)
+
 
 ; X32: stwbrx
 ; X32: lwbrx
 ; X32: sthbrx
 ; X32: lhbrx
+; X32-NOT: ldbrx
+; X32-NOT: stdbrx
 
 ; X64: stwbrx
 ; X64: lwbrx
 ; X64: sthbrx
 ; X64: lhbrx
+; X64-NOT: ldbrx
+; X64-NOT: stdbrx
+
+; PWR7: stwbrx
+; PWR7: lwbrx
+; PWR7: sthbrx
+; PWR7: lhbrx
+; PWR7: stdbrx
+; PWR7: ldbrx
 
diff --git a/test/CodeGen/PowerPC/buildvec_canonicalize.ll b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
index 0454c584bcfe..e155a35c4da0 100644
--- a/test/CodeGen/PowerPC/buildvec_canonicalize.ll
+++ b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
@@ -1,10 +1,4 @@
-; There should be exactly one vxor here.
-; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
-; RUN:   grep vxor | count 1
-
-; There should be exactly one vsplti here.
-; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
-; RUN:   grep vsplti | count 1
+; RUN: llc < %s -march=ppc32 -mattr=+altivec --enable-unsafe-fp-math | FileCheck %s
 
 define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
         %tmp = load <4 x float>* %P3            ; <<4 x float>> [#uses=1]
@@ -15,10 +9,16 @@ define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
         store <4 x i32> zeroinitializer, <4 x i32>* %P2
         ret void
 }
+; The fmul will spill a vspltisw to create a -0.0 vector used as the addend
+; to vmaddfp (so it would IEEE compliant with zero sign propagation).
+; CHECK: @VXOR
+; CHECK: vsplti
+; CHECK: vxor
 
 define void @VSPLTI(<4 x i32>* %P2, <8 x i16>* %P3) {
         store <4 x i32> bitcast (<16 x i8> < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > to <4 x i32>), <4 x i32>* %P2
         store <8 x i16> < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >, <8 x i16>* %P3
         ret void
 }
-
+; CHECK: @VSPLTI
+; CHECK: vsplti
diff --git a/test/CodeGen/PowerPC/complex-return.ll b/test/CodeGen/PowerPC/complex-return.ll
new file mode 100644
index 000000000000..f12152ff0fca
--- /dev/null
+++ b/test/CodeGen/PowerPC/complex-return.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define { ppc_fp128, ppc_fp128 } @foo() nounwind {
+entry:
+  %retval = alloca { ppc_fp128, ppc_fp128 }, align 16
+  %x = alloca { ppc_fp128, ppc_fp128 }, align 16
+  %real = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0
+  %imag = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1
+  store ppc_fp128 0xM400C0000000000000000000000000000, ppc_fp128* %real
+  store ppc_fp128 0xMC00547AE147AE1483CA47AE147AE147A, ppc_fp128* %imag
+  %x.realp = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0
+  %x.real = load ppc_fp128* %x.realp
+  %x.imagp = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1
+  %x.imag = load ppc_fp128* %x.imagp
+  %real1 = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %retval, i32 0, i32 0
+  %imag2 = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %retval, i32 0, i32 1
+  store ppc_fp128 %x.real, ppc_fp128* %real1
+  store ppc_fp128 %x.imag, ppc_fp128* %imag2
+  %0 = load { ppc_fp128, ppc_fp128 }* %retval
+  ret { ppc_fp128, ppc_fp128 } %0
+}
+
+; CHECK: foo:
+; CHECK: lfd 3
+; CHECK: lfd 4
+; CHECK: lfd 2
+; CHECK: lfd 1
+
+define { float, float } @oof() nounwind {
+entry:
+  %retval = alloca { float, float }, align 4
+  %x = alloca { float, float }, align 4
+  %real = getelementptr inbounds { float, float }* %x, i32 0, i32 0
+  %imag = getelementptr inbounds { float, float }* %x, i32 0, i32 1
+  store float 3.500000e+00, float* %real
+  store float 0xC00547AE20000000, float* %imag
+  %x.realp = getelementptr inbounds { float, float }* %x, i32 0, i32 0
+  %x.real = load float* %x.realp
+  %x.imagp = getelementptr inbounds { float, float }* %x, i32 0, i32 1
+  %x.imag = load float* %x.imagp
+  %real1 = getelementptr inbounds { float, float }* %retval, i32 0, i32 0
+  %imag2 = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
+  store float %x.real, float* %real1
+  store float %x.imag, float* %imag2
+  %0 = load { float, float }* %retval
+  ret { float, float } %0
+}
+
+; CHECK: oof:
+; CHECK: lfs 2
+; CHECK: lfs 1
+
diff --git a/test/CodeGen/PowerPC/cr-spills.ll b/test/CodeGen/PowerPC/cr-spills.ll
new file mode 100644
index 000000000000..d6df7a237668
--- /dev/null
+++ b/test/CodeGen/PowerPC/cr-spills.ll
@@ -0,0 +1,409 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; This test case triggers several functions related to cr spilling, both in
+; frame lowering and to handle cr register pressure. When the register kill
+; flags were not being set correctly, this would cause the register scavenger to
+; assert.
+
+@SetupFastFullPelSearch.orig_pels = external unnamed_addr global [768 x i16], align 2
+@weight_luma = external global i32
+@offset_luma = external global i32
+@wp_luma_round = external global i32, align 4
+@luma_log_weight_denom = external global i32, align 4
+
+define void @SetupFastFullPelSearch() #0 {
+entry:
+  %mul10 = mul nsw i32 undef, undef
+  br i1 undef, label %land.end, label %land.lhs.true
+
+land.lhs.true:                                    ; preds = %entry
+  switch i32 0, label %land.end [
+    i32 0, label %land.rhs
+    i32 3, label %land.rhs
+  ]
+
+land.rhs:                                         ; preds = %land.lhs.true, %land.lhs.true
+  %tobool21 = icmp ne i32 undef, 0
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %land.lhs.true, %entry
+  %0 = phi i1 [ %tobool21, %land.rhs ], [ false, %land.lhs.true ], [ false, %entry ]
+  %cond = load i32** undef, align 8
+  br i1 undef, label %if.then95, label %for.body.lr.ph
+
+if.then95:                                        ; preds = %land.end
+  %cmp.i4.i1427 = icmp slt i32 undef, undef
+  br label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %if.then95, %land.end
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  br i1 undef, label %for.body, label %for.body252
+
+for.body252:                                      ; preds = %for.inc997, %for.body
+  %shl263 = add i32 undef, 80
+  br i1 %0, label %for.cond286.preheader, label %for.cond713.preheader
+
+for.cond286.preheader:                            ; preds = %for.body252
+  br label %for.cond290.preheader
+
+for.cond290.preheader:                            ; preds = %for.end520, %for.cond286.preheader
+  %srcptr.31595 = phi i16* [ getelementptr inbounds ([768 x i16]* @SetupFastFullPelSearch.orig_pels, i64 0, i64 0), %for.cond286.preheader ], [ null, %for.end520 ]
+  %1 = load i32* undef, align 4, !tbaa !0
+  %2 = load i32* @weight_luma, align 4, !tbaa !0
+  %3 = load i32* @wp_luma_round, align 4, !tbaa !0
+  %4 = load i32* @luma_log_weight_denom, align 4, !tbaa !0
+  %5 = load i32* @offset_luma, align 4, !tbaa !0
+  %incdec.ptr502.sum = add i64 undef, 16
+  br label %for.body293
+
+for.body293:                                      ; preds = %for.body293, %for.cond290.preheader
+  %srcptr.41591 = phi i16* [ %srcptr.31595, %for.cond290.preheader ], [ undef, %for.body293 ]
+  %refptr.11590 = phi i16* [ undef, %for.cond290.preheader ], [ %add.ptr517, %for.body293 ]
+  %LineSadBlk0.01588 = phi i32 [ 0, %for.cond290.preheader ], [ %add346, %for.body293 ]
+  %LineSadBlk1.01587 = phi i32 [ 0, %for.cond290.preheader ], [ %add402, %for.body293 ]
+  %LineSadBlk3.01586 = phi i32 [ 0, %for.cond290.preheader ], [ %add514, %for.body293 ]
+  %LineSadBlk2.01585 = phi i32 [ 0, %for.cond290.preheader ], [ %add458, %for.body293 ]
+  %6 = load i16* %refptr.11590, align 2, !tbaa !3
+  %conv294 = zext i16 %6 to i32
+  %mul295 = mul nsw i32 %conv294, %2
+  %add296 = add nsw i32 %mul295, %3
+  %shr = ashr i32 %add296, %4
+  %add297 = add nsw i32 %shr, %5
+  %cmp.i.i1513 = icmp sgt i32 %add297, 0
+  %cond.i.i1514 = select i1 %cmp.i.i1513, i32 %add297, i32 0
+  %cmp.i4.i1515 = icmp slt i32 %cond.i.i1514, %1
+  %cond.i5.i1516 = select i1 %cmp.i4.i1515, i32 %cond.i.i1514, i32 %1
+  %7 = load i16* %srcptr.41591, align 2, !tbaa !3
+  %conv300 = zext i16 %7 to i32
+  %sub301 = sub nsw i32 %cond.i5.i1516, %conv300
+  %idxprom302 = sext i32 %sub301 to i64
+  %arrayidx303 = getelementptr inbounds i32* %cond, i64 %idxprom302
+  %8 = load i32* %arrayidx303, align 4, !tbaa !0
+  %add304 = add nsw i32 %8, %LineSadBlk0.01588
+  %9 = load i32* undef, align 4, !tbaa !0
+  %add318 = add nsw i32 %add304, %9
+  %10 = load i16* undef, align 2, !tbaa !3
+  %conv321 = zext i16 %10 to i32
+  %mul322 = mul nsw i32 %conv321, %2
+  %add323 = add nsw i32 %mul322, %3
+  %shr324 = ashr i32 %add323, %4
+  %add325 = add nsw i32 %shr324, %5
+  %cmp.i.i1505 = icmp sgt i32 %add325, 0
+  %cond.i.i1506 = select i1 %cmp.i.i1505, i32 %add325, i32 0
+  %cmp.i4.i1507 = icmp slt i32 %cond.i.i1506, %1
+  %cond.i5.i1508 = select i1 %cmp.i4.i1507, i32 %cond.i.i1506, i32 %1
+  %sub329 = sub nsw i32 %cond.i5.i1508, 0
+  %idxprom330 = sext i32 %sub329 to i64
+  %arrayidx331 = getelementptr inbounds i32* %cond, i64 %idxprom330
+  %11 = load i32* %arrayidx331, align 4, !tbaa !0
+  %add332 = add nsw i32 %add318, %11
+  %cmp.i.i1501 = icmp sgt i32 undef, 0
+  %cond.i.i1502 = select i1 %cmp.i.i1501, i32 undef, i32 0
+  %cmp.i4.i1503 = icmp slt i32 %cond.i.i1502, %1
+  %cond.i5.i1504 = select i1 %cmp.i4.i1503, i32 %cond.i.i1502, i32 %1
+  %incdec.ptr341 = getelementptr inbounds i16* %srcptr.41591, i64 4
+  %12 = load i16* null, align 2, !tbaa !3
+  %conv342 = zext i16 %12 to i32
+  %sub343 = sub nsw i32 %cond.i5.i1504, %conv342
+  %idxprom344 = sext i32 %sub343 to i64
+  %arrayidx345 = getelementptr inbounds i32* %cond, i64 %idxprom344
+  %13 = load i32* %arrayidx345, align 4, !tbaa !0
+  %add346 = add nsw i32 %add332, %13
+  %incdec.ptr348 = getelementptr inbounds i16* %refptr.11590, i64 5
+  %14 = load i16* null, align 2, !tbaa !3
+  %conv349 = zext i16 %14 to i32
+  %mul350 = mul nsw i32 %conv349, %2
+  %add351 = add nsw i32 %mul350, %3
+  %shr352 = ashr i32 %add351, %4
+  %add353 = add nsw i32 %shr352, %5
+  %cmp.i.i1497 = icmp sgt i32 %add353, 0
+  %cond.i.i1498 = select i1 %cmp.i.i1497, i32 %add353, i32 0
+  %cmp.i4.i1499 = icmp slt i32 %cond.i.i1498, %1
+  %cond.i5.i1500 = select i1 %cmp.i4.i1499, i32 %cond.i.i1498, i32 %1
+  %incdec.ptr355 = getelementptr inbounds i16* %srcptr.41591, i64 5
+  %15 = load i16* %incdec.ptr341, align 2, !tbaa !3
+  %conv356 = zext i16 %15 to i32
+  %sub357 = sub nsw i32 %cond.i5.i1500, %conv356
+  %idxprom358 = sext i32 %sub357 to i64
+  %arrayidx359 = getelementptr inbounds i32* %cond, i64 %idxprom358
+  %16 = load i32* %arrayidx359, align 4, !tbaa !0
+  %add360 = add nsw i32 %16, %LineSadBlk1.01587
+  %incdec.ptr362 = getelementptr inbounds i16* %refptr.11590, i64 6
+  %17 = load i16* %incdec.ptr348, align 2, !tbaa !3
+  %conv363 = zext i16 %17 to i32
+  %mul364 = mul nsw i32 %conv363, %2
+  %add365 = add nsw i32 %mul364, %3
+  %shr366 = ashr i32 %add365, %4
+  %add367 = add nsw i32 %shr366, %5
+  %cmp.i.i1493 = icmp sgt i32 %add367, 0
+  %cond.i.i1494 = select i1 %cmp.i.i1493, i32 %add367, i32 0
+  %cmp.i4.i1495 = icmp slt i32 %cond.i.i1494, %1
+  %cond.i5.i1496 = select i1 %cmp.i4.i1495, i32 %cond.i.i1494, i32 %1
+  %incdec.ptr369 = getelementptr inbounds i16* %srcptr.41591, i64 6
+  %18 = load i16* %incdec.ptr355, align 2, !tbaa !3
+  %conv370 = zext i16 %18 to i32
+  %sub371 = sub nsw i32 %cond.i5.i1496, %conv370
+  %idxprom372 = sext i32 %sub371 to i64
+  %arrayidx373 = getelementptr inbounds i32* %cond, i64 %idxprom372
+  %19 = load i32* %arrayidx373, align 4, !tbaa !0
+  %add374 = add nsw i32 %add360, %19
+  %incdec.ptr376 = getelementptr inbounds i16* %refptr.11590, i64 7
+  %20 = load i16* %incdec.ptr362, align 2, !tbaa !3
+  %conv377 = zext i16 %20 to i32
+  %mul378 = mul nsw i32 %conv377, %2
+  %add379 = add nsw i32 %mul378, %3
+  %shr380 = ashr i32 %add379, %4
+  %add381 = add nsw i32 %shr380, %5
+  %cmp.i.i1489 = icmp sgt i32 %add381, 0
+  %cond.i.i1490 = select i1 %cmp.i.i1489, i32 %add381, i32 0
+  %cmp.i4.i1491 = icmp slt i32 %cond.i.i1490, %1
+  %cond.i5.i1492 = select i1 %cmp.i4.i1491, i32 %cond.i.i1490, i32 %1
+  %incdec.ptr383 = getelementptr inbounds i16* %srcptr.41591, i64 7
+  %21 = load i16* %incdec.ptr369, align 2, !tbaa !3
+  %conv384 = zext i16 %21 to i32
+  %sub385 = sub nsw i32 %cond.i5.i1492, %conv384
+  %idxprom386 = sext i32 %sub385 to i64
+  %arrayidx387 = getelementptr inbounds i32* %cond, i64 %idxprom386
+  %22 = load i32* %arrayidx387, align 4, !tbaa !0
+  %add388 = add nsw i32 %add374, %22
+  %23 = load i16* %incdec.ptr376, align 2, !tbaa !3
+  %conv391 = zext i16 %23 to i32
+  %mul392 = mul nsw i32 %conv391, %2
+  %add395 = add nsw i32 0, %5
+  %cmp.i.i1485 = icmp sgt i32 %add395, 0
+  %cond.i.i1486 = select i1 %cmp.i.i1485, i32 %add395, i32 0
+  %cmp.i4.i1487 = icmp slt i32 %cond.i.i1486, %1
+  %cond.i5.i1488 = select i1 %cmp.i4.i1487, i32 %cond.i.i1486, i32 %1
+  %incdec.ptr397 = getelementptr inbounds i16* %srcptr.41591, i64 8
+  %24 = load i16* %incdec.ptr383, align 2, !tbaa !3
+  %conv398 = zext i16 %24 to i32
+  %sub399 = sub nsw i32 %cond.i5.i1488, %conv398
+  %idxprom400 = sext i32 %sub399 to i64
+  %arrayidx401 = getelementptr inbounds i32* %cond, i64 %idxprom400
+  %25 = load i32* %arrayidx401, align 4, !tbaa !0
+  %add402 = add nsw i32 %add388, %25
+  %incdec.ptr404 = getelementptr inbounds i16* %refptr.11590, i64 9
+  %cmp.i4.i1483 = icmp slt i32 undef, %1
+  %cond.i5.i1484 = select i1 %cmp.i4.i1483, i32 undef, i32 %1
+  %26 = load i16* %incdec.ptr397, align 2, !tbaa !3
+  %conv412 = zext i16 %26 to i32
+  %sub413 = sub nsw i32 %cond.i5.i1484, %conv412
+  %idxprom414 = sext i32 %sub413 to i64
+  %arrayidx415 = getelementptr inbounds i32* %cond, i64 %idxprom414
+  %27 = load i32* %arrayidx415, align 4, !tbaa !0
+  %add416 = add nsw i32 %27, %LineSadBlk2.01585
+  %incdec.ptr418 = getelementptr inbounds i16* %refptr.11590, i64 10
+  %28 = load i16* %incdec.ptr404, align 2, !tbaa !3
+  %conv419 = zext i16 %28 to i32
+  %mul420 = mul nsw i32 %conv419, %2
+  %add421 = add nsw i32 %mul420, %3
+  %shr422 = ashr i32 %add421, %4
+  %add423 = add nsw i32 %shr422, %5
+  %cmp.i.i1477 = icmp sgt i32 %add423, 0
+  %cond.i.i1478 = select i1 %cmp.i.i1477, i32 %add423, i32 0
+  %cmp.i4.i1479 = icmp slt i32 %cond.i.i1478, %1
+  %cond.i5.i1480 = select i1 %cmp.i4.i1479, i32 %cond.i.i1478, i32 %1
+  %incdec.ptr425 = getelementptr inbounds i16* %srcptr.41591, i64 10
+  %sub427 = sub nsw i32 %cond.i5.i1480, 0
+  %idxprom428 = sext i32 %sub427 to i64
+  %arrayidx429 = getelementptr inbounds i32* %cond, i64 %idxprom428
+  %29 = load i32* %arrayidx429, align 4, !tbaa !0
+  %add430 = add nsw i32 %add416, %29
+  %incdec.ptr432 = getelementptr inbounds i16* %refptr.11590, i64 11
+  %30 = load i16* %incdec.ptr418, align 2, !tbaa !3
+  %conv433 = zext i16 %30 to i32
+  %mul434 = mul nsw i32 %conv433, %2
+  %add435 = add nsw i32 %mul434, %3
+  %shr436 = ashr i32 %add435, %4
+  %add437 = add nsw i32 %shr436, %5
+  %cmp.i.i1473 = icmp sgt i32 %add437, 0
+  %cond.i.i1474 = select i1 %cmp.i.i1473, i32 %add437, i32 0
+  %cmp.i4.i1475 = icmp slt i32 %cond.i.i1474, %1
+  %cond.i5.i1476 = select i1 %cmp.i4.i1475, i32 %cond.i.i1474, i32 %1
+  %31 = load i16* %incdec.ptr425, align 2, !tbaa !3
+  %conv440 = zext i16 %31 to i32
+  %sub441 = sub nsw i32 %cond.i5.i1476, %conv440
+  %idxprom442 = sext i32 %sub441 to i64
+  %arrayidx443 = getelementptr inbounds i32* %cond, i64 %idxprom442
+  %32 = load i32* %arrayidx443, align 4, !tbaa !0
+  %add444 = add nsw i32 %add430, %32
+  %incdec.ptr446 = getelementptr inbounds i16* %refptr.11590, i64 12
+  %33 = load i16* %incdec.ptr432, align 2, !tbaa !3
+  %conv447 = zext i16 %33 to i32
+  %mul448 = mul nsw i32 %conv447, %2
+  %add449 = add nsw i32 %mul448, %3
+  %shr450 = ashr i32 %add449, %4
+  %add451 = add nsw i32 %shr450, %5
+  %cmp.i.i1469 = icmp sgt i32 %add451, 0
+  %cond.i.i1470 = select i1 %cmp.i.i1469, i32 %add451, i32 0
+  %cmp.i4.i1471 = icmp slt i32 %cond.i.i1470, %1
+  %cond.i5.i1472 = select i1 %cmp.i4.i1471, i32 %cond.i.i1470, i32 %1
+  %incdec.ptr453 = getelementptr inbounds i16* %srcptr.41591, i64 12
+  %34 = load i16* undef, align 2, !tbaa !3
+  %conv454 = zext i16 %34 to i32
+  %sub455 = sub nsw i32 %cond.i5.i1472, %conv454
+  %idxprom456 = sext i32 %sub455 to i64
+  %arrayidx457 = getelementptr inbounds i32* %cond, i64 %idxprom456
+  %35 = load i32* %arrayidx457, align 4, !tbaa !0
+  %add458 = add nsw i32 %add444, %35
+  %incdec.ptr460 = getelementptr inbounds i16* %refptr.11590, i64 13
+  %36 = load i16* %incdec.ptr446, align 2, !tbaa !3
+  %conv461 = zext i16 %36 to i32
+  %mul462 = mul nsw i32 %conv461, %2
+  %add463 = add nsw i32 %mul462, %3
+  %shr464 = ashr i32 %add463, %4
+  %add465 = add nsw i32 %shr464, %5
+  %cmp.i.i1465 = icmp sgt i32 %add465, 0
+  %cond.i.i1466 = select i1 %cmp.i.i1465, i32 %add465, i32 0
+  %cmp.i4.i1467 = icmp slt i32 %cond.i.i1466, %1
+  %cond.i5.i1468 = select i1 %cmp.i4.i1467, i32 %cond.i.i1466, i32 %1
+  %incdec.ptr467 = getelementptr inbounds i16* %srcptr.41591, i64 13
+  %37 = load i16* %incdec.ptr453, align 2, !tbaa !3
+  %conv468 = zext i16 %37 to i32
+  %sub469 = sub nsw i32 %cond.i5.i1468, %conv468
+  %idxprom470 = sext i32 %sub469 to i64
+  %arrayidx471 = getelementptr inbounds i32* %cond, i64 %idxprom470
+  %38 = load i32* %arrayidx471, align 4, !tbaa !0
+  %add472 = add nsw i32 %38, %LineSadBlk3.01586
+  %incdec.ptr474 = getelementptr inbounds i16* %refptr.11590, i64 14
+  %add477 = add nsw i32 0, %3
+  %shr478 = ashr i32 %add477, %4
+  %add479 = add nsw i32 %shr478, %5
+  %cmp.i.i1461 = icmp sgt i32 %add479, 0
+  %cond.i.i1462 = select i1 %cmp.i.i1461, i32 %add479, i32 0
+  %cmp.i4.i1463 = icmp slt i32 %cond.i.i1462, %1
+  %cond.i5.i1464 = select i1 %cmp.i4.i1463, i32 %cond.i.i1462, i32 %1
+  %incdec.ptr481 = getelementptr inbounds i16* %srcptr.41591, i64 14
+  %39 = load i16* %incdec.ptr467, align 2, !tbaa !3
+  %conv482 = zext i16 %39 to i32
+  %sub483 = sub nsw i32 %cond.i5.i1464, %conv482
+  %idxprom484 = sext i32 %sub483 to i64
+  %arrayidx485 = getelementptr inbounds i32* %cond, i64 %idxprom484
+  %40 = load i32* %arrayidx485, align 4, !tbaa !0
+  %add486 = add nsw i32 %add472, %40
+  %incdec.ptr488 = getelementptr inbounds i16* %refptr.11590, i64 15
+  %41 = load i16* %incdec.ptr474, align 2, !tbaa !3
+  %conv489 = zext i16 %41 to i32
+  %mul490 = mul nsw i32 %conv489, %2
+  %add491 = add nsw i32 %mul490, %3
+  %shr492 = ashr i32 %add491, %4
+  %add493 = add nsw i32 %shr492, %5
+  %cmp.i.i1457 = icmp sgt i32 %add493, 0
+  %cond.i.i1458 = select i1 %cmp.i.i1457, i32 %add493, i32 0
+  %cmp.i4.i1459 = icmp slt i32 %cond.i.i1458, %1
+  %cond.i5.i1460 = select i1 %cmp.i4.i1459, i32 %cond.i.i1458, i32 %1
+  %incdec.ptr495 = getelementptr inbounds i16* %srcptr.41591, i64 15
+  %42 = load i16* %incdec.ptr481, align 2, !tbaa !3
+  %conv496 = zext i16 %42 to i32
+  %sub497 = sub nsw i32 %cond.i5.i1460, %conv496
+  %idxprom498 = sext i32 %sub497 to i64
+  %arrayidx499 = getelementptr inbounds i32* %cond, i64 %idxprom498
+  %43 = load i32* %arrayidx499, align 4, !tbaa !0
+  %add500 = add nsw i32 %add486, %43
+  %44 = load i16* %incdec.ptr488, align 2, !tbaa !3
+  %conv503 = zext i16 %44 to i32
+  %mul504 = mul nsw i32 %conv503, %2
+  %add505 = add nsw i32 %mul504, %3
+  %shr506 = ashr i32 %add505, %4
+  %add507 = add nsw i32 %shr506, %5
+  %cmp.i.i1453 = icmp sgt i32 %add507, 0
+  %cond.i.i1454 = select i1 %cmp.i.i1453, i32 %add507, i32 0
+  %cmp.i4.i1455 = icmp slt i32 %cond.i.i1454, %1
+  %cond.i5.i1456 = select i1 %cmp.i4.i1455, i32 %cond.i.i1454, i32 %1
+  %45 = load i16* %incdec.ptr495, align 2, !tbaa !3
+  %conv510 = zext i16 %45 to i32
+  %sub511 = sub nsw i32 %cond.i5.i1456, %conv510
+  %idxprom512 = sext i32 %sub511 to i64
+  %arrayidx513 = getelementptr inbounds i32* %cond, i64 %idxprom512
+  %46 = load i32* %arrayidx513, align 4, !tbaa !0
+  %add514 = add nsw i32 %add500, %46
+  %add.ptr517 = getelementptr inbounds i16* %refptr.11590, i64 %incdec.ptr502.sum
+  %exitcond1692 = icmp eq i32 undef, 4
+  br i1 %exitcond1692, label %for.end520, label %for.body293
+
+for.end520:                                       ; preds = %for.body293
+  store i32 %add346, i32* undef, align 4, !tbaa !0
+  store i32 %add402, i32* undef, align 4, !tbaa !0
+  store i32 %add458, i32* undef, align 4, !tbaa !0
+  store i32 %add514, i32* null, align 4, !tbaa !0
+  br i1 undef, label %for.end543, label %for.cond290.preheader
+
+for.end543:                                       ; preds = %for.end520
+  br i1 undef, label %for.inc997, label %for.body549
+
+for.body549:                                      ; preds = %for.inc701, %for.end543
+  %call554 = call i16* null(i16**** null, i32 signext undef, i32 signext %shl263) #1
+  br label %for.cond559.preheader
+
+for.cond559.preheader:                            ; preds = %for.cond559.preheader, %for.body549
+  br i1 undef, label %for.inc701, label %for.cond559.preheader
+
+for.inc701:                                       ; preds = %for.cond559.preheader
+  br i1 undef, label %for.inc997, label %for.body549
+
+for.cond713.preheader:                            ; preds = %for.end850, %for.body252
+  br label %for.body716
+
+for.body716:                                      ; preds = %for.body716, %for.cond713.preheader
+  br i1 undef, label %for.end850, label %for.body716
+
+for.end850:                                       ; preds = %for.body716
+  br i1 undef, label %for.end873, label %for.cond713.preheader
+
+for.end873:                                       ; preds = %for.end850
+  br i1 undef, label %for.inc997, label %for.body879
+
+for.body879:                                      ; preds = %for.inc992, %for.end873
+  br label %for.cond889.preheader
+
+for.cond889.preheader:                            ; preds = %for.end964, %for.body879
+  br i1 undef, label %for.cond894.preheader.lr.ph, label %for.end964
+
+for.cond894.preheader.lr.ph:                      ; preds = %for.cond889.preheader
+  br label %for.body898.lr.ph.us
+
+for.end957.us:                                    ; preds = %for.body946.us
+  br i1 undef, label %for.body898.lr.ph.us, label %for.end964
+
+for.body946.us:                                   ; preds = %for.body930.us, %for.body946.us
+  br i1 false, label %for.body946.us, label %for.end957.us
+
+for.body930.us:                                   ; preds = %for.body914.us, %for.body930.us
+  br i1 undef, label %for.body930.us, label %for.body946.us
+
+for.body914.us:                                   ; preds = %for.body898.us, %for.body914.us
+  br i1 undef, label %for.body914.us, label %for.body930.us
+
+for.body898.us:                                   ; preds = %for.body898.lr.ph.us, %for.body898.us
+  br i1 undef, label %for.body898.us, label %for.body914.us
+
+for.body898.lr.ph.us:                             ; preds = %for.end957.us, %for.cond894.preheader.lr.ph
+  br label %for.body898.us
+
+for.end964:                                       ; preds = %for.end957.us, %for.cond889.preheader
+  %inc990 = add nsw i32 undef, 1
+  br i1 false, label %for.inc992, label %for.cond889.preheader
+
+for.inc992:                                       ; preds = %for.end964
+  br i1 false, label %for.inc997, label %for.body879
+
+for.inc997:                                       ; preds = %for.inc992, %for.end873, %for.inc701, %for.end543
+  %cmp250 = icmp slt i32 undef, %mul10
+  br i1 %cmp250, label %for.body252, label %for.end999
+
+for.end999:                                       ; preds = %for.inc997
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/PowerPC/ctr-cleanup.ll b/test/CodeGen/PowerPC/ctr-cleanup.ll
new file mode 100644
index 000000000000..04e4ffb0d48d
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctr-cleanup.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mcpu=a2 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @main() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: @main
+; CHECK: li {{[0-9]+}}, 4
+; CHECK-NOT: li {{[0-9]+}}, 4
+; CHECK: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll
index 1d365d47a877..3757fa3e2f29 100644
--- a/test/CodeGen/PowerPC/cttz.ll
+++ b/test/CodeGen/PowerPC/cttz.ll
@@ -1,10 +1,12 @@
 ; Make sure this testcase does not use ctpop
-; RUN: llc < %s -march=ppc32 | grep -i cntlzw
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | FileCheck %s
 
 declare i32 @llvm.cttz.i32(i32, i1)
 
 define i32 @bar(i32 %x) {
 entry:
+; CHECK: @bar
+; CHECK: cntlzw
         %tmp.1 = call i32 @llvm.cttz.i32( i32 %x, i1 true )              ; <i32> [#uses=1]
         ret i32 %tmp.1
 }
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
index e161cb05686f..21e36618c5c1 100644
--- a/test/CodeGen/PowerPC/dbg.ll
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -16,12 +16,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"dbg.c", metadata !"/src", metadata !"clang version 3.1", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 720913, i32 12, metadata !6, metadata !"clang version 3.1", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"dbg.c", metadata !"/src", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9, metadata !9, metadata !10}
diff --git a/test/CodeGen/PowerPC/dcbt-sched.ll b/test/CodeGen/PowerPC/dcbt-sched.ll
new file mode 100644
index 000000000000..dfa1b75bd7db
--- /dev/null
+++ b/test/CodeGen/PowerPC/dcbt-sched.ll
@@ -0,0 +1,22 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc -mcpu=a2 -enable-misched -enable-aa-sched-mi < %s | FileCheck %s
+
+define i8 @test1(i8* noalias %a, i8* noalias %b, i8* noalias %c) nounwind {
+entry:
+  %q = load i8* %b
+  call void @llvm.prefetch(i8* %a, i32 0, i32 3, i32 1)
+  %r = load i8* %c
+  %s = add i8 %q, %r
+  ret i8 %s
+}
+
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+
+; Test that we've moved the second load to before the dcbt to better
+; hide its latency.
+; CHECK: @test1
+; CHECK: lbz
+; CHECK: lbz
+; CHECK: dcbt
+
diff --git a/test/CodeGen/PowerPC/float-asmprint.ll b/test/CodeGen/PowerPC/float-asmprint.ll
new file mode 100644
index 000000000000..c9dc02862aac
--- /dev/null
+++ b/test/CodeGen/PowerPC/float-asmprint.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=powerpc64-none-linux < %s | FileCheck %s
+
+; Check that all current floating-point types are correctly emitted to assembly
+; on a big-endian target. x86_fp80 can't actually print for unrelated reasons,
+; but that's not really a problem.
+
+@var128 = global fp128 0xL00000000000000008000000000000000, align 16
+@varppc128 = global ppc_fp128 0xM80000000000000000000000000000000, align 16
+@var64 = global double -0.0, align 8
+@var32 = global float -0.0, align 4
+@var16 = global half -0.0, align 2
+
+; CHECK: var128:
+; CHECK-NEXT: .quad -9223372036854775808      # fp128 -0
+; CHECK-NEXT: .quad 0
+; CHECK-NEXT: .size
+
+; CHECK: varppc128:
+; CHECK-NEXT: .quad -9223372036854775808      # ppc_fp128 -0
+; CHECK-NEXT: .quad 0
+; CHECK-NEXT: .size
+
+; CHECK: var64:
+; CHECK-NEXT: .quad -9223372036854775808      # double -0
+; CHECK-NEXT: .size
+
+; CHECK: var32:
+; CHECK-NEXT: .long 2147483648                # float -0
+; CHECK-NEXT: .size
+
+; CHECK: var16:
+; CHECK-NEXT: .short 32768                    # half -0
+; CHECK-NEXT: .size
+
diff --git a/test/CodeGen/PowerPC/float-to-int.ll b/test/CodeGen/PowerPC/float-to-int.ll
new file mode 100644
index 000000000000..39cd4f929f8d
--- /dev/null
+++ b/test/CodeGen/PowerPC/float-to-int.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i64 @foo(float %a) nounwind {
+  %x = fptosi float %a to i64
+  ret i64 %x
+
+; CHECK: @foo
+; CHECK: fctidz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i64 @foo2(double %a) nounwind {
+  %x = fptosi double %a to i64
+  ret i64 %x
+
+; CHECK: @foo2
+; CHECK: fctidz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i64 @foo3(float %a) nounwind {
+  %x = fptoui float %a to i64
+  ret i64 %x
+
+; CHECK: @foo3
+; CHECK: fctiduz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i64 @foo4(double %a) nounwind {
+  %x = fptoui double %a to i64
+  ret i64 %x
+
+; CHECK: @foo4
+; CHECK: fctiduz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i32 @goo(float %a) nounwind {
+  %x = fptosi float %a to i32
+  ret i32 %x
+
+; CHECK: @goo
+; CHECK: fctiwz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
+define i32 @goo2(double %a) nounwind {
+  %x = fptosi double %a to i32
+  ret i32 %x
+
+; CHECK: @goo2
+; CHECK: fctiwz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
+define i32 @goo3(float %a) nounwind {
+  %x = fptoui float %a to i32
+  ret i32 %x
+
+; CHECK: @goo3
+; CHECK: fctiwuz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
+define i32 @goo4(double %a) nounwind {
+  %x = fptoui double %a to i32
+  ret i32 %x
+
+; CHECK: @goo4
+; CHECK: fctiwuz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/frame-size.ll b/test/CodeGen/PowerPC/frame-size.ll
new file mode 100644
index 000000000000..0e569a4602c3
--- /dev/null
+++ b/test/CodeGen/PowerPC/frame-size.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+
+define i64 @foo() nounwind {
+entry:
+  %x = alloca [32568 x i8]
+  %"alloca point" = bitcast i32 0 to i32
+  %x1 = bitcast [32568 x i8]* %x to i8*
+
+; Check that the RS spill slot has been allocated (because the estimate
+; will fail the small-frame-size check and the function has spills).
+; CHECK: @foo
+; CHECK: stdu 1, -32768(1)
+
+  %s1 = call i64 @bar(i8* %x1) nounwind
+  %s2 = call i64 @bar(i8* %x1) nounwind
+  %s3 = call i64 @bar(i8* %x1) nounwind
+  %s4 = call i64 @bar(i8* %x1) nounwind
+  %s5 = call i64 @bar(i8* %x1) nounwind
+  %s6 = call i64 @bar(i8* %x1) nounwind
+  %s7 = call i64 @bar(i8* %x1) nounwind
+  %s8 = call i64 @bar(i8* %x1) nounwind
+  %r = call i64 @can(i64 %s1, i64 %s2, i64 %s3, i64 %s4, i64 %s5, i64 %s6, i64 %s7, i64 %s8) nounwind
+  br label %return
+
+return:
+  ret i64 %r
+}
+
+declare i64 @bar(i8*)
+declare i64 @can(i64, i64, i64, i64, i64, i64, i64, i64)
+
diff --git a/test/CodeGen/PowerPC/frameaddr.ll b/test/CodeGen/PowerPC/frameaddr.ll
new file mode 100644
index 000000000000..eabd4a68aa83
--- /dev/null
+++ b/test/CodeGen/PowerPC/frameaddr.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare void @llvm.eh.sjlj.longjmp(i8*) #1
+
+define i8* @main() #0 {
+entry:
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+
+; CHECK: @main
+; CHECK: mr 3, 1
+}
+
+define i8* @foo() #3 { ; naked
+entry:
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+
+; CHECK: @foo
+; CHECK: mr 3, 1
+}
+
+define i8* @bar() #0 {
+entry:
+  %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
+  %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
+  call void @use(i8* %x1) nounwind
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+
+; Note that if we start eliminating non-leaf frame pointers by default, this
+; will need to be updated.
+; CHECK: @bar
+; CHECK: mr 3, 31
+}
+
+declare void @use(i8*)
+
+declare i8* @llvm.frameaddress(i32) #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind naked "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
diff --git a/test/CodeGen/PowerPC/i32-to-float.ll b/test/CodeGen/PowerPC/i32-to-float.ll
new file mode 100644
index 000000000000..2707d0352de1
--- /dev/null
+++ b/test/CodeGen/PowerPC/i32-to-float.ll
@@ -0,0 +1,82 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr6 | FileCheck -check-prefix=CHECK-PWR6 %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @foo(i32 %a) nounwind {
+entry:
+  %x = sitofp i32 %a to float
+  ret float %x
+
+; CHECK: @foo
+; CHECK: extsw [[REG:[0-9]+]], 3
+; CHECK: std [[REG]],
+; CHECK: lfd [[REG2:[0-9]+]],
+; CHECK: fcfid [[REG3:[0-9]+]], [[REG2]]
+; CHECK: frsp 1, [[REG3]]
+; CHECK: blr
+
+; CHECK-PWR6: @foo
+; CHECK-PWR6: stw 3,
+; CHECK-PWR6: lfiwax [[REG:[0-9]+]],
+; CHECK-PWR6: fcfid [[REG2:[0-9]+]], [[REG]]
+; CHECK-PWR6: frsp 1, [[REG2]]
+; CHECK-PWR6: blr
+
+; CHECK-A2: @foo
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwax [[REG:[0-9]+]],
+; CHECK-A2: fcfids 1, [[REG]]
+; CHECK-A2: blr
+}
+
+define double @goo(i32 %a) nounwind {
+entry:
+  %x = sitofp i32 %a to double
+  ret double %x
+
+; CHECK: @goo
+; CHECK: extsw [[REG:[0-9]+]], 3
+; CHECK: std [[REG]],
+; CHECK: lfd [[REG2:[0-9]+]],
+; CHECK: fcfid 1, [[REG2]]
+; CHECK: blr
+
+; CHECK-PWR6: @goo
+; CHECK-PWR6: stw 3,
+; CHECK-PWR6: lfiwax [[REG:[0-9]+]],
+; CHECK-PWR6: fcfid 1, [[REG]]
+; CHECK-PWR6: blr
+
+; CHECK-A2: @goo
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwax [[REG:[0-9]+]],
+; CHECK-A2: fcfid 1, [[REG]]
+; CHECK-A2: blr
+}
+
+define float @foou(i32 %a) nounwind {
+entry:
+  %x = uitofp i32 %a to float
+  ret float %x
+
+; CHECK-A2: @foou
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwzx [[REG:[0-9]+]],
+; CHECK-A2: fcfidus 1, [[REG]]
+; CHECK-A2: blr
+}
+
+define double @goou(i32 %a) nounwind {
+entry:
+  %x = uitofp i32 %a to double
+  ret double %x
+
+; CHECK-A2: @goou
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwzx [[REG:[0-9]+]],
+; CHECK-A2: fcfidu 1, [[REG]]
+; CHECK-A2: blr
+}
+
diff --git a/test/CodeGen/PowerPC/i64-to-float.ll b/test/CodeGen/PowerPC/i64-to-float.ll
new file mode 100644
index 000000000000..b81d109e7f45
--- /dev/null
+++ b/test/CodeGen/PowerPC/i64-to-float.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @foo(i64 %a) nounwind {
+entry:
+  %x = sitofp i64 %a to float
+  ret float %x
+
+; CHECK: @foo
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfids 1, [[REG]]
+; CHECK: blr
+}
+
+define double @goo(i64 %a) nounwind {
+entry:
+  %x = sitofp i64 %a to double
+  ret double %x
+
+; CHECK: @goo
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfid 1, [[REG]]
+; CHECK: blr
+}
+
+define float @foou(i64 %a) nounwind {
+entry:
+  %x = uitofp i64 %a to float
+  ret float %x
+
+; CHECK: @foou
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfidus 1, [[REG]]
+; CHECK: blr
+}
+
+define double @goou(i64 %a) nounwind {
+entry:
+  %x = uitofp i64 %a to double
+  ret double %x
+
+; CHECK: @goou
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfidu 1, [[REG]]
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll
index 5a0c072c9c52..d2a3239ab865 100644
--- a/test/CodeGen/PowerPC/i64_fp_round.ll
+++ b/test/CodeGen/PowerPC/i64_fp_round.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mattr=-fpcvt < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -12,16 +12,16 @@ entry:
 ; Note that only parts of the sequence are checked for here, to allow
 ; for minor code generation differences.
 
-; CHECK: sradi [[REGISTER:[0-9]+]], 3, 53
-; CHECK: addi [[REGISTER:[0-9]+]], [[REGISTER]], 1
-; CHECK: cmpldi 0, [[REGISTER]], 1
-; CHECK: isel [[REGISTER:[0-9]+]], {{[0-9]+}}, 3, 1
-; CHECK: std [[REGISTER]], -{{[0-9]+}}(1)
+; CHECK: sradi [[REG1:[0-9]+]], 3, 53
+; CHECK: addi [[REG2:[0-9]+]], [[REG1]], 1
+; CHECK: cmpldi 0, [[REG2]], 1
+; CHECK: isel [[REG3:[0-9]+]], {{[0-9]+}}, 3, 1
+; CHECK: std [[REG3]], -{{[0-9]+}}(1)
 
 
 ; Also check that with -enable-unsafe-fp-math we do not get that extra
 ; code sequence.  Simply verify that there is no "isel" present.
 
-; RUN: llc -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE
+; RUN: llc -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE
 ; CHECK-UNSAFE-NOT: isel
 
diff --git a/test/CodeGen/PowerPC/iabs.ll b/test/CodeGen/PowerPC/iabs.ll
index 7d089bbd653c..f683238de268 100644
--- a/test/CodeGen/PowerPC/iabs.ll
+++ b/test/CodeGen/PowerPC/iabs.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=ppc32 -stats 2>&1 | \
 ; RUN:   grep "4 .*Number of machine instrs printed"
 
diff --git a/test/CodeGen/PowerPC/in-asm-f64-reg.ll b/test/CodeGen/PowerPC/in-asm-f64-reg.ll
new file mode 100644
index 000000000000..1321dfce2027
--- /dev/null
+++ b/test/CodeGen/PowerPC/in-asm-f64-reg.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
+
+define void @f() {
+; CHECK: @f
+
+entry:
+  %0 = tail call double* asm sideeffect "qvstfdux $2,$0,$1", "=b,{r7},{f11},0,~{memory}"(i32 64, double undef, double* undef)
+  ret void
+
+; CHECK: qvstfdux 11,{{[0-9]+}},7
+}
diff --git a/test/CodeGen/PowerPC/jaggedstructs.ll b/test/CodeGen/PowerPC/jaggedstructs.ll
index 62aa7cf929f8..a10c5ddb36fb 100644
--- a/test/CodeGen/PowerPC/jaggedstructs.ll
+++ b/test/CodeGen/PowerPC/jaggedstructs.ll
@@ -23,22 +23,22 @@ entry:
 ; CHECK: std 4, 200(1)
 ; CHECK: std 3, 192(1)
 ; CHECK: lbz {{[0-9]+}}, 199(1)
-; CHECK: stb {{[0-9]+}}, 55(1)
 ; CHECK: lhz {{[0-9]+}}, 197(1)
+; CHECK: stb {{[0-9]+}}, 55(1)
 ; CHECK: sth {{[0-9]+}}, 53(1)
 ; CHECK: lbz {{[0-9]+}}, 207(1)
-; CHECK: stb {{[0-9]+}}, 63(1)
 ; CHECK: lwz {{[0-9]+}}, 203(1)
+; CHECK: stb {{[0-9]+}}, 63(1)
 ; CHECK: stw {{[0-9]+}}, 59(1)
 ; CHECK: lhz {{[0-9]+}}, 214(1)
-; CHECK: sth {{[0-9]+}}, 70(1)
 ; CHECK: lwz {{[0-9]+}}, 210(1)
+; CHECK: sth {{[0-9]+}}, 70(1)
 ; CHECK: stw {{[0-9]+}}, 66(1)
 ; CHECK: lbz {{[0-9]+}}, 223(1)
-; CHECK: stb {{[0-9]+}}, 79(1)
 ; CHECK: lhz {{[0-9]+}}, 221(1)
-; CHECK: sth {{[0-9]+}}, 77(1)
 ; CHECK: lwz {{[0-9]+}}, 217(1)
+; CHECK: stb {{[0-9]+}}, 79(1)
+; CHECK: sth {{[0-9]+}}, 77(1)
 ; CHECK: stw {{[0-9]+}}, 73(1)
 ; CHECK: ld 6, 72(1)
 ; CHECK: ld 5, 64(1)
diff --git a/test/CodeGen/PowerPC/lbzux.ll b/test/CodeGen/PowerPC/lbzux.ll
index 12f1d1f130d8..98951306fd8e 100644
--- a/test/CodeGen/PowerPC/lbzux.ll
+++ b/test/CodeGen/PowerPC/lbzux.ll
@@ -1,6 +1,6 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s
 
 define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind {
 entry:
diff --git a/test/CodeGen/PowerPC/lit.local.cfg b/test/CodeGen/PowerPC/lit.local.cfg
index 4019eca0bb88..aaa31d93d5f2 100644
--- a/test/CodeGen/PowerPC/lit.local.cfg
+++ b/test/CodeGen/PowerPC/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'PowerPC' in targets:
diff --git a/test/CodeGen/PowerPC/load-shift-combine.ll b/test/CodeGen/PowerPC/load-shift-combine.ll
new file mode 100644
index 000000000000..a5d1224864a6
--- /dev/null
+++ b/test/CodeGen/PowerPC/load-shift-combine.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s
+
+; This used to cause a crash.  A standard load is converted to a pre-increment
+; load.  Later the pre-increment load is combined with a subsequent SRL to
+; produce a smaller load.  This transform invalidly created a standard load
+; and propagated the produced value into uses of both produced values of the
+; pre-increment load.  The result was a crash when attempting to process an
+; add with a token-chain operand.
+
+%struct.Info = type { i32, i32, i8*, i8*, i8*, [32 x i8*], i64, [32 x i64], i64, i64, i64, [32 x i64] }
+%struct.S1847 = type { [12 x i8], [4 x i8], [8 x i8], [4 x i8], [8 x i8], [2 x i8], i8, [4 x i64], i8, [3 x i8], [4 x i8], i8, i16, [4 x %struct.anon.76], i16, i8, i8* }
+%struct.anon.76 = type { i32 }
+@info = common global %struct.Info zeroinitializer, align 8
+@fails = common global i32 0, align 4
+@a1847 = external global [5 x %struct.S1847]
+define void @test1847() nounwind {
+entry:
+  %j = alloca i32, align 4
+  %0 = load i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
+  %1 = load i32* @fails, align 4
+  %bf.load1 = load i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %bf.clear2 = and i96 %bf.load1, 302231454903657293676543
+  %bf.set3 = or i96 %bf.clear2, -38383394772764476296921088
+  store i96 %bf.set3, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %2 = load i32* %j, align 4
+  %3 = load i32* %j, align 4
+  %inc11 = add nsw i32 %3, 1
+  store i32 %inc11, i32* %j, align 4
+  %bf.load15 = load i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %bf.clear16 = and i96 %bf.load15, -18446744069414584321
+  %bf.set17 = or i96 %bf.clear16, 18446743532543672320
+  store i96 %bf.set17, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/mcm-1.ll b/test/CodeGen/PowerPC/mcm-1.ll
new file mode 100644
index 000000000000..a57fb9dd98d0
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-1.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing an external variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ei = external global i32
+
+define signext i32 @test_external() nounwind {
+entry:
+  %0 = load i32* @ei, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ei, align 4
+  ret i32 %0
+}
+
+; CHECK: test_external:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-10.ll b/test/CodeGen/PowerPC/mcm-10.ll
new file mode 100644
index 000000000000..4bec3e16fa04
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-10.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading and storing a static variable scoped to a function.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+  %0 = load i32* @test_fn_static.si, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @test_fn_static.si, align 4
+  ret i32 %0
+}
+
+; CHECK: test_fn_static:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: .type [[VAR]],@object
+; CHECK: .local [[VAR]]
+; CHECK: .comm [[VAR]],4,4
diff --git a/test/CodeGen/PowerPC/mcm-11.ll b/test/CodeGen/PowerPC/mcm-11.ll
new file mode 100644
index 000000000000..f2bc4c9cb72c
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-11.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading and storing a file-scope static variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+  %0 = load i32* @gi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @gi, align 4
+  ret i32 %0
+}
+
+; CHECK: test_file_static:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: .type [[VAR]],@object
+; CHECK: .data
+; CHECK: .globl [[VAR]]
+; CHECK: [[VAR]]:
+; CHECK: .long 5
diff --git a/test/CodeGen/PowerPC/mcm-12.ll b/test/CodeGen/PowerPC/mcm-12.ll
new file mode 100644
index 000000000000..911305d4355f
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-12.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading a value from the constant pool (TOC-relative).
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @test_double_const() nounwind {
+entry:
+  ret double 0x3F4FD4920B498CF0
+}
+
+; CHECK: [[VAR:[a-z0-9A-Z_.]+]]:
+; CHECK: .quad 4562098671269285104
+; CHECK: test_double_const:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
diff --git a/test/CodeGen/PowerPC/mcm-2.ll b/test/CodeGen/PowerPC/mcm-2.ll
new file mode 100644
index 000000000000..f0dff4c5a39c
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-2.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck -check-prefix=LARGE %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing a static variable scoped to a function.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+  %0 = load i32* @test_fn_static.si, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @test_fn_static.si, align 4
+  ret i32 %0
+}
+
+; MEDIUM: test_fn_static:
+; MEDIUM: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; MEDIUM: lwz {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: stw {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: .type [[VAR]],@object
+; MEDIUM: .local [[VAR]]
+; MEDIUM: .comm [[VAR]],4,4
+
+; LARGE: test_fn_static:
+; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
+; LARGE: lwz {{[0-9]+}}, 0([[REG2]])
+; LARGE: stw {{[0-9]+}}, 0([[REG2]])
+; LARGE: .type [[VAR]],@object
+; LARGE: .local [[VAR]]
+; LARGE: .comm [[VAR]],4,4
+
diff --git a/test/CodeGen/PowerPC/mcm-3.ll b/test/CodeGen/PowerPC/mcm-3.ll
new file mode 100644
index 000000000000..b7905503f458
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-3.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck -check-prefix=LARGE %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing a file-scope static variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+  %0 = load i32* @gi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @gi, align 4
+  ret i32 %0
+}
+
+; MEDIUM: test_file_static:
+; MEDIUM: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; MEDIUM: lwz {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: stw {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: .type [[VAR]],@object
+; MEDIUM: .data
+; MEDIUM: .globl [[VAR]]
+; MEDIUM: [[VAR]]:
+; MEDIUM: .long 5
+
+; LARGE: test_file_static:
+; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
+; LARGE: lwz {{[0-9]+}}, 0([[REG2]])
+; LARGE: stw {{[0-9]+}}, 0([[REG2]])
+; LARGE: .type [[VAR]],@object
+; LARGE: .data
+; LARGE: .globl [[VAR]]
+; LARGE: [[VAR]]:
+; LARGE: .long 5
+
diff --git a/test/CodeGen/PowerPC/mcm-4.ll b/test/CodeGen/PowerPC/mcm-4.ll
new file mode 100644
index 000000000000..47c60c936038
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-4.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck -check-prefix=LARGE %s
+
+; Test correct code generation for medium and large code model
+; for loading a value from the constant pool (TOC-relative).
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @test_double_const() nounwind {
+entry:
+  ret double 0x3F4FD4920B498CF0
+}
+
+; MEDIUM: [[VAR:[a-z0-9A-Z_.]+]]:
+; MEDIUM: .quad 4562098671269285104
+; MEDIUM: test_double_const:
+; MEDIUM: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; MEDIUM: lfd {{[0-9]+}}, 0([[REG2]])
+
+; LARGE: [[VAR:[a-z0-9A-Z_.]+]]:
+; LARGE: .quad 4562098671269285104
+; LARGE: test_double_const:
+; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
+; LARGE: lfd {{[0-9]+}}, 0([[REG2]])
diff --git a/test/CodeGen/PowerPC/mcm-5.ll b/test/CodeGen/PowerPC/mcm-5.ll
new file mode 100644
index 000000000000..1be27b7e8cc0
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-5.ll
@@ -0,0 +1,60 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading the address of a jump table from the TOC.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define signext i32 @test_jump_table(i32 signext %i) nounwind {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4
+  switch i32 %0, label %sw.default [
+    i32 3, label %sw.bb
+    i32 4, label %sw.bb1
+    i32 5, label %sw.bb2
+    i32 6, label %sw.bb3
+  ]
+
+sw.default:                                       ; preds = %entry
+  br label %sw.epilog
+
+sw.bb:                                            ; preds = %entry
+  %1 = load i32* %i.addr, align 4
+  %mul = mul nsw i32 %1, 7
+  store i32 %mul, i32* %i.addr, align 4
+  br label %sw.bb1
+
+sw.bb1:                                           ; preds = %entry, %sw.bb
+  %2 = load i32* %i.addr, align 4
+  %dec = add nsw i32 %2, -1
+  store i32 %dec, i32* %i.addr, align 4
+  br label %sw.bb2
+
+sw.bb2:                                           ; preds = %entry, %sw.bb1
+  %3 = load i32* %i.addr, align 4
+  %add = add nsw i32 %3, 3
+  store i32 %add, i32* %i.addr, align 4
+  br label %sw.bb3
+
+sw.bb3:                                           ; preds = %entry, %sw.bb2
+  %4 = load i32* %i.addr, align 4
+  %shl = shl i32 %4, 1
+  store i32 %shl, i32* %i.addr, align 4
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb3, %sw.default
+  %5 = load i32* %i.addr, align 4
+  ret i32 %5
+}
+
+; CHECK: test_jump_table:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: ldx {{[0-9]+}}, {{[0-9]+}}, [[REG2]]
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-6.ll b/test/CodeGen/PowerPC/mcm-6.ll
new file mode 100644
index 000000000000..35efaaa5628f
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-6.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large < %s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing a tentatively defined variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ti = common global i32 0, align 4
+
+define signext i32 @test_tentative() nounwind {
+entry:
+  %0 = load i32* @ti, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ti, align 4
+  ret i32 %0
+}
+
+; CHECK: test_tentative:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc [[VAR:[a-z0-9A-Z_.]+]][TC],{{[a-z0-9A-Z_.]+}}
+; CHECK: .comm [[VAR]],4,4
diff --git a/test/CodeGen/PowerPC/mcm-7.ll b/test/CodeGen/PowerPC/mcm-7.ll
new file mode 100644
index 000000000000..0dd39ee4109d
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-7.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large < %s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading a function address.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i8* @test_fnaddr() nounwind {
+entry:
+  %func = alloca i32 (i32)*, align 8
+  store i32 (i32)* @foo, i32 (i32)** %func, align 8
+  %0 = load i32 (i32)** %func, align 8
+  %1 = bitcast i32 (i32)* %0 to i8*
+  ret i8* %1
+}
+
+declare signext i32 @foo(i32 signext)
+
+; CHECK: test_fnaddr:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-8.ll b/test/CodeGen/PowerPC/mcm-8.ll
new file mode 100644
index 000000000000..3ece786d6447
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-8.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large < %s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading a variable with available-externally linkage.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@x = available_externally constant [13 x i8] c"St9bad_alloc\00"
+
+define signext i8 @test_avext() nounwind {
+entry:
+  %0 = getelementptr inbounds [13 x i8]* @x, i32 0, i32 0
+  %1 = load i8* %0, align 1
+  ret i8 %1
+}
+
+; CHECK: test_avext:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lbz {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-9.ll b/test/CodeGen/PowerPC/mcm-9.ll
new file mode 100644
index 000000000000..f366f45cc863
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-9.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing an aliased external variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ei = external global i32
+@a = alias i32* @ei
+
+define signext i32 @test_external() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK: test_external:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-default.ll b/test/CodeGen/PowerPC/mcm-default.ll
new file mode 100644
index 000000000000..19de2536aec3
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-default.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mcpu=pwr7 -O0 <%s | FileCheck %s
+
+; Test that we generate code for the medium model as the default.
+; Use an external variable reference as an example.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ei = external global i32
+
+define signext i32 @test_external() nounwind {
+entry:
+  %0 = load i32* @ei, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ei, align 4
+  ret i32 %0
+}
+
+; CHECK: test_external:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-obj-2.ll b/test/CodeGen/PowerPC/mcm-obj-2.ll
new file mode 100644
index 000000000000..2dd1718ba75a
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-obj-2.ll
@@ -0,0 +1,77 @@
+; RUN: llc -O1 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; FIXME: When asm-parse is available, could make this an assembly test.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+  %0 = load i32* @test_fn_static.si, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @test_fn_static.si, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing function-scoped variable si.
+;
+; CHECK:       Relocation 0
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 1
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM2]]
+; CHECK-NEXT:  'r_type', 0x00000030
+; CHECK:       Relocation 2
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM2]]
+; CHECK-NEXT:  'r_type', 0x00000030
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+  %0 = load i32* @gi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @gi, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing file-scope variable gi.
+;
+; CHECK:       Relocation 3
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 4
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM3]]
+; CHECK-NEXT:  'r_type', 0x00000030
+; CHECK:       Relocation 5
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM3]]
+; CHECK-NEXT:  'r_type', 0x00000030
+
+define double @test_double_const() nounwind {
+entry:
+  ret double 0x3F4FD4920B498CF0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing a constant.
+;
+; CHECK:       Relocation 6
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 7
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM4]]
+; CHECK-NEXT:  'r_type', 0x00000030
+
diff --git a/test/CodeGen/PowerPC/mcm-obj.ll b/test/CodeGen/PowerPC/mcm-obj.ll
new file mode 100644
index 000000000000..117c3b334346
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-obj.ll
@@ -0,0 +1,268 @@
+; RUN: llc -O0 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=LARGE %s
+
+; FIXME: When asm-parse is available, could make this an assembly test.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ei = external global i32
+
+define signext i32 @test_external() nounwind {
+entry:
+  %0 = load i32* @ei, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ei, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing external variable ei.
+;
+; MEDIUM:       '.rela.text'
+; MEDIUM:       Relocation 0
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM1:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 1
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM1]]
+; MEDIUM-NEXT:  'r_type', 0x00000040
+;
+; LARGE:       '.rela.text'
+; LARGE:       Relocation 0
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM1:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 1
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM1]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+  %0 = load i32* @test_fn_static.si, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @test_fn_static.si, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing function-scoped variable si.
+;
+; MEDIUM:       Relocation 2
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 3
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM2]]
+; MEDIUM-NEXT:  'r_type', 0x00000030
+;
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing function-scoped variable si.
+;
+; LARGE:       Relocation 2
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 3
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM2]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+  %0 = load i32* @gi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @gi, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing file-scope variable gi.
+;
+; MEDIUM:       Relocation 4
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 5
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM3]]
+; MEDIUM-NEXT:  'r_type', 0x00000030
+;
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing file-scope variable gi.
+;
+; LARGE:       Relocation 4
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 5
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM3]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+define double @test_double_const() nounwind {
+entry:
+  ret double 0x3F4FD4920B498CF0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing a constant.
+;
+; MEDIUM:       Relocation 6
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 7
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM4]]
+; MEDIUM-NEXT:  'r_type', 0x00000030
+;
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing a constant.
+;
+; LARGE:       Relocation 6
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 7
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM4]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+define signext i32 @test_jump_table(i32 signext %i) nounwind {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4
+  switch i32 %0, label %sw.default [
+    i32 3, label %sw.bb
+    i32 4, label %sw.bb1
+    i32 5, label %sw.bb2
+    i32 6, label %sw.bb3
+  ]
+
+sw.default:                                       ; preds = %entry
+  br label %sw.epilog
+
+sw.bb:                                            ; preds = %entry
+  %1 = load i32* %i.addr, align 4
+  %mul = mul nsw i32 %1, 7
+  store i32 %mul, i32* %i.addr, align 4
+  br label %sw.bb1
+
+sw.bb1:                                           ; preds = %entry, %sw.bb
+  %2 = load i32* %i.addr, align 4
+  %dec = add nsw i32 %2, -1
+  store i32 %dec, i32* %i.addr, align 4
+  br label %sw.bb2
+
+sw.bb2:                                           ; preds = %entry, %sw.bb1
+  %3 = load i32* %i.addr, align 4
+  %add = add nsw i32 %3, 3
+  store i32 %add, i32* %i.addr, align 4
+  br label %sw.bb3
+
+sw.bb3:                                           ; preds = %entry, %sw.bb2
+  %4 = load i32* %i.addr, align 4
+  %shl = shl i32 %4, 1
+  store i32 %shl, i32* %i.addr, align 4
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb3, %sw.default
+  %5 = load i32* %i.addr, align 4
+  ret i32 %5
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing a jump table address.
+;
+; MEDIUM:       Relocation 8
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM5:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 9
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM5]]
+; MEDIUM-NEXT:  'r_type', 0x00000040
+;
+; LARGE:       Relocation 8
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM5:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 9
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM5]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+@ti = common global i32 0, align 4
+
+define signext i32 @test_tentative() nounwind {
+entry:
+  %0 = load i32* @ti, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ti, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing tentatively declared variable ti.
+;
+; MEDIUM:       Relocation 10
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM6:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 11
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM6]]
+; MEDIUM-NEXT:  'r_type', 0x00000040
+;
+; LARGE:       Relocation 10
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM6:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 11
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM6]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+define i8* @test_fnaddr() nounwind {
+entry:
+  %func = alloca i32 (i32)*, align 8
+  store i32 (i32)* @foo, i32 (i32)** %func, align 8
+  %0 = load i32 (i32)** %func, align 8
+  %1 = bitcast i32 (i32)* %0 to i8*
+  ret i8* %1
+}
+
+declare signext i32 @foo(i32 signext)
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing function address foo.
+;
+; MEDIUM:       Relocation 12
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM7:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 13
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM7]]
+; MEDIUM-NEXT:  'r_type', 0x00000040
+;
+; LARGE:       Relocation 12
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM7:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 13
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM7]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
diff --git a/test/CodeGen/PowerPC/mem_update.ll b/test/CodeGen/PowerPC/mem_update.ll
index 39af11a3d54c..fcf53da67fc2 100644
--- a/test/CodeGen/PowerPC/mem_update.ll
+++ b/test/CodeGen/PowerPC/mem_update.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=ppc32 | \
 ; RUN:   not grep addi
-; RUN: llc < %s -march=ppc64 | \
+; RUN: llc -code-model=small < %s -march=ppc64 | \
 ; RUN:   not grep addi
 
 @Glob = global i64 4
diff --git a/test/CodeGen/PowerPC/misched-inorder-latency.ll b/test/CodeGen/PowerPC/misched-inorder-latency.ll
new file mode 100644
index 000000000000..8fae7ad4d1df
--- /dev/null
+++ b/test/CodeGen/PowerPC/misched-inorder-latency.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -enable-misched -pre-RA-sched=source -scheditins=false \
+; RUN:          -disable-ifcvt-triangle-false -disable-post-ra | FileCheck %s
+;
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+; %val1 is a load live out of %entry. It should be hoisted
+; above the add.
+; CHECK: testload:
+; CHECK: %entry
+; CHECK: lwz
+; CHECK: addi
+; CHECK: bne
+; CHECK: %true
+define i32 @testload(i32 *%ptr, i32 %sumin) {
+entry:
+  %sum1 = add i32 %sumin, 1
+  %val1 = load i32* %ptr
+  %p = icmp eq i32 %sumin, 0
+  br i1 %p, label %true, label %end
+true:
+  %sum2 = add i32 %sum1, 1
+  %ptr2 = getelementptr i32* %ptr, i32 1
+  %val = load i32* %ptr2
+  %val2 = add i32 %val1, %val
+  br label %end
+end:
+  %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ]
+  %summerge = phi i32 [ %sum1, %entry], [ %sum2, %true ]
+  %sumout = add i32 %valmerge, %summerge
+  ret i32 %sumout
+}
+
+; The prefetch gets a default latency of 3 cycles and should be hoisted
+; above the add.
+;
+; CHECK: testprefetch:
+; CHECK: %entry
+; CHECK: dcbt
+; CHECK: addi
+; CHECK: blr
+define i32 @testprefetch(i8 *%ptr, i32 %i) {
+entry:
+  %val1 = add i32 %i, 1
+  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
+  %p = icmp eq i32 %i, 0
+  br i1 %p, label %true, label %end
+true:
+  %val2 = add i32 %val1, 1
+  br label %end
+end:
+  %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ]
+  ret i32 %valmerge
+}
+declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
diff --git a/test/CodeGen/PowerPC/negctr.ll b/test/CodeGen/PowerPC/negctr.ll
new file mode 100644
index 000000000000..2f6995c65dd8
--- /dev/null
+++ b/test/CodeGen/PowerPC/negctr.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -mcpu=a2 -disable-lsr | FileCheck -check-prefix=NOLSR %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @main() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 0
+  br i1 %exitcond, label %for.end, label %for.body
+
+; FIXME: We currently can't form the 32-bit unsigned trip count necessary here!
+; CHECK: @main
+; CHECK-NOT: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+define void @main1() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 0
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: @main1
+; CHECK: li [[REG:[0-9]+]], -1
+; CHECK: mtctr [[REG]]
+; CHECK: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+define void @main2() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, -100000
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: @main2
+; CHECK: lis [[REG:[0-9]+]], -2
+; CHECK: ori [[REG2:[0-9]+]], [[REG]], 31071
+; CHECK: mtctr [[REG2]]
+; CHECK: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+define void @main3() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 127984, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, -16
+  %exitcond = icmp eq i64 %indvars.iv.next, -16
+  br i1 %exitcond, label %for.end, label %for.body
+
+; NOLSR: @main3
+; NOLSR: li [[REG:[0-9]+]], 8000
+; NOLSR: mtctr [[REG]]
+; NOLSR: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/popcnt.ll b/test/CodeGen/PowerPC/popcnt.ll
new file mode 100644
index 000000000000..b304d72aede2
--- /dev/null
+++ b/test/CodeGen/PowerPC/popcnt.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=ppc64 -mattr=+popcntd < %s | FileCheck %s
+
+define i8 @cnt8(i8 %x) nounwind readnone {
+  %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
+  ret i8 %cnt
+; CHECK: @cnt8
+; CHECK: rlwinm
+; CHECK: popcntw
+; CHECK: blr
+}
+
+define i16 @cnt16(i16 %x) nounwind readnone {
+  %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
+  ret i16 %cnt
+; CHECK: @cnt16
+; CHECK: rlwinm
+; CHECK: popcntw
+; CHECK: blr
+}
+
+define i32 @cnt32(i32 %x) nounwind readnone {
+  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %cnt
+; CHECK: @cnt32
+; CHECK: popcntw
+; CHECK: blr
+}
+
+define i64 @cnt64(i64 %x) nounwind readnone {
+  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %cnt
+; CHECK: @cnt64
+; CHECK: popcntd
+; CHECK: blr
+}
+
+declare i8 @llvm.ctpop.i8(i8) nounwind readnone
+declare i16 @llvm.ctpop.i16(i16) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/test/CodeGen/PowerPC/ppc64-toc.ll b/test/CodeGen/PowerPC/ppc64-toc.ll
index a29bdcb25031..7f30ef883e9a 100644
--- a/test/CodeGen/PowerPC/ppc64-toc.ll
+++ b/test/CodeGen/PowerPC/ppc64-toc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -code-model=small < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
diff --git a/test/CodeGen/PowerPC/pr15031.ll b/test/CodeGen/PowerPC/pr15031.ll
new file mode 100644
index 000000000000..5ccf941a1f16
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr15031.ll
@@ -0,0 +1,370 @@
+; RUN: llc -mcpu=pwr7 -O3 < %s | FileCheck %s
+
+; Test case derived from bug report 15031.  The code in the post-RA
+; scheduler to break critical anti-dependencies was failing to check
+; whether an instruction had more than one definition, and ensuring
+; that any additional definitions interfered with the choice of a new
+; register.  As a result, this test originally caused this to be
+; generated:
+;
+;   lbzu 3, 1(3)
+;
+; which is illegal, since it requires register 3 to both receive the
+; loaded value and receive the updated address.  With the fix to bug
+; 15031, a different register is chosen to receive the loaded value.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%"class.llvm::MachineMemOperand" = type { %"struct.llvm::MachinePointerInfo", i64, i32, %"class.llvm::MDNode"*, %"class.llvm::MDNode"* }
+%"struct.llvm::MachinePointerInfo" = type { %"class.llvm::Value"*, i64 }
+%"class.llvm::Value" = type { i32 (...)**, i8, i8, i16, %"class.llvm::Type"*, %"class.llvm::Use"*, %"class.llvm::StringMapEntry"* }
+%"class.llvm::Type" = type { %"class.llvm::LLVMContext"*, i32, i32, %"class.llvm::Type"** }
+%"class.llvm::LLVMContext" = type { %"class.llvm::LLVMContextImpl"* }
+%"class.llvm::LLVMContextImpl" = type opaque
+%"class.llvm::Use" = type { %"class.llvm::Value"*, %"class.llvm::Use"*, %"class.llvm::PointerIntPair" }
+%"class.llvm::PointerIntPair" = type { i64 }
+%"class.llvm::StringMapEntry" = type opaque
+%"class.llvm::MDNode" = type { %"class.llvm::Value", %"class.llvm::FoldingSetImpl::Node", i32, i32 }
+%"class.llvm::FoldingSetImpl::Node" = type { i8* }
+%"class.llvm::MachineInstr" = type { %"class.llvm::ilist_node", %"class.llvm::MCInstrDesc"*, %"class.llvm::MachineBasicBlock"*, %"class.llvm::MachineOperand"*, i32, %"class.llvm::ArrayRecycler<llvm::MachineOperand, 8>::Capacity", i8, i8, i8, %"class.llvm::MachineMemOperand"**, %"class.llvm::DebugLoc" }
+%"class.llvm::ilist_node" = type { %"class.llvm::ilist_half_node", %"class.llvm::MachineInstr"* }
+%"class.llvm::ilist_half_node" = type { %"class.llvm::MachineInstr"* }
+%"class.llvm::MCInstrDesc" = type { i16, i16, i16, i16, i16, i32, i64, i16*, i16*, %"class.llvm::MCOperandInfo"* }
+%"class.llvm::MCOperandInfo" = type { i16, i8, i8, i32 }
+%"class.llvm::MachineBasicBlock" = type { %"class.llvm::ilist_node.0", %"struct.llvm::ilist", %"class.llvm::BasicBlock"*, i32, %"class.llvm::MachineFunction"*, %"class.std::vector.163", %"class.std::vector.163", %"class.std::vector.123", %"class.std::vector.123", i32, i8, i8 }
+%"class.llvm::ilist_node.0" = type { %"class.llvm::ilist_half_node.1", %"class.llvm::MachineBasicBlock"* }
+%"class.llvm::ilist_half_node.1" = type { %"class.llvm::MachineBasicBlock"* }
+%"struct.llvm::ilist" = type { %"class.llvm::iplist" }
+%"class.llvm::iplist" = type { %"struct.llvm::ilist_traits", %"class.llvm::MachineInstr"* }
+%"struct.llvm::ilist_traits" = type { %"class.llvm::ilist_half_node", %"class.llvm::MachineBasicBlock"* }
+%"class.llvm::BasicBlock" = type { %"class.llvm::Value", %"class.llvm::ilist_node.2", %"class.llvm::iplist.4", %"class.llvm::Function"* }
+%"class.llvm::ilist_node.2" = type { %"class.llvm::ilist_half_node.3", %"class.llvm::BasicBlock"* }
+%"class.llvm::ilist_half_node.3" = type { %"class.llvm::BasicBlock"* }
+%"class.llvm::iplist.4" = type { %"struct.llvm::ilist_traits.5", %"class.llvm::Instruction"* }
+%"struct.llvm::ilist_traits.5" = type { %"class.llvm::ilist_half_node.10" }
+%"class.llvm::ilist_half_node.10" = type { %"class.llvm::Instruction"* }
+%"class.llvm::Instruction" = type { %"class.llvm::User", %"class.llvm::ilist_node.193", %"class.llvm::BasicBlock"*, %"class.llvm::DebugLoc" }
+%"class.llvm::User" = type { %"class.llvm::Value", %"class.llvm::Use"*, i32 }
+%"class.llvm::ilist_node.193" = type { %"class.llvm::ilist_half_node.10", %"class.llvm::Instruction"* }
+%"class.llvm::DebugLoc" = type { i32, i32 }
+%"class.llvm::Function" = type { %"class.llvm::GlobalValue", %"class.llvm::ilist_node.27", %"class.llvm::iplist.47", %"class.llvm::iplist.54", %"class.llvm::ValueSymbolTable"*, %"class.llvm::AttributeSet" }
+%"class.llvm::GlobalValue" = type { [52 x i8], [4 x i8], %"class.llvm::Module"*, %"class.std::basic_string" }
+%"class.llvm::Module" = type { %"class.llvm::LLVMContext"*, %"class.llvm::iplist.11", %"class.llvm::iplist.20", %"class.llvm::iplist.29", %"struct.llvm::ilist.38", %"class.std::basic_string", %"class.llvm::ValueSymbolTable"*, %"class.llvm::OwningPtr", %"class.std::basic_string", %"class.std::basic_string", %"class.std::basic_string", i8* }
+%"class.llvm::iplist.11" = type { %"struct.llvm::ilist_traits.12", %"class.llvm::GlobalVariable"* }
+%"struct.llvm::ilist_traits.12" = type { %"class.llvm::ilist_node.18" }
+%"class.llvm::ilist_node.18" = type { %"class.llvm::ilist_half_node.19", %"class.llvm::GlobalVariable"* }
+%"class.llvm::ilist_half_node.19" = type { %"class.llvm::GlobalVariable"* }
+%"class.llvm::GlobalVariable" = type { %"class.llvm::GlobalValue", %"class.llvm::ilist_node.18", i8 }
+%"class.llvm::iplist.20" = type { %"struct.llvm::ilist_traits.21", %"class.llvm::Function"* }
+%"struct.llvm::ilist_traits.21" = type { %"class.llvm::ilist_node.27" }
+%"class.llvm::ilist_node.27" = type { %"class.llvm::ilist_half_node.28", %"class.llvm::Function"* }
+%"class.llvm::ilist_half_node.28" = type { %"class.llvm::Function"* }
+%"class.llvm::iplist.29" = type { %"struct.llvm::ilist_traits.30", %"class.llvm::GlobalAlias"* }
+%"struct.llvm::ilist_traits.30" = type { %"class.llvm::ilist_node.36" }
+%"class.llvm::ilist_node.36" = type { %"class.llvm::ilist_half_node.37", %"class.llvm::GlobalAlias"* }
+%"class.llvm::ilist_half_node.37" = type { %"class.llvm::GlobalAlias"* }
+%"class.llvm::GlobalAlias" = type { %"class.llvm::GlobalValue", %"class.llvm::ilist_node.36" }
+%"struct.llvm::ilist.38" = type { %"class.llvm::iplist.39" }
+%"class.llvm::iplist.39" = type { %"struct.llvm::ilist_traits.40", %"class.llvm::NamedMDNode"* }
+%"struct.llvm::ilist_traits.40" = type { %"class.llvm::ilist_node.45" }
+%"class.llvm::ilist_node.45" = type { %"class.llvm::ilist_half_node.46", %"class.llvm::NamedMDNode"* }
+%"class.llvm::ilist_half_node.46" = type { %"class.llvm::NamedMDNode"* }
+%"class.llvm::NamedMDNode" = type { %"class.llvm::ilist_node.45", %"class.std::basic_string", %"class.llvm::Module"*, i8* }
+%"class.std::basic_string" = type { %"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" }
+%"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" = type { i8* }
+%"class.llvm::ValueSymbolTable" = type opaque
+%"class.llvm::OwningPtr" = type { %"class.llvm::GVMaterializer"* }
+%"class.llvm::GVMaterializer" = type opaque
+%"class.llvm::iplist.47" = type { %"struct.llvm::ilist_traits.48", %"class.llvm::BasicBlock"* }
+%"struct.llvm::ilist_traits.48" = type { %"class.llvm::ilist_half_node.3" }
+%"class.llvm::iplist.54" = type { %"struct.llvm::ilist_traits.55", %"class.llvm::Argument"* }
+%"struct.llvm::ilist_traits.55" = type { %"class.llvm::ilist_half_node.61" }
+%"class.llvm::ilist_half_node.61" = type { %"class.llvm::Argument"* }
+%"class.llvm::Argument" = type { %"class.llvm::Value", %"class.llvm::ilist_node.192", %"class.llvm::Function"* }
+%"class.llvm::ilist_node.192" = type { %"class.llvm::ilist_half_node.61", %"class.llvm::Argument"* }
+%"class.llvm::AttributeSet" = type { %"class.llvm::AttributeSetImpl"* }
+%"class.llvm::AttributeSetImpl" = type opaque
+%"class.llvm::MachineFunction" = type { %"class.llvm::Function"*, %"class.llvm::TargetMachine"*, %"class.llvm::MCContext"*, %"class.llvm::MachineModuleInfo"*, %"class.llvm::GCModuleInfo"*, %"class.llvm::MachineRegisterInfo"*, %"struct.llvm::MachineFunctionInfo"*, %"class.llvm::MachineFrameInfo"*, %"class.llvm::MachineConstantPool"*, %"class.llvm::MachineJumpTableInfo"*, %"class.std::vector.163", %"class.llvm::BumpPtrAllocator", %"class.llvm::Recycler", %"class.llvm::ArrayRecycler", %"class.llvm::Recycler.180", %"struct.llvm::ilist.181", i32, i32, i8 }
+%"class.llvm::TargetMachine" = type { i32 (...)**, %"class.llvm::Target"*, %"class.std::basic_string", %"class.std::basic_string", %"class.std::basic_string", %"class.llvm::MCCodeGenInfo"*, %"class.llvm::MCAsmInfo"*, i8, %"class.llvm::TargetOptions" }
+%"class.llvm::Target" = type opaque
+%"class.llvm::MCCodeGenInfo" = type opaque
+%"class.llvm::MCAsmInfo" = type opaque
+%"class.llvm::TargetOptions" = type { [2 x i8], i32, i8, i32, i8, %"class.std::basic_string", i32, i32 }
+%"class.llvm::MCContext" = type { %"class.llvm::SourceMgr"*, %"class.llvm::MCAsmInfo"*, %"class.llvm::MCRegisterInfo"*, %"class.llvm::MCObjectFileInfo"*, %"class.llvm::BumpPtrAllocator", %"class.llvm::StringMap", %"class.llvm::StringMap.62", i32, %"class.llvm::DenseMap.63", i8*, %"class.llvm::raw_ostream"*, i8, %"class.std::basic_string", %"class.std::basic_string", %"class.std::vector", %"class.std::vector.70", %"class.llvm::MCDwarfLoc", i8, i8, i32, %"class.llvm::MCSection"*, %"class.llvm::MCSymbol"*, %"class.llvm::MCSymbol"*, %"class.std::vector.75", %"class.llvm::StringRef", %"class.llvm::StringRef", i8, %"class.llvm::DenseMap.80", %"class.std::vector.84", i8*, i8*, i8*, i8 }
+%"class.llvm::SourceMgr" = type opaque
+%"class.llvm::MCRegisterInfo" = type { %"struct.llvm::MCRegisterDesc"*, i32, i32, i32, %"class.llvm::MCRegisterClass"*, i32, i32, [2 x i16]*, i16*, i8*, i16*, i32, i16*, i32, i32, i32, i32, %"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair"*, %"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair"*, %"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair"*, %"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair"*, %"class.llvm::DenseMap" }
+%"struct.llvm::MCRegisterDesc" = type { i32, i32, i32, i32, i32, i32 }
+%"class.llvm::MCRegisterClass" = type { i8*, i16*, i8*, i16, i16, i16, i16, i16, i8, i8 }
+%"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair" = type { i32, i32 }
+%"class.llvm::DenseMap" = type { %"struct.std::pair"*, i32, i32, i32 }
+%"struct.std::pair" = type { i32, i32 }
+%"class.llvm::MCObjectFileInfo" = type opaque
+%"class.llvm::BumpPtrAllocator" = type { i64, i64, %"class.llvm::SlabAllocator"*, %"class.llvm::MemSlab"*, i8*, i8*, i64 }
+%"class.llvm::SlabAllocator" = type { i32 (...)** }
+%"class.llvm::MemSlab" = type { i64, %"class.llvm::MemSlab"* }
+%"class.llvm::StringMap" = type { %"class.llvm::StringMapImpl", %"class.llvm::BumpPtrAllocator"* }
+%"class.llvm::StringMapImpl" = type { %"class.llvm::StringMapEntryBase"**, i32, i32, i32, i32 }
+%"class.llvm::StringMapEntryBase" = type { i32 }
+%"class.llvm::StringMap.62" = type { %"class.llvm::StringMapImpl", %"class.llvm::BumpPtrAllocator"* }
+%"class.llvm::DenseMap.63" = type { %"struct.std::pair.66"*, i32, i32, i32 }
+%"struct.std::pair.66" = type opaque
+%"class.llvm::raw_ostream" = type { i32 (...)**, i8*, i8*, i8*, i32 }
+%"class.std::vector" = type { %"struct.std::_Vector_base" }
+%"struct.std::_Vector_base" = type { %"struct.std::_Vector_base<llvm::MCDwarfFile *, std::allocator<llvm::MCDwarfFile *> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MCDwarfFile *, std::allocator<llvm::MCDwarfFile *> >::_Vector_impl" = type { %"class.llvm::MCDwarfFile"**, %"class.llvm::MCDwarfFile"**, %"class.llvm::MCDwarfFile"** }
+%"class.llvm::MCDwarfFile" = type { %"class.llvm::StringRef", i32 }
+%"class.llvm::StringRef" = type { i8*, i64 }
+%"class.std::vector.70" = type { %"struct.std::_Vector_base.71" }
+%"struct.std::_Vector_base.71" = type { %"struct.std::_Vector_base<llvm::StringRef, std::allocator<llvm::StringRef> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::StringRef, std::allocator<llvm::StringRef> >::_Vector_impl" = type { %"class.llvm::StringRef"*, %"class.llvm::StringRef"*, %"class.llvm::StringRef"* }
+%"class.llvm::MCDwarfLoc" = type { i32, i32, i32, i32, i32, i32 }
+%"class.llvm::MCSection" = type opaque
+%"class.llvm::MCSymbol" = type { %"class.llvm::StringRef", %"class.llvm::MCSection"*, %"class.llvm::MCExpr"*, i8 }
+%"class.llvm::MCExpr" = type opaque
+%"class.std::vector.75" = type { %"struct.std::_Vector_base.76" }
+%"struct.std::_Vector_base.76" = type { %"struct.std::_Vector_base<const llvm::MCGenDwarfLabelEntry *, std::allocator<const llvm::MCGenDwarfLabelEntry *> >::_Vector_impl" }
+%"struct.std::_Vector_base<const llvm::MCGenDwarfLabelEntry *, std::allocator<const llvm::MCGenDwarfLabelEntry *> >::_Vector_impl" = type { %"class.llvm::MCGenDwarfLabelEntry"**, %"class.llvm::MCGenDwarfLabelEntry"**, %"class.llvm::MCGenDwarfLabelEntry"** }
+%"class.llvm::MCGenDwarfLabelEntry" = type { %"class.llvm::StringRef", i32, i32, %"class.llvm::MCSymbol"* }
+%"class.llvm::DenseMap.80" = type { %"struct.std::pair.83"*, i32, i32, i32 }
+%"struct.std::pair.83" = type { %"class.llvm::MCSection"*, %"class.llvm::MCLineSection"* }
+%"class.llvm::MCLineSection" = type { %"class.std::vector.215" }
+%"class.std::vector.215" = type { %"struct.std::_Vector_base.216" }
+%"struct.std::_Vector_base.216" = type { %"struct.std::_Vector_base<llvm::MCLineEntry, std::allocator<llvm::MCLineEntry> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MCLineEntry, std::allocator<llvm::MCLineEntry> >::_Vector_impl" = type { %"class.llvm::MCLineEntry"*, %"class.llvm::MCLineEntry"*, %"class.llvm::MCLineEntry"* }
+%"class.llvm::MCLineEntry" = type { %"class.llvm::MCDwarfLoc", %"class.llvm::MCSymbol"* }
+%"class.std::vector.84" = type { %"struct.std::_Vector_base.85" }
+%"struct.std::_Vector_base.85" = type { %"struct.std::_Vector_base<const llvm::MCSection *, std::allocator<const llvm::MCSection *> >::_Vector_impl" }
+%"struct.std::_Vector_base<const llvm::MCSection *, std::allocator<const llvm::MCSection *> >::_Vector_impl" = type { %"class.llvm::MCSection"**, %"class.llvm::MCSection"**, %"class.llvm::MCSection"** }
+%"class.llvm::MachineModuleInfo" = type { %"class.llvm::ImmutablePass", %"class.llvm::MCContext", %"class.llvm::Module"*, %"class.llvm::MachineModuleInfoImpl"*, %"class.std::vector.95", i32, %"class.std::vector.100", %"class.llvm::DenseMap.110", %"class.llvm::DenseMap.114", i32, %"class.std::vector.118", %"class.std::vector.123", %"class.std::vector.123", %"class.std::vector.128", %"class.llvm::SmallPtrSet", %"class.llvm::MMIAddrLabelMap"*, i8, i8, i8, i8, %"class.llvm::SmallVector.133" }
+%"class.llvm::ImmutablePass" = type { %"class.llvm::ModulePass" }
+%"class.llvm::ModulePass" = type { %"class.llvm::Pass" }
+%"class.llvm::Pass" = type { i32 (...)**, %"class.llvm::AnalysisResolver"*, i8*, i32 }
+%"class.llvm::AnalysisResolver" = type { %"class.std::vector.89", %"class.llvm::PMDataManager"* }
+%"class.std::vector.89" = type { %"struct.std::_Vector_base.90" }
+%"struct.std::_Vector_base.90" = type { %"struct.std::_Vector_base<std::pair<const void *, llvm::Pass *>, std::allocator<std::pair<const void *, llvm::Pass *> > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::pair<const void *, llvm::Pass *>, std::allocator<std::pair<const void *, llvm::Pass *> > >::_Vector_impl" = type { %"struct.std::pair.94"*, %"struct.std::pair.94"*, %"struct.std::pair.94"* }
+%"struct.std::pair.94" = type { i8*, %"class.llvm::Pass"* }
+%"class.llvm::PMDataManager" = type opaque
+%"class.llvm::MachineModuleInfoImpl" = type { i32 (...)** }
+%"class.std::vector.95" = type { %"struct.std::_Vector_base.96" }
+%"struct.std::_Vector_base.96" = type { %"struct.std::_Vector_base<llvm::MachineMove, std::allocator<llvm::MachineMove> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MachineMove, std::allocator<llvm::MachineMove> >::_Vector_impl" = type { %"class.llvm::MachineMove"*, %"class.llvm::MachineMove"*, %"class.llvm::MachineMove"* }
+%"class.llvm::MachineMove" = type { %"class.llvm::MCSymbol"*, %"class.llvm::MachineLocation", %"class.llvm::MachineLocation" }
+%"class.llvm::MachineLocation" = type { i8, i32, i32 }
+%"class.std::vector.100" = type { %"struct.std::_Vector_base.101" }
+%"struct.std::_Vector_base.101" = type { %"struct.std::_Vector_base<llvm::LandingPadInfo, std::allocator<llvm::LandingPadInfo> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::LandingPadInfo, std::allocator<llvm::LandingPadInfo> >::_Vector_impl" = type { %"struct.llvm::LandingPadInfo"*, %"struct.llvm::LandingPadInfo"*, %"struct.llvm::LandingPadInfo"* }
+%"struct.llvm::LandingPadInfo" = type { %"class.llvm::MachineBasicBlock"*, %"class.llvm::SmallVector", %"class.llvm::SmallVector", %"class.llvm::MCSymbol"*, %"class.llvm::Function"*, %"class.std::vector.105" }
+%"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl", %"struct.llvm::SmallVectorStorage" }
+%"class.llvm::SmallVectorImpl" = type { %"class.llvm::SmallVectorTemplateBase" }
+%"class.llvm::SmallVectorTemplateBase" = type { %"class.llvm::SmallVectorTemplateCommon" }
+%"class.llvm::SmallVectorTemplateCommon" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion" }
+%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8* }
+%"struct.llvm::AlignedCharArrayUnion" = type { %"struct.llvm::AlignedCharArray" }
+%"struct.llvm::AlignedCharArray" = type { [8 x i8] }
+%"struct.llvm::SmallVectorStorage" = type { i8 }
+%"class.std::vector.105" = type { %"struct.std::_Vector_base.106" }
+%"struct.std::_Vector_base.106" = type { %"struct.std::_Vector_base<int, std::allocator<int> >::_Vector_impl" }
+%"struct.std::_Vector_base<int, std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
+%"class.llvm::DenseMap.110" = type { %"struct.std::pair.113"*, i32, i32, i32 }
+%"struct.std::pair.113" = type { %"class.llvm::MCSymbol"*, %"class.llvm::SmallVector.206" }
+%"class.llvm::SmallVector.206" = type { [28 x i8], %"struct.llvm::SmallVectorStorage.207" }
+%"struct.llvm::SmallVectorStorage.207" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.198"] }
+%"struct.llvm::AlignedCharArrayUnion.198" = type { %"struct.llvm::AlignedCharArray.199" }
+%"struct.llvm::AlignedCharArray.199" = type { [4 x i8] }
+%"class.llvm::DenseMap.114" = type { %"struct.std::pair.117"*, i32, i32, i32 }
+%"struct.std::pair.117" = type { %"class.llvm::MCSymbol"*, i32 }
+%"class.std::vector.118" = type { %"struct.std::_Vector_base.119" }
+%"struct.std::_Vector_base.119" = type { %"struct.std::_Vector_base<const llvm::GlobalVariable *, std::allocator<const llvm::GlobalVariable *> >::_Vector_impl" }
+%"struct.std::_Vector_base<const llvm::GlobalVariable *, std::allocator<const llvm::GlobalVariable *> >::_Vector_impl" = type { %"class.llvm::GlobalVariable"**, %"class.llvm::GlobalVariable"**, %"class.llvm::GlobalVariable"** }
+%"class.std::vector.123" = type { %"struct.std::_Vector_base.124" }
+%"struct.std::_Vector_base.124" = type { %"struct.std::_Vector_base<unsigned int, std::allocator<unsigned int> >::_Vector_impl" }
+%"struct.std::_Vector_base<unsigned int, std::allocator<unsigned int> >::_Vector_impl" = type { i32*, i32*, i32* }
+%"class.std::vector.128" = type { %"struct.std::_Vector_base.129" }
+%"struct.std::_Vector_base.129" = type { %"struct.std::_Vector_base<const llvm::Function *, std::allocator<const llvm::Function *> >::_Vector_impl" }
+%"struct.std::_Vector_base<const llvm::Function *, std::allocator<const llvm::Function *> >::_Vector_impl" = type { %"class.llvm::Function"**, %"class.llvm::Function"**, %"class.llvm::Function"** }
+%"class.llvm::SmallPtrSet" = type { %"class.llvm::SmallPtrSetImpl", [33 x i8*] }
+%"class.llvm::SmallPtrSetImpl" = type { i8**, i8**, i32, i32, i32 }
+%"class.llvm::MMIAddrLabelMap" = type opaque
+%"class.llvm::SmallVector.133" = type { %"class.llvm::SmallVectorImpl.134", %"struct.llvm::SmallVectorStorage.139" }
+%"class.llvm::SmallVectorImpl.134" = type { %"class.llvm::SmallVectorTemplateBase.135" }
+%"class.llvm::SmallVectorTemplateBase.135" = type { %"class.llvm::SmallVectorTemplateCommon.136" }
+%"class.llvm::SmallVectorTemplateCommon.136" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.137" }
+%"struct.llvm::AlignedCharArrayUnion.137" = type { %"struct.llvm::AlignedCharArray.138" }
+%"struct.llvm::AlignedCharArray.138" = type { [40 x i8] }
+%"struct.llvm::SmallVectorStorage.139" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.137"] }
+%"class.llvm::GCModuleInfo" = type opaque
+%"class.llvm::MachineRegisterInfo" = type { %"class.llvm::TargetRegisterInfo"*, i8, i8, %"class.llvm::IndexedMap", %"class.llvm::IndexedMap.146", %"class.llvm::MachineOperand"**, %"class.llvm::BitVector", %"class.llvm::BitVector", %"class.llvm::BitVector", %"class.std::vector.147", %"class.std::vector.123" }
+%"class.llvm::TargetRegisterInfo" = type { i32 (...)**, %"class.llvm::MCRegisterInfo", %"struct.llvm::TargetRegisterInfoDesc"*, i8**, i32*, %"class.llvm::TargetRegisterClass"**, %"class.llvm::TargetRegisterClass"** }
+%"struct.llvm::TargetRegisterInfoDesc" = type { i32, i8 }
+%"class.llvm::TargetRegisterClass" = type { %"class.llvm::MCRegisterClass"*, i32*, i32*, i16*, %"class.llvm::TargetRegisterClass"**, void (%"class.llvm::ArrayRef"*, %"class.llvm::MachineFunction"*)* }
+%"class.llvm::ArrayRef" = type { i16*, i64 }
+%"class.llvm::IndexedMap" = type { %"class.std::vector.140", %"struct.std::pair.145", %"struct.llvm::VirtReg2IndexFunctor" }
+%"class.std::vector.140" = type { %"struct.std::_Vector_base.141" }
+%"struct.std::_Vector_base.141" = type { %"struct.std::_Vector_base<std::pair<const llvm::TargetRegisterClass *, llvm::MachineOperand *>, std::allocator<std::pair<const llvm::TargetRegisterClass *, llvm::MachineOperand *> > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::pair<const llvm::TargetRegisterClass *, llvm::MachineOperand *>, std::allocator<std::pair<const llvm::TargetRegisterClass *, llvm::MachineOperand *> > >::_Vector_impl" = type { %"struct.std::pair.145"*, %"struct.std::pair.145"*, %"struct.std::pair.145"* }
+%"struct.std::pair.145" = type { %"class.llvm::TargetRegisterClass"*, %"class.llvm::MachineOperand"* }
+%"class.llvm::MachineOperand" = type { i8, [3 x i8], %union.anon, %"class.llvm::MachineInstr"*, %union.anon.188 }
+%union.anon = type { i32 }
+%union.anon.188 = type { %struct.anon }
+%struct.anon = type { %"class.llvm::MachineOperand"*, %"class.llvm::MachineOperand"* }
+%"struct.llvm::VirtReg2IndexFunctor" = type { i8 }
+%"class.llvm::IndexedMap.146" = type { %"class.std::vector.147", %"struct.std::pair.152", %"struct.llvm::VirtReg2IndexFunctor" }
+%"class.std::vector.147" = type { %"struct.std::_Vector_base.148" }
+%"struct.std::_Vector_base.148" = type { %"struct.std::_Vector_base<std::pair<unsigned int, unsigned int>, std::allocator<std::pair<unsigned int, unsigned int> > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::pair<unsigned int, unsigned int>, std::allocator<std::pair<unsigned int, unsigned int> > >::_Vector_impl" = type { %"struct.std::pair.152"*, %"struct.std::pair.152"*, %"struct.std::pair.152"* }
+%"struct.std::pair.152" = type { i32, i32 }
+%"class.llvm::BitVector" = type { i64*, i32, i32 }
+%"struct.llvm::MachineFunctionInfo" = type { i32 (...)** }
+%"class.llvm::MachineFrameInfo" = type opaque
+%"class.llvm::MachineConstantPool" = type { %"class.llvm::DataLayout"*, i32, %"class.std::vector.153", %"class.llvm::DenseSet" }
+%"class.llvm::DataLayout" = type opaque
+%"class.std::vector.153" = type { %"struct.std::_Vector_base.154" }
+%"struct.std::_Vector_base.154" = type { %"struct.std::_Vector_base<llvm::MachineConstantPoolEntry, std::allocator<llvm::MachineConstantPoolEntry> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MachineConstantPoolEntry, std::allocator<llvm::MachineConstantPoolEntry> >::_Vector_impl" = type { %"class.llvm::MachineConstantPoolEntry"*, %"class.llvm::MachineConstantPoolEntry"*, %"class.llvm::MachineConstantPoolEntry"* }
+%"class.llvm::MachineConstantPoolEntry" = type { %union.anon.158, i32 }
+%union.anon.158 = type { %"class.llvm::Constant"* }
+%"class.llvm::Constant" = type { %"class.llvm::User" }
+%"class.llvm::DenseSet" = type { %"class.llvm::DenseMap.159" }
+%"class.llvm::DenseMap.159" = type { %"struct.std::pair.162"*, i32, i32, i32 }
+%"struct.std::pair.162" = type { %"class.llvm::MachineConstantPoolValue"*, i8 }
+%"class.llvm::MachineConstantPoolValue" = type { i32 (...)**, %"class.llvm::Type"* }
+%"class.llvm::MachineJumpTableInfo" = type opaque
+%"class.std::vector.163" = type { %"struct.std::_Vector_base.164" }
+%"struct.std::_Vector_base.164" = type { %"struct.std::_Vector_base<llvm::MachineBasicBlock *, std::allocator<llvm::MachineBasicBlock *> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MachineBasicBlock *, std::allocator<llvm::MachineBasicBlock *> >::_Vector_impl" = type { %"class.llvm::MachineBasicBlock"**, %"class.llvm::MachineBasicBlock"**, %"class.llvm::MachineBasicBlock"** }
+%"class.llvm::Recycler" = type { %"class.llvm::iplist.168" }
+%"class.llvm::iplist.168" = type { %"struct.llvm::ilist_traits.169", %"struct.llvm::RecyclerStruct"* }
+%"struct.llvm::ilist_traits.169" = type { %"struct.llvm::RecyclerStruct" }
+%"struct.llvm::RecyclerStruct" = type { %"struct.llvm::RecyclerStruct"*, %"struct.llvm::RecyclerStruct"* }
+%"class.llvm::ArrayRecycler" = type { %"class.llvm::SmallVector.174" }
+%"class.llvm::SmallVector.174" = type { %"class.llvm::SmallVectorImpl.175", %"struct.llvm::SmallVectorStorage.179" }
+%"class.llvm::SmallVectorImpl.175" = type { %"class.llvm::SmallVectorTemplateBase.176" }
+%"class.llvm::SmallVectorTemplateBase.176" = type { %"class.llvm::SmallVectorTemplateCommon.177" }
+%"class.llvm::SmallVectorTemplateCommon.177" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.178" }
+%"struct.llvm::AlignedCharArrayUnion.178" = type { %"struct.llvm::AlignedCharArray" }
+%"struct.llvm::SmallVectorStorage.179" = type { [7 x %"struct.llvm::AlignedCharArrayUnion.178"] }
+%"class.llvm::Recycler.180" = type { %"class.llvm::iplist.168" }
+%"struct.llvm::ilist.181" = type { %"class.llvm::iplist.182" }
+%"class.llvm::iplist.182" = type { %"struct.llvm::ilist_traits.183", %"class.llvm::MachineBasicBlock"* }
+%"struct.llvm::ilist_traits.183" = type { %"class.llvm::ilist_half_node.1" }
+%"class.llvm::ArrayRecycler<llvm::MachineOperand, 8>::Capacity" = type { i8 }
+%"class.llvm::ConstantInt" = type { %"class.llvm::Constant", %"class.llvm::APInt" }
+%"class.llvm::APInt" = type { i32, %union.anon.189 }
+%union.anon.189 = type { i64 }
+%"class.llvm::ConstantFP" = type { %"class.llvm::Constant", %"class.llvm::APFloat" }
+%"class.llvm::APFloat" = type { %"struct.llvm::fltSemantics"*, %"union.llvm::APFloat::Significand", i16, i8 }
+%"struct.llvm::fltSemantics" = type opaque
+%"union.llvm::APFloat::Significand" = type { i64 }
+%"class.llvm::BlockAddress" = type { %"class.llvm::Constant" }
+%"class.llvm::hash_code" = type { i64 }
+%"struct.llvm::hashing::detail::hash_combine_recursive_helper" = type { [64 x i8], %"struct.llvm::hashing::detail::hash_state", i64 }
+%"struct.llvm::hashing::detail::hash_state" = type { i64, i64, i64, i64, i64, i64, i64, i64 }
+%"class.llvm::PrintReg" = type { %"class.llvm::TargetRegisterInfo"*, i32, i32 }
+%"class.llvm::PseudoSourceValue" = type { %"class.llvm::Value" }
+%"class.llvm::FoldingSetNodeID" = type { %"class.llvm::SmallVector.194" }
+%"class.llvm::SmallVector.194" = type { [28 x i8], %"struct.llvm::SmallVectorStorage.200" }
+%"struct.llvm::SmallVectorStorage.200" = type { [31 x %"struct.llvm::AlignedCharArrayUnion.198"] }
+%"struct.llvm::ArrayRecycler<llvm::MachineOperand, 8>::FreeList" = type { %"struct.llvm::ArrayRecycler<llvm::MachineOperand, 8>::FreeList"* }
+%"class.llvm::ilist_iterator.202" = type { %"class.llvm::MachineInstr"* }
+%"class.llvm::TargetInstrInfo" = type { i32 (...)**, [28 x i8], i32, i32 }
+%"struct.std::pair.203" = type { i8, i8 }
+%"class.llvm::SmallVectorImpl.195" = type { %"class.llvm::SmallVectorTemplateBase.196" }
+%"class.llvm::SmallVectorTemplateBase.196" = type { %"class.llvm::SmallVectorTemplateCommon.197" }
+%"class.llvm::SmallVectorTemplateCommon.197" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.198" }
+%"class.llvm::AliasAnalysis" = type { i32 (...)**, %"class.llvm::DataLayout"*, %"class.llvm::TargetLibraryInfo"*, %"class.llvm::AliasAnalysis"* }
+%"class.llvm::TargetLibraryInfo" = type opaque
+%"struct.llvm::AliasAnalysis::Location" = type { %"class.llvm::Value"*, i64, %"class.llvm::MDNode"* }
+%"class.llvm::DIVariable" = type { %"class.llvm::DIDescriptor" }
+%"class.llvm::DIDescriptor" = type { %"class.llvm::MDNode"* }
+%"class.llvm::DIScope" = type { %"class.llvm::DIDescriptor" }
+%"class.llvm::ArrayRef.208" = type { i32*, i64 }
+%"class.llvm::SmallVector.209" = type { %"class.llvm::SmallVectorImpl.210", %"struct.llvm::SmallVectorStorage.214" }
+%"class.llvm::SmallVectorImpl.210" = type { %"class.llvm::SmallVectorTemplateBase.211" }
+%"class.llvm::SmallVectorTemplateBase.211" = type { %"class.llvm::SmallVectorTemplateCommon.212" }
+%"class.llvm::SmallVectorTemplateCommon.212" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.213" }
+%"struct.llvm::AlignedCharArrayUnion.213" = type { %"struct.llvm::AlignedCharArray" }
+%"struct.llvm::SmallVectorStorage.214" = type { [7 x %"struct.llvm::AlignedCharArrayUnion.213"] }
+%"class.llvm::Twine" = type { %"union.llvm::Twine::Child", %"union.llvm::Twine::Child", i8, i8 }
+%"union.llvm::Twine::Child" = type { %"class.llvm::Twine"* }
+%"struct.std::random_access_iterator_tag" = type { i8 }
+
+declare void @_ZN4llvm19MachineRegisterInfo27removeRegOperandFromUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"*, %"class.llvm::MachineOperand"*)
+
+declare void @_ZN4llvm19MachineRegisterInfo22addRegOperandToUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"*, %"class.llvm::MachineOperand"*)
+
+declare zeroext i32 @_ZNK4llvm14MCRegisterInfo9getSubRegEjj(%"class.llvm::MCRegisterInfo"*, i32 zeroext, i32 zeroext)
+
+define void @_ZN4llvm14MachineOperand12substPhysRegEjRKNS_18TargetRegisterInfoE(%"class.llvm::MachineOperand"* %this, i32 zeroext %Reg, %"class.llvm::TargetRegisterInfo"* %TRI) align 2 {
+entry:
+  %SubReg_TargetFlags.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 1
+  %0 = bitcast [3 x i8]* %SubReg_TargetFlags.i to i24*
+  %bf.load.i = load i24* %0, align 1
+  %bf.lshr.i = lshr i24 %bf.load.i, 12
+  %tobool = icmp eq i24 %bf.lshr.i, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %bf.cast.i = zext i24 %bf.lshr.i to i32
+  %add.ptr = getelementptr inbounds %"class.llvm::TargetRegisterInfo"* %TRI, i64 0, i32 1
+  %call3 = tail call zeroext i32 @_ZNK4llvm14MCRegisterInfo9getSubRegEjj(%"class.llvm::MCRegisterInfo"* %add.ptr, i32 zeroext %Reg, i32 zeroext %bf.cast.i)
+  %bf.load.i10 = load i24* %0, align 1
+  %bf.clear.i = and i24 %bf.load.i10, 4095
+  store i24 %bf.clear.i, i24* %0, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %Reg.addr.0 = phi i32 [ %call3, %if.then ], [ %Reg, %entry ]
+  %RegNo.i.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 2, i32 0
+  %1 = load i32* %RegNo.i.i, align 4, !tbaa !0
+  %cmp.i = icmp eq i32 %1, %Reg.addr.0
+  br i1 %cmp.i, label %_ZN4llvm14MachineOperand6setRegEj.exit, label %if.end.i
+
+if.end.i:                                         ; preds = %if.end
+  %ParentMI.i.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 3
+  %2 = load %"class.llvm::MachineInstr"** %ParentMI.i.i, align 8, !tbaa !3
+  %tobool.i = icmp eq %"class.llvm::MachineInstr"* %2, null
+  br i1 %tobool.i, label %if.end13.i, label %if.then3.i
+
+if.then3.i:                                       ; preds = %if.end.i
+  %Parent.i.i = getelementptr inbounds %"class.llvm::MachineInstr"* %2, i64 0, i32 2
+  %3 = load %"class.llvm::MachineBasicBlock"** %Parent.i.i, align 8, !tbaa !3
+  %tobool5.i = icmp eq %"class.llvm::MachineBasicBlock"* %3, null
+  br i1 %tobool5.i, label %if.end13.i, label %if.then6.i
+
+if.then6.i:                                       ; preds = %if.then3.i
+  %xParent.i.i = getelementptr inbounds %"class.llvm::MachineBasicBlock"* %3, i64 0, i32 4
+  %4 = load %"class.llvm::MachineFunction"** %xParent.i.i, align 8, !tbaa !3
+  %tobool8.i = icmp eq %"class.llvm::MachineFunction"* %4, null
+  br i1 %tobool8.i, label %if.end13.i, label %if.then9.i
+
+if.then9.i:                                       ; preds = %if.then6.i
+  %RegInfo.i.i = getelementptr inbounds %"class.llvm::MachineFunction"* %4, i64 0, i32 5
+  %5 = load %"class.llvm::MachineRegisterInfo"** %RegInfo.i.i, align 8, !tbaa !3
+  tail call void @_ZN4llvm19MachineRegisterInfo27removeRegOperandFromUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"* %5, %"class.llvm::MachineOperand"* %this)
+  store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4, !tbaa !0
+  tail call void @_ZN4llvm19MachineRegisterInfo22addRegOperandToUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"* %5, %"class.llvm::MachineOperand"* %this)
+  br label %_ZN4llvm14MachineOperand6setRegEj.exit
+
+if.end13.i:                                       ; preds = %if.then6.i, %if.then3.i, %if.end.i
+  store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4, !tbaa !0
+  br label %_ZN4llvm14MachineOperand6setRegEj.exit
+
+_ZN4llvm14MachineOperand6setRegEj.exit:           ; preds = %if.end, %if.then9.i, %if.end13.i
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"any pointer", metadata !1}
+!4 = metadata !{metadata !"vtable pointer", metadata !2}
+!5 = metadata !{metadata !"long", metadata !1}
+!6 = metadata !{i64 0, i64 8, metadata !3, i64 8, i64 8, metadata !5}
+!7 = metadata !{metadata !"short", metadata !1}
+!8 = metadata !{i64 0, i64 1, metadata !1, i64 1, i64 4, metadata !0, i64 2, i64 1, metadata !1, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 4, i64 4, metadata !0, i64 4, i64 4, metadata !0, i64 8, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !5, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 24, i64 8, metadata !3, i64 16, i64 4, metadata !0, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 24, i64 4, metadata !0}
+!9 = metadata !{metadata !"bool", metadata !1}
+!10 = metadata !{i8 0, i8 2}
+
+; CHECK-NOT: lbzu 3, 1(3)
diff --git a/test/CodeGen/PowerPC/pr15359.ll b/test/CodeGen/PowerPC/pr15359.ll
new file mode 100644
index 000000000000..12fa3e5ffbdd
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr15359.ll
@@ -0,0 +1,20 @@
+; RUN: llc -O0 -mcpu=pwr7 -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+target datalayout = "E-p:64:64:64-S0-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@nextIdx = external thread_local global i32
+
+define fastcc void @func() nounwind {
+entry:
+  store i32 42, i32* @nextIdx
+  ret void
+}
+
+; Verify that nextIdx has symbol type TLS.
+;
+; CHECK:    '.symtab'
+; CHECK:    'nextIdx'
+; CHECK:    'st_type', 0x6
+
diff --git a/test/CodeGen/PowerPC/pr15630.ll b/test/CodeGen/PowerPC/pr15630.ll
new file mode 100644
index 000000000000..c5ba8a4d4f04
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr15630.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define weak_odr void @_D4core6atomic49__T11atomicStoreVE4core6atomic11MemoryOrder3ThThZ11atomicStoreFNaNbKOhhZv(i8* %val_arg, i8 zeroext %newval_arg) {
+entry:
+  %newval = alloca i8
+  %ordering = alloca i32, align 4
+  store i8 %newval_arg, i8* %newval
+  %tmp = load i8* %newval
+  store atomic volatile i8 %tmp, i8* %val_arg seq_cst, align 1
+  ret void
+}
+
+; CHECK: stwcx.
diff --git a/test/CodeGen/PowerPC/pr15632.ll b/test/CodeGen/PowerPC/pr15632.ll
new file mode 100644
index 000000000000..3ea83468b6d7
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr15632.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare void @other(ppc_fp128 %tmp70)
+
+define void @bug() {
+entry:
+  %tmp70 = frem ppc_fp128 0xM00000000000000000000000000000000, undef
+  call void @other(ppc_fp128 %tmp70)
+  unreachable
+}
+
+; CHECK: bl fmodl
diff --git a/test/CodeGen/PowerPC/pwr3-6x.ll b/test/CodeGen/PowerPC/pwr3-6x.ll
new file mode 100644
index 000000000000..a9cfe412fd84
--- /dev/null
+++ b/test/CodeGen/PowerPC/pwr3-6x.ll
@@ -0,0 +1,14 @@
+; Test basic support for some older processors.
+
+;RUN: llc < %s -march=ppc64 -mcpu=pwr3 | FileCheck %s
+;RUN: llc < %s -march=ppc64 -mcpu=pwr4 | FileCheck %s
+;RUN: llc < %s -march=ppc64 -mcpu=pwr5 | FileCheck %s
+;RUN: llc < %s -march=ppc64 -mcpu=pwr5x | FileCheck %s
+;RUN: llc < %s -march=ppc64 -mcpu=pwr6x | FileCheck %s
+
+define void @foo() {
+entry:
+  ret void
+}
+
+; CHECK: @foo
diff --git a/test/CodeGen/PowerPC/quadint-return.ll b/test/CodeGen/PowerPC/quadint-return.ll
new file mode 100644
index 000000000000..03499915e78e
--- /dev/null
+++ b/test/CodeGen/PowerPC/quadint-return.ll
@@ -0,0 +1,19 @@
+; REQUIRES: asserts
+; RUN: llc -O0 -debug -o - < %s 2>&1 | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i128 @foo() nounwind {
+entry:
+  %x = alloca i128, align 16
+  store i128 27, i128* %x, align 16
+  %0 = load i128* %x, align 16
+  ret i128 %0
+}
+
+; CHECK: ********** Function: foo
+; CHECK: ********** FAST REGISTER ALLOCATION **********
+; CHECK: %X3<def> = COPY %vreg
+; CHECK-NEXT: %X4<def> = COPY %vreg
+; CHECK-NEXT: BLR
diff --git a/test/CodeGen/PowerPC/r31.ll b/test/CodeGen/PowerPC/r31.ll
new file mode 100644
index 000000000000..7ce12f600b41
--- /dev/null
+++ b/test/CodeGen/PowerPC/r31.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+
+define i64 @foo(i64 %a) nounwind {
+entry:
+  call void asm sideeffect "", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30}"() nounwind
+  br label %return
+
+; CHECK: @foo
+; CHECK: mr 31, 3
+
+return:                                           ; preds = %entry
+  ret i64 %a
+}
+
diff --git a/test/CodeGen/PowerPC/recipest.ll b/test/CodeGen/PowerPC/recipest.ll
new file mode 100644
index 000000000000..89705faa46e9
--- /dev/null
+++ b/test/CodeGen/PowerPC/recipest.ll
@@ -0,0 +1,226 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck -check-prefix=CHECK-SAFE %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare double @llvm.sqrt.f64(double)
+declare float @llvm.sqrt.f32(float)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+
+define double @foo(double %a, double %b) nounwind {
+entry:
+  %x = call double @llvm.sqrt.f64(double %b)
+  %r = fdiv double %a, %x
+  ret double %r
+
+; CHECK: @foo
+; CHECK: frsqrte
+; CHECK: fnmsub
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: blr
+
+; CHECK-SAFE: @foo
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define double @foof(double %a, float %b) nounwind {
+entry:
+  %x = call float @llvm.sqrt.f32(float %b)
+  %y = fpext float %x to double
+  %r = fdiv double %a, %y
+  ret double %r
+
+; CHECK: @foof
+; CHECK: frsqrtes
+; CHECK: fnmsubs
+; CHECK: fmuls
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: fmul
+; CHECK: blr
+
+; CHECK-SAFE: @foof
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define float @food(float %a, double %b) nounwind {
+entry:
+  %x = call double @llvm.sqrt.f64(double %b)
+  %y = fptrunc double %x to float
+  %r = fdiv float %a, %y
+  ret float %r
+
+; CHECK: @foo
+; CHECK: frsqrte
+; CHECK: fnmsub
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: frsp
+; CHECK: fmuls
+; CHECK: blr
+
+; CHECK-SAFE: @foo
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define float @goo(float %a, float %b) nounwind {
+entry:
+  %x = call float @llvm.sqrt.f32(float %b)
+  %r = fdiv float %a, %x
+  ret float %r
+
+; CHECK: @goo
+; CHECK: frsqrtes
+; CHECK: fnmsubs
+; CHECK: fmuls
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: fmuls
+; CHECK: blr
+
+; CHECK-SAFE: @goo
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
+  %r = fdiv <4 x float> %a, %x
+  ret <4 x float> %r
+
+; CHECK: @hoo
+; CHECK: vrsqrtefp
+
+; CHECK-SAFE: @hoo
+; CHECK-SAFE-NOT: vrsqrtefp
+; CHECK-SAFE: blr
+}
+
+define double @foo2(double %a, double %b) nounwind {
+entry:
+  %r = fdiv double %a, %b
+  ret double %r
+
+; CHECK: @foo2
+; CHECK: fre
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: blr
+
+; CHECK-SAFE: @foo2
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define float @goo2(float %a, float %b) nounwind {
+entry:
+  %r = fdiv float %a, %b
+  ret float %r
+
+; CHECK: @goo2
+; CHECK: fres
+; CHECK: fnmsubs
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: blr
+
+; CHECK-SAFE: @goo2
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @hoo2(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %r = fdiv <4 x float> %a, %b
+  ret <4 x float> %r
+
+; CHECK: @hoo2
+; CHECK: vrefp
+
+; CHECK-SAFE: @hoo2
+; CHECK-SAFE-NOT: vrefp
+; CHECK-SAFE: blr
+}
+
+define double @foo3(double %a) nounwind {
+entry:
+  %r = call double @llvm.sqrt.f64(double %a)
+  ret double %r
+
+; CHECK: @foo3
+; CHECK: frsqrte
+; CHECK: fnmsub
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fre
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: blr
+
+; CHECK-SAFE: @foo3
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: blr
+}
+
+define float @goo3(float %a) nounwind {
+entry:
+  %r = call float @llvm.sqrt.f32(float %a)
+  ret float %r
+
+; CHECK: @goo3
+; CHECK: frsqrtes
+; CHECK: fnmsubs
+; CHECK: fmuls
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: fres
+; CHECK: fnmsubs
+; CHECK: fmadds
+; CHECK: blr
+
+; CHECK-SAFE: @goo3
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @hoo3(<4 x float> %a) nounwind {
+entry:
+  %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
+  ret <4 x float> %r
+
+; CHECK: @hoo3
+; CHECK: vrsqrtefp
+; CHECK: vrefp
+
+; CHECK-SAFE: @hoo3
+; CHECK-SAFE-NOT: vrsqrtefp
+; CHECK-SAFE: blr
+}
+
diff --git a/test/CodeGen/PowerPC/rlwimi3.ll b/test/CodeGen/PowerPC/rlwimi3.ll
index 7efdbe9634fe..31b6d4aa03bc 100644
--- a/test/CodeGen/PowerPC/rlwimi3.ll
+++ b/test/CodeGen/PowerPC/rlwimi3.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=ppc32 -stats 2>&1 | \
 ; RUN:   grep "Number of machine instrs printed" | grep 12
 
diff --git a/test/CodeGen/PowerPC/rounding-ops.ll b/test/CodeGen/PowerPC/rounding-ops.ll
new file mode 100644
index 000000000000..b210a6bda8bf
--- /dev/null
+++ b/test/CodeGen/PowerPC/rounding-ops.ll
@@ -0,0 +1,145 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @test1(float %x) nounwind  {
+  %call = tail call float @floorf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test1:
+; CHECK: frim 1, 1
+
+; CHECK-FM: test1:
+; CHECK-FM: frim 1, 1
+}
+
+declare float @floorf(float) nounwind readnone
+
+define double @test2(double %x) nounwind  {
+  %call = tail call double @floor(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test2:
+; CHECK: frim 1, 1
+
+; CHECK-FM: test2:
+; CHECK-FM: frim 1, 1
+}
+
+declare double @floor(double) nounwind readnone
+
+define float @test3(float %x) nounwind  {
+  %call = tail call float @nearbyintf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test3:
+; CHECK-NOT: frin
+
+; CHECK-FM: test3:
+; CHECK-FM: frin 1, 1
+}
+
+declare float @nearbyintf(float) nounwind readnone
+
+define double @test4(double %x) nounwind  {
+  %call = tail call double @nearbyint(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test4:
+; CHECK-NOT: frin
+
+; CHECK-FM: test4:
+; CHECK-FM: frin 1, 1
+}
+
+declare double @nearbyint(double) nounwind readnone
+
+define float @test5(float %x) nounwind  {
+  %call = tail call float @ceilf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test5:
+; CHECK: frip 1, 1
+
+; CHECK-FM: test5:
+; CHECK-FM: frip 1, 1
+}
+
+declare float @ceilf(float) nounwind readnone
+
+define double @test6(double %x) nounwind  {
+  %call = tail call double @ceil(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test6:
+; CHECK: frip 1, 1
+
+; CHECK-FM: test6:
+; CHECK-FM: frip 1, 1
+}
+
+declare double @ceil(double) nounwind readnone
+
+define float @test9(float %x) nounwind  {
+  %call = tail call float @truncf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test9:
+; CHECK: friz 1, 1
+
+; CHECK-FM: test9:
+; CHECK-FM: friz 1, 1
+}
+
+declare float @truncf(float) nounwind readnone
+
+define double @test10(double %x) nounwind  {
+  %call = tail call double @trunc(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test10:
+; CHECK: friz 1, 1
+
+; CHECK-FM: test10:
+; CHECK-FM: friz 1, 1
+}
+
+declare double @trunc(double) nounwind readnone
+
+define float @test11(float %x) nounwind  {
+  %call = tail call float @rintf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test11:
+; CHECK-NOT: frin
+
+; CHECK-FM: test11:
+; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]]
+; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]]
+; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2
+; CHECK-FM: mtfsb1 6
+; CHECK-FM: .LBB[[BB]]_2:
+; CHECK-FM: blr
+}
+
+declare float @rintf(float) nounwind readnone
+
+define double @test12(double %x) nounwind  {
+  %call = tail call double @rint(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test12:
+; CHECK-NOT: frin
+
+; CHECK-FM: test12:
+; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]]
+; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]]
+; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2
+; CHECK-FM: mtfsb1 6
+; CHECK-FM: .LBB[[BB]]_2:
+; CHECK-FM: blr
+}
+
+declare double @rint(double) nounwind readnone
+
diff --git a/test/CodeGen/PowerPC/s000-alias-misched.ll b/test/CodeGen/PowerPC/s000-alias-misched.ll
new file mode 100644
index 000000000000..d03ee8738eea
--- /dev/null
+++ b/test/CodeGen/PowerPC/s000-alias-misched.ll
@@ -0,0 +1,101 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+; RUN: llc < %s -enable-misched -march=ppc64 -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -enable-misched -enable-aa-sched-mi -march=ppc64 -mcpu=a2 | FileCheck %s
+
+@aa = external global [256 x [256 x double]], align 32
+@bb = external global [256 x [256 x double]], align 32
+@cc = external global [256 x [256 x double]], align 32
+@.str1 = external hidden unnamed_addr constant [6 x i8], align 1
+@X = external global [16000 x double], align 32
+@Y = external global [16000 x double], align 32
+@Z = external global [16000 x double], align 32
+@U = external global [16000 x double], align 32
+@V = external global [16000 x double], align 32
+@.str137 = external hidden unnamed_addr constant [14 x i8], align 1
+
+declare void @check(i32 signext) nounwind
+
+declare signext i32 @printf(i8* nocapture, ...) nounwind
+
+declare signext i32 @init(i8*) nounwind
+
+define signext i32 @s000() nounwind {
+entry:
+  %call = tail call signext i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0))
+  %call1 = tail call i64 @clock() nounwind
+  br label %for.cond2.preheader
+
+; CHECK: @s000
+
+for.cond2.preheader:                              ; preds = %for.end, %entry
+  %nl.018 = phi i32 [ 0, %entry ], [ %inc9, %for.end ]
+  br label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %for.cond2.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond2.preheader ], [ %indvars.iv.next.15, %for.body4 ]
+  %arrayidx = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv
+  %arrayidx6 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv
+  %0 = bitcast double* %arrayidx to <1 x double>*
+  %1 = load <1 x double>* %0, align 32, !tbaa !0
+  %add = fadd <1 x double> %1, <double 1.000000e+00>
+  %2 = bitcast double* %arrayidx6 to <1 x double>*
+  store <1 x double> %add, <1 x double>* %2, align 32, !tbaa !0
+  %indvars.iv.next.322 = or i64 %indvars.iv, 4
+  %arrayidx.4 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.322
+  %arrayidx6.4 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.322
+  %3 = bitcast double* %arrayidx.4 to <1 x double>*
+  %4 = load <1 x double>* %3, align 32, !tbaa !0
+  %add.4 = fadd <1 x double> %4, <double 1.000000e+00>
+  %5 = bitcast double* %arrayidx6.4 to <1 x double>*
+  store <1 x double> %add.4, <1 x double>* %5, align 32, !tbaa !0
+  %indvars.iv.next.726 = or i64 %indvars.iv, 8
+  %arrayidx.8 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.726
+  %arrayidx6.8 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.726
+  %6 = bitcast double* %arrayidx.8 to <1 x double>*
+  %7 = load <1 x double>* %6, align 32, !tbaa !0
+  %add.8 = fadd <1 x double> %7, <double 1.000000e+00>
+  %8 = bitcast double* %arrayidx6.8 to <1 x double>*
+  store <1 x double> %add.8, <1 x double>* %8, align 32, !tbaa !0
+  %indvars.iv.next.1130 = or i64 %indvars.iv, 12
+  %arrayidx.12 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1130
+  %arrayidx6.12 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1130
+  %9 = bitcast double* %arrayidx.12 to <1 x double>*
+  %10 = load <1 x double>* %9, align 32, !tbaa !0
+  %add.12 = fadd <1 x double> %10, <double 1.000000e+00>
+  %11 = bitcast double* %arrayidx6.12 to <1 x double>*
+  store <1 x double> %add.12, <1 x double>* %11, align 32, !tbaa !0
+  %indvars.iv.next.15 = add i64 %indvars.iv, 16
+  %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32
+  %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 16000
+  br i1 %exitcond.15, label %for.end, label %for.body4
+
+; All of the loads should come before all of the stores.
+; CHECK: mtctr
+; CHECK: stfd
+; CHECK-NOT: lfd
+; CHECK: bdnz
+
+for.end:                                          ; preds = %for.body4
+  %call7 = tail call signext i32 @dummy(double* getelementptr inbounds ([16000 x double]* @X, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Y, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Z, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @U, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @V, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @aa, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @bb, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @cc, i64 0, i64 0), double 0.000000e+00) nounwind
+  %inc9 = add nsw i32 %nl.018, 1
+  %exitcond = icmp eq i32 %inc9, 400000
+  br i1 %exitcond, label %for.end10, label %for.cond2.preheader
+
+for.end10:                                        ; preds = %for.end
+  %call11 = tail call i64 @clock() nounwind
+  %sub = sub nsw i64 %call11, %call1
+  %conv = sitofp i64 %sub to double
+  %div = fdiv double %conv, 1.000000e+06
+  %call12 = tail call signext i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str137, i64 0, i64 0), double %div) nounwind
+  tail call void @check(i32 signext 1)
+  ret i32 0
+}
+
+declare i64 @clock() nounwind
+
+declare signext i32 @dummy(double*, double*, double*, double*, double*, [256 x double]*, [256 x double]*, [256 x double]*, double)
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/sdag-ppcf128.ll b/test/CodeGen/PowerPC/sdag-ppcf128.ll
new file mode 100644
index 000000000000..535ece6d3dfe
--- /dev/null
+++ b/test/CodeGen/PowerPC/sdag-ppcf128.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+;
+; PR14751: Unsupported type in SelectionDAG::getConstantFP()
+
+define fastcc void @_D3std4math4sqrtFNaNbNfcZc() {
+entry:
+  br i1 undef, label %if, label %else
+; CHECK: cmplwi 0, 3, 0
+if:                                               ; preds = %entry
+  store { ppc_fp128, ppc_fp128 } zeroinitializer, { ppc_fp128, ppc_fp128 }* undef
+  ret void
+
+else:                                             ; preds = %entry
+  unreachable
+}
diff --git a/test/CodeGen/PowerPC/sjlj.ll b/test/CodeGen/PowerPC/sjlj.ll
new file mode 100644
index 000000000000..7ea35dafc3fa
--- /dev/null
+++ b/test/CodeGen/PowerPC/sjlj.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-NOAV %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.__jmp_buf_tag = type { [64 x i64], i32, %struct.__sigset_t, [8 x i8] }
+%struct.__sigset_t = type { [16 x i64] }
+
+@env_sigill = internal global [1 x %struct.__jmp_buf_tag] zeroinitializer, align 16
+
+define void @foo() #0 {
+entry:
+  call void @llvm.eh.sjlj.longjmp(i8* bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8*))
+  unreachable
+
+; CHECK: @foo
+; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
+; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l
+; CHECK: ld 31, 0([[REG]])
+; CHECK: ld [[REG2:[0-9]+]], 8([[REG]])
+; CHECK: ld 1, 16([[REG]])
+; CHECK: mtctr [[REG2]]
+; CHECK: ld 2, 24([[REG]])
+; CHECK: bctr
+
+return:                                           ; No predecessors!
+  ret void
+}
+
+declare void @llvm.eh.sjlj.longjmp(i8*) #1
+
+define signext i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  store i8* %0, i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**)
+  %1 = call i8* @llvm.stacksave()
+  store i8* %1, i8** getelementptr (i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**), i32 2)
+  %2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8*))
+  %tobool = icmp ne i32 %2, 0
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+if.else:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.else
+  store i32 0, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %3 = load i32* %retval
+  ret i32 %3
+
+; FIXME: We should be saving VRSAVE on Darwin, but we're not!
+
+; CHECK: @main
+; CHECK: std
+; Make sure that we're not saving VRSAVE on non-Darwin:
+; CHECK-NOT: mfspr
+; CHECK: stfd
+; CHECK: stvx
+
+; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
+; CHECK: std 31, env_sigill@toc@l([[REG]])
+; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l
+; CHECK: std [[REG]], [[OFF:[0-9]+]](31)                  # 8-byte Folded Spill
+; CHECK: std 1, 16([[REG]])
+; CHECK: std 2, 24([[REG]])
+; CHECK: bcl 20, 31, .LBB1_1
+; CHECK: li 3, 1
+; CHECK: #EH_SjLj_Setup	.LBB1_1
+; CHECK: b .LBB1_2
+
+; CHECK: .LBB1_1:
+; CHECK: mflr [[REGL:[0-9]+]]
+; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31)                   # 8-byte Folded Reload
+; CHECK: std [[REGL]], 8([[REG2]])
+; CHECK: li 3, 0
+
+; CHECK: .LBB1_2:
+
+; CHECK: lfd
+; CHECK: lvx
+; CHECK: ld
+; CHECK: blr
+
+; CHECK-NOAV: @main
+; CHECK-NOAV-NOT: stvx
+; CHECK-NOAV: bcl
+; CHECK-NOAV: mflr
+; CHECK-NOAV: bl foo
+; CHECK-NOAV-NOT: lvx
+; CHECK-NOAV: blr
+}
+
+declare i8* @llvm.frameaddress(i32) #2
+
+declare i8* @llvm.stacksave() #3
+
+declare i32 @llvm.eh.sjlj.setjmp(i8*) #3
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/stdux-constuse.ll b/test/CodeGen/PowerPC/stdux-constuse.ll
new file mode 100644
index 000000000000..e62d438014ee
--- /dev/null
+++ b/test/CodeGen/PowerPC/stdux-constuse.ll
@@ -0,0 +1,47 @@
+; RUN: llc -mcpu=a2 -disable-lsr < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @test1(i64 %add, i64* %ptr) nounwind {
+entry:
+  %p1 = getelementptr i64* %ptr, i64 144115188075855
+  br label %for.cond2.preheader
+
+for.cond2.preheader:
+  %nl.018 = phi i32 [ 0, %entry ], [ %inc9, %for.end ]
+  br label %for.body4
+
+for.body4:
+  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body4 ], [ 16000, %for.cond2.preheader ]
+  %i0 = phi i64* [ %p1, %for.cond2.preheader ], [ %i6, %for.body4 ]
+  %i6 = getelementptr i64* %i0, i64 400000
+  %i7 = getelementptr i64* %i6, i64 300000
+  %i8 = getelementptr i64* %i6, i64 200000
+  %i9 = getelementptr i64* %i6, i64 100000
+  store i64 %add, i64* %i6, align 32
+  store i64 %add, i64* %i7, align 32
+  store i64 %add, i64* %i8, align 32
+  store i64 %add, i64* %i9, align 32
+  %lsr.iv.next = add i32 %lsr.iv, -16
+  %exitcond.15 = icmp eq i32 %lsr.iv.next, 0
+  br i1 %exitcond.15, label %for.end, label %for.body4
+
+; Make sure that we generate the most compact form of this loop with no
+; unnecessary moves
+; CHECK: @test1
+; CHECK: mtctr
+; CHECK: stdux
+; CHECK-NEXT: stdx
+; CHECK-NEXT: stdx
+; CHECK-NEXT: stdx
+; CHECK-NEXT: bdnz
+
+for.end:
+  %inc9 = add nsw i32 %nl.018, 1
+  %exitcond = icmp eq i32 %inc9, 400000
+  br i1 %exitcond, label %for.end10, label %for.cond2.preheader
+
+for.end10:
+  ret i32 0
+}
+
diff --git a/test/CodeGen/PowerPC/stfiwx-2.ll b/test/CodeGen/PowerPC/stfiwx-2.ll
index c49b25cc2303..7786fc17eacb 100644
--- a/test/CodeGen/PowerPC/stfiwx-2.ll
+++ b/test/CodeGen/PowerPC/stfiwx-2.ll
@@ -1,11 +1,14 @@
-; This cannot be a stfiwx
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep stb
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep stfiwx
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -mcpu=g5 | FileCheck %s
 
 define void @test(float %F, i8* %P) {
 	%I = fptosi float %F to i32
 	%X = trunc i32 %I to i8
 	store i8 %X, i8* %P
 	ret void
+; CHECK: fctiwz 0, 1
+; CHECK: stfiwx 0, 0, 4
+; CHECK: lwz 4, 12(1)
+; CHECK: stb 4, 0(3)
+; CHECK: blr
 }
 
diff --git a/test/CodeGen/PowerPC/store-update.ll b/test/CodeGen/PowerPC/store-update.ll
new file mode 100644
index 000000000000..538ed24fbc46
--- /dev/null
+++ b/test/CodeGen/PowerPC/store-update.ll
@@ -0,0 +1,170 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i8* @stbu(i8* %base, i8 zeroext %val) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i8* %base, i64 16
+  store i8 %val, i8* %arrayidx, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @stbu
+; CHECK: %entry
+; CHECK-NEXT: stbu
+; CHECK-NEXT: blr
+
+define i8* @stbux(i8* %base, i8 zeroext %val, i64 %offset) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i8* %base, i64 %offset
+  store i8 %val, i8* %arrayidx, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @stbux
+; CHECK: %entry
+; CHECK-NEXT: stbux
+; CHECK-NEXT: blr
+
+define i16* @sthu(i16* %base, i16 zeroext %val) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i16* %base, i64 16
+  store i16 %val, i16* %arrayidx, align 2
+  ret i16* %arrayidx
+}
+; CHECK: @sthu
+; CHECK: %entry
+; CHECK-NEXT: sthu
+; CHECK-NEXT: blr
+
+define i16* @sthux(i16* %base, i16 zeroext %val, i64 %offset) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i16* %base, i64 %offset
+  store i16 %val, i16* %arrayidx, align 2
+  ret i16* %arrayidx
+}
+; CHECK: @sthux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: sthux
+; CHECK-NEXT: blr
+
+define i32* @stwu(i32* %base, i32 zeroext %val) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i32* %base, i64 16
+  store i32 %val, i32* %arrayidx, align 4
+  ret i32* %arrayidx
+}
+; CHECK: @stwu
+; CHECK: %entry
+; CHECK-NEXT: stwu
+; CHECK-NEXT: blr
+
+define i32* @stwux(i32* %base, i32 zeroext %val, i64 %offset) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i32* %base, i64 %offset
+  store i32 %val, i32* %arrayidx, align 4
+  ret i32* %arrayidx
+}
+; CHECK: @stwux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: stwux
+; CHECK-NEXT: blr
+
+define i8* @stbu8(i8* %base, i64 %val) nounwind {
+entry:
+  %conv = trunc i64 %val to i8
+  %arrayidx = getelementptr inbounds i8* %base, i64 16
+  store i8 %conv, i8* %arrayidx, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @stbu
+; CHECK: %entry
+; CHECK-NEXT: stbu
+; CHECK-NEXT: blr
+
+define i8* @stbux8(i8* %base, i64 %val, i64 %offset) nounwind {
+entry:
+  %conv = trunc i64 %val to i8
+  %arrayidx = getelementptr inbounds i8* %base, i64 %offset
+  store i8 %conv, i8* %arrayidx, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @stbux
+; CHECK: %entry
+; CHECK-NEXT: stbux
+; CHECK-NEXT: blr
+
+define i16* @sthu8(i16* %base, i64 %val) nounwind {
+entry:
+  %conv = trunc i64 %val to i16
+  %arrayidx = getelementptr inbounds i16* %base, i64 16
+  store i16 %conv, i16* %arrayidx, align 2
+  ret i16* %arrayidx
+}
+; CHECK: @sthu
+; CHECK: %entry
+; CHECK-NEXT: sthu
+; CHECK-NEXT: blr
+
+define i16* @sthux8(i16* %base, i64 %val, i64 %offset) nounwind {
+entry:
+  %conv = trunc i64 %val to i16
+  %arrayidx = getelementptr inbounds i16* %base, i64 %offset
+  store i16 %conv, i16* %arrayidx, align 2
+  ret i16* %arrayidx
+}
+; CHECK: @sthux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: sthux
+; CHECK-NEXT: blr
+
+define i32* @stwu8(i32* %base, i64 %val) nounwind {
+entry:
+  %conv = trunc i64 %val to i32
+  %arrayidx = getelementptr inbounds i32* %base, i64 16
+  store i32 %conv, i32* %arrayidx, align 4
+  ret i32* %arrayidx
+}
+; CHECK: @stwu
+; CHECK: %entry
+; CHECK-NEXT: stwu
+; CHECK-NEXT: blr
+
+define i32* @stwux8(i32* %base, i64 %val, i64 %offset) nounwind {
+entry:
+  %conv = trunc i64 %val to i32
+  %arrayidx = getelementptr inbounds i32* %base, i64 %offset
+  store i32 %conv, i32* %arrayidx, align 4
+  ret i32* %arrayidx
+}
+; CHECK: @stwux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: stwux
+; CHECK-NEXT: blr
+
+define i64* @stdu(i64* %base, i64 %val) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i64* %base, i64 16
+  store i64 %val, i64* %arrayidx, align 8
+  ret i64* %arrayidx
+}
+; CHECK: @stdu
+; CHECK: %entry
+; CHECK-NEXT: stdu
+; CHECK-NEXT: blr
+
+define i64* @stdux(i64* %base, i64 %val, i64 %offset) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i64* %base, i64 %offset
+  store i64 %val, i64* %arrayidx, align 8
+  ret i64* %arrayidx
+}
+; CHECK: @stdux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: stdux
+; CHECK-NEXT: blr
+
diff --git a/test/CodeGen/PowerPC/structsinmem.ll b/test/CodeGen/PowerPC/structsinmem.ll
index 884d3a89d15a..2a17e740ea01 100644
--- a/test/CodeGen/PowerPC/structsinmem.ll
+++ b/test/CodeGen/PowerPC/structsinmem.ll
@@ -1,9 +1,5 @@
 ; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
 
-; FIXME: The code generation for packed structs is very poor because the
-; PowerPC target wrongly rejects all unaligned loads.  This test case will
-; need to be revised when that is fixed.
-
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -118,8 +114,8 @@ entry:
   ret i32 %add13
 
 ; CHECK: lha {{[0-9]+}}, 126(1)
-; CHECK: lbz {{[0-9]+}}, 119(1)
 ; CHECK: lha {{[0-9]+}}, 132(1)
+; CHECK: lbz {{[0-9]+}}, 119(1)
 ; CHECK: lwz {{[0-9]+}}, 140(1)
 ; CHECK: lwz {{[0-9]+}}, 144(1)
 ; CHECK: lwz {{[0-9]+}}, 152(1)
@@ -209,19 +205,11 @@ entry:
   %add13 = add nsw i32 %add11, %6
   ret i32 %add13
 
-; CHECK: lbz {{[0-9]+}}, 149(1)
-; CHECK: lbz {{[0-9]+}}, 150(1)
-; CHECK: lbz {{[0-9]+}}, 147(1)
-; CHECK: lbz {{[0-9]+}}, 148(1)
-; CHECK: lbz {{[0-9]+}}, 133(1)
-; CHECK: lbz {{[0-9]+}}, 134(1)
 ; CHECK: lha {{[0-9]+}}, 126(1)
+; CHECK: lha {{[0-9]+}}, 133(1)
 ; CHECK: lbz {{[0-9]+}}, 119(1)
 ; CHECK: lwz {{[0-9]+}}, 140(1)
-; CHECK: lhz {{[0-9]+}}, 154(1)
-; CHECK: lhz {{[0-9]+}}, 156(1)
-; CHECK: lbz {{[0-9]+}}, 163(1)
-; CHECK: lbz {{[0-9]+}}, 164(1)
-; CHECK: lbz {{[0-9]+}}, 161(1)
-; CHECK: lbz {{[0-9]+}}, 162(1)
+; CHECK: lwz {{[0-9]+}}, 147(1)
+; CHECK: lwz {{[0-9]+}}, 154(1)
+; CHECK: lwz {{[0-9]+}}, 161(1)
 }
diff --git a/test/CodeGen/PowerPC/structsinregs.ll b/test/CodeGen/PowerPC/structsinregs.ll
index ef706af95d65..54de6060d0f0 100644
--- a/test/CodeGen/PowerPC/structsinregs.ll
+++ b/test/CodeGen/PowerPC/structsinregs.ll
@@ -1,9 +1,5 @@
 ; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
 
-; FIXME: The code generation for packed structs is very poor because the
-; PowerPC target wrongly rejects all unaligned loads.  This test case will
-; need to be revised when that is fixed.
-
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -63,13 +59,13 @@ entry:
   %call = call i32 @callee1(%struct.s1* byval %p1, %struct.s2* byval %p2, %struct.s3* byval %p3, %struct.s4* byval %p4, %struct.s5* byval %p5, %struct.s6* byval %p6, %struct.s7* byval %p7)
   ret i32 %call
 
-; CHECK: ld 9, 128(31)
-; CHECK: ld 8, 136(31)
-; CHECK: ld 7, 144(31)
-; CHECK: lwz 6, 152(31)
-; CHECK: lwz 5, 160(31)
-; CHECK: lhz 4, 168(31)
-; CHECK: lbz 3, 176(31)
+; CHECK: ld 9, 112(31)
+; CHECK: ld 8, 120(31)
+; CHECK: ld 7, 128(31)
+; CHECK: lwz 6, 136(31)
+; CHECK: lwz 5, 144(31)
+; CHECK: lhz 4, 152(31)
+; CHECK: lbz 3, 160(31)
 }
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
@@ -109,8 +105,8 @@ entry:
 ; CHECK: sth 4, 62(1)
 ; CHECK: stb 3, 55(1)
 ; CHECK: lha {{[0-9]+}}, 62(1)
-; CHECK: lbz {{[0-9]+}}, 55(1)
 ; CHECK: lha {{[0-9]+}}, 68(1)
+; CHECK: lbz {{[0-9]+}}, 55(1)
 ; CHECK: lwz {{[0-9]+}}, 76(1)
 ; CHECK: lwz {{[0-9]+}}, 80(1)
 ; CHECK: lwz {{[0-9]+}}, 88(1)
@@ -155,10 +151,10 @@ entry:
 ; CHECK: ld 9, 96(1)
 ; CHECK: ld 8, 88(1)
 ; CHECK: ld 7, 80(1)
-; CHECK: lwz 6, 152(31)
+; CHECK: lwz 6, 136(31)
 ; CHECK: ld 5, 64(1)
-; CHECK: lhz 4, 168(31)
-; CHECK: lbz 3, 176(31)
+; CHECK: lhz 4, 152(31)
+; CHECK: lbz 3, 160(31)
 }
 
 define internal i32 @callee2(%struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind {
@@ -195,19 +191,11 @@ entry:
 ; CHECK: std 5, 64(1)
 ; CHECK: sth 4, 62(1)
 ; CHECK: stb 3, 55(1)
-; CHECK: lbz {{[0-9]+}}, 85(1)
-; CHECK: lbz {{[0-9]+}}, 86(1)
-; CHECK: lbz {{[0-9]+}}, 83(1)
-; CHECK: lbz {{[0-9]+}}, 84(1)
-; CHECK: lbz {{[0-9]+}}, 69(1)
-; CHECK: lbz {{[0-9]+}}, 70(1)
 ; CHECK: lha {{[0-9]+}}, 62(1)
+; CHECK: lha {{[0-9]+}}, 69(1)
 ; CHECK: lbz {{[0-9]+}}, 55(1)
 ; CHECK: lwz {{[0-9]+}}, 76(1)
-; CHECK: lhz {{[0-9]+}}, 90(1)
-; CHECK: lhz {{[0-9]+}}, 92(1)
-; CHECK: lbz {{[0-9]+}}, 99(1)
-; CHECK: lbz {{[0-9]+}}, 100(1)
-; CHECK: lbz {{[0-9]+}}, 97(1)
-; CHECK: lbz {{[0-9]+}}, 98(1)
+; CHECK: lwz {{[0-9]+}}, 83(1)
+; CHECK: lwz {{[0-9]+}}, 90(1)
+; CHECK: lwz {{[0-9]+}}, 97(1)
 }
diff --git a/test/CodeGen/PowerPC/stubs.ll b/test/CodeGen/PowerPC/stubs.ll
index 4889263b4c4e..cfcc50b7a876 100644
--- a/test/CodeGen/PowerPC/stubs.ll
+++ b/test/CodeGen/PowerPC/stubs.ll
@@ -10,8 +10,8 @@ entry:
 ; CHECK: 	.section	__TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
 ; CHECK: ___floatditf$stub:
 ; CHECK: 	.indirect_symbol ___floatditf
-; CHECK: 	lis r11,ha16(___floatditf$lazy_ptr)
-; CHECK: 	lwzu r12,lo16(___floatditf$lazy_ptr)(r11)
+; CHECK: 	lis r11, ha16(___floatditf$lazy_ptr)
+; CHECK: 	lwzu r12, lo16(___floatditf$lazy_ptr)(r11)
 ; CHECK: 	mtctr r12
 ; CHECK: 	bctr
 ; CHECK: 	.section	__DATA,__la_symbol_ptr,lazy_symbol_pointers
diff --git a/test/CodeGen/PowerPC/stwu8.ll b/test/CodeGen/PowerPC/stwu8.ll
index 897bfc6d6caa..e0bd04345439 100644
--- a/test/CodeGen/PowerPC/stwu8.ll
+++ b/test/CodeGen/PowerPC/stwu8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
diff --git a/test/CodeGen/PowerPC/svr4-redzone.ll b/test/CodeGen/PowerPC/svr4-redzone.ll
new file mode 100644
index 000000000000..91ff5797389b
--- /dev/null
+++ b/test/CodeGen/PowerPC/svr4-redzone.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple="powerpc-unknown-linux-gnu" < %s | FileCheck %s --check-prefix=PPC32
+; RUN: llc -mtriple="powerpc64-unknown-linux-gnu" < %s | FileCheck %s --check-prefix=PPC64
+; PR15332
+
+define void @regalloc() nounwind {
+entry:
+	%0 = add i32 1, 2
+	ret void
+}
+; PPC32: regalloc:
+; PPC32-NOT: stwu 1, -{{[0-9]+}}(1)
+; PPC32: blr
+
+; PPC64: regalloc:
+; PPC64-NOT: stdu 1, -{{[0-9]+}}(1)
+; PPC64: blr
+
+define void @smallstack() nounwind {
+entry:
+	%0 = alloca i8, i32 4
+	ret void
+}
+; PPC32: smallstack:
+; PPC32: stwu 1, -16(1)
+
+; PPC64: smallstack:
+; PPC64-NOT: stdu 1, -{{[0-9]+}}(1)
+; PPC64: blr
+
+define void @bigstack() nounwind {
+entry:
+	%0 = alloca i8, i32 230
+	ret void
+}
+; PPC32: bigstack:
+; PPC32: stwu 1, -240(1)
+
+; PPC64: bigstack:
+; PPC64: stdu 1, -352(1)
diff --git a/test/CodeGen/PowerPC/tls-2.ll b/test/CodeGen/PowerPC/tls-2.ll
new file mode 100644
index 000000000000..20d8fe46ea17
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-2.ll
@@ -0,0 +1,15 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-freebsd10.0"
+; RUN: llc -O1 < %s -march=ppc64 | FileCheck %s
+
+@a = thread_local global i32 0, align 4
+
+;CHECK:          localexec:
+define i32 @localexec() nounwind {
+entry:
+;CHECK:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
+;CHECK-NEXT:     li [[REG2:[0-9]+]], 42
+;CHECK-NEXT:     stw [[REG2]], a@tprel@l([[REG1]])
+  store i32 42, i32* @a, align 4
+  ret i32 0
+}
diff --git a/test/CodeGen/PowerPC/tls-gd-obj.ll b/test/CodeGen/PowerPC/tls-gd-obj.ll
new file mode 100644
index 000000000000..00b537d5325b
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-gd-obj.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mcpu=pwr7 -O0 -filetype=obj -relocation-model=pic %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; Test correct relocation generation for thread-local storage using
+; the general dynamic model and integrated assembly.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_GOT_TLSGD16_HA, R_PPC64_GOT_TLSGD16_LO,
+; and R_PPC64_TLSGD for accessing external variable a, and R_PPC64_REL24
+; for the call to __tls_get_addr.
+;
+; CHECK:       '.rela.text'
+; CHECK:       Relocation 0
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9a-f]+]]
+; CHECK-NEXT:  'r_type', 0x00000052
+; CHECK:       Relocation 1
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x00000050
+; CHECK:       Relocation 2
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x0000006b
+; CHECK:       Relocation 3
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x{{[0-9a-f]+}}
+; CHECK-NEXT:  'r_type', 0x0000000a
+
diff --git a/test/CodeGen/PowerPC/tls-gd.ll b/test/CodeGen/PowerPC/tls-gd.ll
new file mode 100644
index 000000000000..5f0ef9a050da
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-gd.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mcpu=pwr7 -O0 -relocation-model=pic < %s | FileCheck %s
+
+; Test correct assembly code generation for thread-local storage using
+; the general dynamic model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK: addis [[REG:[0-9]+]], 2, a@got@tlsgd@ha
+; CHECK-NEXT: addi 3, [[REG]], a@got@tlsgd@l
+; CHECK:      bl __tls_get_addr(a@tlsgd)
+; CHECK-NEXT: nop
+
diff --git a/test/CodeGen/PowerPC/tls-ie-obj.ll b/test/CodeGen/PowerPC/tls-ie-obj.ll
new file mode 100644
index 000000000000..3600cc52ba54
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ie-obj.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mcpu=pwr7 -O0 -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; Test correct relocation generation for thread-local storage
+; using the initial-exec model and integrated assembly.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = external thread_local global i32
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_GOT_TPREL16_DS and R_PPC64_TLS for
+; accessing external variable a.
+;
+; CHECK:       '.rela.text'
+; CHECK:       Relocation 0
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9a-f]+]]
+; CHECK-NEXT:  'r_type', 0x0000005a
+; CHECK:       Relocation 1
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x00000058
+; CHECK:       Relocation 2
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x00000043
+
diff --git a/test/CodeGen/PowerPC/tls-ie.ll b/test/CodeGen/PowerPC/tls-ie.ll
new file mode 100644
index 000000000000..c5cfba7b3f7a
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ie.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mcpu=pwr7 -O0 <%s | FileCheck %s
+
+; Test correct assembly code generation for thread-local storage
+; using the initial-exec model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = external thread_local global i32
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK: addis [[REG1:[0-9]+]], 2, a@got@tprel@ha
+; CHECK: ld [[REG2:[0-9]+]], a@got@tprel@l([[REG1]])
+; CHECK: add {{[0-9]+}}, [[REG2]], a@tls
+
diff --git a/test/CodeGen/PowerPC/tls-ld-2.ll b/test/CodeGen/PowerPC/tls-ld-2.ll
new file mode 100644
index 000000000000..4399b330ea47
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ld-2.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mcpu=pwr7 -O1 -relocation-model=pic < %s | FileCheck %s
+
+; Test peephole optimization for thread-local storage using the
+; local dynamic model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = hidden thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK:      addis [[REG:[0-9]+]], 2, a@got@tlsld@ha
+; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l
+; CHECK:      bl __tls_get_addr(a@tlsld)
+; CHECK-NEXT: nop
+; CHECK:      addis [[REG2:[0-9]+]], 3, a@dtprel@ha
+; CHECK-NEXT: lwa {{[0-9]+}}, a@dtprel@l([[REG2]])
diff --git a/test/CodeGen/PowerPC/tls-ld-obj.ll b/test/CodeGen/PowerPC/tls-ld-obj.ll
new file mode 100644
index 000000000000..c521ae405f46
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ld-obj.ll
@@ -0,0 +1,50 @@
+; RUN: llc -mcpu=pwr7 -O0 -filetype=obj -relocation-model=pic %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; Test correct relocation generation for thread-local storage using
+; the local dynamic model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = hidden thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_GOT_TLSLD16_HA, R_PPC64_GOT_TLSLD16_LO,
+; R_PPC64_TLSLD, R_PPC64_DTPREL16_HA, and R_PPC64_DTPREL16_LO for
+; accessing external variable a, and R_PPC64_REL24 for the call to
+; __tls_get_addr.
+;
+; CHECK:       '.rela.text'
+; CHECK:       Relocation 0
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9a-f]+]]
+; CHECK-NEXT:  'r_type', 0x00000056
+; CHECK:       Relocation 1
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x00000054
+; CHECK:       Relocation 2
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x0000006c
+; CHECK:       Relocation 3
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x{{[0-9a-f]+}}
+; CHECK-NEXT:  'r_type', 0x0000000a
+; CHECK:       Relocation 4
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x0000004d
+; CHECK:       Relocation 5
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x0000004b
+
diff --git a/test/CodeGen/PowerPC/tls-ld.ll b/test/CodeGen/PowerPC/tls-ld.ll
new file mode 100644
index 000000000000..db02a56f6a22
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ld.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mcpu=pwr7 -O0 -relocation-model=pic < %s | FileCheck %s
+
+; Test correct assembly code generation for thread-local storage using
+; the local dynamic model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = hidden thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK:      addis [[REG:[0-9]+]], 2, a@got@tlsld@ha
+; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l
+; CHECK:      bl __tls_get_addr(a@tlsld)
+; CHECK-NEXT: nop
+; CHECK:      addis [[REG2:[0-9]+]], 3, a@dtprel@ha
+; CHECK-NEXT: addi {{[0-9]+}}, [[REG2]], a@dtprel@l
diff --git a/test/CodeGen/PowerPC/tls.ll b/test/CodeGen/PowerPC/tls.ll
index 713893bf5862..2daa60ab37f2 100644
--- a/test/CodeGen/PowerPC/tls.ll
+++ b/test/CodeGen/PowerPC/tls.ll
@@ -1,16 +1,21 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-freebsd10.0"
-; RUN: llc < %s -march=ppc64 | FileCheck %s
+; RUN: llc -O0 < %s -march=ppc64 | FileCheck -check-prefix=OPT0 %s
+; RUN: llc -O1 < %s -march=ppc64 | FileCheck -check-prefix=OPT1 %s
 
 @a = thread_local global i32 0, align 4
 
-;CHECK:          localexec:
+;OPT0:          localexec:
+;OPT1:          localexec:
 define i32 @localexec() nounwind {
 entry:
-;CHECK:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
-;CHECK-NEXT:     li [[REG2:[0-9]+]], 42
-;CHECK-NEXT:     addi [[REG1]], [[REG1]], a@tprel@l
-;CHECK-NEXT:     stw [[REG2]], 0([[REG1]])
+;OPT0:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
+;OPT0-NEXT:     li [[REG2:[0-9]+]], 42
+;OPT0-NEXT:     addi [[REG1]], [[REG1]], a@tprel@l
+;OPT0:          stw [[REG2]], 0([[REG1]])
+;OPT1:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
+;OPT1-NEXT:     li [[REG2:[0-9]+]], 42
+;OPT1-NEXT:     stw [[REG2]], a@tprel@l([[REG1]])
   store i32 42, i32* @a, align 4
   ret i32 0
 }
diff --git a/test/CodeGen/PowerPC/unal4-std.ll b/test/CodeGen/PowerPC/unal4-std.ll
new file mode 100644
index 000000000000..169bd787c0c1
--- /dev/null
+++ b/test/CodeGen/PowerPC/unal4-std.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define fastcc void @copy_to_conceal() #0 {
+entry:
+  br i1 undef, label %if.then, label %if.end210
+
+if.then:                                          ; preds = %entry
+  br label %vector.body.i
+
+vector.body.i:                                    ; preds = %vector.body.i, %if.then
+  %index.i = phi i64 [ 0, %vector.body.i ], [ 0, %if.then ]
+  store <8 x i16> zeroinitializer, <8 x i16>* undef, align 2
+  br label %vector.body.i
+
+if.end210:                                        ; preds = %entry
+  ret void
+
+; This will generate two align-1 i64 stores. Make sure that they are
+; indexed stores and not in r+i form (which require the offset to be
+; a multiple of 4).
+; CHECK: @copy_to_conceal
+; CHECK: stdx {{[0-9]+}}, 0,
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/unaligned.ll b/test/CodeGen/PowerPC/unaligned.ll
new file mode 100644
index 000000000000..d05080338f33
--- /dev/null
+++ b/test/CodeGen/PowerPC/unaligned.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+
+define void @foo1(i16* %p, i16* %r) nounwind {
+entry:
+  %v = load i16* %p, align 1
+  store i16 %v, i16* %r, align 1
+  ret void
+
+; CHECK: @foo1
+; CHECK: lhz
+; CHECK: sth
+}
+
+define void @foo2(i32* %p, i32* %r) nounwind {
+entry:
+  %v = load i32* %p, align 1
+  store i32 %v, i32* %r, align 1
+  ret void
+
+; CHECK: @foo2
+; CHECK: lwz
+; CHECK: stw
+}
+
+define void @foo3(i64* %p, i64* %r) nounwind {
+entry:
+  %v = load i64* %p, align 1
+  store i64 %v, i64* %r, align 1
+  ret void
+
+; CHECK: @foo3
+; CHECK: ld
+; CHECK: std
+}
+
+define void @foo4(float* %p, float* %r) nounwind {
+entry:
+  %v = load float* %p, align 1
+  store float %v, float* %r, align 1
+  ret void
+
+; CHECK: @foo4
+; CHECK: lfs
+; CHECK: stfs
+}
+
+define void @foo5(double* %p, double* %r) nounwind {
+entry:
+  %v = load double* %p, align 1
+  store double %v, double* %r, align 1
+  ret void
+
+; CHECK: @foo5
+; CHECK: lfd
+; CHECK: stfd
+}
+
+define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind {
+entry:
+  %v = load <4 x float>* %p, align 1
+  store <4 x float> %v, <4 x float>* %r, align 1
+  ret void
+
+; These loads and stores are legalized into aligned loads and stores
+; using aligned stack slots.
+; CHECK: @foo6
+; CHECK: ld
+; CHECK: ld
+; CHECK: std
+; CHECK: std
+}
+
diff --git a/test/CodeGen/PowerPC/vaddsplat.ll b/test/CodeGen/PowerPC/vaddsplat.ll
new file mode 100644
index 000000000000..e65148aff03a
--- /dev/null
+++ b/test/CodeGen/PowerPC/vaddsplat.ll
@@ -0,0 +1,149 @@
+; RUN: llc -O0 -mcpu=pwr7 <%s | FileCheck %s
+
+; Test optimizations of build_vector for 6-bit immediates.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%v4i32 = type <4 x i32>
+%v8i16 = type <8 x i16>
+%v16i8 = type <16 x i8>
+
+define void @test_v4i32_pos_even(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 18, i32 18, i32 18, i32 18 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_pos_even:
+; CHECK: vspltisw [[REG1:[0-9]+]], 9
+; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v4i32_neg_even(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 -28, i32 -28, i32 -28, i32 -28 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_neg_even:
+; CHECK: vspltisw [[REG1:[0-9]+]], -14
+; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v8i16_pos_even(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_pos_even:
+; CHECK: vspltish [[REG1:[0-9]+]], 15
+; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v8i16_neg_even(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_neg_even:
+; CHECK: vspltish [[REG1:[0-9]+]], -16
+; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v16i8_pos_even(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_pos_even:
+; CHECK: vspltisb [[REG1:[0-9]+]], 8
+; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v16i8_neg_even(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_neg_even:
+; CHECK: vspltisb [[REG1:[0-9]+]], -9
+; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v4i32_pos_odd(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 27, i32 27, i32 27, i32 27 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_pos_odd:
+; CHECK: vspltisw [[REG2:[0-9]+]], -16
+; CHECK: vspltisw [[REG1:[0-9]+]], 11
+; CHECK: vsubuwm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v4i32_neg_odd(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 -27, i32 -27, i32 -27, i32 -27 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_neg_odd:
+; CHECK: vspltisw [[REG2:[0-9]+]], -16
+; CHECK: vspltisw [[REG1:[0-9]+]], -11
+; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v8i16_pos_odd(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_pos_odd:
+; CHECK: vspltish [[REG2:[0-9]+]], -16
+; CHECK: vspltish [[REG1:[0-9]+]], 15
+; CHECK: vsubuhm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v8i16_neg_odd(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_neg_odd:
+; CHECK: vspltish [[REG2:[0-9]+]], -16
+; CHECK: vspltish [[REG1:[0-9]+]], -15
+; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v16i8_pos_odd(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_pos_odd:
+; CHECK: vspltisb [[REG2:[0-9]+]], -16
+; CHECK: vspltisb [[REG1:[0-9]+]], 1
+; CHECK: vsububm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v16i8_neg_odd(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_neg_odd:
+; CHECK: vspltisb [[REG2:[0-9]+]], -16
+; CHECK: vspltisb [[REG1:[0-9]+]], -1
+; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG2]]
+
diff --git a/test/CodeGen/PowerPC/varargs.ll b/test/CodeGen/PowerPC/varargs.ll
index 1769be957ac4..90f0480d6ad2 100644
--- a/test/CodeGen/PowerPC/varargs.ll
+++ b/test/CodeGen/PowerPC/varargs.ll
@@ -8,15 +8,16 @@ define i8* @test1(i8** %foo) nounwind {
 }
 
 ; P32: test1:
-; P32: 	lwz r4, 0(r3)
-; P32:	addi r5, r4, 4
-; P32:	stw r5, 0(r3)
-; P32:	lwz r3, 0(r4)
-; P32:	blr 
+; P32: lwz r2, 0(r3)
+; P32: addi r4, r2, 4
+; P32: stw r4, 0(r3)
+; P32: lwz r3, 0(r2)
+; P32: blr 
 
 ; P64: test1:
-; P64: ld r4, 0(r3)
-; P64: addi r5, r4, 8
-; P64: std r5, 0(r3)
-; P64: ld r3, 0(r4)
-; P64: blr
+; P64: ld r2, 0(r3)
+; P64: addi r4, r2, 8
+; P64: std r4, 0(r3)
+; P64: ld r3, 0(r2)
+; P64: blr 
+
diff --git a/test/CodeGen/PowerPC/vec_cmp.ll b/test/CodeGen/PowerPC/vec_cmp.ll
index 3180f464d125..eb41667610cd 100644
--- a/test/CodeGen/PowerPC/vec_cmp.ll
+++ b/test/CodeGen/PowerPC/vec_cmp.ll
@@ -54,7 +54,7 @@ entry:
 }
 ; CHECK:     v16si8_cmp_ne:
 ; CHECK:     vcmpequb [[RET:[0-9]+]], 2, 3
-; CHECK-NOR: vnor     2, [[RET]], [[RET]]
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <16 x i8> @v16si8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
 entry:
diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll
index 399f19f8d2e2..e4799e50e6ad 100644
--- a/test/CodeGen/PowerPC/vec_constants.ll
+++ b/test/CodeGen/PowerPC/vec_constants.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep CPI
+; RUN: llc -O0 -mcpu=pwr7 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
 
 define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
 	%tmp = load <4 x i32>* %P1		; <<4 x i32>> [#uses=1]
@@ -13,32 +16,71 @@ define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
 	%tmp13 = bitcast <4 x i32> %tmp12 to <4 x float>		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp13, <4 x float>* %P3
 	ret void
+
+; CHECK: test1:
+; CHECK-NOT: CPI
 }
 
 define <4 x i32> @test_30() nounwind {
 	ret <4 x i32> < i32 30, i32 30, i32 30, i32 30 >
+
+; CHECK: test_30:
+; CHECK: vspltisw
+; CHECK-NEXT: vadduwm
+; CHECK-NEXT: blr
 }
 
 define <4 x i32> @test_29() nounwind {
 	ret <4 x i32> < i32 29, i32 29, i32 29, i32 29 >
+
+; CHECK: test_29:
+; CHECK: vspltisw
+; CHECK-NEXT: vspltisw
+; CHECK-NEXT: vsubuwm
+; CHECK-NEXT: blr
 }
 
 define <8 x i16> @test_n30() nounwind {
 	ret <8 x i16> < i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30 >
+
+; CHECK: test_n30:
+; CHECK: vspltish
+; CHECK-NEXT: vadduhm
+; CHECK-NEXT: blr
 }
 
 define <16 x i8> @test_n104() nounwind {
 	ret <16 x i8> < i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104 >
+
+; CHECK: test_n104:
+; CHECK: vspltisb
+; CHECK-NEXT: vslb
+; CHECK-NEXT: blr
 }
 
 define <4 x i32> @test_vsldoi() nounwind {
 	ret <4 x i32> < i32 512, i32 512, i32 512, i32 512 >
+
+; CHECK: test_vsldoi:
+; CHECK: vspltisw
+; CHECK-NEXT: vsldoi
+; CHECK-NEXT: blr
 }
 
 define <8 x i16> @test_vsldoi_65023() nounwind {
 	ret <8 x i16> < i16 65023, i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023 >
+
+; CHECK: test_vsldoi_65023:
+; CHECK: vspltish
+; CHECK-NEXT: vsldoi
+; CHECK-NEXT: blr
 }
 
 define <4 x i32> @test_rol() nounwind {
 	ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 >
+
+; CHECK: test_rol:
+; CHECK: vspltisw
+; CHECK-NEXT: vrlw
+; CHECK-NEXT: blr
 }
diff --git a/test/CodeGen/PowerPC/vec_extload.ll b/test/CodeGen/PowerPC/vec_extload.ll
index 201c15b9c735..998645d90da6 100644
--- a/test/CodeGen/PowerPC/vec_extload.ll
+++ b/test/CodeGen/PowerPC/vec_extload.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+; RUN: llc -mcpu=pwr6 -mattr=+altivec -code-model=small < %s | FileCheck %s
 
 ; Check vector extend load expansion with altivec enabled.
 
@@ -15,55 +15,9 @@ define <16 x i8> @v16si8_sext_in_reg(<16 x i8> %a) {
   ret <16 x i8> %c
 }
 ; CHECK: v16si8_sext_in_reg:
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vslb
+; CHECK: vsrab
+; CHECK: blr 
 
 ; The zero extend uses a more clever logic: a vector splat
 ; and a logic and to set higher bits to 0.
@@ -83,31 +37,9 @@ define <8 x i16> @v8si16_sext_in_reg(<8 x i16> %a) {
   ret <8 x i16> %c
 }
 ; CHECK: v8si16_sext_in_reg:
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vslh
+; CHECK: vsrah
+; CHECK: blr 
 
 ; Same as v8si16_sext_in_reg, but instead of creating the mask
 ; with a splat, loads it from memory.
@@ -129,19 +61,9 @@ define <4 x i32> @v4si32_sext_in_reg(<4 x i32> %a) {
   ret <4 x i32> %c
 }
 ; CHECK: v4si32_sext_in_reg:
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vslw
+; CHECK: vsraw
+; CHECK: blr 
 
 ; Same as v8si16_sext_in_reg.
 define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) {
diff --git a/test/CodeGen/PowerPC/vec_mul.ll b/test/CodeGen/PowerPC/vec_mul.ll
index 80f4de4a1728..53bc75dd1078 100644
--- a/test/CodeGen/PowerPC/vec_mul.ll
+++ b/test/CodeGen/PowerPC/vec_mul.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep mullw
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vmsumuhm
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -march=ppc32 -mattr=+altivec | FileCheck %s
 
 define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 	%tmp = load <4 x i32>* %X		; <<4 x i32>> [#uses=1]
@@ -7,6 +6,9 @@ define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 	%tmp3 = mul <4 x i32> %tmp, %tmp2		; <<4 x i32>> [#uses=1]
 	ret <4 x i32> %tmp3
 }
+; CHECK: test_v4i32:
+; CHECK: vmsumuhm
+; CHECK-NOT: mullw
 
 define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
 	%tmp = load <8 x i16>* %X		; <<8 x i16>> [#uses=1]
@@ -14,6 +16,9 @@ define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
 	%tmp3 = mul <8 x i16> %tmp, %tmp2		; <<8 x i16>> [#uses=1]
 	ret <8 x i16> %tmp3
 }
+; CHECK: test_v8i16:
+; CHECK: vmladduhm
+; CHECK-NOT: mullw
 
 define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
 	%tmp = load <16 x i8>* %X		; <<16 x i8>> [#uses=1]
@@ -21,3 +26,21 @@ define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
 	%tmp3 = mul <16 x i8> %tmp, %tmp2		; <<16 x i8>> [#uses=1]
 	ret <16 x i8> %tmp3
 }
+; CHECK: test_v16i8:
+; CHECK: vmuloub
+; CHECK: vmuleub
+; CHECK-NOT: mullw
+
+define <4 x float> @test_float(<4 x float>* %X, <4 x float>* %Y) {
+	%tmp = load <4 x float>* %X
+	%tmp2 = load <4 x float>* %Y
+	%tmp3 = fmul <4 x float> %tmp, %tmp2
+	ret <4 x float> %tmp3
+}
+; Check the creation of a negative zero float vector by creating a vector of
+; all bits set and shifting it 31 bits to left, resulting a an vector of 
+; 4 x 0x80000000 (-0.0 as float).
+; CHECK: test_float:
+; CHECK: vspltisw [[ZNEG:[0-9]+]], -1
+; CHECK: vslw     {{[0-9]+}}, [[ZNEG]], [[ZNEG]]
+; CHECK: vmaddfp
diff --git a/test/CodeGen/PowerPC/vec_rounding.ll b/test/CodeGen/PowerPC/vec_rounding.ll
new file mode 100644
index 000000000000..7c55638620a9
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_rounding.ll
@@ -0,0 +1,172 @@
+; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+
+; Check vector round to single-precision toward -infinity (vrfim)
+; instruction generation using Altivec.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <2 x double> @llvm.floor.v2f64(<2 x double> %p)
+define <2 x double> @floor_v2f64(<2 x double> %p)
+{
+  %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+; CHECK: floor_v2f64:
+; CHECK: frim
+; CHECK: frim
+
+declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
+define <4 x double> @floor_v4f64(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+; CHECK: floor_v4f64:
+; CHECK: frim
+; CHECK: frim
+; CHECK: frim
+; CHECK: frim
+
+declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
+define <2 x double> @ceil_v2f64(<2 x double> %p)
+{
+  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+; CHECK: ceil_v2f64:
+; CHECK: frip
+; CHECK: frip
+
+declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
+define <4 x double> @ceil_v4f64(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+; CHECK: ceil_v4f64:
+; CHECK: frip
+; CHECK: frip
+; CHECK: frip
+; CHECK: frip
+
+declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
+define <2 x double> @trunc_v2f64(<2 x double> %p)
+{
+  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+; CHECK: trunc_v2f64:
+; CHECK: friz
+; CHECK: friz
+
+declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
+define <4 x double> @trunc_v4f64(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+; CHECK: trunc_v4f64:
+; CHECK: friz
+; CHECK: friz
+; CHECK: friz
+; CHECK: friz
+
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
+define <2 x double> @nearbyint_v2f64(<2 x double> %p)
+{
+  %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+; CHECK: nearbyint_v2f64:
+; CHECK: bl nearbyint
+; CHECK: bl nearbyint
+
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
+define <4 x double> @nearbyint_v4f64(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+; CHECK: nearbyint_v4f64:
+; CHECK: bl nearbyint
+; CHECK: bl nearbyint
+; CHECK: bl nearbyint
+; CHECK: bl nearbyint
+
+
+declare <4 x float> @llvm.floor.v4f32(<4 x float> %p)
+define <4 x float> @floor_v4f32(<4 x float> %p)
+{
+  %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+; CHECK: floor_v4f32:
+; CHECK: vrfim
+
+declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
+define <8 x float> @floor_v8f32(<8 x float> %p)
+{
+  %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+; CHECK: floor_v8f32:
+; CHECK: vrfim
+; CHECK: vrfim
+
+declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
+define <4 x float> @ceil_v4f32(<4 x float> %p)
+{
+  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+; CHECK: ceil_v4f32:
+; CHECK: vrfip
+
+declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
+define <8 x float> @ceil_v8f32(<8 x float> %p)
+{
+  %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+; CHECK: ceil_v8f32:
+; CHECK: vrfip
+; CHECK: vrfip
+
+declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
+define <4 x float> @trunc_v4f32(<4 x float> %p)
+{
+  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+; CHECK: trunc_v4f32:
+; CHECK: vrfiz
+
+declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
+define <8 x float> @trunc_v8f32(<8 x float> %p)
+{
+  %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+; CHECK: trunc_v8f32:
+; CHECK: vrfiz
+; CHECK: vrfiz
+
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
+define <4 x float> @nearbyint_v4f32(<4 x float> %p)
+{
+  %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+; CHECK: nearbyint_v4f32:
+; CHECK: vrfin
+
+declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
+define <8 x float> @nearbyint_v8f32(<8 x float> %p)
+{
+  %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+; CHECK: nearbyint_v8f32:
+; CHECK: vrfin
+; CHECK: vrfin
diff --git a/test/CodeGen/PowerPC/vec_select.ll b/test/CodeGen/PowerPC/vec_select.ll
new file mode 100644
index 000000000000..4ad0acca0067
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_select.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -mtriple=powerpc64-linux-gnu -mattr=+altivec | FileCheck %s
+
+; CHECK: vsel_float
+define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
+  ret <4 x float> %vsel
+}
diff --git a/test/CodeGen/PowerPC/vrsave-spill.ll b/test/CodeGen/PowerPC/vrsave-spill.ll
new file mode 100644
index 000000000000..c73206d8fc86
--- /dev/null
+++ b/test/CodeGen/PowerPC/vrsave-spill.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin -mcpu=g5 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-apple-darwin"
+
+define <4 x float> @foo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %c = fadd <4 x float> %a, %b
+  %d = fmul <4 x float> %c, %a
+  call void asm sideeffect "", "~{VRsave}"() nounwind
+  br label %return
+
+; CHECK: @foo
+; CHECK: mfspr r{{[0-9]+}}, 256
+; CHECK: mtspr 256, r{{[0-9]+}}
+
+return:                                           ; preds = %entry
+  ret <4 x float> %d
+}
+
diff --git a/test/CodeGen/PowerPC/vrspill.ll b/test/CodeGen/PowerPC/vrspill.ll
index 7641017c434e..9fb3d03477c9 100644
--- a/test/CodeGen/PowerPC/vrspill.ll
+++ b/test/CodeGen/PowerPC/vrspill.ll
@@ -13,7 +13,7 @@ entry:
   ret void
 }
 
-; CHECK: stvx 2, 0, 0
-; CHECK: lvx 2, 0, 0
+; CHECK: stvx 2, 1,
+; CHECK: lvx 2, 1,
 
 declare void @foo(i32*)