vendor/llvm/llvm-trunk-r178860

author: Dimitry Andric <dim@FreeBSD.org> 2013-04-08 18:41:23 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2013-04-08 18:41:23 +0000
commit: 4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 (patch)
tree: 06099edc18d30894081a822b756f117cbe0b8207 /test/Transforms
parent: 482e7bddf617ae804dc47133cb07eb4aa81e45de (diff)
375 files changed, 11571 insertions, 1365 deletions
diff --git a/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
index e740b29f9288..1226b98a998e 100644
--- a/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
+++ b/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -1,15 +1,19 @@
-; RUN: opt < %s -argpromotion -S | grep nounwind | count 2
+; RUN: opt < %s -argpromotion -S | FileCheck %s
 
+; CHECK: define internal i32 @deref(i32 %x.val) #0 {
 define internal i32 @deref(i32* %x) nounwind {
 entry:
-	%tmp2 = load i32* %x, align 4		; <i32> [#uses=1]
-	ret i32 %tmp2
+  %tmp2 = load i32* %x, align 4
+  ret i32 %tmp2
 }
 
 define i32 @f(i32 %x) {
 entry:
-	%x_addr = alloca i32		; <i32*> [#uses=2]
-	store i32 %x, i32* %x_addr, align 4
-	%tmp1 = call i32 @deref( i32* %x_addr ) nounwind 		; <i32> [#uses=1]
-	ret i32 %tmp1
+  %x_addr = alloca i32
+  store i32 %x, i32* %x_addr, align 4
+; CHECK: %tmp1 = call i32 @deref(i32 %x_addr.val) [[NUW:#[0-9]+]]
+  %tmp1 = call i32 @deref( i32* %x_addr ) nounwind
+  ret i32 %tmp1
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ArgumentPromotion/crash.ll b/test/Transforms/ArgumentPromotion/crash.ll
index fed002aa98a9..5e1a0370dbb1 100644
--- a/test/Transforms/ArgumentPromotion/crash.ll
+++ b/test/Transforms/ArgumentPromotion/crash.ll
@@ -1,7 +1,5 @@
+; RUN: opt -inline -argpromotion < %s
 ; rdar://7879828
-; RUN: opt -inline -argpromotion %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
 
 define void @foo() {
   invoke void @foo2()
@@ -11,6 +9,8 @@ if.end432:
   unreachable
 
 for.end520: 
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+           cleanup
   unreachable
 }
 
@@ -57,3 +57,5 @@ init:
   %4 = call i32 @"clay_assign(Chain, Chain)"(%0* %3, %0* %1)
   ret i32 0
 }
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/BBVectorize/X86/pr15289.ll b/test/Transforms/BBVectorize/X86/pr15289.ll
new file mode 100644
index 000000000000..07cc5d8b96b7
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/pr15289.ll
@@ -0,0 +1,98 @@
+; RUN: opt < %s -basicaa -bb-vectorize -disable-output
+; This is a bugpoint-reduced test case. It did not always assert, but does reproduce the bug
+; and running under valgrind (or some similar tool) will catch the error.
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin12.2.0"
+
+%0 = type { [10 x { float, float }], [10 x { float, float }], [10 x { float, float }], [10 x { float, float }], [10 x { float, float }] }
+%1 = type { [10 x [8 x i8]] }
+%2 = type { i64, i64 }
+%3 = type { [10 x i64], i64, i64, i64, i64, i64 }
+%4 = type { i64, i64, i64, i64, i64, i64 }
+%5 = type { [10 x i64] }
+%6 = type { [10 x float], [10 x float], [10 x float], [10 x float] }
+%struct.__st_parameter_dt.1.3.5.7 = type { %struct.__st_parameter_common.0.2.4.6, i64, i64*, i64*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, [256 x i8], i32*, i64, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, [4 x i8] }
+%struct.__st_parameter_common.0.2.4.6 = type { i32, i32, i8*, i32, i32, i8*, i32* }
+
+@cctenso_ = external unnamed_addr global %0, align 32
+@ctenso_ = external unnamed_addr global %1, align 32
+@i_dim_ = external unnamed_addr global %2, align 16
+@itenso1_ = external unnamed_addr global %3, align 32
+@itenso2_ = external unnamed_addr global %4, align 32
+@ltenso_ = external unnamed_addr global %5, align 32
+@rtenso_ = external unnamed_addr global %6, align 32
+@.cst = external unnamed_addr constant [8 x i8], align 8
+@.cst1 = external unnamed_addr constant [3 x i8], align 8
+@.cst2 = external unnamed_addr constant [29 x i8], align 8
+@.cst3 = external unnamed_addr constant [32 x i8], align 64
+
+define void @cart_to_dc2y_(double* noalias nocapture %xx, double* noalias nocapture %yy, double* noalias nocapture %zz, [5 x { double, double }]* noalias nocapture %c2ten) nounwind uwtable {
+entry:
+  %0 = fmul double undef, undef
+  %1 = fmul double undef, undef
+  %2 = fadd double undef, undef
+  %3 = fmul double undef, 0x3FE8B8B76E3E9919
+  %4 = fsub double %0, %1
+  %5 = fsub double -0.000000e+00, undef
+  %6 = fmul double undef, undef
+  %7 = fmul double %4, %6
+  %8 = fmul double undef, 2.000000e+00
+  %9 = fmul double %8, undef
+  %10 = fmul double undef, %9
+  %11 = fmul double %10, undef
+  %12 = fsub double undef, %7
+  %13 = fmul double %3, %12
+  %14 = fmul double %3, undef
+  %15 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 0
+  store double %13, double* %15, align 8, !tbaa !0
+  %16 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 1
+  %17 = fmul double undef, %8
+  %18 = fmul double %17, undef
+  %19 = fmul double undef, %18
+  %20 = fadd double undef, undef
+  %21 = fmul double %3, %19
+  %22 = fsub double -0.000000e+00, %21
+  %23 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 0
+  store double %22, double* %23, align 8, !tbaa !0
+  %24 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 1
+  %25 = fmul double undef, 0x3FE42F601A8C6794
+  %26 = fmul double undef, 2.000000e+00
+  %27 = fsub double %26, %0
+  %28 = fmul double %6, undef
+  %29 = fsub double undef, %28
+  %30 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 0
+  store double undef, double* %30, align 8, !tbaa !0
+  %31 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 1
+  %32 = fmul double undef, %17
+  %33 = fmul double undef, %17
+  %34 = fmul double undef, %32
+  %35 = fmul double undef, %33
+  %36 = fsub double undef, %35
+  %37 = fmul double %3, %34
+  %38 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 0
+  store double %37, double* %38, align 8, !tbaa !0
+  %39 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 1
+  %40 = fmul double undef, %8
+  %41 = fmul double undef, %40
+  %42 = fmul double undef, %41
+  %43 = fsub double undef, %42
+  %44 = fmul double %3, %43
+  %45 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 0
+  store double %13, double* %45, align 8, !tbaa !0
+  %46 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 1
+  %47 = fsub double -0.000000e+00, %14
+  store double %47, double* %16, align 8, !tbaa !0
+  store double undef, double* %24, align 8, !tbaa !0
+  store double -0.000000e+00, double* %31, align 8, !tbaa !0
+  store double undef, double* %39, align 8, !tbaa !0
+  store double undef, double* %46, align 8, !tbaa !0
+  ret void
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!0 = metadata !{metadata !"alias set 17: real(kind=8)", metadata !1}
+!1 = metadata !{metadata !1}
diff --git a/test/Transforms/BBVectorize/X86/simple-int.ll b/test/Transforms/BBVectorize/X86/simple-int.ll
new file mode 100644
index 000000000000..f5dbe46b1480
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/simple-int.ll
@@ -0,0 +1,79 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+declare double @llvm.fma.f64(double, double, double)
+declare double @llvm.fmuladd.f64(double, double, double)
+declare double @llvm.cos.f64(double)
+declare double @llvm.powi.f64(double, i32)
+
+; Basic depth-3 chain with fma
+define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
+	%Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test1
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with fmuladd
+define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
+	%Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test1a
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with cos
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.cos.f64(double %X1)
+	%Y2 = call double @llvm.cos.f64(double %X2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test2
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with powi
+define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
+	%Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test3
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with powi (different powers: should not vectorize)
+define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+        %P2 = add i32 %P, 1
+	%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
+	%Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test4
+; CHECK: ret double %R
+}
+
diff --git a/test/Transforms/BBVectorize/cycle.ll b/test/Transforms/BBVectorize/cycle.ll
index e8e82ce02479..bdcb30da887f 100644
--- a/test/Transforms/BBVectorize/cycle.ll
+++ b/test/Transforms/BBVectorize/cycle.ll
@@ -1,5 +1,5 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
 
 ; This test checks the non-trivial pairing-induced cycle avoidance. Without this cycle avoidance, the algorithm would otherwise
 ; want to select the pairs:
diff --git a/test/Transforms/BBVectorize/ld1.ll b/test/Transforms/BBVectorize/ld1.ll
index cea225d076e1..ea5cb5dd93f7 100644
--- a/test/Transforms/BBVectorize/ld1.ll
+++ b/test/Transforms/BBVectorize/ld1.ll
@@ -1,5 +1,5 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
 
 define double @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
 entry:
diff --git a/test/Transforms/BBVectorize/loop1.ll b/test/Transforms/BBVectorize/loop1.ll
index c22ea5852a1b..e592edb44a02 100644
--- a/test/Transforms/BBVectorize/loop1.ll
+++ b/test/Transforms/BBVectorize/loop1.ll
@@ -1,7 +1,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
 ; The second check covers the use of alias analysis (with loop unrolling).
 
 define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
diff --git a/test/Transforms/BBVectorize/req-depth.ll b/test/Transforms/BBVectorize/req-depth.ll
index 8c9cc3c188e3..e0120059b952 100644
--- a/test/Transforms/BBVectorize/req-depth.ll
+++ b/test/Transforms/BBVectorize/req-depth.ll
@@ -1,6 +1,6 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 3 -S | FileCheck %s -check-prefix=CHECK-RD3
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 2 -S | FileCheck %s -check-prefix=CHECK-RD2
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 3 -bb-vectorize-ignore-target-info -S | FileCheck %s -check-prefix=CHECK-RD3
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 2 -bb-vectorize-ignore-target-info -S | FileCheck %s -check-prefix=CHECK-RD2
 
 define double @test1(double %A1, double %A2, double %B1, double %B2) {
 	%X1 = fsub double %A1, %B1
diff --git a/test/Transforms/BBVectorize/search-limit.ll b/test/Transforms/BBVectorize/search-limit.ll
index aeaf98865bc9..a694e45bc181 100644
--- a/test/Transforms/BBVectorize/search-limit.ll
+++ b/test/Transforms/BBVectorize/search-limit.ll
@@ -1,6 +1,6 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-search-limit=4 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-SL4
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-search-limit=4 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-SL4
 
 define double @test1(double %A1, double %A2, double %B1, double %B2) {
 ; CHECK: @test1
diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll
index ae1d63bfd852..e4d51526ca11 100644
--- a/test/Transforms/BBVectorize/simple-int.ll
+++ b/test/Transforms/BBVectorize/simple-int.ll
@@ -1,7 +1,8 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
 
 declare double @llvm.fma.f64(double, double, double)
+declare double @llvm.fmuladd.f64(double, double, double)
 declare double @llvm.cos.f64(double)
 declare double @llvm.powi.f64(double, i32)
 
@@ -31,6 +32,32 @@ define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1,
 ; CHECK: ret double %R
 }
 
+; Basic depth-3 chain with fmuladd
+define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
+	%Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test1a
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0
+; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1
+; CHECK: %Y1 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+}
+
 ; Basic depth-3 chain with cos
 define double @test2(double %A1, double %A2, double %B1, double %B2) {
 	%X1 = fsub double %A1, %B1
@@ -97,7 +124,10 @@ define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
 ; CHECK: ret double %R
 }
 
-; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) nounwind readonly
-; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) nounwind readonly
+; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
+; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
+; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #1
+; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) #1
 
+; CHECK: attributes #0 = { nounwind readnone }
+; CHECK: attributes #1 = { nounwind readonly }
diff --git a/test/Transforms/BBVectorize/simple-ldstr.ll b/test/Transforms/BBVectorize/simple-ldstr.ll
index 7dd77c933f6d..8e51d297e8ec 100644
--- a/test/Transforms/BBVectorize/simple-ldstr.ll
+++ b/test/Transforms/BBVectorize/simple-ldstr.ll
@@ -1,6 +1,6 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
 
 ; Simple 3-pair chain with loads and stores
 define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
diff --git a/test/Transforms/BBVectorize/simple-sel.ll b/test/Transforms/BBVectorize/simple-sel.ll
index 15ecb597025a..8caccfd32c34 100644
--- a/test/Transforms/BBVectorize/simple-sel.ll
+++ b/test/Transforms/BBVectorize/simple-sel.ll
@@ -1,6 +1,6 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-no-bools -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-NB
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-no-bools -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-NB
 
 ; Basic depth-3 chain with select
 define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 %C2) {
diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll
index 3527ae75b457..a447908d16cc 100644
--- a/test/Transforms/BBVectorize/simple.ll
+++ b/test/Transforms/BBVectorize/simple.ll
@@ -1,5 +1,5 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
 
 ; Basic depth-3 chain
 define double @test1(double %A1, double %A2, double %B1, double %B2) {
diff --git a/test/Transforms/BBVectorize/simple3.ll b/test/Transforms/BBVectorize/simple3.ll
index 153be73f832f..78bcc9f83080 100644
--- a/test/Transforms/BBVectorize/simple3.ll
+++ b/test/Transforms/BBVectorize/simple3.ll
@@ -1,5 +1,5 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=192 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=192 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
 
 ; Basic depth-3 chain
 define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2, double %B3) {
diff --git a/test/Transforms/CodeGenPrepare/basic.ll b/test/Transforms/CodeGenPrepare/basic.ll
index c68e77eb555a..d617e43be865 100644
--- a/test/Transforms/CodeGenPrepare/basic.ll
+++ b/test/Transforms/CodeGenPrepare/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt -codegenprepare %s -S -o - | FileCheck %s
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/ConstProp/2007-11-23-cttz.ll b/test/Transforms/ConstProp/2007-11-23-cttz.ll
index 6d34cb17fffd..c5ee70c2ff12 100644
--- a/test/Transforms/ConstProp/2007-11-23-cttz.ll
+++ b/test/Transforms/ConstProp/2007-11-23-cttz.ll
@@ -3,6 +3,6 @@
 declare i13 @llvm.cttz.i13(i13, i1)
 
 define i13 @test() {
-	%X = call i13 @llvm.cttz.i13(i13 0, i1 true)
+	%X = call i13 @llvm.cttz.i13(i13 0, i1 false)
 	ret i13 %X
 }
diff --git a/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll b/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll
index ce79e3b2964a..a415995070e5 100644
--- a/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll
+++ b/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -constmerge %s | FileCheck %s
+; RUN: opt -S -constmerge < %s | FileCheck %s
 
 ; CHECK: @foo = constant i32 6
 ; CHECK: @bar = constant i32 6
diff --git a/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll b/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll
index f561daf66781..5aafcfe3d4fb 100644
--- a/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll
+++ b/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll
@@ -1,4 +1,4 @@
-; RUN: opt -constmerge %s -S -o - | FileCheck %s
+; RUN: opt -constmerge -S < %s | FileCheck %s
 ; PR8978
 
 declare i32 @zed(%struct.foobar*, %struct.foobar*)
diff --git a/test/Transforms/ConstantMerge/merge-both.ll b/test/Transforms/ConstantMerge/merge-both.ll
index b71eb437dbc3..b00345557c83 100644
--- a/test/Transforms/ConstantMerge/merge-both.ll
+++ b/test/Transforms/ConstantMerge/merge-both.ll
@@ -1,4 +1,4 @@
-; RUN: opt -constmerge %s -S -o - | FileCheck %s
+; RUN: opt -constmerge -S < %s | FileCheck %s
 ; Test that in one run var3 is merged into var2 and var1 into var4.
 ; Test that we merge @var5 and @var6 into one with the higher alignment, and
 ; don't merge var7/var8 into var5/var6.
diff --git a/test/Transforms/ConstantMerge/unnamed-addr.ll b/test/Transforms/ConstantMerge/unnamed-addr.ll
index 24100837aabd..aff8540f2cb1 100644
--- a/test/Transforms/ConstantMerge/unnamed-addr.ll
+++ b/test/Transforms/ConstantMerge/unnamed-addr.ll
@@ -1,4 +1,4 @@
-; RUN: opt -constmerge %s -S -o - | FileCheck %s
+; RUN: opt -constmerge -S < %s | FileCheck %s
 ; Test which corresponding x and y are merged and that unnamed_addr
 ; is correctly set.
 
diff --git a/test/Transforms/CorrelatedValuePropagation/basic.ll b/test/Transforms/CorrelatedValuePropagation/basic.ll
index 475cd8d772e6..39c437ccfae9 100644
--- a/test/Transforms/CorrelatedValuePropagation/basic.ll
+++ b/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -81,6 +81,26 @@ LessThanOrEqualToTwo:
   ret i32 0
 }
 
+declare i32* @f(i32*)
+define void @test5(i32* %x, i32* %y) {
+; CHECK: @test5
+entry:
+  %pre = icmp eq i32* %x, null
+  br i1 %pre, label %return, label %loop
+
+loop:
+  %phi = phi i32* [ %sel, %loop ], [ %x, %entry ]
+; CHECK: %phi = phi i32* [ %f, %loop ], [ %x, %entry ]
+  %f = tail call i32* @f(i32* %phi)
+  %cmp1 = icmp ne i32* %f, %y
+  %sel = select i1 %cmp1, i32* %f, i32* null
+  %cmp2 = icmp eq i32* %sel, null
+  br i1 %cmp2, label %return, label %loop
+
+return:
+  ret void
+}
+
 define i32 @switch1(i32 %s) {
 ; CHECK: @switch1
 entry:
@@ -105,7 +125,7 @@ negative:
   ]
 
 out:
-  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
+  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
   ret i32 %p
 
 next:
diff --git a/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll b/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
index 7c6c575ea80f..f049265ce4ea 100644
--- a/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
+++ b/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
@@ -1,20 +1,20 @@
-; RUN: opt < %s -deadargelim -S > %t
-; RUN: cat %t | grep nounwind | count 2
-; RUN: cat %t | grep signext | count 2
-; RUN: cat %t | not grep inreg
-; RUN: cat %t | not grep zeroext
-; RUN: cat %t | not grep byval
+; RUN: opt < %s -deadargelim -S | FileCheck %s
 
-	%struct = type { }
+%struct = type { }
 
 @g = global i8 0
 
+; CHECK: define internal void @foo(i8 signext %y) [[NUW:#[0-9]+]]
+
 define internal zeroext i8 @foo(i8* inreg %p, i8 signext %y, ... )  nounwind {
-	store i8 %y, i8* @g
-	ret i8 0
+  store i8 %y, i8* @g
+  ret i8 0
 }
 
 define i32 @bar() {
-	%A = call zeroext i8(i8*, i8, ...)* @foo(i8* inreg null, i8 signext 1, %struct* byval null ) nounwind
-	ret i32 0
+; CHECK: call void @foo(i8 signext 1) [[NUW]]
+  %A = call zeroext i8(i8*, i8, ...)* @foo(i8* inreg null, i8 signext 1, %struct* byval null ) nounwind
+  ret i32 0
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
index 2f820bad8474..f5d2588dd059 100644
--- a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
+++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -8,14 +8,14 @@ entry:
   call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !10)
   call void @llvm.dbg.value(metadata !{i32 %hash}, i64 0, metadata !11)
   call void @llvm.dbg.value(metadata !{i32 %flags}, i64 0, metadata !12)
-; CHECK:  call fastcc i8* @add_name_internal(i8* %name, i32 %hash) nounwind, !dbg !13
+; CHECK:  call fastcc i8* @add_name_internal(i8* %name, i32 %hash) [[NUW:#[0-9]+]], !dbg !13
   %0 = call fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext 0, i32 %flags) nounwind, !dbg !13 ; <i8*> [#uses=1]
   ret i8* %0, !dbg !13
 }
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-define internal fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext %extra, i32 %flags) nounwind noinline ssp {
+define internal fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext %extra, i32 %flags) noinline nounwind ssp {
 entry:
   call void @llvm.dbg.value(metadata !{i8* %name}, i64 0, metadata !15)
   call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !20)
@@ -38,6 +38,11 @@ bb2:                                              ; preds = %bb1, %bb
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+; CHECK: attributes #0 = { nounwind ssp }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes #2 = { noinline nounwind ssp }
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{i32 524545, metadata !1, metadata !"name", metadata !2, i32 8, metadata !6} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"vfs_addname", metadata !"vfs_addname", metadata !"vfs_addname", metadata !2, i32 12, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 524329, metadata !"tail.c", metadata !"/Users/echeng/LLVM/radars/r7927803/", metadata !3} ; [ DW_TAG_file_type ]
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
index dcbfaaa3d77b..24448b7009ed 100644
--- a/test/Transforms/DeadArgElim/dbginfo.ll
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -deadargelim -S | FileCheck %s
+; RUN: opt -deadargelim -S < %s | FileCheck %s
 ; PR14016
 
 ; Check that debug info metadata for subprograms stores pointers to
@@ -36,19 +36,17 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di", metadata !"clang version 3.2 (trunk 165305)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !8, metadata !9}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"run", metadata !"run", metadata !"", metadata !6, i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
-!6 = metadata !{i32 786473, metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{i32 786478, i32 0, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", metadata !6, i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 165305)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !8, metadata !9}
+!5 = metadata !{i32 786478, metadata !6, metadata !"run", metadata !"run", metadata !"", metadata !6, i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
+!6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !1, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{i32 786478, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", metadata !6, i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg]
 
 ; CHECK: metadata !"dead_vararg"{{.*}}void ()* @_ZN12_GLOBAL__N_111dead_varargEz
 
-!9 = metadata !{i32 786478, i32 0, metadata !6, metadata !"dead_arg", metadata !"dead_arg", metadata !"", metadata !6, i32 4, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @_ZN12_GLOBAL__N_18dead_argEPv, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [dead_arg]
+!9 = metadata !{i32 786478, metadata !6, metadata !"dead_arg", metadata !"dead_arg", metadata !"", metadata !6, i32 4, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @_ZN12_GLOBAL__N_18dead_argEPv, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [dead_arg]
 
 ; CHECK: metadata !"dead_arg"{{.*}}void ()* @_ZN12_GLOBAL__N_18dead_argEPv
 
@@ -62,3 +60,4 @@ entry:
 !17 = metadata !{i32 5, i32 25, metadata !18, null}
 !18 = metadata !{i32 786443, metadata !8, i32 5, i32 23, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
 !19 = metadata !{i32 5, i32 30, metadata !18, null}
+!20 = metadata !{metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di"}
diff --git a/test/Transforms/DeadArgElim/deadexternal.ll b/test/Transforms/DeadArgElim/deadexternal.ll
index e3fe1bbb548b..cca58721e532 100644
--- a/test/Transforms/DeadArgElim/deadexternal.ll
+++ b/test/Transforms/DeadArgElim/deadexternal.ll
@@ -1,4 +1,4 @@
-; RUN: opt -deadargelim -S %s | FileCheck %s
+; RUN: opt -deadargelim -S < %s | FileCheck %s
 
 define void @test(i32) {
   ret void
diff --git a/test/Transforms/DeadArgElim/keepalive.ll b/test/Transforms/DeadArgElim/keepalive.ll
index dc92dc9f171a..e41110c96ef4 100644
--- a/test/Transforms/DeadArgElim/keepalive.ll
+++ b/test/Transforms/DeadArgElim/keepalive.ll
@@ -1,6 +1,4 @@
-; RUN: opt < %s -deadargelim -S > %t
-; RUN: grep "define internal zeroext i32 @test1() nounwind" %t
-; RUN: grep "define internal <{ i32, i32 }> @test2" %t
+; RUN: opt < %s -deadargelim -S | FileCheck %s
 
 %Ty = type <{ i32, i32 }>
 
@@ -9,11 +7,13 @@
 ; the function and then changing too much.
 
 ; This checks if the return value attributes are not removed
+; CHECK: define internal zeroext i32 @test1() #0
 define internal zeroext i32 @test1(i32 %DEADARG1) nounwind {
         ret i32 1
 }
 
 ; This checks if the struct doesn't get non-packed
+; CHECK: define internal <{ i32, i32 }> @test2
 define internal <{ i32, i32 }> @test2(i32 %DEADARG1) {
         ret <{ i32, i32 }> <{ i32 1, i32 2 }>
 }
@@ -28,3 +28,4 @@ define void @caller() {
         ret void
 }
 
+; CHECK: attributes #0 = { nounwind }
diff --git a/test/Transforms/DeadStoreElimination/const-pointers.ll b/test/Transforms/DeadStoreElimination/const-pointers.ll
index 7d57804631da..15976f9f10d4 100644
--- a/test/Transforms/DeadStoreElimination/const-pointers.ll
+++ b/test/Transforms/DeadStoreElimination/const-pointers.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -basicaa -dse -S | FileCheck %s
+; RUN: opt -basicaa -dse -S < %s | FileCheck %s
 
 %t = type { i32 }
 
diff --git a/test/Transforms/DeadStoreElimination/dominate.ll b/test/Transforms/DeadStoreElimination/dominate.ll
index 284fea4234fc..38cf1a066dae 100644
--- a/test/Transforms/DeadStoreElimination/dominate.ll
+++ b/test/Transforms/DeadStoreElimination/dominate.ll
@@ -1,4 +1,4 @@
-; RUN: opt  %s -dse -disable-output
+; RUN: opt -dse -disable-output < %s
 ; test that we don't crash
 declare void @bar()
 
diff --git a/test/Transforms/DeadStoreElimination/no-targetdata.ll b/test/Transforms/DeadStoreElimination/no-targetdata.ll
index 6c7f940316a0..4022d76dcb52 100644
--- a/test/Transforms/DeadStoreElimination/no-targetdata.ll
+++ b/test/Transforms/DeadStoreElimination/no-targetdata.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -basicaa -dse -S | FileCheck %s
+; RUN: opt -basicaa -dse -S < %s | FileCheck %s
 
 declare void @test1f()
 
diff --git a/test/Transforms/DeadStoreElimination/pr11390.ll b/test/Transforms/DeadStoreElimination/pr11390.ll
index 2ce6eea365aa..f63aa1eb8aae 100644
--- a/test/Transforms/DeadStoreElimination/pr11390.ll
+++ b/test/Transforms/DeadStoreElimination/pr11390.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basicaa -dse -S -o - %s | FileCheck %s
+; RUN: opt -basicaa -dse -S < %s | FileCheck %s
 ; PR11390
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/EarlyCSE/commute.ll b/test/Transforms/EarlyCSE/commute.ll
index f84a7dd1aae9..8cf04d1765b9 100644
--- a/test/Transforms/EarlyCSE/commute.ll
+++ b/test/Transforms/EarlyCSE/commute.ll
@@ -19,9 +19,9 @@ define void @test2(float %A, float %B, i1* %PA, i1* %PB) {
   ; CHECK-NEXT: store
   ; CHECK-NEXT: store
   ; CHECK-NEXT: ret
-  %C = fcmp eq float %A, %B
+  %C = fcmp oeq float %A, %B
   store i1 %C, i1* %PA
-  %D = fcmp eq float %B, %A
+  %D = fcmp oeq float %B, %A
   store i1 %D, i1* %PB
   ret void
 }
diff --git a/test/Transforms/EarlyCSE/floatingpoint.ll b/test/Transforms/EarlyCSE/floatingpoint.ll
new file mode 100644
index 000000000000..2abecd74b63a
--- /dev/null
+++ b/test/Transforms/EarlyCSE/floatingpoint.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -S -early-cse | FileCheck %s
+
+; Ensure we don't simplify away additions vectors of +0.0's (same as scalars).
+define <4 x float> @fV( <4 x float> %a) {
+       ; CHECK: %b = fadd <4 x float> %a, zeroinitializer
+       %b = fadd  <4 x float> %a, <float 0.0,float 0.0,float 0.0,float 0.0>
+       ret <4 x float> %b
+}
+
+define <4 x float> @fW( <4 x float> %a) {
+       ; CHECK: ret <4 x float> %a
+       %b = fadd  <4 x float> %a, <float -0.0,float -0.0,float -0.0,float -0.0>
+       ret <4 x float> %b
+}
diff --git a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
index 946453f586ed..36a765873487 100644
--- a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
@@ -1,18 +1,24 @@
-; RUN: opt < %s -basicaa -functionattrs -S | grep readnone | count 4
+; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
 @x = global i32 0
 
+; CHECK: declare i32 @e() #0
 declare i32 @e() readnone
 
+; CHECK: define i32 @f() #0
 define i32 @f() {
 	%tmp = call i32 @e( )		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
+; CHECK: define i32 @g() #0
 define i32 @g() readonly {
 	ret i32 0
 }
 
+; CHECK: define i32 @h() #0
 define i32 @h() readnone {
 	%tmp = load i32* @x		; <i32> [#uses=1]
 	ret i32 %tmp
 }
+
+; CHECK: attributes #0 = { readnone }
diff --git a/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll b/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
index 22eca1320415..d8256ae8e647 100644
--- a/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
@@ -1,9 +1,13 @@
-; RUN: opt < %s -basicaa -functionattrs -S | grep readonly | count 2
+; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
 
+; CHECK: define i32 @f() #0
 define i32 @f() {
 entry:
-	%tmp = call i32 @e( )		; <i32> [#uses=1]
-	ret i32 %tmp
+  %tmp = call i32 @e( )
+  ret i32 %tmp
 }
 
+; CHECK: declare i32 @e() #0
 declare i32 @e() readonly
+
+; CHECK: attributes #0 = { readonly }
diff --git a/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll b/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
new file mode 100644
index 000000000000..d414b73524fd
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -functionattrs -S | FileCheck %s
+
+; CHECK: declare noalias i8* @fopen(i8* nocapture, i8* nocapture) #0
+declare i8* @fopen(i8*, i8*)
+
+; CHECK: declare i8 @strlen(i8* nocapture) #1
+declare i8 @strlen(i8*)
+
+; CHECK: declare noalias i32* @realloc(i32* nocapture, i32) #0
+declare i32* @realloc(i32*, i32)
+
+; Test deliberately wrong declaration
+declare i32 @strcpy(...)
+
+; CHECK-NOT: strcpy{{.*}}noalias
+; CHECK-NOT: strcpy{{.*}}nocapture
+; CHECK-NOT: strcpy{{.*}}nounwind
+; CHECK-NOT: strcpy{{.*}}readonly
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes #1 = { nounwind readonly }
diff --git a/test/Transforms/FunctionAttrs/annotate-1.ll b/test/Transforms/FunctionAttrs/annotate-1.ll
new file mode 100644
index 000000000000..ae77380acc4a
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/annotate-1.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -functionattrs -S | FileCheck %s
+
+declare i8* @fopen(i8*, i8*)
+; CHECK: declare noalias i8* @fopen(i8* nocapture, i8* nocapture) [[G0:#[0-9]]] 
+
+declare i8 @strlen(i8*)
+; CHECK: declare i8 @strlen(i8* nocapture) [[G1:#[0-9]]]
+
+declare i32* @realloc(i32*, i32)
+; CHECK: declare noalias i32* @realloc(i32* nocapture, i32) [[G0]]
+
+; Test deliberately wrong declaration
+
+declare i32 @strcpy(...)
+; CHECK: declare i32 @strcpy(...)
+
+; CHECK: attributes [[G0]] = { nounwind }
+; CHECK: attributes [[G1]] = { nounwind readonly }
diff --git a/test/Transforms/FunctionAttrs/atomic.ll b/test/Transforms/FunctionAttrs/atomic.ll
index 7c2bff7a05f7..027ee0fd06a2 100644
--- a/test/Transforms/FunctionAttrs/atomic.ll
+++ b/test/Transforms/FunctionAttrs/atomic.ll
@@ -3,7 +3,7 @@
 ; Atomic load/store to local doesn't affect whether a function is
 ; readnone/readonly.
 define i32 @test1(i32 %x) uwtable ssp {
-; CHECK: define i32 @test1(i32 %x) uwtable readnone ssp {
+; CHECK: define i32 @test1(i32 %x) #0 {
 entry:
   %x.addr = alloca i32, align 4
   store atomic i32 %x, i32* %x.addr seq_cst, align 4
@@ -13,9 +13,11 @@ entry:
 
 ; A function with an Acquire load is not readonly.
 define i32 @test2(i32* %x) uwtable ssp {
-; CHECK: define i32 @test2(i32* nocapture %x) uwtable ssp {
+; CHECK: define i32 @test2(i32* nocapture %x) #1 {
 entry:
   %r = load atomic i32* %x seq_cst, align 4
   ret i32 %r
 }
 
+; CHECK: attributes #0 = { readnone ssp uwtable }
+; CHECK: attributes #1 = { ssp uwtable }
diff --git a/test/Transforms/FunctionAttrs/noreturn.ll b/test/Transforms/FunctionAttrs/noreturn.ll
new file mode 100644
index 000000000000..470ebcb1d3cd
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/noreturn.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -functionattrs -instcombine -S | FileCheck %s
+
+define void @endless_loop() noreturn nounwind readnone ssp uwtable {
+entry:
+  br label %while.body
+
+while.body:
+  br label %while.body
+}
+;CHECK: @main
+;CHECK: endless_loop
+;CHECK: ret
+define i32 @main() noreturn nounwind ssp uwtable {
+entry:
+  tail call void @endless_loop()
+  unreachable
+}
+
diff --git a/test/Transforms/GCOVProfiling/linkagename.ll b/test/Transforms/GCOVProfiling/linkagename.ll
new file mode 100644
index 000000000000..d1bce728e08c
--- /dev/null
+++ b/test/Transforms/GCOVProfiling/linkagename.ll
@@ -0,0 +1,27 @@
+; RUN: echo '!9 = metadata !{metadata !"%T/linkagename.ll", metadata !0}' > %t1
+; RUN: cat %s %t1 > %t2
+; RUN: opt -insert-gcov-profiling -disable-output < %t2
+; RUN: grep _Z3foov %T/linkagename.gcno
+; RUN: rm %T/linkagename.gcno
+
+; REQUIRES: shell
+
+define void @_Z3foov() {
+entry:
+  ret void, !dbg !8
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.gcov = !{!9}
+
+!0 = metadata !{i32 786449, i32 4, metadata !1, metadata !"clang version 3.3 (trunk 177323)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/nlewycky/hello.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 786473, metadata !2}          ; [ DW_TAG_file_type ] [/home/nlewycky/hello.cc]
+!2 = metadata !{metadata !"hello.cc", metadata !"/home/nlewycky"}
+!3 = metadata !{i32 0}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3foov, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!6 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 1, i32 0, metadata !5, null}
+
+
diff --git a/test/Transforms/GCOVProfiling/lit.local.cfg b/test/Transforms/GCOVProfiling/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/GCOVProfiling/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GCOVProfiling/version.ll b/test/Transforms/GCOVProfiling/version.ll
new file mode 100644
index 000000000000..d6d0f3314c26
--- /dev/null
+++ b/test/Transforms/GCOVProfiling/version.ll
@@ -0,0 +1,29 @@
+; RUN: echo '!9 = metadata !{metadata !"%T/version.ll", metadata !0}' > %t1
+; RUN: cat %s %t1 > %t2
+; RUN: opt -insert-gcov-profiling -disable-output < %t2
+; RUN: head -c12 %T/version.gcno | grep '^oncg\*204MVLL$'
+; RUN: rm %T/version.gcno
+; RUN: not opt -insert-gcov-profiling -default-gcov-version=asdfasdf -disable-output < %t2
+; RUN: opt -insert-gcov-profiling -default-gcov-version=407* -disable-output < %t2
+; RUN: head -c12 %T/version.gcno | grep '^oncg\*704MVLL$'
+; RUN: rm %T/version.gcno
+
+define void @test() {
+  ret void, !dbg !8
+}
+
+; REQUIRES: shell
+
+!llvm.gcov = !{!9}
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{metadata !"./version", metadata !1}
+!1 = metadata !{i32 786449, i32 0, i32 4, metadata !2, metadata !"clang version 3.3 (trunk 176994)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [./version] [DW_LANG_C_plus_plus]
+!2 = metadata !{i32 786473, metadata !"version", metadata !"/usr/local/google/home/nlewycky"} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 0}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"test", metadata !"test", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @test, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [test]
+!6 = metadata !{i32 786473, metadata !"<stdin>", metadata !"."} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{i32 1, i32 0, metadata !5, null}
+;; !9 is added through the echo line at the top.
diff --git a/test/Transforms/GVN/2011-04-27-phioperands.ll b/test/Transforms/GVN/2011-04-27-phioperands.ll
index 6e5075db7c8e..42c46500c483 100644
--- a/test/Transforms/GVN/2011-04-27-phioperands.ll
+++ b/test/Transforms/GVN/2011-04-27-phioperands.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -gvn -disable-output
+; RUN: opt -gvn -disable-output < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
 
diff --git a/test/Transforms/GVN/MemdepMiscompile.ll b/test/Transforms/GVN/MemdepMiscompile.ll
new file mode 100644
index 000000000000..d42016961575
--- /dev/null
+++ b/test/Transforms/GVN/MemdepMiscompile.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+; rdar://12801584
+; Value of %shouldExit can be changed by RunInMode.
+; Make sure we do not replace load %shouldExit in while.cond.backedge
+; with a phi node where the value from while.body is 0.
+define i32 @test() nounwind ssp {
+entry:
+; CHECK: test()
+; CHECK: while.body:
+; CHECK: call void @RunInMode
+; CHECK: br i1 %tobool, label %while.cond.backedge, label %if.then
+; CHECK: while.cond.backedge:
+; CHECK: load i32* %shouldExit
+; CHECK: br i1 %cmp, label %while.body
+  %shouldExit = alloca i32, align 4
+  %tasksIdle = alloca i32, align 4
+  store i32 0, i32* %shouldExit, align 4
+  store i32 0, i32* %tasksIdle, align 4
+  call void @CTestInitialize(i32* %tasksIdle) nounwind
+  %0 = load i32* %shouldExit, align 4
+  %cmp1 = icmp eq i32 %0, 0
+  br i1 %cmp1, label %while.body.lr.ph, label %while.end
+
+while.body.lr.ph:
+  br label %while.body
+
+while.body:
+  call void @RunInMode(i32 100) nounwind
+  %1 = load i32* %tasksIdle, align 4
+  %tobool = icmp eq i32 %1, 0
+  br i1 %tobool, label %while.cond.backedge, label %if.then
+
+if.then:
+  store i32 0, i32* %tasksIdle, align 4
+  call void @TimerCreate(i32* %shouldExit) nounwind
+  br label %while.cond.backedge
+
+while.cond.backedge:
+  %2 = load i32* %shouldExit, align 4
+  %cmp = icmp eq i32 %2, 0
+  br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
+
+while.cond.while.end_crit_edge:
+  br label %while.end
+
+while.end:
+  ret i32 0
+}
+declare void @CTestInitialize(i32*)
+declare void @RunInMode(i32)
+declare void @TimerCreate(i32*)
diff --git a/test/Transforms/GVN/crash-no-aa.ll b/test/Transforms/GVN/crash-no-aa.ll
index dae65ddb2fe7..9ad63a7350c2 100644
--- a/test/Transforms/GVN/crash-no-aa.ll
+++ b/test/Transforms/GVN/crash-no-aa.ll
@@ -1,7 +1,6 @@
-; RUN: opt -no-aa -gvn -S %s
+; RUN: opt -no-aa -gvn -S < %s
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v1
-28:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-freebsd8.0"
 
 ; PR5744
diff --git a/test/Transforms/GVN/crash.ll b/test/Transforms/GVN/crash.ll
index 4a8c8e4589c8..9fb612fcae13 100644
--- a/test/Transforms/GVN/crash.ll
+++ b/test/Transforms/GVN/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -gvn %s -disable-output
+; RUN: opt -gvn -disable-output < %s
 
 ; PR5631
 
diff --git a/test/Transforms/GVN/edge.ll b/test/Transforms/GVN/edge.ll
index 32392f3ab0c8..3a102b6c3539 100644
--- a/test/Transforms/GVN/edge.ll
+++ b/test/Transforms/GVN/edge.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -gvn -S -o - | FileCheck %s
+; RUN: opt -gvn -S < %s | FileCheck %s
 
 define i32 @f1(i32 %x) {
   ; CHECK: define i32 @f1(
diff --git a/test/Transforms/GVN/fpmath.ll b/test/Transforms/GVN/fpmath.ll
index 8ab285448fbb..403df5c9008a 100644
--- a/test/Transforms/GVN/fpmath.ll
+++ b/test/Transforms/GVN/fpmath.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -gvn -S -o - | FileCheck %s
+; RUN: opt -gvn -S < %s | FileCheck %s
 
 define double @test1(double %x, double %y) {
 ; CHECK: @test1(double %x, double %y)
diff --git a/test/Transforms/GVN/lpre-call-wrap-2.ll b/test/Transforms/GVN/lpre-call-wrap-2.ll
index e39f3ed87d1c..35e3534a9c89 100644
--- a/test/Transforms/GVN/lpre-call-wrap-2.ll
+++ b/test/Transforms/GVN/lpre-call-wrap-2.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -basicaa -gvn -enable-load-pre %s | FileCheck %s
+; RUN: opt -S -basicaa -gvn -enable-load-pre < %s | FileCheck %s
 ;
 ; The partially redundant load in bb1 should be hoisted to "bb".  This comes
 ; from this C code (GCC PR 23455):
diff --git a/test/Transforms/GVN/lpre-call-wrap.ll b/test/Transforms/GVN/lpre-call-wrap.ll
index 40462798b534..0646f3fe0aad 100644
--- a/test/Transforms/GVN/lpre-call-wrap.ll
+++ b/test/Transforms/GVN/lpre-call-wrap.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -gvn -enable-load-pre %s | FileCheck %s
+; RUN: opt -S -gvn -enable-load-pre < %s | FileCheck %s
 ;
 ; Make sure the load in bb3.backedge is removed and moved into bb1 after the 
 ; call.  This makes the non-call case faster. 
diff --git a/test/Transforms/GVN/nonescaping-malloc.ll b/test/Transforms/GVN/nonescaping-malloc.ll
index afcb7fe3bb0f..c2eeed56ffc1 100644
--- a/test/Transforms/GVN/nonescaping-malloc.ll
+++ b/test/Transforms/GVN/nonescaping-malloc.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -basicaa -gvn -stats -disable-output 2>&1 | grep "Number of loads deleted"
 ; rdar://7363102
 
diff --git a/test/Transforms/GVN/null-aliases-nothing.ll b/test/Transforms/GVN/null-aliases-nothing.ll
index 9e4ae18c710c..37bf09d7f3ff 100644
--- a/test/Transforms/GVN/null-aliases-nothing.ll
+++ b/test/Transforms/GVN/null-aliases-nothing.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -basicaa -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 
 %t = type { i32 }
 declare void @test1f(i8*)
diff --git a/test/Transforms/GVN/pr12979.ll b/test/Transforms/GVN/pr12979.ll
index 669da9127d0b..0198a56513ea 100644
--- a/test/Transforms/GVN/pr12979.ll
+++ b/test/Transforms/GVN/pr12979.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -gvn -S -o - | FileCheck %s
+; RUN: opt -gvn -S < %s | FileCheck %s
 
 define i32 @test1(i32 %x, i32 %y) {
 ; CHECK: @test1(i32 %x, i32 %y)
diff --git a/test/Transforms/GVN/range.ll b/test/Transforms/GVN/range.ll
index 3759c415dabc..2115fe85661e 100644
--- a/test/Transforms/GVN/range.ll
+++ b/test/Transforms/GVN/range.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -basicaa -gvn -S -o - | FileCheck %s
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
 
 define i32 @test1(i32* %p) {
 ; CHECK: @test1(i32* %p)
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index 72fa819d1c73..f470ed88bb9c 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -254,14 +254,11 @@ Cont:
   %A = load i8* %P3
   ret i8 %A
 
-;; FIXME: This is disabled because this caused a miscompile in the llvm-gcc
-;; bootstrap, see r82411
-;
-; HECK: @coerce_mustalias_nonlocal1
-; HECK: Cont:
-; HECK:   %A = phi i8 [
-; HECK-NOT: load
-; HECK: ret i8 %A
+; CHECK: @coerce_mustalias_nonlocal1
+; CHECK: Cont:
+; CHECK:   %A = phi i8 [
+; CHECK-NOT: load
+; CHECK: ret i8 %A
 }
 
 
diff --git a/test/Transforms/GVN/tbaa.ll b/test/Transforms/GVN/tbaa.ll
index 90661c62507b..85fe39a93b01 100644
--- a/test/Transforms/GVN/tbaa.ll
+++ b/test/Transforms/GVN/tbaa.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -basicaa -gvn -S -o - | FileCheck %s
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
 
 define i32 @test1(i8* %p, i8* %q) {
 ; CHECK: @test1(i8* %p, i8* %q)
diff --git a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
index 0f3efa09a1da..e71aed9e05ff 100644
--- a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
+++ b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -globalopt -stats -disable-output 2>&1 | grep "1 globalopt - Number of global vars shrunk to booleans"
 
 @Stop = internal global i32 0                     ; <i32*> [#uses=3]
diff --git a/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll b/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll
index 27352fa29066..629d57c88424 100644
--- a/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll
+++ b/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll
@@ -1,5 +1,5 @@
 ; PR6422
-; RUN: opt -globalopt -S %s
+; RUN: opt -globalopt -S < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll b/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll
index 6f1996a867e3..ab7721fd9720 100644
--- a/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll
+++ b/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll
@@ -1,4 +1,4 @@
-; RUN: opt -globalopt -S %s
+; RUN: opt -globalopt -S < %s
 ; PR6435
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/GlobalOpt/crash-2.ll b/test/Transforms/GlobalOpt/crash-2.ll
new file mode 100644
index 000000000000..684f6cee180b
--- /dev/null
+++ b/test/Transforms/GlobalOpt/crash-2.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-as < %s | opt -globalopt -disable-output
+; NOTE: This needs to run through 'llvm-as' first to reproduce the error!
+; PR15440
+
+%union.U5.0.6.12 = type { i32 }
+%struct.S0.1.7.13 = type { i8, i8, i8, i8, i16, [2 x i8] }
+%struct.S1.2.8.14 = type { i32, i16, i8, i8 }
+
+@.str = external unnamed_addr constant [2 x i8], align 1
+@g_25 = external global i8, align 1
+@g_71 = internal global %struct.S0.1.7.13 { i8 1, i8 -93, i8 58, i8 -1, i16 -5, [2 x i8] undef }, align 4
+@g_114 = external global i8, align 1
+@g_30 = external global { i32, i8, i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }, align 4
+@g_271 = internal global [7 x [6 x [5 x i8*]]] [[6 x [5 x i8*]] [[5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null, i8* null], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)]], [6 x [5 x i8*]] [[5 x i8*] [i8* @g_25, i8* null, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_25, i8* @g_114, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25], [5 x i8*] [i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25, i8* @g_25, i8* @g_25], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)]], [6 x [5 x i8*]] [[5 x i8*] [i8* null, i8* @g_25, i8* @g_25, i8* @g_25, i8* null], [5 x i8*] [i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* null, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* null, i8* @g_25], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null], [5 x i8*] [i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)]], [6 x [5 x i8*]] [[5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null, i8* @g_25], [5 x i8*] [i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25], [5 x i8*] [i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_114, i8* @g_25, i8* @g_25, i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)]], [6 x [5 x i8*]] [[5 x i8*] [i8* @g_25, i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_114], [5 x i8*] [i8* @g_25, i8* null, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* null], [5 x i8*] [i8* @g_114, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* @g_25]], [6 x [5 x i8*]] [[5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* @g_114, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25]], [6 x [5 x i8*]] [[5 x i8*] [i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* @g_25, i8* @g_25, i8* @g_114], [5 x i8*] [i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25], [5 x i8*] [i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25]]], align 4
+
+define i32 @func() {
+  %tmp = load i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), align 1
+  ret i32 0
+}
diff --git a/test/Transforms/GlobalOpt/crash.ll b/test/Transforms/GlobalOpt/crash.ll
index 366a874f7352..80c777ccabc1 100644
--- a/test/Transforms/GlobalOpt/crash.ll
+++ b/test/Transforms/GlobalOpt/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -globalopt -disable-output %s
+; RUN: opt -globalopt -disable-output < %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 target triple = "i386-apple-darwin9.8"
 
diff --git a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
index e3bc473f52ad..c9076109443d 100644
--- a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
+++ b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
@@ -1,4 +1,4 @@
-; RUN: opt -globalopt %s -S | FileCheck %s
+; RUN: opt -globalopt -S < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
diff --git a/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll b/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll
new file mode 100644
index 000000000000..9295c2025a2a
--- /dev/null
+++ b/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; rdar://12580965.
+; ObjC++ test case.
+
+%struct.ButtonInitData = type { i8* }
+
+@_ZL14buttonInitData = internal global [1 x %struct.ButtonInitData] zeroinitializer, align 4
+
+@"\01L_OBJC_METH_VAR_NAME_40" = internal global [7 x i8] c"print:\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_41" = internal externally_initialized  global i8* getelementptr inbounds ([7 x i8]* @"\01L_OBJC_METH_VAR_NAME_40", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+@llvm.used = appending global [2 x i8*] [i8* getelementptr inbounds ([7 x i8]* @"\01L_OBJC_METH_VAR_NAME_40", i32 0, i32 0),  i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_41" to i8*)]
+
+define internal void @__cxx_global_var_init() section "__TEXT,__StaticInit,regular,pure_instructions" {
+  %1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_41", !invariant.load !2009
+  store i8* %1, i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
+  ret void
+}
+
+define internal void @_GLOBAL__I_a() section "__TEXT,__StaticInit,regular,pure_instructions" {
+  call void @__cxx_global_var_init()
+  ret void
+}
+
+declare void @test(i8*)
+
+define void @print() {
+; CHECK: %1 = load i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
+  %1 = load i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
+  call void @test(i8* %1)
+  ret void
+}
+
+!2009 = metadata !{}
diff --git a/test/Transforms/GlobalOpt/integer-bool.ll b/test/Transforms/GlobalOpt/integer-bool.ll
index 5a34a9c4dabd..51858069ac5b 100644
--- a/test/Transforms/GlobalOpt/integer-bool.ll
+++ b/test/Transforms/GlobalOpt/integer-bool.ll
@@ -1,23 +1,28 @@
-; RUN: opt < %s -globalopt -instcombine | \
-; RUN:    llvm-dis | grep "ret i1 true"
-
+; RUN: opt < %s -S -globalopt -instcombine | FileCheck %s
 ;; check that global opt turns integers that only hold 0 or 1 into bools.
 
-@G = internal global i32 0              ; <i32*> [#uses=3]
+@G = internal addrspace(1) global i32 0
+; CHECK: @G
+; CHECK: addrspace(1)
+; CHECK: global i1 false
 
 define void @set1() {
-        store i32 0, i32* @G
-        ret void
+  store i32 0, i32 addrspace(1)* @G
+; CHECK: store i1 false
+  ret void
 }
 
 define void @set2() {
-        store i32 1, i32* @G
-        ret void
+  store i32 1, i32 addrspace(1)* @G
+; CHECK: store i1 true
+  ret void
 }
 
 define i1 @get() {
-        %A = load i32* @G               ; <i32> [#uses=1]
-        %C = icmp slt i32 %A, 2         ; <i1> [#uses=1]
-        ret i1 %C
+; CHECK: @get
+  %A = load i32 addrspace(1) * @G
+  %C = icmp slt i32 %A, 2
+  ret i1 %C
+; CHECK: ret i1 true
 }
 
diff --git a/test/Transforms/GlobalOpt/memset-null.ll b/test/Transforms/GlobalOpt/memset-null.ll
index 01534025faa3..53ec7551130e 100644
--- a/test/Transforms/GlobalOpt/memset-null.ll
+++ b/test/Transforms/GlobalOpt/memset-null.ll
@@ -1,4 +1,4 @@
-; RUN: opt -globalopt %s -S -o - | FileCheck %s
+; RUN: opt -globalopt -S < %s | FileCheck %s
 ; PR10047
 
 %0 = type { i32, void ()* }
diff --git a/test/Transforms/GlobalOpt/unnamed-addr.ll b/test/Transforms/GlobalOpt/unnamed-addr.ll
index ee7505873126..2ca91e50da2a 100644
--- a/test/Transforms/GlobalOpt/unnamed-addr.ll
+++ b/test/Transforms/GlobalOpt/unnamed-addr.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -globalopt -S | FileCheck %s
+; RUN: opt -globalopt -S < %s | FileCheck %s
 
 @a = internal global i32 0, align 4
 @b = internal global i32 0, align 4
diff --git a/test/Transforms/IPConstantProp/user-with-multiple-uses.ll b/test/Transforms/IPConstantProp/user-with-multiple-uses.ll
index 402ea41167ce..968718084e42 100644
--- a/test/Transforms/IPConstantProp/user-with-multiple-uses.ll
+++ b/test/Transforms/IPConstantProp/user-with-multiple-uses.ll
@@ -4,9 +4,9 @@
 ; IPSCCP should propagate the 0 argument, eliminate the switch, and propagate
 ; the result.
 
-; CHECK: define i32 @main() noreturn nounwind {
+; CHECK: define i32 @main() #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: %call2 = tail call i32 @wwrite(i64 0) nounwind
+; CHECK-NEXT: %call2 = tail call i32 @wwrite(i64 0) [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret i32 123
 
 define i32 @main() noreturn nounwind {
@@ -28,3 +28,7 @@ sw.default:
 return:
   ret i32 0
 }
+
+; CHECK: attributes #0 = { noreturn nounwind }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll b/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
index 150ae70a8262..e3de75e36fd8 100644
--- a/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
+++ b/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -indvars %s | FileCheck %s
+; RUN: opt -S -indvars < %s | FileCheck %s
 
 ; The indvar simplification code should ensure that the first PHI in the block 
 ; is the canonical one!
diff --git a/test/Transforms/IndVarSimplify/crash.ll b/test/Transforms/IndVarSimplify/crash.ll
index 1b702a3b1a3c..aa6a2ee16521 100644
--- a/test/Transforms/IndVarSimplify/crash.ll
+++ b/test/Transforms/IndVarSimplify/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -indvars %s -disable-output
+; RUN: opt -indvars -disable-output < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 declare i32 @putchar(i8) nounwind
diff --git a/test/Transforms/IndVarSimplify/dont-recompute.ll b/test/Transforms/IndVarSimplify/dont-recompute.ll
new file mode 100644
index 000000000000..d37b0e21f826
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/dont-recompute.ll
@@ -0,0 +1,69 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+; This tests that the IV is not recomputed outside of the loop when it is known
+; to be computed by the loop and used in the loop any way. In the example below
+; although a's value can be computed outside of the loop, there is no benefit
+; in doing so as it has to be computed by the loop anyway.
+;
+; extern void func(unsigned val);
+;
+; void test(unsigned m)
+; {
+;   unsigned a = 0;
+;
+;   for (int i=0; i<186; i++) {
+;     a += m;
+;     func(a);
+;   }
+;
+;   func(a);
+; }
+
+declare void @func(i32)
+
+; CHECK: @test
+define void @test(i32 %m) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %add = add i32 %a.05, %m
+; CHECK: tail call void @func(i32 %add)
+  tail call void @func(i32 %add)
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, 186
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+; CHECK: for.end:
+; CHECK-NOT: mul i32 %m, 186
+; CHECK:%add.lcssa = phi i32 [ %add, %for.body ]
+; CHECK-NEXT: tail call void @func(i32 %add.lcssa)
+  tail call void @func(i32 %add)
+  ret void
+}
+
+; CHECK: @test2
+define i32 @test2(i32 %m) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %add = add i32 %a.05, %m
+; CHECK: tail call void @func(i32 %add)
+  tail call void @func(i32 %add)
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, 186
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+; CHECK: for.end:
+; CHECK-NOT: mul i32 %m, 186
+; CHECK:%add.lcssa = phi i32 [ %add, %for.body ]
+; CHECK-NEXT: ret i32 %add.lcssa
+  ret i32 %add
+}
diff --git a/test/Transforms/IndVarSimplify/iv-zext.ll b/test/Transforms/IndVarSimplify/iv-zext.ll
index 2e0f70ce461a..ed0514b08e33 100644
--- a/test/Transforms/IndVarSimplify/iv-zext.ll
+++ b/test/Transforms/IndVarSimplify/iv-zext.ll
@@ -2,7 +2,7 @@
 ; CHECK-NOT: and
 ; CHECK-NOT: zext
 
-target datalayout = "-p:64:64:64-n32:64"
+target datalayout = "p:64:64:64-n32:64"
 
 define void @foo(double* %d, i64 %n) nounwind {
 entry:
diff --git a/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll b/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
index 52c9e5c3ffc9..dc36b9948254 100644
--- a/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
+++ b/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -indvars -disable-output -stats -info-output-file - | FileCheck %s
 ; Check that IndVarSimplify is not creating unnecessary canonical IVs
 ; that will never be used.
diff --git a/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll b/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
index 5ced3b8e8da9..b8ca56050dca 100644
--- a/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
+++ b/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
@@ -3,10 +3,15 @@
 define i32 @main() {
 entry:
         invoke void @__main( )
-                        to label %LongJmpBlkPre unwind label %LongJmpBlkPre
+                        to label %LongJmpBlkPost unwind label %LongJmpBlkPre
 
-LongJmpBlkPre:          ; preds = %entry, %entry
+LongJmpBlkPost:
+        ret i32 0
+
+LongJmpBlkPre:
         %i.3 = phi i32 [ 0, %entry ], [ 0, %entry ]             ; <i32> [#uses=0]
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
         ret i32 0
 }
 
@@ -14,3 +19,4 @@ define void @__main() {
         ret void
 }
 
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll b/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll
index 1bd55299a901..43bdd309c987 100644
--- a/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll
+++ b/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll
@@ -13,6 +13,8 @@ LJDecisionBB:           ; preds = %else
         br label %else
 
 RethrowExcept:          ; preds = %entry
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
         ret i32 0
 }
 
@@ -20,4 +22,4 @@ define void @__main() {
         ret void
 }
 
-
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll b/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll
index b4380d01e483..ee5a378b1876 100644
--- a/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll
+++ b/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll
@@ -2,7 +2,6 @@
 ; PR993
 target datalayout = "e-p:32:32"
 target triple = "i386-unknown-openbsd3.9"
-deplibs = [ "stdc++", "c", "crtend" ]
 	%"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" = type { i8* }
 	%"struct.__gnu_cxx::char_producer<char>" = type { i32 (...)** }
 	%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll b/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll
index b754d9f9f5d9..fb5a4b512b9c 100644
--- a/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll
+++ b/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll
@@ -2,7 +2,6 @@
 ; PR992
 target datalayout = "e-p:32:32"
 target triple = "i686-pc-linux-gnu"
-deplibs = [ "stdc++", "c", "crtend" ]
 	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
 	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
 	%"struct.__cxxabiv1::__array_type_info" = type { %"struct.std::type_info" }
diff --git a/test/Transforms/Inline/2010-05-12-ValueMap.ll b/test/Transforms/Inline/2010-05-12-ValueMap.ll
index f9cc13f499b3..f452907efd07 100644
--- a/test/Transforms/Inline/2010-05-12-ValueMap.ll
+++ b/test/Transforms/Inline/2010-05-12-ValueMap.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -inline -mergefunc -disable-output
+; RUN: opt -inline -mergefunc -disable-output < %s
 
 ; This tests for a bug where the inliner kept the functions in a ValueMap after
 ; it had completed and a ModulePass started to run. LLVM would crash deleting
diff --git a/test/Transforms/Inline/alloca_test.ll b/test/Transforms/Inline/alloca_test.ll
index e5791d5d2553..8464259ce1f8 100644
--- a/test/Transforms/Inline/alloca_test.ll
+++ b/test/Transforms/Inline/alloca_test.ll
@@ -1,7 +1,7 @@
 ; This test ensures that alloca instructions in the entry block for an inlined
 ; function are moved to the top of the function they are inlined into.
 ;
-; RUN: opt -S -inline %s | FileCheck %s
+; RUN: opt -S -inline < %s | FileCheck %s
 
 define i32 @func(i32 %i) {
         %X = alloca i32         ; <i32*> [#uses=1]
diff --git a/test/Transforms/Inline/basictest.ll b/test/Transforms/Inline/basictest.ll
index 609a3d4e153e..39e25cb5d627 100644
--- a/test/Transforms/Inline/basictest.ll
+++ b/test/Transforms/Inline/basictest.ll
@@ -45,3 +45,48 @@ define i32 @test2(i1 %cond) {
 ; CHECK-NOT: = alloca
 ; CHECK: ret i32
 }
+
+declare void @barrier() noduplicate
+
+define internal i32 @f() {
+  call void @barrier() noduplicate
+  ret i32 1
+}
+
+define i32 @g() {
+  call void @barrier() noduplicate
+  ret i32 2
+}
+
+define internal i32 @h() {
+  call void @barrier() noduplicate
+  ret i32 3
+}
+
+define i32 @test3() {
+  %b = call i32 @f()
+  ret i32 %b
+}
+
+; The call to @f cannot be inlined as there is another callsite
+; calling @f, and @f contains a noduplicate call.
+;
+; The call to @g cannot be inlined as it has external linkage.
+;
+; The call to @h *can* be inlined.
+
+; CHECK: @test
+define i32 @test() {
+; CHECK: call i32 @f()
+  %a = call i32 @f()
+; CHECK: call i32 @g()
+  %b = call i32 @g()
+; CHECK-NOT: call i32 @h()
+  %c = call i32 @h()
+
+  %d = add i32 %a, %b
+  %e = add i32 %d, %c
+
+  ret i32 %e
+; CHECK: }
+}
diff --git a/test/Transforms/Inline/crash2.ll b/test/Transforms/Inline/crash2.ll
index cb1f44d5cca7..be634f625633 100644
--- a/test/Transforms/Inline/crash2.ll
+++ b/test/Transforms/Inline/crash2.ll
@@ -1,4 +1,4 @@
-; RUN: opt  -inline -scalarrepl -max-cg-scc-iterations=1  %s -disable-output
+; RUN: opt  -inline -scalarrepl -max-cg-scc-iterations=1 -disable-output < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.3"
 
diff --git a/test/Transforms/Inline/delete-call.ll b/test/Transforms/Inline/delete-call.ll
index 7716d6a47bec..97c52af9e0d5 100644
--- a/test/Transforms/Inline/delete-call.ll
+++ b/test/Transforms/Inline/delete-call.ll
@@ -1,5 +1,9 @@
-; RUN: opt %s -S  -inline -functionattrs -stats 2>&1 | grep "Number of call sites deleted, not inlined"
-; RUN: opt %s -S  -inline -stats 2>&1 | grep "Number of functions inlined"
+; REQUIRES: asserts
+; RUN: opt -S -inline -stats < %s 2>&1 | FileCheck %s
+; CHECK: Number of functions inlined
+
+; RUN: opt -S -inline -functionattrs -stats < %s 2>&1 | FileCheck -check-prefix=FUNCTIONATTRS %s
+; CHECK-FUNCTIONATTRS: Number of call sites deleted, not inlined
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 target triple = "i386-apple-darwin9.8"
diff --git a/test/Transforms/Inline/devirtualize-3.ll b/test/Transforms/Inline/devirtualize-3.ll
index c32be4e024a3..3f019676e4a4 100644
--- a/test/Transforms/Inline/devirtualize-3.ll
+++ b/test/Transforms/Inline/devirtualize-3.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basicaa -inline -S -scalarrepl -gvn -instcombine %s | FileCheck %s
+; RUN: opt -basicaa -inline -S -scalarrepl -gvn -instcombine < %s | FileCheck %s
 ; PR5009
 
 ; CHECK: define i32 @main() 
diff --git a/test/Transforms/Inline/devirtualize.ll b/test/Transforms/Inline/devirtualize.ll
index 51ea4baa3866..d46154ef6a98 100644
--- a/test/Transforms/Inline/devirtualize.ll
+++ b/test/Transforms/Inline/devirtualize.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -basicaa -inline -scalarrepl -instcombine -simplifycfg -instcombine -gvn -globaldce %s | FileCheck %s
+; RUN: opt -S -Os < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
diff --git a/test/Transforms/Inline/gvn-inline-iteration.ll b/test/Transforms/Inline/gvn-inline-iteration.ll
index e502fd5777d5..526ed79e7b48 100644
--- a/test/Transforms/Inline/gvn-inline-iteration.ll
+++ b/test/Transforms/Inline/gvn-inline-iteration.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basicaa -inline -gvn %s -S -max-cg-scc-iterations=1 | FileCheck %s
+; RUN: opt -basicaa -inline -gvn -S -max-cg-scc-iterations=1 < %s | FileCheck %s
 ; rdar://6295824 and PR6724
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/Transforms/Inline/inline-optsize.ll b/test/Transforms/Inline/inline-optsize.ll
index 20d7426abd13..3ad573a04e42 100644
--- a/test/Transforms/Inline/inline-optsize.ll
+++ b/test/Transforms/Inline/inline-optsize.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -Oz %s | FileCheck %s -check-prefix=OZ
-; RUN: opt -S -O2 %s | FileCheck %s -check-prefix=O2
+; RUN: opt -S -Oz < %s | FileCheck %s -check-prefix=OZ
+; RUN: opt -S -O2 < %s | FileCheck %s -check-prefix=O2
 
 ; The inline threshold for a function with the optsize attribute is currently
 ; the same as the global inline threshold for -Os. Check that the optsize
diff --git a/test/Transforms/Inline/inline_constprop.ll b/test/Transforms/Inline/inline_constprop.ll
index 0b48a7282f45..77bc3784acb4 100644
--- a/test/Transforms/Inline/inline_constprop.ll
+++ b/test/Transforms/Inline/inline_constprop.ll
@@ -111,6 +111,82 @@ bb.false:
   ret i32 %sub
 }
 
+declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
+
+define i8 @caller4(i8 %z) {
+; Check that we can constant fold through intrinsics such as the
+; overflow-detecting arithmetic instrinsics. These are particularly important
+; as they are used heavily in standard library code and generic C++ code where
+; the arguments are oftent constant but complete generality is required.
+;
+; CHECK: @caller4
+; CHECK-NOT: call
+; CHECK: ret i8 -1
+
+entry:
+  %x = call i8 @callee4(i8 254, i8 14, i8 %z)
+  ret i8 %x
+}
+
+define i8 @callee4(i8 %x, i8 %y, i8 %z) {
+  %uadd = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y)
+  %o = extractvalue {i8, i1} %uadd, 1
+  br i1 %o, label %bb.true, label %bb.false
+
+bb.true:
+  ret i8 -1
+
+bb.false:
+  ; This block musn't be counted in the inline cost.
+  %z1 = add i8 %z, 1
+  %z2 = add i8 %z1, 1
+  %z3 = add i8 %z2, 1
+  %z4 = add i8 %z3, 1
+  %z5 = add i8 %z4, 1
+  %z6 = add i8 %z5, 1
+  %z7 = add i8 %z6, 1
+  %z8 = add i8 %z7, 1
+  ret i8 %z8
+}
+
+define i64 @caller5(i64 %y) {
+; Check that we can round trip constants through various kinds of casts etc w/o
+; losing track of the constant prop in the inline cost analysis.
+;
+; CHECK: @caller5
+; CHECK-NOT: call
+; CHECK: ret i64 -1
+
+entry:
+  %x = call i64 @callee5(i64 42, i64 %y)
+  ret i64 %x
+}
+
+define i64 @callee5(i64 %x, i64 %y) {
+  %inttoptr = inttoptr i64 %x to i8*
+  %bitcast = bitcast i8* %inttoptr to i32*
+  %ptrtoint = ptrtoint i32* %bitcast to i64
+  %trunc = trunc i64 %ptrtoint to i32
+  %zext = zext i32 %trunc to i64
+  %cmp = icmp eq i64 %zext, 42
+  br i1 %cmp, label %bb.true, label %bb.false
+
+bb.true:
+  ret i64 -1
+
+bb.false:
+  ; This block musn't be counted in the inline cost.
+  %y1 = add i64 %y, 1
+  %y2 = add i64 %y1, 1
+  %y3 = add i64 %y2, 1
+  %y4 = add i64 %y3, 1
+  %y5 = add i64 %y4, 1
+  %y6 = add i64 %y5, 1
+  %y7 = add i64 %y6, 1
+  %y8 = add i64 %y7, 1
+  ret i64 %y8
+}
+
 
 define i32 @PR13412.main() {
 ; This is a somewhat complicated three layer subprogram that was reported to
diff --git a/test/Transforms/Inline/inline_invoke.ll b/test/Transforms/Inline/inline_invoke.ll
index 9f5f670b859b..c3941388f937 100644
--- a/test/Transforms/Inline/inline_invoke.ll
+++ b/test/Transforms/Inline/inline_invoke.ll
@@ -96,6 +96,7 @@ eh.resume:
 ; CHECK:      landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A]])
 ; CHECK-NEXT:   to label %[[LBL:[^\s]+]] unwind
 ; CHECK: [[LBL]]:
@@ -166,6 +167,7 @@ eh.resume:
 ; CHECK-NEXT: [[LPADVAL1:%.*]] = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A1]])
 ; CHECK-NEXT:   to label %[[RESUME1:[^\s]+]] unwind
 ; CHECK: [[RESUME1]]:
@@ -185,6 +187,7 @@ eh.resume:
 ; CHECK-NEXT: [[LPADVAL2:%.*]] = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:   cleanup
 ; CHECK-NEXT:   catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT:   catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A2]])
 ; CHECK-NEXT:   to label %[[RESUME2:[^\s]+]] unwind
 ; CHECK: [[RESUME2]]:
@@ -272,6 +275,7 @@ lpad.cont:
 ; CHECK:      landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(
 ; CHECK-NEXT:   to label %[[L:[^\s]+]] unwind
 ; CHECK:    [[L]]:
@@ -318,6 +322,7 @@ terminate:
 ; CHECK:      landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(
 ; CHECK-NEXT:   to label %[[L:[^\s]+]] unwind
 ; CHECK:    [[L]]:
@@ -330,7 +335,7 @@ terminate:
 ; CHECK-NEXT: br label %[[JOIN]]
 ; CHECK:    [[JOIN]]:
 ; CHECK-NEXT: phi { i8*, i32 }
-; CHECK-NEXT: call void @opaque() nounwind
+; CHECK-NEXT: call void @opaque() [[NUW:#[0-9]+]]
 ; CHECK-NEXT: br label %[[FIX:[^\s]+]]
 ; CHECK:    lpad:
 ; CHECK-NEXT: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
@@ -340,3 +345,8 @@ terminate:
 ; CHECK-NEXT: [[T1:%.*]] = phi i32 [ 0, %[[JOIN]] ], [ 1, %lpad ]
 ; CHECK-NEXT: call void @use(i32 [[T1]])
 ; CHECK-NEXT: call void @_ZSt9terminatev()
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes #2 = { ssp uwtable }
+; CHECK: attributes #3 = { noreturn nounwind }
diff --git a/test/Transforms/Inline/inline_minisize.ll b/test/Transforms/Inline/inline_minisize.ll
new file mode 100644
index 000000000000..3dddbcf3303d
--- /dev/null
+++ b/test/Transforms/Inline/inline_minisize.ll
@@ -0,0 +1,232 @@
+; RUN: opt -O2 -S < %s | FileCheck %s
+
+@data = common global i32* null, align 8
+
+define i32 @fct1(i32 %a) nounwind uwtable ssp {
+entry:
+  %a.addr = alloca i32, align 4
+  %res = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  %tmp = load i32* %a.addr, align 4
+  %idxprom = sext i32 %tmp to i64
+  %tmp1 = load i32** @data, align 8
+  %arrayidx = getelementptr inbounds i32* %tmp1, i64 %idxprom
+  %tmp2 = load i32* %arrayidx, align 4
+  %tmp3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %tmp3, 1
+  %idxprom1 = sext i32 %add to i64
+  %tmp4 = load i32** @data, align 8
+  %arrayidx2 = getelementptr inbounds i32* %tmp4, i64 %idxprom1
+  %tmp5 = load i32* %arrayidx2, align 4
+  %mul = mul nsw i32 %tmp2, %tmp5
+  store i32 %mul, i32* %res, align 4
+  store i32 0, i32* %i, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %tmp6 = load i32* %i, align 4
+  %tmp7 = load i32* %res, align 4
+  %cmp = icmp slt i32 %tmp6, %tmp7
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp8 = load i32* %i, align 4
+  %idxprom3 = sext i32 %tmp8 to i64
+  %tmp9 = load i32** @data, align 8
+  %arrayidx4 = getelementptr inbounds i32* %tmp9, i64 %idxprom3
+  call void @fct0(i32* %arrayidx4)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %tmp10 = load i32* %i, align 4
+  %inc = add nsw i32 %tmp10, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  store i32 0, i32* %i, align 4
+  br label %for.cond5
+
+for.cond5:                                        ; preds = %for.inc10, %for.end
+  %tmp11 = load i32* %i, align 4
+  %tmp12 = load i32* %res, align 4
+  %cmp6 = icmp slt i32 %tmp11, %tmp12
+  br i1 %cmp6, label %for.body7, label %for.end12
+
+for.body7:                                        ; preds = %for.cond5
+  %tmp13 = load i32* %i, align 4
+  %idxprom8 = sext i32 %tmp13 to i64
+  %tmp14 = load i32** @data, align 8
+  %arrayidx9 = getelementptr inbounds i32* %tmp14, i64 %idxprom8
+  call void @fct0(i32* %arrayidx9)
+  br label %for.inc10
+
+for.inc10:                                        ; preds = %for.body7
+  %tmp15 = load i32* %i, align 4
+  %inc11 = add nsw i32 %tmp15, 1
+  store i32 %inc11, i32* %i, align 4
+  br label %for.cond5
+
+for.end12:                                        ; preds = %for.cond5
+  store i32 0, i32* %i, align 4
+  br label %for.cond13
+
+for.cond13:                                       ; preds = %for.inc18, %for.end12
+  %tmp16 = load i32* %i, align 4
+  %tmp17 = load i32* %res, align 4
+  %cmp14 = icmp slt i32 %tmp16, %tmp17
+  br i1 %cmp14, label %for.body15, label %for.end20
+
+for.body15:                                       ; preds = %for.cond13
+  %tmp18 = load i32* %i, align 4
+  %idxprom16 = sext i32 %tmp18 to i64
+  %tmp19 = load i32** @data, align 8
+  %arrayidx17 = getelementptr inbounds i32* %tmp19, i64 %idxprom16
+  call void @fct0(i32* %arrayidx17)
+  br label %for.inc18
+
+for.inc18:                                        ; preds = %for.body15
+  %tmp20 = load i32* %i, align 4
+  %inc19 = add nsw i32 %tmp20, 1
+  store i32 %inc19, i32* %i, align 4
+  br label %for.cond13
+
+for.end20:                                        ; preds = %for.cond13
+  %tmp21 = load i32* %res, align 4
+  ret i32 %tmp21
+}
+
+declare void @fct0(i32*)
+
+define i32 @fct2(i32 %a) nounwind uwtable inlinehint ssp {
+entry:
+  %a.addr = alloca i32, align 4
+  %res = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  %tmp = load i32* %a.addr, align 4
+  %shl = shl i32 %tmp, 1
+  %idxprom = sext i32 %shl to i64
+  %tmp1 = load i32** @data, align 8
+  %arrayidx = getelementptr inbounds i32* %tmp1, i64 %idxprom
+  %tmp2 = load i32* %arrayidx, align 4
+  %tmp3 = load i32* %a.addr, align 4
+  %shl1 = shl i32 %tmp3, 1
+  %add = add nsw i32 %shl1, 13
+  %idxprom2 = sext i32 %add to i64
+  %tmp4 = load i32** @data, align 8
+  %arrayidx3 = getelementptr inbounds i32* %tmp4, i64 %idxprom2
+  %tmp5 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %tmp2, %tmp5
+  store i32 %mul, i32* %res, align 4
+  store i32 0, i32* %i, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %tmp6 = load i32* %i, align 4
+  %tmp7 = load i32* %res, align 4
+  %cmp = icmp slt i32 %tmp6, %tmp7
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp8 = load i32* %i, align 4
+  %idxprom4 = sext i32 %tmp8 to i64
+  %tmp9 = load i32** @data, align 8
+  %arrayidx5 = getelementptr inbounds i32* %tmp9, i64 %idxprom4
+  call void @fct0(i32* %arrayidx5)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %tmp10 = load i32* %i, align 4
+  %inc = add nsw i32 %tmp10, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  store i32 0, i32* %i, align 4
+  br label %for.cond6
+
+for.cond6:                                        ; preds = %for.inc11, %for.end
+  %tmp11 = load i32* %i, align 4
+  %tmp12 = load i32* %res, align 4
+  %cmp7 = icmp slt i32 %tmp11, %tmp12
+  br i1 %cmp7, label %for.body8, label %for.end13
+
+for.body8:                                        ; preds = %for.cond6
+  %tmp13 = load i32* %i, align 4
+  %idxprom9 = sext i32 %tmp13 to i64
+  %tmp14 = load i32** @data, align 8
+  %arrayidx10 = getelementptr inbounds i32* %tmp14, i64 %idxprom9
+  call void @fct0(i32* %arrayidx10)
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %for.body8
+  %tmp15 = load i32* %i, align 4
+  %inc12 = add nsw i32 %tmp15, 1
+  store i32 %inc12, i32* %i, align 4
+  br label %for.cond6
+
+for.end13:                                        ; preds = %for.cond6
+  store i32 0, i32* %i, align 4
+  br label %for.cond14
+
+for.cond14:                                       ; preds = %for.inc19, %for.end13
+  %tmp16 = load i32* %i, align 4
+  %tmp17 = load i32* %res, align 4
+  %cmp15 = icmp slt i32 %tmp16, %tmp17
+  br i1 %cmp15, label %for.body16, label %for.end21
+
+for.body16:                                       ; preds = %for.cond14
+  %tmp18 = load i32* %i, align 4
+  %idxprom17 = sext i32 %tmp18 to i64
+  %tmp19 = load i32** @data, align 8
+  %arrayidx18 = getelementptr inbounds i32* %tmp19, i64 %idxprom17
+  call void @fct0(i32* %arrayidx18)
+  br label %for.inc19
+
+for.inc19:                                        ; preds = %for.body16
+  %tmp20 = load i32* %i, align 4
+  %inc20 = add nsw i32 %tmp20, 1
+  store i32 %inc20, i32* %i, align 4
+  br label %for.cond14
+
+for.end21:                                        ; preds = %for.cond14
+  %tmp21 = load i32* %res, align 4
+  ret i32 %tmp21
+}
+
+define i32 @fct3(i32 %c) nounwind uwtable ssp {
+entry:
+  ;CHECK: @fct3
+  ;CHECK: call i32 @fct1
+  ; The inline keyword gives a sufficient benefits to inline fct2
+  ;CHECK-NOT: call i32 @fct2
+  %c.addr = alloca i32, align 4
+  store i32 %c, i32* %c.addr, align 4
+  %tmp = load i32* %c.addr, align 4
+  %call = call i32 @fct1(i32 %tmp)
+  %tmp1 = load i32* %c.addr, align 4
+  %call1 = call i32 @fct2(i32 %tmp1)
+  %add = add nsw i32 %call, %call1
+  ret i32 %add
+}
+
+define i32 @fct4(i32 %c) minsize nounwind uwtable ssp {
+entry:
+  ;CHECK: @fct4
+  ;CHECK: call i32 @fct1
+  ; With Oz (minsize attribute), the benefit of inlining fct2
+  ; is the same as fct1, thus no inlining for fct2
+  ;CHECK: call i32 @fct2
+  %c.addr = alloca i32, align 4
+  store i32 %c, i32* %c.addr, align 4
+  %tmp = load i32* %c.addr, align 4
+  %call = call i32 @fct1(i32 %tmp)
+  %tmp1 = load i32* %c.addr, align 4
+  %call1 = call i32 @fct2(i32 %tmp1)
+  %add = add nsw i32 %call, %call1
+  ret i32 %add
+}
diff --git a/test/Transforms/Inline/inline_ssp.ll b/test/Transforms/Inline/inline_ssp.ll
new file mode 100644
index 000000000000..a4b43a77bad2
--- /dev/null
+++ b/test/Transforms/Inline/inline_ssp.ll
@@ -0,0 +1,160 @@
+; RUN: opt -inline %s -S | FileCheck %s
+; Ensure SSP attributes are propagated correctly when inlining.
+
+@.str = private unnamed_addr constant [11 x i8] c"fun_nossp\0A\00", align 1
+@.str1 = private unnamed_addr constant [9 x i8] c"fun_ssp\0A\00", align 1
+@.str2 = private unnamed_addr constant [15 x i8] c"fun_sspstrong\0A\00", align 1
+@.str3 = private unnamed_addr constant [12 x i8] c"fun_sspreq\0A\00", align 1
+
+; These first four functions (@fun_sspreq, @fun_sspstrong, @fun_ssp, @fun_nossp)
+; are used by the remaining functions to ensure that the SSP attributes are
+; propagated correctly.  The caller should have its SSP attribute set as:
+; strictest(caller-ssp-attr, callee-ssp-attr), where strictness is ordered as:
+;  sspreq > sspstrong > ssp > [no ssp]
+define internal void @fun_sspreq() nounwind sspreq uwtable {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str3, i32 0, i32 0))
+  ret void
+}
+
+define internal void @fun_sspstrong() nounwind sspstrong uwtable {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str2, i32 0, i32 0))
+  ret void
+}
+
+define internal void @fun_ssp() nounwind ssp uwtable {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str1, i32 0, i32 0))
+  ret void
+}
+
+define internal void @fun_nossp() nounwind uwtable {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0))
+  ret void
+}
+
+; Tests start below 
+
+define void @inline_req_req() nounwind sspreq uwtable {
+entry:
+; CHECK: @inline_req_req() #0
+  call void @fun_sspreq()
+  ret void
+}
+
+define void @inline_req_strong() nounwind sspstrong uwtable {
+entry:
+; CHECK: @inline_req_strong() #0
+  call void @fun_sspreq()
+  ret void
+}
+
+define void @inline_req_ssp() nounwind ssp uwtable {
+entry:
+; CHECK: @inline_req_ssp() #0
+  call void @fun_sspreq()
+  ret void
+}
+
+define void @inline_req_nossp() nounwind uwtable {
+entry:
+; CHECK: @inline_req_nossp() #0
+  call void @fun_sspreq()
+  ret void
+}
+
+define void @inline_strong_req() nounwind sspreq uwtable {
+entry:
+; CHECK: @inline_strong_req() #0
+  call void @fun_sspstrong()
+  ret void
+}
+
+
+define void @inline_strong_strong() nounwind sspstrong uwtable {
+entry:
+; CHECK: @inline_strong_strong() #1
+  call void @fun_sspstrong()
+  ret void
+}
+
+define void @inline_strong_ssp() nounwind ssp uwtable {
+entry:
+; CHECK: @inline_strong_ssp() #1
+  call void @fun_sspstrong()
+  ret void
+}
+
+define void @inline_strong_nossp() nounwind uwtable {
+entry:
+; CHECK: @inline_strong_nossp() #1
+  call void @fun_sspstrong()
+  ret void
+}
+
+define void @inline_ssp_req() nounwind sspreq uwtable {
+entry:
+; CHECK: @inline_ssp_req() #0
+  call void @fun_ssp()
+  ret void
+}
+
+
+define void @inline_ssp_strong() nounwind sspstrong uwtable {
+entry:
+; CHECK: @inline_ssp_strong() #1
+  call void @fun_ssp()
+  ret void
+}
+
+define void @inline_ssp_ssp() nounwind ssp uwtable {
+entry:
+; CHECK: @inline_ssp_ssp() #2
+  call void @fun_ssp()
+  ret void
+}
+
+define void @inline_ssp_nossp() nounwind uwtable {
+entry:
+; CHECK: @inline_ssp_nossp() #2
+  call void @fun_ssp()
+  ret void
+}
+
+define void @inline_nossp_req() nounwind uwtable sspreq {
+entry:
+; CHECK: @inline_nossp_req() #0
+  call void @fun_nossp()
+  ret void
+}
+
+
+define void @inline_nossp_strong() nounwind sspstrong uwtable {
+entry:
+; CHECK: @inline_nossp_strong() #1
+  call void @fun_nossp()
+  ret void
+}
+
+define void @inline_nossp_ssp() nounwind ssp uwtable {
+entry:
+; CHECK: @inline_nossp_ssp() #2
+  call void @fun_nossp()
+  ret void
+}
+
+define void @inline_nossp_nossp() nounwind uwtable {
+entry:
+; CHECK: @inline_nossp_nossp() #3
+  call void @fun_nossp()
+  ret void
+}
+
+declare i32 @printf(i8*, ...)
+
+; CHECK: attributes #0 = { nounwind sspreq uwtable }
+; CHECK: attributes #1 = { nounwind sspstrong uwtable }
+; CHECK: attributes #2 = { nounwind ssp uwtable }
+; CHECK: attributes #3 = { nounwind uwtable }
diff --git a/test/Transforms/Inline/lifetime-no-datalayout.ll b/test/Transforms/Inline/lifetime-no-datalayout.ll
new file mode 100644
index 000000000000..f4ffef3850f1
--- /dev/null
+++ b/test/Transforms/Inline/lifetime-no-datalayout.ll
@@ -0,0 +1,23 @@
+; RUN: opt -inline -S < %s | FileCheck %s
+
+declare void @use(i8* %a)
+
+define void @helper() {
+  %a = alloca i8
+  call void @use(i8* %a)
+  ret void
+}
+
+; Size in llvm.lifetime.X should be -1 (unknown).
+define void @test() {
+; CHECK: @test
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.start(i64 -1
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.end(i64 -1
+  call void @helper()
+; CHECK-NOT: lifetime
+; CHECK: ret void
+  ret void
+}
+
diff --git a/test/Transforms/Inline/lifetime.ll b/test/Transforms/Inline/lifetime.ll
index a95c836b77de..fc73385295ed 100644
--- a/test/Transforms/Inline/lifetime.ll
+++ b/test/Transforms/Inline/lifetime.ll
@@ -1,22 +1,25 @@
-; RUN: opt -inline %s -S -o - | FileCheck %s
+; RUN: opt -inline -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 declare void @llvm.lifetime.start(i64, i8*)
 declare void @llvm.lifetime.end(i64, i8*)
 
 define void @helper_both_markers() {
   %a = alloca i8
-  call void @llvm.lifetime.start(i64 1, i8* %a)
-  call void @llvm.lifetime.end(i64 1, i8* %a)
+  ; Size in llvm.lifetime.start / llvm.lifetime.end differs from
+  ; allocation size. We should use the former.
+  call void @llvm.lifetime.start(i64 2, i8* %a)
+  call void @llvm.lifetime.end(i64 2, i8* %a)
   ret void
 }
 
 define void @test_both_markers() {
 ; CHECK: @test_both_markers
-; CHECK: llvm.lifetime.start(i64 1
-; CHECK-NEXT: llvm.lifetime.end(i64 1
+; CHECK: llvm.lifetime.start(i64 2
+; CHECK-NEXT: llvm.lifetime.end(i64 2
   call void @helper_both_markers()
-; CHECK-NEXT: llvm.lifetime.start(i64 1
-; CHECK-NEXT: llvm.lifetime.end(i64 1
+; CHECK-NEXT: llvm.lifetime.start(i64 2
+; CHECK-NEXT: llvm.lifetime.end(i64 2
   call void @helper_both_markers()
 ; CHECK-NEXT: ret void
   ret void
@@ -27,7 +30,7 @@ define void @test_both_markers() {
 declare void @use(i8* %a)
 
 define void @helper_no_markers() {
-  %a = alloca i8
+  %a = alloca i8 ; Allocation size is 1 byte.
   call void @use(i8* %a)
   ret void
 }
@@ -37,14 +40,14 @@ define void @helper_no_markers() {
 define void @test_no_marker() {
 ; CHECK: @test_no_marker
 ; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.start(i64 -1
+; CHECK: llvm.lifetime.start(i64 1
 ; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.end(i64 -1
+; CHECK: llvm.lifetime.end(i64 1
   call void @helper_no_markers()
 ; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.start(i64 -1
+; CHECK: llvm.lifetime.start(i64 1
 ; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.end(i64 -1
+; CHECK: llvm.lifetime.end(i64 1
   call void @helper_no_markers()
 ; CHECK-NOT: lifetime
 ; CHECK: ret void
@@ -76,3 +79,22 @@ define void @test_two_casts() {
 ; CHECK: ret void
   ret void
 }
+
+define void @helper_arrays_alloca() {
+  %a = alloca [10 x i32], align 16
+  %1 = bitcast [10 x i32]* %a to i8*
+  call void @use(i8* %1)
+  ret void
+}
+
+define void @test_arrays_alloca() {
+; CHECK: @test_arrays_alloca
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.start(i64 40,
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.end(i64 40,
+  call void @helper_arrays_alloca()
+; CHECK-NOT: lifetime
+; CHECK: ret void
+  ret void
+}
diff --git a/test/Transforms/Inline/noinline-recursive-fn.ll b/test/Transforms/Inline/noinline-recursive-fn.ll
index 6cde0e27fd1e..5520093ee457 100644
--- a/test/Transforms/Inline/noinline-recursive-fn.ll
+++ b/test/Transforms/Inline/noinline-recursive-fn.ll
@@ -2,7 +2,7 @@
 ; This effectively is just peeling off the first iteration of a loop, and the
 ; inliner heuristics are not set up for this.
 
-; RUN: opt -inline %s -S | FileCheck %s
+; RUN: opt -inline -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.3"
diff --git a/test/Transforms/Inline/noinline.ll b/test/Transforms/Inline/noinline.ll
index dc3f6e003094..7667114b68e6 100644
--- a/test/Transforms/Inline/noinline.ll
+++ b/test/Transforms/Inline/noinline.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -inline -S | FileCheck %s
+; RUN: opt -inline -S < %s | FileCheck %s
 ; PR6682
 declare void @foo() nounwind
 
diff --git a/test/Transforms/Inline/recursive.ll b/test/Transforms/Inline/recursive.ll
index 5fe8d1639ca3..fe1c041af9a8 100644
--- a/test/Transforms/Inline/recursive.ll
+++ b/test/Transforms/Inline/recursive.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -inline -S | FileCheck %s
+; RUN: opt -inline -S < %s | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin10.0"
diff --git a/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll b/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
index 1da28562aae4..d266164fd870 100644
--- a/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
+++ b/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -instcombine %s | FileCheck %s
+; RUN: opt -S -instcombine < %s | FileCheck %s
 ; PR2297
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/Transforms/InstCombine/2009-02-11-NotInitialized.ll b/test/Transforms/InstCombine/2009-02-11-NotInitialized.ll
new file mode 100644
index 000000000000..b66495d9cbaa
--- /dev/null
+++ b/test/Transforms/InstCombine/2009-02-11-NotInitialized.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -inline -instcombine -functionattrs | llvm-dis
+;
+; Check that nocapture attributes are added when run after an SCC pass.
+; PR3520
+
+define i32 @use(i8* %x) nounwind readonly {
+; CHECK: @use(i8* nocapture %x)
+  %1 = tail call i64 @strlen(i8* %x) nounwind readonly
+  %2 = trunc i64 %1 to i32
+  ret i32 %2
+}
+
+declare i64 @strlen(i8*) nounwind readonly
+; CHECK: declare i64 @strlen(i8* nocapture) nounwind readonly
diff --git a/test/Transforms/InstCombine/2010-03-03-ExtElim.ll b/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
index 2df12d670adb..bb3159e1e6fa 100644
--- a/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
+++ b/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine -S %s | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 ; PR6486
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
diff --git a/test/Transforms/SimplifyLibCalls/2010-05-30-memcpy-Struct.ll b/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll
index f67bae74f503..09a96749f260 100644
--- a/test/Transforms/SimplifyLibCalls/2010-05-30-memcpy-Struct.ll
+++ b/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll
@@ -1,4 +1,4 @@
-; RUN: opt -simplify-libcalls %s -S -o - | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 ; PR7265
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -6,11 +6,11 @@ target triple = "x86_64-unknown-linux-gnu"
 
 %union.anon = type { i32, [4 x i8] }
 
-@.str = private constant [3 x i8] c"%s\00"        ; <[3 x i8]*> [#uses=2]
+@.str = private constant [3 x i8] c"%s\00"
 
 define void @CopyEventArg(%union.anon* %ev) nounwind {
 entry:
-  %call = call i32 (i8*, i8*, ...)* @sprintf(i8* undef, i8* getelementptr inbounds ([3 x i8]* @.str, i64 0, i64 0), %union.anon* %ev) nounwind ; <i32> [#uses=0]
+  %call = call i32 (i8*, i8*, ...)* @sprintf(i8* undef, i8* getelementptr inbounds ([3 x i8]* @.str, i64 0, i64 0), %union.anon* %ev) nounwind
 ; CHECK: bitcast %union.anon* %ev to i8*
 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
   ret void
diff --git a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
index eb2899475695..800162197919 100644
--- a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
+++ b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
@@ -5,8 +5,8 @@
 define i32 @main(i32 %argc) nounwind ssp {
 entry:
   %tmp3151 = trunc i32 %argc to i8
-; CHECK: %tmp3163 = shl i8 %tmp3162, 6
-; CHECK: and i8 %tmp3163, 64
+; CHECK: %0 = shl i8 %tmp3151, 5
+; CHECK: and i8 %0, 64
 ; CHECK-NOT: shl
 ; CHECK-NOT: shr
   %tmp3161 = or i8 %tmp3151, -17
diff --git a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
index 0907c490bb35..2dedd44e2be1 100644
--- a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
+++ b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
@@ -50,7 +50,7 @@ entry:
   %b = add <4 x i32> zeroinitializer, %a
   ret <4 x i32> %b
 ; CHECK: entry:
-; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
+; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret <4 x i32> %a
 }
 
@@ -66,3 +66,7 @@ entry:
 
 declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+
+; CHECK: attributes #0 = { nounwind readnone ssp }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/InstCombine/2012-12-14-simp-vgep.ll b/test/Transforms/InstCombine/2012-12-14-simp-vgep.ll
new file mode 100644
index 000000000000..fc29b095e5ce
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-12-14-simp-vgep.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+define <4 x i32> @foo(<4 x i32*>* %in) {
+  %t17 = load <4 x i32*>* %in, align 8
+  %t18 = icmp eq <4 x i32*> %t17, zeroinitializer
+  %t19 = zext <4 x i1> %t18 to <4 x i32>
+  ret <4 x i32> %t19
+}
diff --git a/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll b/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll
new file mode 100644
index 000000000000..b20c3a07c0ac
--- /dev/null
+++ b/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%struct._my_struct = type <{ [12 x i8], [4 x i8] }>
+
+@initval = common global %struct._my_struct zeroinitializer, align 1
+
+; InstCombine will try to change the %struct._my_struct alloca into an
+; allocation of an i96 because of the bitcast to create %2. That's not valid,
+; as the other 32 bits of the structure still feed into the return value
+define { i64, i64 } @function(i32 %x, i32 %y, i32 %z) nounwind {
+; CHECK: @function
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %retval = alloca %struct._my_struct, align 8
+; CHECK-NOT: bitcast i96* %retval to %struct._my_struct*
+entry:
+  %retval = alloca %struct._my_struct, align 8
+  %k.sroa.0.0.copyload = load i96* bitcast (%struct._my_struct* @initval to i96*), align 1
+  %k.sroa.1.12.copyload = load i32* bitcast ([4 x i8]* getelementptr inbounds (%struct._my_struct* @initval, i64 0, i32 1) to i32*), align 1
+  %0 = zext i32 %x to i96
+  %bf.value = shl nuw nsw i96 %0, 6
+  %bf.clear = and i96 %k.sroa.0.0.copyload, -288230376151711744
+  %1 = zext i32 %y to i96
+  %bf.value2 = shl nuw nsw i96 %1, 32
+  %bf.shl3 = and i96 %bf.value2, 288230371856744448
+  %bf.value.masked = and i96 %bf.value, 4294967232
+  %2 = zext i32 %z to i96
+  %bf.value8 = and i96 %2, 63
+  %bf.clear4 = or i96 %bf.shl3, %bf.value.masked
+  %bf.set5 = or i96 %bf.clear4, %bf.value8
+  %bf.set10 = or i96 %bf.set5, %bf.clear
+  %retval.0.cast7 = bitcast %struct._my_struct* %retval to i96*
+  store i96 %bf.set10, i96* %retval.0.cast7, align 8
+  %retval.12.idx8 = getelementptr inbounds %struct._my_struct* %retval, i64 0, i32 1
+  %retval.12.cast9 = bitcast [4 x i8]* %retval.12.idx8 to i32*
+  store i32 %k.sroa.1.12.copyload, i32* %retval.12.cast9, align 4
+  %trunc = trunc i96 %bf.set10 to i64
+  %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %trunc, 0
+  %retval.8.idx12 = getelementptr inbounds %struct._my_struct* %retval, i64 0, i32 0, i64 8
+  %retval.8.cast13 = bitcast i8* %retval.8.idx12 to i64*
+  %retval.8.load14 = load i64* %retval.8.cast13, align 8
+  %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.8.load14, 1
+  ret { i64, i64 } %.fca.1.insert
+}
diff --git a/test/Transforms/InstCombine/abs-1.ll b/test/Transforms/InstCombine/abs-1.ll
new file mode 100644
index 000000000000..807f238755b5
--- /dev/null
+++ b/test/Transforms/InstCombine/abs-1.ll
@@ -0,0 +1,41 @@
+; Test that the abs library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @abs(i32)
+declare i64 @labs(i64)
+declare i64 @llabs(i64)
+
+; Check abs(x) -> x >s -1 ? x : -x.
+
+define i32 @test_simplify1(i32 %x) {
+; CHECK: @test_simplify1
+  %ret = call i32 @abs(i32 %x)
+; CHECK-NEXT: [[ISPOS:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub i32 0, %x
+; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[ISPOS]], i32 %x, i32 [[NEG]]
+  ret i32 %ret
+; CHECK-NEXT: ret i32 [[RET]]
+}
+
+define i64 @test_simplify2(i64 %x) {
+; CHECK: @test_simplify2
+  %ret = call i64 @labs(i64 %x)
+; CHECK-NEXT: [[ISPOS:%[a-z0-9]+]] = icmp sgt i64 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub i64 0, %x
+; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[ISPOS]], i64 %x, i64 [[NEG]]
+  ret i64 %ret
+; CHECK-NEXT: ret i64 [[RET]]
+}
+
+define i64 @test_simplify3(i64 %x) {
+; CHECK: @test_simplify3
+  %ret = call i64 @llabs(i64 %x)
+; CHECK-NEXT: [[ISPOS:%[a-z0-9]+]] = icmp sgt i64 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub i64 0, %x
+; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[ISPOS]], i64 %x, i64 [[NEG]]
+  ret i64 %ret
+; CHECK-NEXT: ret i64 [[RET]]
+}
diff --git a/test/Transforms/InstCombine/align-external.ll b/test/Transforms/InstCombine/align-external.ll
index d4a5d429912b..c3ef2dbb70f0 100644
--- a/test/Transforms/InstCombine/align-external.ll
+++ b/test/Transforms/InstCombine/align-external.ll
@@ -8,7 +8,7 @@
 ; CHECK: %q = add i64 %r, 1
 ; CHECK: ret i64 %q
 
-target datalayout = "-i32:8:32"
+target datalayout = "i32:8:32"
 
 @A = external global i32
 @B = weak_odr global i32 0
diff --git a/test/Transforms/InstCombine/bitcast-bigendian.ll b/test/Transforms/InstCombine/bitcast-bigendian.ll
new file mode 100644
index 000000000000..4ded581a14c6
--- /dev/null
+++ b/test/Transforms/InstCombine/bitcast-bigendian.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; These tests are extracted from bitcast.ll.
+; Verify that they also work correctly on big-endian targets.
+
+define float @test2(<2 x float> %A, <2 x i32> %B) {
+  %tmp28 = bitcast <2 x float> %A to i64  ; <i64> [#uses=2]
+  %tmp23 = trunc i64 %tmp28 to i32                ; <i32> [#uses=1]
+  %tmp24 = bitcast i32 %tmp23 to float            ; <float> [#uses=1]
+
+  %tmp = bitcast <2 x i32> %B to i64
+  %tmp2 = trunc i64 %tmp to i32                ; <i32> [#uses=1]
+  %tmp4 = bitcast i32 %tmp2 to float            ; <float> [#uses=1]
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+
+; CHECK: @test2
+; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 1
+; CHECK-NEXT:  bitcast <2 x i32> %B to <2 x float>
+; CHECK-NEXT:  %tmp4 = extractelement <2 x float> {{.*}}, i32 1
+; CHECK-NEXT:  %add = fadd float %tmp24, %tmp4
+; CHECK-NEXT:  ret float %add
+}
+
+define float @test3(<2 x float> %A, <2 x i64> %B) {
+  %tmp28 = bitcast <2 x float> %A to i64
+  %tmp29 = lshr i64 %tmp28, 32
+  %tmp23 = trunc i64 %tmp29 to i32
+  %tmp24 = bitcast i32 %tmp23 to float
+
+  %tmp = bitcast <2 x i64> %B to i128
+  %tmp1 = lshr i128 %tmp, 64
+  %tmp2 = trunc i128 %tmp1 to i32
+  %tmp4 = bitcast i32 %tmp2 to float
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+
+; CHECK: @test3
+; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 0
+; CHECK-NEXT:  bitcast <2 x i64> %B to <4 x float>
+; CHECK-NEXT:  %tmp4 = extractelement <4 x float> {{.*}}, i32 1
+; CHECK-NEXT:  %add = fadd float %tmp24, %tmp4
+; CHECK-NEXT:  ret float %add
+}
+
diff --git a/test/Transforms/InstCombine/bitcast-vector-fold.ll b/test/Transforms/InstCombine/bitcast-vector-fold.ll
index 8feec229171a..8fd7f35b7bb7 100644
--- a/test/Transforms/InstCombine/bitcast-vector-fold.ll
+++ b/test/Transforms/InstCombine/bitcast-vector-fold.ll
@@ -31,3 +31,8 @@ define <4 x i32> @test6() {
 	%tmp3 = bitcast <2 x double> <double 0.5, double 1.0> to <4 x i32>
 	ret <4 x i32> %tmp3
 }
+
+define i32 @test7() {
+       %tmp3 = bitcast <2 x half> <half 0xH1100, half 0xH0011> to i32
+       ret i32 %tmp3
+}
+\ No newline at end of file
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 8f6ae7d83527..1e6113256bf3 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -11,7 +11,7 @@ define i32 @test1(i64 %a) {
         %t3 = xor <2 x i32> %t1, %t2
         %t4 = extractelement <2 x i32> %t3, i32 0
         ret i32 %t4
-        
+
 ; CHECK: @test1
 ; CHECK: ret i32 0
 }
@@ -30,7 +30,7 @@ define float @test2(<2 x float> %A, <2 x i32> %B) {
 
   %add = fadd float %tmp24, %tmp4
   ret float %add
-  
+
 ; CHECK: @test2
 ; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 0
 ; CHECK-NEXT:  bitcast <2 x i32> %B to <2 x float>
@@ -55,7 +55,7 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
 
   %add = fadd float %tmp24, %tmp4
   ret float %add
-  
+
 ; CHECK: @test3
 ; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 1
 ; CHECK-NEXT:  bitcast <2 x i64> %B to <4 x float>
@@ -75,7 +75,7 @@ define <2 x i32> @test4(i32 %A, i32 %B){
   ; CHECK: @test4
   ; CHECK-NEXT: insertelement <2 x i32> undef, i32 %A, i32 0
   ; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %B, i32 1
-  ; CHECK-NEXT: ret <2 x i32> 
+  ; CHECK-NEXT: ret <2 x i32>
 
 }
 
@@ -92,7 +92,7 @@ define <2 x float> @test5(float %A, float %B) {
   ; CHECK: @test5
   ; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0
   ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %B, i32 1
-  ; CHECK-NEXT: ret <2 x float> 
+  ; CHECK-NEXT: ret <2 x float>
 }
 
 define <2 x float> @test6(float %A){
@@ -123,7 +123,7 @@ define i64 @Vec2(i64 %in) {
 }
 
 define i64 @All11(i64 %in) {
-  %out = and i64 %in, xor (i64 bitcast (<2 x float> bitcast (i64 -1 to <2 x float>) to i64), i64 -1) 
+  %out = and i64 %in, xor (i64 bitcast (<2 x float> bitcast (i64 -1 to <2 x float>) to i64), i64 -1)
   ret i64 %out
 ; CHECK: @All11
 ; CHECK: ret i64 0
@@ -131,9 +131,16 @@ define i64 @All11(i64 %in) {
 
 
 define i32 @All111(i32 %in) {
-  %out = and i32 %in, xor (i32 bitcast (<1 x float> bitcast (i32 -1 to <1 x float>) to i32), i32 -1) 
+  %out = and i32 %in, xor (i32 bitcast (<1 x float> bitcast (i32 -1 to <1 x float>) to i32), i32 -1)
   ret i32 %out
 ; CHECK: @All111
 ; CHECK: ret i32 0
 }
 
+define <2 x i16> @BitcastInsert(i32 %a) {
+  %v = insertelement <1 x i32> undef, i32 %a, i32 0
+  %r = bitcast <1 x i32> %v to <2 x i16>
+  ret <2 x i16> %r
+; CHECK: @BitcastInsert
+; CHECK: bitcast i32 %a to <2 x i16>
+}
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index b4eb69d4363d..de738bb7c06d 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -473,14 +473,12 @@ define i64 @test51(i64 %A, i1 %cond) {
   %F = sext i32 %E to i64
   ret i64 %F
 ; CHECK: @test51
-
-; FIXME: disabled, see PR5997
-; HECK-NEXT: %C = and i64 %A, 4294967294
-; HECK-NEXT: %D = or i64 %A, 1
-; HECK-NEXT: %E = select i1 %cond, i64 %C, i64 %D
-; HECK-NEXT: %sext = shl i64 %E, 32
-; HECK-NEXT: %F = ashr i64 %sext, 32
-; HECK-NEXT: ret i64 %F
+; CHECK-NEXT: %C = and i64 %A, 4294967294
+; CHECK-NEXT: %D = or i64 %A, 1
+; CHECK-NEXT: %E = select i1 %cond, i64 %C, i64 %D
+; CHECK-NEXT: %sext = shl i64 %E, 32
+; CHECK-NEXT: %F = ashr exact i64 %sext, 32
+; CHECK-NEXT: ret i64 %F
 }
 
 define i32 @test52(i64 %A) {
diff --git a/test/Transforms/InstCombine/compare-signs.ll b/test/Transforms/InstCombine/compare-signs.ll
index f8e49110610a..72db66e3ab0f 100644
--- a/test/Transforms/InstCombine/compare-signs.ll
+++ b/test/Transforms/InstCombine/compare-signs.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -instcombine -S | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 ; PR5438
 
 ; TODO: This should also optimize down.
diff --git a/test/Transforms/InstCombine/constant-expr-datalayout.ll b/test/Transforms/InstCombine/constant-expr-datalayout.ll
new file mode 100644
index 000000000000..9a72c77afdb0
--- /dev/null
+++ b/test/Transforms/InstCombine/constant-expr-datalayout.ll
@@ -0,0 +1,12 @@
+; RUN: opt -instcombine %s -S -o - | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%test1.struct = type { i32, i32 }
+@test1.aligned_glbl = global %test1.struct zeroinitializer, align 4
+define void @test1(i64 *%ptr) {
+  store i64 and (i64 ptrtoint (i32* getelementptr (%test1.struct* @test1.aligned_glbl, i32 0, i32 1) to i64), i64 3), i64* %ptr
+; CHECK: store i64 0, i64* %ptr
+  ret void
+}
diff --git a/test/Transforms/InstCombine/cos-1.ll b/test/Transforms/InstCombine/cos-1.ll
new file mode 100644
index 000000000000..b92e448abd9f
--- /dev/null
+++ b/test/Transforms/InstCombine/cos-1.ll
@@ -0,0 +1,38 @@
+; Test that the cos library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s -check-prefix=NO-FLOAT-SHRINK
+; RUN: opt < %s -instcombine -enable-double-float-shrink -S | FileCheck %s -check-prefix=DO-FLOAT-SHRINK
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare double @cos(double)
+
+; Check cos(-x) -> cos(x);
+
+define double @test_simplify1(double %d) {
+; NO-FLOAT-SHRINK: @test_simplify1
+  %neg = fsub double -0.000000e+00, %d
+  %cos = call double @cos(double %neg)
+; NO-FLOAT-SHRINK: call double @cos(double %d)
+  ret double %cos
+}
+
+define float @test_simplify2(float %f) {
+; DO-FLOAT-SHRINK: @test_simplify2
+  %conv1 = fpext float %f to double
+  %neg = fsub double -0.000000e+00, %conv1
+  %cos = call double @cos(double %neg)
+  %conv2 = fptrunc double %cos to float
+; DO-FLOAT-SHRINK: call float @cosf(float %f)
+  ret float %conv2
+}
+
+define float @test_simplify3(float %f) {
+; NO-FLOAT-SHRINK: @test_simplify3
+  %conv1 = fpext float %f to double
+  %neg = fsub double -0.000000e+00, %conv1
+  %cos = call double @cos(double %neg)
+; NO-FLOAT-SHRINK: call double @cos(double %conv1)
+  %conv2 = fptrunc double %cos to float
+  ret float %conv2
+}
diff --git a/test/Transforms/InstCombine/cos-2.ll b/test/Transforms/InstCombine/cos-2.ll
new file mode 100644
index 000000000000..2f2dfafe484d
--- /dev/null
+++ b/test/Transforms/InstCombine/cos-2.ll
@@ -0,0 +1,17 @@
+; Test that the cos library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare float @cos(double)
+
+; Check that cos functions with the wrong prototype aren't simplified.
+
+define float @test_no_simplify1(double %d) {
+; CHECK: @test_no_simplify1
+  %neg = fsub double -0.000000e+00, %d
+  %cos = call float @cos(double %neg)
+; CHECK: call float @cos(double %neg)
+  ret float %cos
+}
diff --git a/test/Transforms/SimplifyLibCalls/debug-line.ll b/test/Transforms/InstCombine/debug-line.ll
index b668e4b9d342..084efdc989f9 100644
--- a/test/Transforms/SimplifyLibCalls/debug-line.ll
+++ b/test/Transforms/InstCombine/debug-line.ll
@@ -1,4 +1,4 @@
-; RUN: opt -simplify-libcalls -S < %s | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 
 
 @.str = private constant [3 x i8] c"%c\00"
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index f6892fc3e1f9..cdbcd865117c 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -28,22 +28,21 @@ entry:
   ret i8* %call, !dbg !21
 }
 
-!llvm.dbg.lv.foobar = !{!0, !7, !9}
-!llvm.dbg.sp = !{!1}
+!llvm.dbg.cu = !{!3}
 
-!0 = metadata !{i32 590081, metadata !1, metadata !"__dest", metadata !2, i32 16777294, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", metadata !2, i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"string.h", metadata !"Game", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589841, i32 0, i32 12, metadata !"bits.c", metadata !"Game", metadata !"clang version 3.0 (trunk 127710)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786689, metadata !1, metadata !"__dest", metadata !2, i32 16777294, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", metadata !2, i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 0, i32 12, metadata !26, metadata !"clang version 3.0 (trunk 127710)", i1 true, metadata !"", i32 0, null, null, metadata !24, null, null} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6}
-!6 = metadata !{i32 589839, metadata !3, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 590081, metadata !1, metadata !"__val", metadata !2, i32 33554510, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 589860, metadata !3, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 590081, metadata !1, metadata !"__len", metadata !2, i32 50331726, metadata !10, i32 0} ; [ DW_TAG_arg_variable ]
+!6 = metadata !{i32 786447, metadata !3, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 786689, metadata !1, metadata !"__val", metadata !2, i32 33554510, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 786468, metadata !3, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786689, metadata !1, metadata !"__len", metadata !2, i32 50331726, metadata !10, i32 0, null} ; [ DW_TAG_arg_variable ]
 !10 = metadata !{i32 589846, metadata !3, metadata !"size_t", metadata !2, i32 80, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_typedef ]
 !11 = metadata !{i32 589846, metadata !3, metadata !"__darwin_size_t", metadata !2, i32 90, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 589860, metadata !3, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 786468, metadata !3, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !13 = metadata !{metadata !"any pointer", metadata !14}
 !14 = metadata !{metadata !"omnipotent char", metadata !15}
 !15 = metadata !{metadata !"Simple C/C++ TBAA", null}
@@ -53,5 +52,10 @@ entry:
 !19 = metadata !{metadata !"long", metadata !14}
 !20 = metadata !{i32 78, i32 54, metadata !1, null}
 !21 = metadata !{i32 80, i32 3, metadata !22, null}
-!22 = metadata !{i32 589835, metadata !23, i32 80, i32 3, metadata !2, i32 7} ; [ DW_TAG_lexical_block ]
-!23 = metadata !{i32 589835, metadata !1, i32 79, i32 1, metadata !2, i32 6} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786443, metadata !23, i32 80, i32 3, metadata !2, i32 7} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 786443, metadata !1, i32 79, i32 1, metadata !2, i32 6} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{metadata !1}
+!25 = metadata !{metadata !0, metadata !7, metadata !9}
+!26 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
+!27 = metadata !{metadata !"string.h", metadata !"Game"}
+!28 = metadata !{metadata !"bits.c", metadata !"Game"}
diff --git a/test/Transforms/InstCombine/devirt.ll b/test/Transforms/InstCombine/devirt.ll
index 6189dc2af4f9..9c7cf5d697e8 100644
--- a/test/Transforms/InstCombine/devirt.ll
+++ b/test/Transforms/InstCombine/devirt.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine -S -o - %s | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 
 ; CHECK-NOT: getelementptr
 ; CHECK-NOT: ptrtoint
diff --git a/test/Transforms/InstCombine/disable-simplify-libcalls.ll b/test/Transforms/InstCombine/disable-simplify-libcalls.ll
index d81e9ae5bd73..c2c29368b1a8 100644
--- a/test/Transforms/InstCombine/disable-simplify-libcalls.ll
+++ b/test/Transforms/InstCombine/disable-simplify-libcalls.ll
@@ -37,6 +37,18 @@ declare i64 @strtoll(i8*, i8**, i32)
 declare i64 @strtoul(i8*, i8**, i32)
 declare i64 @strtoull(i8*, i8**, i32)
 declare i64 @strcspn(i8*, i8*)
+declare i32 @abs(i32)
+declare i32 @ffs(i32)
+declare i32 @ffsl(i64)
+declare i32 @ffsll(i64)
+declare i32 @fprintf(i8*, i8*)
+declare i32 @isascii(i32)
+declare i32 @isdigit(i32)
+declare i32 @toascii(i32)
+declare i64 @labs(i64)
+declare i64 @llabs(i64)
+declare i32 @printf(i8*)
+declare i32 @sprintf(i8*, i8*)
 
 define double @t1(double %x) {
 ; CHECK: @t1
@@ -234,3 +246,90 @@ define i64 @t25(i8* %y) {
   ret i64 %ret
 ; CHECK: call i64 @strcspn
 }
+
+define i32 @t26(i32 %y) {
+; CHECK: @t26
+  %ret = call i32 @abs(i32 %y)
+  ret i32 %ret
+; CHECK: call i32 @abs
+}
+
+define i32 @t27(i32 %y) {
+; CHECK: @t27
+  %ret = call i32 @ffs(i32 %y)
+  ret i32 %ret
+; CHECK: call i32 @ffs
+}
+
+define i32 @t28(i64 %y) {
+; CHECK: @t28
+  %ret = call i32 @ffsl(i64 %y)
+  ret i32 %ret
+; CHECK: call i32 @ffsl
+}
+
+define i32 @t29(i64 %y) {
+; CHECK: @t29
+  %ret = call i32 @ffsll(i64 %y)
+  ret i32 %ret
+; CHECK: call i32 @ffsll
+}
+
+define void @t30() {
+; CHECK: @t30
+  %x = getelementptr inbounds [13 x i8]* @.str1, i32 0, i32 0
+  call i32 @fprintf(i8* null, i8* %x)
+  ret void
+; CHECK: call i32 @fprintf
+}
+
+define i32 @t31(i32 %y) {
+; CHECK: @t31
+  %ret = call i32 @isascii(i32 %y)
+  ret i32 %ret
+; CHECK: call i32 @isascii
+}
+
+define i32 @t32(i32 %y) {
+; CHECK: @t32
+  %ret = call i32 @isdigit(i32 %y)
+  ret i32 %ret
+; CHECK: call i32 @isdigit
+}
+
+define i32 @t33(i32 %y) {
+; CHECK: @t33
+  %ret = call i32 @toascii(i32 %y)
+  ret i32 %ret
+; CHECK: call i32 @toascii
+}
+
+define i64 @t34(i64 %y) {
+; CHECK: @t34
+  %ret = call i64 @labs(i64 %y)
+  ret i64 %ret
+; CHECK: call i64 @labs
+}
+
+define i64 @t35(i64 %y) {
+; CHECK: @t35
+  %ret = call i64 @llabs(i64 %y)
+  ret i64 %ret
+; CHECK: call i64 @llabs
+}
+
+define void @t36() {
+; CHECK: @t36
+  %x = getelementptr inbounds [1 x i8]* @empty, i32 0, i32 0
+  call i32 @printf(i8* %x)
+  ret void
+; CHECK: call i32 @printf
+}
+
+define void @t37(i8* %x) {
+; CHECK: @t37
+  %y = getelementptr inbounds [13 x i8]* @.str1, i32 0, i32 0
+  call i32 @sprintf(i8* %x, i8* %y)
+  ret void
+; CHECK: call i32 @sprintf
+}
diff --git a/test/Transforms/SimplifyLibCalls/double-float-shrink.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll
index b4ab8b4ceb9d..e5448ee00765 100644
--- a/test/Transforms/SimplifyLibCalls/double-float-shrink.ll
+++ b/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -1,98 +1,98 @@
-; RUN: opt  < %s -simplify-libcalls -enable-double-float-shrink -S | FileCheck %s
+; RUN: opt < %s -instcombine -enable-double-float-shrink -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 define float @acos_test(float %f) nounwind readnone {
 ; CHECK: acos_test
-    %conv = fpext float %f to double
-    %call = call double @acos(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @acos(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @acosf(float %f)
 }
 
 define double @acos_test2(float %f) nounwind readnone {
 ; CHECK: acos_test2
-    %conv = fpext float %f to double
-    %call = call double @acos(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @acos(double %conv)
+   ret double %call
 ; CHECK: call double @acos(double %conv)
 }
 
 define float @acosh_test(float %f) nounwind readnone {
 ; CHECK: acosh_test
-    %conv = fpext float %f to double
-    %call = call double @acosh(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @acosh(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @acoshf(float %f)
 }
 
 define double @acosh_test2(float %f) nounwind readnone {
 ; CHECK: acosh_test2
-    %conv = fpext float %f to double
-    %call = call double @acosh(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @acosh(double %conv)
+   ret double %call
 ; CHECK: call double @acosh(double %conv)
 }
 
 define float @asin_test(float %f) nounwind readnone {
 ; CHECK: asin_test
-    %conv = fpext float %f to double
-    %call = call double @asin(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @asin(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @asinf(float %f)
 }
 
 define double @asin_test2(float %f) nounwind readnone {
 ; CHECK: asin_test2
-    %conv = fpext float %f to double
-    %call = call double @asin(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @asin(double %conv)
+   ret double %call
 ; CHECK: call double @asin(double %conv)
 }
 
 define float @asinh_test(float %f) nounwind readnone {
 ; CHECK: asinh_test
-    %conv = fpext float %f to double
-    %call = call double @asinh(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @asinh(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @asinhf(float %f)
 }
 
 define double @asinh_test2(float %f) nounwind readnone {
 ; CHECK: asinh_test2
-    %conv = fpext float %f to double
-    %call = call double @asinh(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @asinh(double %conv)
+   ret double %call
 ; CHECK: call double @asinh(double %conv)
 }
 
 define float @atan_test(float %f) nounwind readnone {
 ; CHECK: atan_test
-    %conv = fpext float %f to double
-    %call = call double @atan(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @atan(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @atanf(float %f)
 }
 
 define double @atan_test2(float %f) nounwind readnone {
 ; CHECK: atan_test2
-    %conv = fpext float %f to double
-    %call = call double @atan(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @atan(double %conv)
+   ret double %call
 ; CHECK: call double @atan(double %conv)
 }
 define float @atanh_test(float %f) nounwind readnone {
 ; CHECK: atanh_test
-    %conv = fpext float %f to double
-    %call = call double @atanh(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @atanh(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @atanhf(float %f)
 }
 
@@ -105,210 +105,210 @@ define double @atanh_test2(float %f) nounwind readnone {
 }
 define float @cbrt_test(float %f) nounwind readnone {
 ; CHECK: cbrt_test
-    %conv = fpext float %f to double
-    %call = call double @cbrt(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @cbrt(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @cbrtf(float %f)
 }
 
 define double @cbrt_test2(float %f) nounwind readnone {
 ; CHECK: cbrt_test2
-    %conv = fpext float %f to double
-    %call = call double @cbrt(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @cbrt(double %conv)
+   ret double %call
 ; CHECK: call double @cbrt(double %conv)
 }
 define float @exp_test(float %f) nounwind readnone {
 ; CHECK: exp_test
-    %conv = fpext float %f to double
-    %call = call double @exp(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @exp(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @expf(float %f)
 }
 
 define double @exp_test2(float %f) nounwind readnone {
 ; CHECK: exp_test2
-    %conv = fpext float %f to double
-    %call = call double @exp(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @exp(double %conv)
+   ret double %call
 ; CHECK: call double @exp(double %conv)
 }
 define float @expm1_test(float %f) nounwind readnone {
 ; CHECK: expm1_test
-    %conv = fpext float %f to double
-    %call = call double @expm1(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @expm1(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @expm1f(float %f)
 }
 
 define double @expm1_test2(float %f) nounwind readnone {
 ; CHECK: expm1_test2
-    %conv = fpext float %f to double
-    %call = call double @expm1(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @expm1(double %conv)
+   ret double %call
 ; CHECK: call double @expm1(double %conv)
 }
 define float @exp10_test(float %f) nounwind readnone {
 ; CHECK: exp10_test
-    %conv = fpext float %f to double
-    %call = call double @exp10(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @exp10(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @exp10f(float %f)
 }
 
 define double @exp10_test2(float %f) nounwind readnone {
 ; CHECK: exp10_test2
-    %conv = fpext float %f to double
-    %call = call double @exp10(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @exp10(double %conv)
+   ret double %call
 ; CHECK: call double @exp10(double %conv)
 }
 define float @log_test(float %f) nounwind readnone {
 ; CHECK: log_test
-    %conv = fpext float %f to double
-    %call = call double @log(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @log(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @logf(float %f)
 }
 
 define double @log_test2(float %f) nounwind readnone {
 ; CHECK: log_test2
-    %conv = fpext float %f to double
-    %call = call double @log(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @log(double %conv)
+   ret double %call
 ; CHECK: call double @log(double %conv)
 }
 define float @log10_test(float %f) nounwind readnone {
 ; CHECK: log10_test
-    %conv = fpext float %f to double
-    %call = call double @log10(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @log10(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @log10f(float %f)
 }
 
 define double @log10_test2(float %f) nounwind readnone {
 ; CHECK: log10_test2
-    %conv = fpext float %f to double
-    %call = call double @log10(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @log10(double %conv)
+   ret double %call
 ; CHECK: call double @log10(double %conv)
 }
 define float @log1p_test(float %f) nounwind readnone {
 ; CHECK: log1p_test
-    %conv = fpext float %f to double
-    %call = call double @log1p(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @log1p(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @log1pf(float %f)
 }
 
 define double @log1p_test2(float %f) nounwind readnone {
 ; CHECK: log1p_test2
-    %conv = fpext float %f to double
-    %call = call double @log1p(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @log1p(double %conv)
+   ret double %call
 ; CHECK: call double @log1p(double %conv)
 }
 define float @log2_test(float %f) nounwind readnone {
 ; CHECK: log2_test
-    %conv = fpext float %f to double
-    %call = call double @log2(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @log2(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @log2f(float %f)
 }
 
 define double @log2_test2(float %f) nounwind readnone {
 ; CHECK: log2_test2
-    %conv = fpext float %f to double
-    %call = call double @log2(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @log2(double %conv)
+   ret double %call
 ; CHECK: call double @log2(double %conv)
 }
 define float @logb_test(float %f) nounwind readnone {
 ; CHECK: logb_test
-    %conv = fpext float %f to double
-    %call = call double @logb(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @logb(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @logbf(float %f)
 }
 
 define double @logb_test2(float %f) nounwind readnone {
 ; CHECK: logb_test2
-    %conv = fpext float %f to double
-    %call = call double @logb(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @logb(double %conv)
+   ret double %call
 ; CHECK: call double @logb(double %conv)
 }
 define float @sin_test(float %f) nounwind readnone {
 ; CHECK: sin_test
-    %conv = fpext float %f to double
-    %call = call double @sin(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @sin(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @sinf(float %f)
 }
 
 define double @sin_test2(float %f) nounwind readnone {
 ; CHECK: sin_test2
-    %conv = fpext float %f to double
-    %call = call double @sin(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @sin(double %conv)
+   ret double %call
 ; CHECK: call double @sin(double %conv)
 }
 define float @sqrt_test(float %f) nounwind readnone {
 ; CHECK: sqrt_test
-    %conv = fpext float %f to double
-    %call = call double @sqrt(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @sqrt(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @sqrtf(float %f)
 }
 
 define double @sqrt_test2(float %f) nounwind readnone {
 ; CHECK: sqrt_test2
-    %conv = fpext float %f to double
-    %call = call double @sqrt(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @sqrt(double %conv)
+   ret double %call
 ; CHECK: call double @sqrt(double %conv)
 }
 define float @tan_test(float %f) nounwind readnone {
 ; CHECK: tan_test
-    %conv = fpext float %f to double
-    %call = call double @tan(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @tan(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @tanf(float %f)
 }
 
 define double @tan_test2(float %f) nounwind readnone {
 ; CHECK: tan_test2
-    %conv = fpext float %f to double
-    %call = call double @tan(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @tan(double %conv)
+   ret double %call
 ; CHECK: call double @tan(double %conv)
 }
 define float @tanh_test(float %f) nounwind readnone {
 ; CHECK: tanh_test
-    %conv = fpext float %f to double
-    %call = call double @tanh(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
+   %conv = fpext float %f to double
+   %call = call double @tanh(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
 ; CHECK: call float @tanhf(float %f)
 }
 
 define double @tanh_test2(float %f) nounwind readnone {
 ; CHECK: tanh_test2
-    %conv = fpext float %f to double
-    %call = call double @tanh(double %conv)
-    ret double %call
+   %conv = fpext float %f to double
+   %call = call double @tanh(double %conv)
+   ret double %call
 ; CHECK: call double @tanh(double %conv)
 }
 
diff --git a/test/Transforms/InstCombine/double-float-shrink-2.ll b/test/Transforms/InstCombine/double-float-shrink-2.ll
new file mode 100644
index 000000000000..7f6df92c96c5
--- /dev/null
+++ b/test/Transforms/InstCombine/double-float-shrink-2.ll
@@ -0,0 +1,80 @@
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-linux" | FileCheck -check-prefix=DO-SIMPLIFY %s
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-win32" | FileCheck -check-prefix=DONT-SIMPLIFY %s
+; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-win32" | FileCheck -check-prefix=C89-SIMPLIFY %s
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
+; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
+; RUN: opt < %s -instcombine -S -mtriple "sparc-sun-solaris" | FileCheck -check-prefix=DO-SIMPLIFY %s
+
+; DO-SIMPLIFY: call float @floorf(
+; DO-SIMPLIFY: call float @ceilf(
+; DO-SIMPLIFY: call float @roundf(
+; DO-SIMPLIFY: call float @nearbyintf(
+; DO-SIMPLIFY: call float @truncf(
+; DO-SIMPLIFY: call float @fabsf(
+
+; C89-SIMPLIFY: call float @floorf(
+; C89-SIMPLIFY: call float @ceilf(
+; C89-SIMPLIFY: call double @round(
+; C89-SIMPLIFY: call double @nearbyint(
+
+; DONT-SIMPLIFY: call double @floor(
+; DONT-SIMPLIFY: call double @ceil(
+; DONT-SIMPLIFY: call double @round(
+; DONT-SIMPLIFY: call double @nearbyint(
+; DONT-SIMPLIFY: call double @trunc(
+; DONT-SIMPLIFY: call double @fabs(
+
+declare double @floor(double)
+declare double @ceil(double)
+declare double @round(double)
+declare double @nearbyint(double)
+declare double @trunc(double)
+declare double @fabs(double)
+
+define float @test_floor(float %C) {
+  %D = fpext float %C to double
+  ; --> floorf
+  %E = call double @floor(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_ceil(float %C) {
+  %D = fpext float %C to double
+  ; --> ceilf
+  %E = call double @ceil(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_round(float %C) {
+  %D = fpext float %C to double
+  ; --> roundf
+  %E = call double @round(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_nearbyint(float %C) {
+  %D = fpext float %C to double
+  ; --> nearbyintf
+  %E = call double @nearbyint(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_trunc(float %C) {
+  %D = fpext float %C to double
+  ; --> truncf
+  %E = call double @trunc(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_fabs(float %C) {
+  %D = fpext float %C to double
+  ; --> fabsf
+  %E = call double @fabs(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
diff --git a/test/Transforms/InstCombine/exact.ll b/test/Transforms/InstCombine/exact.ll
index 14741e3c1c33..88ca88c3b927 100644
--- a/test/Transforms/InstCombine/exact.ll
+++ b/test/Transforms/InstCombine/exact.ll
@@ -99,9 +99,9 @@ define i1 @ashr_icmp2(i64 %X) nounwind {
 ; PR9998
 ; Make sure we don't transform the ashr here into an sdiv
 ; CHECK: @pr9998
-; CHECK: = and i32 %V, 1
-; CHECK: %Z = icmp ne
-; CHECK: ret i1 %Z
+; CHECK:      [[BIT:%[A-Za-z0-9.]+]] = and i32 %V, 1
+; CHECK-NEXT: [[CMP:%[A-Za-z0-9.]+]] = icmp ne i32 [[BIT]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
 define i1 @pr9998(i32 %V) nounwind {
 entry:
   %W = shl i32 %V, 31
@@ -112,6 +112,7 @@ entry:
 }
 
 
+
 ; CHECK: @udiv_icmp1
 ; CHECK: icmp ne i64 %X, 0
 define i1 @udiv_icmp1(i64 %X) nounwind {
diff --git a/test/Transforms/InstCombine/exp2-1.ll b/test/Transforms/InstCombine/exp2-1.ll
new file mode 100644
index 000000000000..1b0ad5000412
--- /dev/null
+++ b/test/Transforms/InstCombine/exp2-1.ll
@@ -0,0 +1,76 @@
+; Test that the exp2 library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare double @exp2(double)
+declare float @exp2f(float)
+
+; Check exp2(sitofp(x)) -> ldexp(1.0, sext(x)).
+
+define double @test_simplify1(i32 %x) {
+; CHECK: @test_simplify1
+  %conv = sitofp i32 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+  ret double %ret
+}
+
+define double @test_simplify2(i16 signext %x) {
+; CHECK: @test_simplify2
+  %conv = sitofp i16 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+  ret double %ret
+}
+
+define double @test_simplify3(i8 signext %x) {
+; CHECK: @test_simplify3
+  %conv = sitofp i8 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+  ret double %ret
+}
+
+define float @test_simplify4(i32 %x) {
+; CHECK: @test_simplify4
+  %conv = sitofp i32 %x to float
+  %ret = call float @exp2f(float %conv)
+; CHECK: call float @ldexpf
+  ret float %ret
+}
+
+; Check exp2(uitofp(x)) -> ldexp(1.0, zext(x)).
+
+define double @test_no_simplify1(i32 %x) {
+; CHECK: @test_no_simplify1
+  %conv = uitofp i32 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @exp2
+  ret double %ret
+}
+
+define double @test_simplify6(i16 zeroext %x) {
+; CHECK: @test_simplify6
+  %conv = uitofp i16 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+  ret double %ret
+}
+
+define double @test_simplify7(i8 zeroext %x) {
+; CHECK: @test_simplify7
+  %conv = uitofp i8 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+  ret double %ret
+}
+
+define float @test_simplify8(i8 zeroext %x) {
+; CHECK: @test_simplify8
+  %conv = uitofp i8 %x to float
+  %ret = call float @exp2f(float %conv)
+; CHECK: call float @ldexpf
+  ret float %ret
+}
diff --git a/test/Transforms/InstCombine/exp2-2.ll b/test/Transforms/InstCombine/exp2-2.ll
new file mode 100644
index 000000000000..bed063798e29
--- /dev/null
+++ b/test/Transforms/InstCombine/exp2-2.ll
@@ -0,0 +1,17 @@
+; Test that the exp2 library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare float @exp2(double)
+
+; Check that exp2 functions with the wrong prototype aren't simplified.
+
+define float @test_no_simplify1(i32 %x) {
+; CHECK: @test_no_simplify1
+  %conv = sitofp i32 %x to double
+  %ret = call float @exp2(double %conv)
+; CHECK: call float @exp2(double %conv)
+  ret float %ret
+}
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
new file mode 100644
index 000000000000..edcbcc71dfb4
--- /dev/null
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -0,0 +1,467 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }"
+; 1.2f and 2.3f is supposed to be fold.
+define float @fold(float %a) {
+  %mul = fmul fast float %a, 0x3FF3333340000000
+  %mul1 = fmul fast float %mul, 0x4002666660000000
+  ret float %mul1
+; CHECK: @fold
+; CHECK: fmul fast float %a, 0x4006147AE0000000
+}
+
+; Same testing-case as the one used in fold() except that the operators have
+; fixed FP mode.
+define float @notfold(float %a) {
+; CHECK: @notfold
+; CHECK: %mul = fmul fast float %a, 0x3FF3333340000000
+  %mul = fmul fast float %a, 0x3FF3333340000000
+  %mul1 = fmul float %mul, 0x4002666660000000
+  ret float %mul1
+}
+
+define float @fold2(float %a) {
+; CHECK: @fold2
+; CHECK: fmul fast float %a, 0x4006147AE0000000
+  %mul = fmul float %a, 0x3FF3333340000000
+  %mul1 = fmul fast float %mul, 0x4002666660000000
+  ret float %mul1
+}
+
+; C * f1 + f1 = (C+1) * f1
+define double @fold3(double %f1) {
+  %t1 = fmul fast double 2.000000e+00, %f1
+  %t2 = fadd fast double %f1, %t1
+  ret double %t2
+; CHECK: @fold3
+; CHECK: fmul fast double %f1, 3.000000e+00
+}
+
+; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y)
+define float @fold4(float %f1, float %f2) {
+  %sub = fsub float 4.000000e+00, %f1
+  %sub1 = fsub float 5.000000e+00, %f2
+  %add = fadd fast float %sub, %sub1
+  ret float %add
+; CHECK: @fold4
+; CHECK: %1 = fadd fast float %f1, %f2
+; CHECK: fsub fast float 9.000000e+00, %1
+}
+
+; (X + C1) + C2 => X + (C1 + C2)
+define float @fold5(float %f1, float %f2) {
+  %add = fadd float %f1, 4.000000e+00
+  %add1 = fadd fast float %add, 5.000000e+00
+  ret float %add1
+; CHECK: @fold5
+; CHECK: fadd fast float %f1, 9.000000e+00
+}
+
+; (X + X) + X => 3.0 * X
+define float @fold6(float %f1) {
+  %t1 = fadd fast float %f1, %f1
+  %t2 = fadd fast float %f1, %t1
+  ret float %t2
+; CHECK: @fold6
+; CHECK: fmul fast float %f1, 3.000000e+00
+}
+
+; C1 * X + (X + X) = (C1 + 2) * X
+define float @fold7(float %f1) {
+  %t1 = fmul fast float %f1, 5.000000e+00
+  %t2 = fadd fast float %f1, %f1
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: @fold7
+; CHECK: fmul fast float %f1, 7.000000e+00
+}
+
+; (X + X) + (X + X) => 4.0 * X
+define float @fold8(float %f1) {
+  %t1 = fadd fast float %f1, %f1
+  %t2 = fadd fast float %f1, %f1
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: fold8
+; CHECK: fmul fast float %f1, 4.000000e+00
+}
+
+; X - (X + Y) => 0 - Y
+define float @fold9(float %f1, float %f2) {
+  %t1 = fadd float %f1, %f2
+  %t3 = fsub fast float %f1, %t1
+  ret float %t3
+
+; CHECK: @fold9
+; CHECK: fsub fast float 0.000000e+00, %f2
+}
+
+; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of
+; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the
+; top of resulting simplified expression tree may potentially reveal some
+; optimization opportunities in the super-expression trees.
+;
+define float @fold10(float %f1, float %f2) {
+  %t1 = fadd fast float 2.000000e+00, %f1
+  %t2 = fsub fast float %f2, 3.000000e+00
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: @fold10
+; CHECK: %t3 = fadd fast float %t2, -1.000000e+00
+; CHECK: ret float %t3
+}
+
+; once cause Crash/miscompilation
+define float @fail1(float %f1, float %f2) {
+  %conv3 = fadd fast float %f1, -1.000000e+00
+  %add = fadd fast float %conv3, %conv3
+  %add2 = fadd fast float %add, %conv3
+  ret float %add2
+; CHECK: @fail1
+; CHECK: ret
+}
+
+define double @fail2(double %f1, double %f2) {
+  %t1 = fsub fast double %f1, %f2
+  %t2 = fadd fast double %f1, %f2
+  %t3 = fsub fast double %t1, %t2
+  ret double %t3
+; CHECK: @fail2
+; CHECK: ret
+}
+
+; c1 * x - x => (c1 - 1.0) * x
+define float @fold13(float %x) {
+  %mul = fmul fast float %x, 7.000000e+00
+  %sub = fsub fast float %mul, %x
+  ret float %sub
+; CHECK: fold13
+; CHECK: fmul fast float %x, 6.000000e+00
+; CHECK: ret
+}
+
+; =========================================================================
+;
+;   Testing-cases about fmul begin
+;
+; =========================================================================
+
+; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution)
+define float @fmul_distribute1(float %f1) {
+  %t1 = fmul float %f1, 6.0e+3
+  %t2 = fadd float %t1, 2.0e+3
+  %t3 = fmul fast float %t2, 5.0e+3
+  ret float %t3
+; CHECK: @fmul_distribute1
+; CHECK: %1 = fmul fast float %f1, 3.000000e+07
+; CHECK: %t3 = fadd fast float %1, 1.000000e+07
+}
+
+; (X/C1 + C2) * C3 => X/(C1/C3) + C2*C3
+define double @fmul_distribute2(double %f1, double %f2) {
+  %t1 = fdiv double %f1, 3.0e+0
+  %t2 = fadd double %t1, 5.0e+1
+  ; 0x10000000000000 = DBL_MIN
+  %t3 = fmul fast double %t2, 0x10000000000000
+  ret double %t3
+
+; CHECK: @fmul_distribute2
+; CHECK: %1 = fdiv fast double %f1, 0x7FE8000000000000
+; CHECK: fadd fast double %1, 0x69000000000000
+}
+
+; 5.0e-1 * DBL_MIN yields denormal, so "(f1*3.0 + 5.0e-1) * DBL_MIN" cannot
+; be simplified into f1 * (3.0*DBL_MIN) + (5.0e-1*DBL_MIN)
+define double @fmul_distribute3(double %f1) {
+  %t1 = fdiv double %f1, 3.0e+0
+  %t2 = fadd double %t1, 5.0e-1
+  %t3 = fmul fast double %t2, 0x10000000000000
+  ret double %t3
+
+; CHECK: @fmul_distribute3
+; CHECK: fmul fast double %t2, 0x10000000000000
+}
+
+; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution)
+define float @fmul_distribute4(float %f1) {
+  %t1 = fmul float %f1, 6.0e+3
+  %t2 = fsub float 2.0e+3, %t1
+  %t3 = fmul fast float %t2, 5.0e+3
+  ret float %t3
+; CHECK: @fmul_distribute4
+; CHECK: %1 = fmul fast float %f1, 3.000000e+07
+; CHECK: %t3 = fsub fast float 1.000000e+07, %1
+}
+
+; C1/X * C2 => (C1*C2) / X
+define float @fmul2(float %f1) {
+  %t1 = fdiv float 2.0e+3, %f1
+  %t3 = fmul fast float %t1, 6.0e+3
+  ret float %t3
+; CHECK: @fmul2
+; CHECK: fdiv fast float 1.200000e+07, %f1
+}
+
+; X/C1 * C2 => X * (C2/C1) (if C2/C1 is normal Fp)
+define float @fmul3(float %f1, float %f2) {
+  %t1 = fdiv float %f1, 2.0e+3
+  %t3 = fmul fast float %t1, 6.0e+3
+  ret float %t3
+; CHECK: @fmul3
+; CHECK: fmul fast float %f1, 3.000000e+00
+}
+
+; Rule "X/C1 * C2 => X * (C2/C1) is not applicable if C2/C1 is either a special
+; value of a denormal. The 0x3810000000000000 here take value FLT_MIN
+;
+define float @fmul4(float %f1, float %f2) {
+  %t1 = fdiv float %f1, 2.0e+3
+  %t3 = fmul fast float %t1, 0x3810000000000000
+  ret float %t3
+; CHECK: @fmul4
+; CHECK: fmul fast float %t1, 0x3810000000000000
+}
+
+; X / C1 * C2 => X / (C2/C1) if  C1/C2 is either a special value of a denormal,
+;  and C2/C1 is a normal value.
+;
+define float @fmul5(float %f1, float %f2) {
+  %t1 = fdiv float %f1, 3.0e+0
+  %t3 = fmul fast float %t1, 0x3810000000000000
+  ret float %t3
+; CHECK: @fmul5
+; CHECK: fdiv fast float %f1, 0x47E8000000000000
+}
+
+; (X*Y) * X => (X*X) * Y
+define float @fmul6(float %f1, float %f2) {
+  %mul = fmul float %f1, %f2
+  %mul1 = fmul fast float %mul, %f1
+  ret float %mul1
+; CHECK: @fmul6
+; CHECK: fmul fast float %f1, %f1
+}
+
+; "(X*Y) * X => (X*X) * Y" is disabled if "X*Y" has multiple uses
+define float @fmul7(float %f1, float %f2) {
+  %mul = fmul float %f1, %f2
+  %mul1 = fmul fast float %mul, %f1
+  %add = fadd float %mul1, %mul
+  ret float %add
+; CHECK: @fmul7
+; CHECK: fmul fast float %mul, %f1
+}
+
+; =========================================================================
+;
+;   Testing-cases about negation
+;
+; =========================================================================
+define float @fneg1(float %f1, float %f2) {
+  %sub = fsub float -0.000000e+00, %f1
+  %sub1 = fsub nsz float 0.000000e+00, %f2
+  %mul = fmul float %sub, %sub1
+  ret float %mul
+; CHECK: @fneg1
+; CHECK: fmul float %f1, %f2
+}
+
+; =========================================================================
+;
+;   Testing-cases about div
+;
+; =========================================================================
+
+; X/C1 / C2 => X * (1/(C2*C1))
+define float @fdiv1(float %x) {
+  %div = fdiv float %x, 0x3FF3333340000000
+  %div1 = fdiv fast float %div, 0x4002666660000000
+  ret float %div1
+; 0x3FF3333340000000 = 1.2f
+; 0x4002666660000000 = 2.3f
+; 0x3FD7303B60000000 = 0.36231884057971014492
+; CHECK: @fdiv1
+; CHECK: fmul fast float %x, 0x3FD7303B60000000
+}
+
+; X*C1 / C2 => X * (C1/C2)
+define float @fdiv2(float %x) {
+  %mul = fmul float %x, 0x3FF3333340000000
+  %div1 = fdiv fast float %mul, 0x4002666660000000
+  ret float %div1
+
+; 0x3FF3333340000000 = 1.2f
+; 0x4002666660000000 = 2.3f
+; 0x3FE0B21660000000 = 0.52173918485641479492
+; CHECK: @fdiv2
+; CHECK: fmul fast float %x, 0x3FE0B21660000000
+}
+
+; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal
+;
+define float @fdiv3(float %x) {
+  %div = fdiv float %x, 0x47EFFFFFE0000000
+  %div1 = fdiv fast float %div, 0x4002666660000000
+  ret float %div1
+; CHECK: @fdiv3
+; CHECK: fdiv float %x, 0x47EFFFFFE0000000
+}
+
+; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal
+define float @fdiv4(float %x) {
+  %mul = fmul float %x, 0x47EFFFFFE0000000
+  %div = fdiv float %mul, 0x3FC99999A0000000
+  ret float %div
+; CHECK: @fdiv4
+; CHECK: fmul float %x, 0x47EFFFFFE0000000
+}
+
+; (X/Y)/Z = > X/(Y*Z)
+define float @fdiv5(float %f1, float %f2, float %f3) {
+  %t1 = fdiv float %f1, %f2
+  %t2 = fdiv fast float %t1, %f3
+  ret float %t2
+; CHECK: @fdiv5
+; CHECK: fmul float %f2, %f3
+}
+
+; Z/(X/Y) = > (Z*Y)/X
+define float @fdiv6(float %f1, float %f2, float %f3) {
+  %t1 = fdiv float %f1, %f2
+  %t2 = fdiv fast float %f3, %t1
+  ret float %t2
+; CHECK: @fdiv6
+; CHECK: fmul float %f3, %f2
+}
+
+; C1/(X*C2) => (C1/C2) / X
+define float @fdiv7(float %x) {
+  %t1 = fmul float %x, 3.0e0
+  %t2 = fdiv fast float 15.0e0, %t1
+  ret float %t2
+; CHECK: @fdiv7
+; CHECK: fdiv fast float 5.000000e+00, %x
+}
+
+; C1/(X/C2) => (C1*C2) / X
+define float @fdiv8(float %x) {
+  %t1 = fdiv float %x, 3.0e0
+  %t2 = fdiv fast float 15.0e0, %t1
+  ret float %t2
+; CHECK: @fdiv8
+; CHECK: fdiv fast float 4.500000e+01, %x
+}
+
+; C1/(C2/X) => (C1/C2) * X
+define float @fdiv9(float %x) {
+  %t1 = fdiv float 3.0e0, %x
+  %t2 = fdiv fast float 15.0e0, %t1
+  ret float %t2
+; CHECK: @fdiv9
+; CHECK: fmul fast float %x, 5.000000e+00
+}
+
+; =========================================================================
+;
+;   Testing-cases about factorization
+;
+; =========================================================================
+; x*z + y*z => (x+y) * z
+define float @fact_mul1(float %x, float %y, float %z) {
+  %t1 = fmul fast float %x, %z
+  %t2 = fmul fast float %y, %z
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: @fact_mul1
+; CHECK: fmul fast float %1, %z
+}
+
+; z*x + y*z => (x+y) * z
+define float @fact_mul2(float %x, float %y, float %z) {
+  %t1 = fmul fast float %z, %x
+  %t2 = fmul fast float %y, %z
+  %t3 = fsub fast float %t1, %t2
+  ret float %t3
+; CHECK: @fact_mul2
+; CHECK: fmul fast float %1, %z
+}
+
+; z*x - z*y => (x-y) * z
+define float @fact_mul3(float %x, float %y, float %z) {
+  %t2 = fmul fast float %z, %y
+  %t1 = fmul fast float %z, %x
+  %t3 = fsub fast float %t1, %t2
+  ret float %t3
+; CHECK: @fact_mul3
+; CHECK: fmul fast float %1, %z
+}
+
+; x*z - z*y => (x-y) * z
+define float @fact_mul4(float %x, float %y, float %z) {
+  %t1 = fmul fast float %x, %z
+  %t2 = fmul fast float %z, %y
+  %t3 = fsub fast float %t1, %t2
+  ret float %t3
+; CHECK: @fact_mul4
+; CHECK: fmul fast float %1, %z
+}
+
+; x/y + x/z, no xform
+define float @fact_div1(float %x, float %y, float %z) {
+  %t1 = fdiv fast float %x, %y
+  %t2 = fdiv fast float %x, %z
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: fact_div1
+; CHECK: fadd fast float %t1, %t2
+}
+
+; x/y + z/x; no xform
+define float @fact_div2(float %x, float %y, float %z) {
+  %t1 = fdiv fast float %x, %y
+  %t2 = fdiv fast float %z, %x
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: fact_div2
+; CHECK: fadd fast float %t1, %t2
+}
+
+; y/x + z/x => (y+z)/x
+define float @fact_div3(float %x, float %y, float %z) {
+  %t1 = fdiv fast float %y, %x
+  %t2 = fdiv fast float %z, %x
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: fact_div3
+; CHECK: fdiv fast float %1, %x
+}
+
+; y/x - z/x => (y-z)/x
+define float @fact_div4(float %x, float %y, float %z) {
+  %t1 = fdiv fast float %y, %x
+  %t2 = fdiv fast float %z, %x
+  %t3 = fsub fast float %t1, %t2
+  ret float %t3
+; CHECK: fact_div4
+; CHECK: fdiv fast float %1, %x
+}
+
+; y/x - z/x => (y-z)/x is disabled if y-z is denormal.
+define float @fact_div5(float %x) {
+  %t1 = fdiv fast float 0x3810000000000000, %x
+  %t2 = fdiv fast float 0x3800000000000000, %x
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: fact_div5
+; CHECK: fdiv fast float 0x3818000000000000, %x
+}
+
+; y/x - z/x => (y-z)/x is disabled if y-z is denormal.
+define float @fact_div6(float %x) {
+  %t1 = fdiv fast float 0x3810000000000000, %x
+  %t2 = fdiv fast float 0x3800000000000000, %x
+  %t3 = fsub fast float %t1, %t2
+  ret float %t3
+; CHECK: fact_div6
+; CHECK: %t3 = fsub fast float %t1, %t2
+}
diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll
new file mode 100644
index 000000000000..0510df3d24b9
--- /dev/null
+++ b/test/Transforms/InstCombine/ffs-1.ll
@@ -0,0 +1,134 @@
+; Test that the ffs* library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=LINUX
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @ffs(i32)
+declare i32 @ffsl(i32)
+declare i32 @ffsll(i64)
+
+; Check ffs(0) -> 0.
+
+define i32 @test_simplify1() {
+; CHECK: @test_simplify1
+  %ret = call i32 @ffs(i32 0)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify2() {
+; CHECK-LINUX: @test_simplify2
+  %ret = call i32 @ffsl(i32 0)
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 0
+}
+
+define i32 @test_simplify3() {
+; CHECK-LINUX: @test_simplify3
+  %ret = call i32 @ffsll(i64 0)
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 0
+}
+
+; Check ffs(c) -> cttz(c) + 1, where 'c' is a constant.
+
+define i32 @test_simplify4() {
+; CHECK: @test_simplify4
+  %ret = call i32 @ffs(i32 1)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify5() {
+; CHECK: @test_simplify5
+  %ret = call i32 @ffs(i32 2048)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 12
+}
+
+define i32 @test_simplify6() {
+; CHECK: @test_simplify6
+  %ret = call i32 @ffs(i32 65536)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 17
+}
+
+define i32 @test_simplify7() {
+; CHECK-LINUX: @test_simplify7
+  %ret = call i32 @ffsl(i32 65536)
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 17
+}
+
+define i32 @test_simplify8() {
+; CHECK-LINUX: @test_simplify8
+  %ret = call i32 @ffsll(i64 1024)
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 11
+}
+
+define i32 @test_simplify9() {
+; CHECK-LINUX: @test_simplify9
+  %ret = call i32 @ffsll(i64 65536)
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 17
+}
+
+define i32 @test_simplify10() {
+; CHECK-LINUX: @test_simplify10
+  %ret = call i32 @ffsll(i64 17179869184)
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 35
+}
+
+define i32 @test_simplify11() {
+; CHECK-LINUX: @test_simplify11
+  %ret = call i32 @ffsll(i64 281474976710656)
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 49
+}
+
+define i32 @test_simplify12() {
+; CHECK-LINUX: @test_simplify12
+  %ret = call i32 @ffsll(i64 1152921504606846976)
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 61
+}
+
+; Check ffs(x) -> x != 0 ? (i32)llvm.cttz(x) + 1 : 0.
+
+define i32 @test_simplify13(i32 %x) {
+; CHECK: @test_simplify13
+  %ret = call i32 @ffs(i32 %x)
+; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add i32 [[CTTZ]], 1
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
+; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
+  ret i32 %ret
+; CHECK-NEXT: ret i32 [[RET]]
+}
+
+define i32 @test_simplify14(i32 %x) {
+; CHECK-LINUX: @test_simplify14
+  %ret = call i32 @ffsl(i32 %x)
+; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add i32 [[CTTZ]], 1
+; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
+; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 [[RET]]
+}
+
+define i32 @test_simplify15(i64 %x) {
+; CHECK-LINUX: @test_simplify15
+  %ret = call i32 @ffsll(i64 %x)
+; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add i64 [[CTTZ]], 1
+; CHECK-LINUX-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32
+; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0
+; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[TRUNC]], i32 0
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 [[RET]]
+}
diff --git a/test/Transforms/InstCombine/fmul.ll b/test/Transforms/InstCombine/fmul.ll
new file mode 100644
index 000000000000..3671b4c6991c
--- /dev/null
+++ b/test/Transforms/InstCombine/fmul.ll
@@ -0,0 +1,72 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; (-0.0 - X) * C => X * -C
+define float @test1(float %x) {
+  %sub = fsub float -0.000000e+00, %x
+  %mul = fmul float %sub, 2.0e+1
+  ret float %mul
+
+; CHECK: @test1
+; CHECK: fmul float %x, -2.000000e+01
+}
+
+; (0.0 - X) * C => X * -C
+define float @test2(float %x) {
+  %sub = fsub nsz float 0.000000e+00, %x
+  %mul = fmul float %sub, 2.0e+1
+  ret float %mul
+
+; CHECK: @test2
+; CHECK: fmul float %x, -2.000000e+01
+}
+
+; (-0.0 - X) * (-0.0 - Y) => X * Y
+define float @test3(float %x, float %y) {
+  %sub1 = fsub float -0.000000e+00, %x
+  %sub2 = fsub float -0.000000e+00, %y
+  %mul = fmul float %sub1, %sub2
+  ret float %mul
+; CHECK: @test3
+; CHECK: fmul float %x, %y
+}
+
+; (0.0 - X) * (0.0 - Y) => X * Y
+define float @test4(float %x, float %y) {
+  %sub1 = fsub nsz float 0.000000e+00, %x
+  %sub2 = fsub nsz float 0.000000e+00, %y
+  %mul = fmul float %sub1, %sub2
+  ret float %mul
+; CHECK: @test4
+; CHECK: fmul float %x, %y
+}
+
+; (-0.0 - X) * Y => -0.0 - (X * Y)
+define float @test5(float %x, float %y) {
+  %sub1 = fsub float -0.000000e+00, %x
+  %mul = fmul float %sub1, %y
+  ret float %mul
+; CHECK: @test5
+; CHECK: %1 = fmul float %x, %y
+; CHECK: %mul = fsub float -0.000000e+00, %1
+}
+
+; (0.0 - X) * Y => 0.0 - (X * Y)
+define float @test6(float %x, float %y) {
+  %sub1 = fsub nsz float 0.000000e+00, %x
+  %mul = fmul float %sub1, %y
+  ret float %mul
+; CHECK: @test6
+; CHECK: %1 = fmul float %x, %y
+; CHECK: %mul = fsub float -0.000000e+00, %1
+}
+
+; "(-0.0 - X) * Y => -0.0 - (X * Y)" is disabled if expression "-0.0 - X"
+; has multiple uses.
+define float @test7(float %x, float %y) {
+  %sub1 = fsub float -0.000000e+00, %x
+  %mul = fmul float %sub1, %y
+  %mul2 = fmul float %mul, %sub1
+  ret float %mul2
+; CHECK: @test7
+; CHECK: fsub float -0.000000e+00, %x
+}
diff --git a/test/Transforms/InstCombine/fold-phi.ll b/test/Transforms/InstCombine/fold-phi.ll
new file mode 100644
index 000000000000..bd01d58aa586
--- /dev/null
+++ b/test/Transforms/InstCombine/fold-phi.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: no_crash
+define float @no_crash(float %a) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %sum.057 = phi float [ 0.000000e+00, %entry ], [ %add5, %bb0 ]
+  %add5 = fadd float %sum.057, %a    ; PR14592
+  br i1 undef, label %bb0, label %end
+
+bb0:
+  br label %for.body
+
+end:
+  ret float %add5
+}
+
+; CHECK: fold_phi
+define float @fold_phi(float %a) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+; CHECK: phi float
+; CHECK-NEXT: br i1 undef
+  %sum.057 = phi float [ 0.000000e+00, %entry ], [ %add5, %bb0 ]
+  %add5 = fadd float %sum.057, 1.0 ;; Should be moved to the latch!
+  br i1 undef, label %bb0, label %end
+
+; CHECK: bb0:
+bb0:
+; CHECK: fadd float
+  br label %for.body
+
+end:
+  ret float %add5
+}
diff --git a/test/Transforms/InstCombine/fpcast.ll b/test/Transforms/InstCombine/fpcast.ll
index bc6aa0a6891f..09f053289dc1 100644
--- a/test/Transforms/InstCombine/fpcast.ll
+++ b/test/Transforms/InstCombine/fpcast.ll
@@ -13,3 +13,22 @@ define i8 @test2() {
 ; CHECK: ret i8 -1
 }
 
+; CHECK: test3
+define half @test3(float %a) {
+; CHECK: fptrunc
+; CHECK: llvm.fabs.f16
+  %b = call float @llvm.fabs.f32(float %a)
+  %c = fptrunc float %b to half
+  ret half %c
+}
+
+; CHECK: test4
+define half @test4(float %a) {
+; CHECK: fptrunc
+; CHECK: fsub
+  %b = fsub float -0.0, %a
+  %c = fptrunc float %b to half
+  ret half %c
+}
+
+declare float @llvm.fabs.f32(float) nounwind readonly
diff --git a/test/Transforms/InstCombine/fprintf-1.ll b/test/Transforms/InstCombine/fprintf-1.ll
new file mode 100644
index 000000000000..39d86b4588cc
--- /dev/null
+++ b/test/Transforms/InstCombine/fprintf-1.ll
@@ -0,0 +1,80 @@
+; Test that the fprintf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+%FILE = type { }
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+@percent_c = constant [3 x i8] c"%c\00"
+@percent_d = constant [3 x i8] c"%d\00"
+@percent_f = constant [3 x i8] c"%f\00"
+@percent_s = constant [3 x i8] c"%s\00"
+
+declare i32 @fprintf(%FILE*, i8*, ...)
+
+; Check fprintf(fp, "foo") -> fwrite("foo", 3, 1, fp).
+
+define void @test_simplify1(%FILE* %fp) {
+; CHECK: @test_simplify1
+  %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt)
+; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0), i32 12, i32 1, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check fprintf(fp, "%c", chr) -> fputc(chr, fp).
+
+define void @test_simplify2(%FILE* %fp) {
+; CHECK: @test_simplify2
+  %fmt = getelementptr [3 x i8]* @percent_c, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i8 104)
+; CHECK-NEXT: call i32 @fputc(i32 104, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check fprintf(fp, "%s", str) -> fputs(str, fp).
+; NOTE: The fputs simplifier simplifies this further to fwrite.
+
+define void @test_simplify3(%FILE* %fp) {
+; CHECK: @test_simplify3
+  %fmt = getelementptr [3 x i8]* @percent_s, i32 0, i32 0
+  %str = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i8* %str)
+; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0), i32 12, i32 1, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check fprintf(fp, fmt, ...) -> fiprintf(fp, fmt, ...) if no floating point.
+
+define void @test_simplify4(%FILE* %fp) {
+; CHECK-IPRINTF: @test_simplify4
+  %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i32 187)
+; CHECK-NEXT-IPRINTF: call i32 (%FILE*, i8*, ...)* @fiprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+  ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify1(%FILE* %fp) {
+; CHECK-IPRINTF: @test_no_simplify1
+  %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, double 1.87)
+; CHECK-NEXT-IPRINTF: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+  ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify2(%FILE* %fp, double %d) {
+; CHECK: @test_no_simplify2
+  %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, double %d)
+; CHECK-NEXT: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double %d)
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/InstCombine/fputs-1.ll b/test/Transforms/InstCombine/fputs-1.ll
new file mode 100644
index 000000000000..c7c5becfd038
--- /dev/null
+++ b/test/Transforms/InstCombine/fputs-1.ll
@@ -0,0 +1,43 @@
+; Test that the fputs library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+%FILE = type { }
+
+@empty = constant [1 x i8] zeroinitializer
+@A = constant [2 x i8] c"A\00"
+@hello = constant [7 x i8] c"hello\0A\00"
+
+declare i32 @fputs(i8*, %FILE*)
+
+; Check fputs(str, fp) --> fwrite(str, 1, strlen(s), fp).
+
+define void @test_simplify1(%FILE* %fp) {
+; CHECK: @test_simplify1
+  %str = getelementptr [1 x i8]* @empty, i32 0, i32 0
+  call i32 @fputs(i8* %str, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; NOTE: The fwrite simplifier simplifies this further to fputc.
+
+define void @test_simplify2(%FILE* %fp) {
+; CHECK: @test_simplify2
+  %str = getelementptr [2 x i8]* @A, i32 0, i32 0
+  call i32 @fputs(i8* %str, %FILE* %fp)
+; CHECK-NEXT: call i32 @fputc(i32 65, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify3(%FILE* %fp) {
+; CHECK: @test_simplify3
+  %str = getelementptr [7 x i8]* @hello, i32 0, i32 0
+  call i32 @fputs(i8* %str, %FILE* %fp)
+; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([7 x i8]* @hello, i32 0, i32 0), i32 6, i32 1, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/InstCombine/fwrite-1.ll b/test/Transforms/InstCombine/fwrite-1.ll
new file mode 100644
index 000000000000..528cdec217f7
--- /dev/null
+++ b/test/Transforms/InstCombine/fwrite-1.ll
@@ -0,0 +1,57 @@
+; Test that the fwrite library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+%FILE = type { }
+
+@str = constant [1 x i8] zeroinitializer
+@empty = constant [0 x i8] zeroinitializer
+
+declare i64 @fwrite(i8*, i64, i64, %FILE *)
+
+; Check fwrite(S, 1, 1, fp) -> fputc(S[0], fp).
+
+define void @test_simplify1(%FILE* %fp) {
+; CHECK: @test_simplify1
+  %str = getelementptr inbounds [1 x i8]* @str, i64 0, i64 0
+  call i64 @fwrite(i8* %str, i64 1, i64 1, %FILE* %fp)
+; CHECK-NEXT: call i32 @fputc(i32 0, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify2(%FILE* %fp) {
+; CHECK: @test_simplify2
+  %str = getelementptr inbounds [0 x i8]* @empty, i64 0, i64 0
+  call i64 @fwrite(i8* %str, i64 1, i64 0, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify3(%FILE* %fp) {
+; CHECK: @test_simplify3
+  %str = getelementptr inbounds [0 x i8]* @empty, i64 0, i64 0
+  call i64 @fwrite(i8* %str, i64 0, i64 1, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define i64 @test_no_simplify1(%FILE* %fp) {
+; CHECK: @test_no_simplify1
+  %str = getelementptr inbounds [1 x i8]* @str, i64 0, i64 0
+  %ret = call i64 @fwrite(i8* %str, i64 1, i64 1, %FILE* %fp)
+; CHECK-NEXT: call i64 @fwrite
+  ret i64 %ret
+; CHECK-NEXT: ret i64 %ret
+}
+
+define void @test_no_simplify2(%FILE* %fp, i64 %size) {
+; CHECK: @test_no_simplify2
+  %str = getelementptr inbounds [1 x i8]* @str, i64 0, i64 0
+  call i64 @fwrite(i8* %str, i64 %size, i64 1, %FILE* %fp)
+; CHECK-NEXT: call i64 @fwrite
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index 1c120ecbe9eb..bb07736ef803 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -424,7 +424,7 @@ define i32 @test35() nounwind {
              i8* getelementptr (%t1* bitcast (%t0* @s to %t1*), i32 0, i32 1, i32 0)) nounwind
   ret i32 0
 ; CHECK: @test35
-; CHECK: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @"\01LC8", i64 0, i64 0), i8* getelementptr inbounds (%t0* @s, i64 0, i32 1, i64 0)) nounwind
+; CHECK: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @"\01LC8", i64 0, i64 0), i8* getelementptr inbounds (%t0* @s, i64 0, i32 1, i64 0)) [[NUW:#[0-9]+]]
 }
 
 ; Instcombine should constant-fold the GEP so that indices that have
@@ -492,3 +492,21 @@ define void @three_gep_f(%three_gep_t2* %x) {
 
 declare void @three_gep_g(i32*)
 declare void @three_gep_h(%three_gep_t2*)
+
+%struct.ham = type { i32, %struct.zot*, %struct.zot*, %struct.zot* }
+%struct.zot = type { i64, i8 }
+
+define void @test39(%struct.ham* %arg, i8 %arg1) nounwind {
+  %tmp = getelementptr inbounds %struct.ham* %arg, i64 0, i32 2
+  %tmp2 = load %struct.zot** %tmp, align 8
+  %tmp3 = bitcast %struct.zot* %tmp2 to i8*
+  %tmp4 = getelementptr inbounds i8* %tmp3, i64 -8
+  store i8 %arg1, i8* %tmp4, align 8
+  ret void
+
+; CHECK: @test39
+; CHECK: getelementptr inbounds %struct.ham* %arg, i64 0, i32 2
+; CHECK: getelementptr inbounds i8* %tmp3, i64 -8
+}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 8e064a4f2fc9..446c0e01dcaa 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -677,3 +677,212 @@ define i1 @test66(i64 %A, i64 %B) {
 ; CHECK-NEXT: ret i1 true
   ret i1 %cmp
 }
+
+; CHECK: @test67
+; CHECK: %and = and i32 %x, 96
+; CHECK: %cmp = icmp ne i32 %and, 0
+define i1 @test67(i32 %x) nounwind uwtable {
+  %and = and i32 %x, 127
+  %cmp = icmp sgt i32 %and, 31
+  ret i1 %cmp
+}
+
+; CHECK: @test68
+; CHECK: %cmp = icmp ugt i32 %and, 30
+define i1 @test68(i32 %x) nounwind uwtable {
+  %and = and i32 %x, 127
+  %cmp = icmp sgt i32 %and, 30
+  ret i1 %cmp
+}
+
+; PR14708
+; CHECK: @test69
+; CHECK: %1 = and i32 %c, -33
+; CHECK: %2 = icmp eq i32 %1, 65
+; CHECK: ret i1 %2
+define i1 @test69(i32 %c) nounwind uwtable {
+  %1 = icmp eq i32 %c, 97
+  %2 = icmp eq i32 %c, 65
+  %3 = or i1 %1, %2
+  ret i1 %3
+}
+
+; CHECK: @icmp_sext16trunc
+; CHECK-NEXT: %1 = trunc i32 %x to i16
+; CHECK-NEXT: %cmp = icmp slt i16 %1, 36
+define i1 @icmp_sext16trunc(i32 %x) {
+  %trunc = trunc i32 %x to i16
+  %sext = sext i16 %trunc to i32
+  %cmp = icmp slt i32 %sext, 36
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_sext8trunc
+; CHECK-NEXT: %1 = trunc i32 %x to i8
+; CHECK-NEXT: %cmp = icmp slt i8 %1, 36
+define i1 @icmp_sext8trunc(i32 %x) {
+  %trunc = trunc i32 %x to i8
+  %sext = sext i8 %trunc to i32
+  %cmp = icmp slt i32 %sext, 36
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl16
+; CHECK-NEXT: %1 = trunc i32 %x to i16
+; CHECK-NEXT: %cmp = icmp slt i16 %1, 36
+define i1 @icmp_shl16(i32 %x) {
+  %shl = shl i32 %x, 16
+  %cmp = icmp slt i32 %shl, 2359296
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl24
+; CHECK-NEXT: %1 = trunc i32 %x to i8
+; CHECK-NEXT: %cmp = icmp slt i8 %1, 36
+define i1 @icmp_shl24(i32 %x) {
+  %shl = shl i32 %x, 24
+  %cmp = icmp slt i32 %shl, 603979776
+  ret i1 %cmp
+}
+
+; If the (shl x, C) preserved the sign and this is a sign test,
+; compare the LHS operand instead
+; CHECK: @icmp_shl_nsw_sgt
+; CHECK-NEXT: icmp sgt i32 %x, 0
+define i1 @icmp_shl_nsw_sgt(i32 %x) {
+  %shl = shl nsw i32 %x, 21
+  %cmp = icmp sgt i32 %shl, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl_nsw_sge0
+; CHECK-NEXT: icmp sgt i32 %x, -1
+define i1 @icmp_shl_nsw_sge0(i32 %x) {
+  %shl = shl nsw i32 %x, 21
+  %cmp = icmp sge i32 %shl, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl_nsw_sge1
+; CHECK-NEXT: icmp sgt i32 %x, 0
+define i1 @icmp_shl_nsw_sge1(i32 %x) {
+  %shl = shl nsw i32 %x, 21
+  %cmp = icmp sge i32 %shl, 1
+  ret i1 %cmp
+}
+
+; Checks for icmp (eq|ne) (shl x, C), 0
+; CHECK: @icmp_shl_nsw_eq
+; CHECK-NEXT: icmp eq i32 %x, 0
+define i1 @icmp_shl_nsw_eq(i32 %x) {
+  %mul = shl nsw i32 %x, 5
+  %cmp = icmp eq i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl_eq
+; CHECK-NOT: icmp eq i32 %mul, 0
+define i1 @icmp_shl_eq(i32 %x) {
+  %mul = shl i32 %x, 5
+  %cmp = icmp eq i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl_nsw_ne
+; CHECK-NEXT: icmp ne i32 %x, 0
+define i1 @icmp_shl_nsw_ne(i32 %x) {
+  %mul = shl nsw i32 %x, 7
+  %cmp = icmp ne i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl_ne
+; CHECK-NOT: icmp ne i32 %x, 0
+define i1 @icmp_shl_ne(i32 %x) {
+  %mul = shl i32 %x, 7
+  %cmp = icmp ne i32 %mul, 0
+  ret i1 %cmp
+}
+
+; If the (mul x, C) preserved the sign and this is sign test,
+; compare the LHS operand instead
+; CHECK: @icmp_mul_nsw
+; CHECK-NEXT: icmp sgt i32 %x, 0
+define i1 @icmp_mul_nsw(i32 %x) {
+  %mul = mul nsw i32 %x, 12
+  %cmp = icmp sgt i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul_nsw1
+; CHECK-NEXT: icmp slt i32 %x, 0
+define i1 @icmp_mul_nsw1(i32 %x) {
+  %mul = mul nsw i32 %x, 12
+  %cmp = icmp sle i32 %mul, -1
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul_nsw_neg
+; CHECK-NEXT: icmp slt i32 %x, 1
+define i1 @icmp_mul_nsw_neg(i32 %x) {
+  %mul = mul nsw i32 %x, -12
+  %cmp = icmp sge i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul_nsw_neg1
+; CHECK-NEXT: icmp slt i32 %x, 0
+define i1 @icmp_mul_nsw_neg1(i32 %x) {
+  %mul = mul nsw i32 %x, -12
+  %cmp = icmp sge i32 %mul, 1
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul_nsw_0
+; CHECK-NOT: icmp sgt i32 %x, 0
+define i1 @icmp_mul_nsw_0(i32 %x) {
+  %mul = mul nsw i32 %x, 0
+  %cmp = icmp sgt i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul
+; CHECK-NEXT: %mul = mul i32 %x, -12
+define i1 @icmp_mul(i32 %x) {
+  %mul = mul i32 %x, -12
+  %cmp = icmp sge i32 %mul, 0
+  ret i1 %cmp
+}
+
+; Checks for icmp (eq|ne) (mul x, C), 0
+; CHECK: @icmp_mul_neq0
+; CHECK-NEXT: icmp ne i32 %x, 0
+define i1 @icmp_mul_neq0(i32 %x) {
+  %mul = mul nsw i32 %x, -12
+  %cmp = icmp ne i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul_eq0
+; CHECK-NEXT: icmp eq i32 %x, 0
+define i1 @icmp_mul_eq0(i32 %x) {
+  %mul = mul nsw i32 %x, 12
+  %cmp = icmp eq i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul0_eq0
+; CHECK-NEXT: ret i1 true
+define i1 @icmp_mul0_eq0(i32 %x) {
+  %mul = mul i32 %x, 0
+  %cmp = icmp eq i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul0_ne0
+; CHECK-NEXT: ret i1 false
+define i1 @icmp_mul0_ne0(i32 %x) {
+  %mul = mul i32 %x, 0
+  %cmp = icmp ne i32 %mul, 0
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/idioms.ll b/test/Transforms/InstCombine/idioms.ll
index 6b3567fc6e8d..1a211668c3bf 100644
--- a/test/Transforms/InstCombine/idioms.ll
+++ b/test/Transforms/InstCombine/idioms.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine %s -S | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 
 ; Check that code corresponding to the following C function is
 ; simplified into a single ASR operation:
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 382e6b38574d..f334b3b1e935 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -instcombine -S | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 
 %overflow.result = type {i8, i1}
 
@@ -220,3 +220,39 @@ define i32 @cttz_simplify1b(i32 %x) nounwind readnone ssp {
 ; CHECK: @cttz_simplify1b
 ; CHECK-NEXT: ret i32 0
 }
+
+define i32 @ctlz_undef(i32 %Value) nounwind {
+  %ctlz = call i32 @llvm.ctlz.i32(i32 0, i1 true)
+  ret i32 %ctlz
+
+; CHECK: @ctlz_undef
+; CHECK-NEXT: ret i32 undef
+}
+
+define i32 @cttz_undef(i32 %Value) nounwind {
+  %cttz = call i32 @llvm.cttz.i32(i32 0, i1 true)
+  ret i32 %cttz
+
+; CHECK: @cttz_undef
+; CHECK-NEXT: ret i32 undef
+}
+
+define i32 @ctlz_select(i32 %Value) nounwind {
+  %tobool = icmp ne i32 %Value, 0
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %Value, i1 true)
+  %s = select i1 %tobool, i32 %ctlz, i32 32
+  ret i32 %s
+
+; CHECK: @ctlz_select
+; CHECK: select i1 %tobool, i32 %ctlz, i32 32
+}
+
+define i32 @cttz_select(i32 %Value) nounwind {
+  %tobool = icmp ne i32 %Value, 0
+  %cttz = call i32 @llvm.cttz.i32(i32 %Value, i1 true)
+  %s = select i1 %tobool, i32 %cttz, i32 32
+  ret i32 %s
+
+; CHECK: @cttz_select
+; CHECK: select i1 %tobool, i32 %cttz, i32 32
+}
diff --git a/test/Transforms/InstCombine/isascii-1.ll b/test/Transforms/InstCombine/isascii-1.ll
new file mode 100644
index 000000000000..2a413d89b492
--- /dev/null
+++ b/test/Transforms/InstCombine/isascii-1.ll
@@ -0,0 +1,32 @@
+; Test that the isascii library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @isascii(i32)
+
+; Check isascii(c) -> c <u 128.
+
+define i32 @test_simplify1() {
+; CHECK: @test_simplify1
+  %ret = call i32 @isascii(i32 127)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify2() {
+; CHECK: @test_simplify2
+  %ret = call i32 @isascii(i32 128)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify3(i32 %x) {
+; CHECK: @test_simplify3
+  %ret = call i32 @isascii(i32 %x)
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %x, 128
+; CHECK-NEXT: [[ZEXT:%[a-z0-9]+]] = zext i1 [[CMP]] to i32
+  ret i32 %ret
+; CHECK-NEXT: ret i32 [[ZEXT]]
+}
diff --git a/test/Transforms/InstCombine/isdigit-1.ll b/test/Transforms/InstCombine/isdigit-1.ll
new file mode 100644
index 000000000000..f291296c8826
--- /dev/null
+++ b/test/Transforms/InstCombine/isdigit-1.ll
@@ -0,0 +1,48 @@
+; Test that the isdigit library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @isdigit(i32)
+
+; Check isdigit(c) -> (c - '0') <u 10;
+
+define i32 @test_simplify1() {
+; CHECK: @test_simplify1
+  %ret = call i32 @isdigit(i32 47)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify2() {
+; CHECK: @test_simplify2
+  %ret = call i32 @isdigit(i32 48)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify3() {
+; CHECK: @test_simplify3
+  %ret = call i32 @isdigit(i32 57)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify4() {
+; CHECK: @test_simplify4
+  %ret = call i32 @isdigit(i32 58)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify5(i32 %x) {
+; CHECK: @test_simplify5
+
+  %ret = call i32 @isdigit(i32 %x)
+; CHECK-NEXT: [[ADD:%[a-z0-9]+]] = add i32 %x, -48
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 [[ADD]], 10
+; CHECK-NEXT: [[ZEXT:%[a-z0-9]+]] = zext i1 [[CMP]] to i32
+  ret i32 %ret
+; CHECK-NEXT: ret i32 [[ZEXT]]
+}
diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll
index 5cafb7787e36..d88188e4109c 100644
--- a/test/Transforms/InstCombine/load-cmp.ll
+++ b/test/Transforms/InstCombine/load-cmp.ll
@@ -47,6 +47,18 @@ define i1 @test4(i32 %X) {
 ; CHECK-NEXT: ret i1 %R
 }
 
+define i1 @test4_i16(i16 %X) {
+  %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i16 %X
+  %Q = load i16* %P
+  %R = icmp sle i16 %Q, 73
+  ret i1 %R
+; CHECK: @test4_i16
+; CHECK-NEXT: lshr i16 933, %X
+; CHECK-NEXT: and i16 {{.*}}, 1
+; CHECK-NEXT: %R = icmp ne i16 {{.*}}, 0
+; CHECK-NEXT: ret i1 %R
+}
+
 define i1 @test5(i32 %X) {
   %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
   %Q = load i16* %P
diff --git a/test/Transforms/InstCombine/load3.ll b/test/Transforms/InstCombine/load3.ll
index 35398e17db8c..db74426783c1 100644
--- a/test/Transforms/InstCombine/load3.ll
+++ b/test/Transforms/InstCombine/load3.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.0.0"
 
 ; Instcombine should be able to do trivial CSE of loads.
 
@@ -24,4 +24,23 @@ define float @test2() {
   
 ; CHECK: @test2
 ; CHECK: ret float 0x3806965600000000
-}
-\ No newline at end of file
+}
+
+@rslts32 = global [36 x i32] zeroinitializer, align 4
+
+@expect32 = internal constant [36 x i32][ i32 1, i32 2, i32 0, i32 100, i32 3,
+i32 4, i32 0, i32 -7, i32 4, i32 4, i32 8, i32 8, i32 1, i32 3, i32 8, i32 3,
+i32 4, i32 -2, i32 2, i32 8, i32 83, i32 77, i32 8, i32 17, i32 77, i32 88, i32
+22, i32 33, i32 44, i32 88, i32 77, i32 4, i32 4, i32 7, i32 -7, i32 -8] ,
+align 4
+
+; PR14986
+define void @test3() nounwind {
+; This is a weird way of computing zero.
+  %l = load i32* getelementptr ([36 x i32]* @expect32, i32 29826161, i32 28), align 4
+  store i32 %l, i32* getelementptr ([36 x i32]* @rslts32, i32 29826161, i32 28), align 4
+  ret void
+
+; CHECK: @test3
+; CHECK: store i32 1, i32* getelementptr inbounds ([36 x i32]* @rslts32, i32 0, i32 0)
+}
diff --git a/test/Transforms/InstCombine/logical-select.ll b/test/Transforms/InstCombine/logical-select.ll
index bb59817a4f69..f8c06768453d 100644
--- a/test/Transforms/InstCombine/logical-select.ll
+++ b/test/Transforms/InstCombine/logical-select.ll
@@ -10,8 +10,8 @@ define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
   %j = or i32 %g, %i
   ret i32 %j
 ; CHECK: %e = icmp slt i32 %a, %b
-; CHECK: %j = select i1 %e, i32 %c, i32 %d
-; CHECK: ret i32 %j
+; CHECK-NEXT: [[result:%.*]] = select i1 %e, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
 define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
   %e = icmp slt i32 %a, %b
@@ -22,8 +22,8 @@ define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
   %j = or i32 %i, %g
   ret i32 %j
 ; CHECK: %e = icmp slt i32 %a, %b
-; CHECK: %j = select i1 %e, i32 %c, i32 %d
-; CHECK: ret i32 %j
+; CHECK-NEXT: [[result:%.*]] = select i1 %e, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
 
 define i32 @goo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
@@ -36,8 +36,8 @@ entry:
   %3 = or i32 %1, %2
   ret i32 %3
 ; CHECK: %0 = icmp slt i32 %a, %b
-; CHECK: %1 = select i1 %0, i32 %c, i32 %d
-; CHECK: ret i32 %1
+; CHECK-NEXT: [[result:%.*]] = select i1 %0, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
 define i32 @poo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 entry:
@@ -49,8 +49,8 @@ entry:
   %3 = or i32 %1, %2
   ret i32 %3
 ; CHECK: %0 = icmp slt i32 %a, %b
-; CHECK: %1 = select i1 %0, i32 %c, i32 %d
-; CHECK: ret i32 %1
+; CHECK-NEXT: [[result:%.*]] = select i1 %0, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
 
 define i32 @par(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
@@ -63,6 +63,6 @@ entry:
   %3 = or i32 %1, %2
   ret i32 %3
 ; CHECK: %0 = icmp slt i32 %a, %b
-; CHECK: %1 = select i1 %0, i32 %c, i32 %d
-; CHECK: ret i32 %1
+; CHECK-NEXT: [[result:%.*]] = select i1 %0, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll
index 4e3217dc2d96..cd12b29b1186 100644
--- a/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -91,3 +91,32 @@ define void @test5(i8* %ptr, i8** %esc) {
   store volatile i8 4, i8* %g
   ret void
 }
+
+;; When a basic block contains only a call to free and this block is accessed
+;; through a test of the argument of free against null, move the call in the
+;; predecessor block.
+;; Using simplifycfg will remove the empty basic block and the branch operation
+;; Then, performing a dead elimination will remove the comparison.
+;; This is what happens with -O1 and upper.
+; CHECK: @test6
+define void @test6(i8* %foo) minsize {
+; CHECK:  %tobool = icmp eq i8* %foo, null
+;; Call to free moved
+; CHECK-NEXT: tail call void @free(i8* %foo)
+; CHECK-NEXT: br i1 %tobool, label %if.end, label %if.then
+; CHECK: if.then:
+;; Block is now empty and may be simplified by simplifycfg
+; CHECK-NEXT:   br label %if.end
+; CHECK: if.end:
+; CHECK-NEXT:  ret void
+entry:
+  %tobool = icmp eq i8* %foo, null
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void @free(i8* %foo)
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
diff --git a/test/Transforms/InstCombine/memcmp-1.ll b/test/Transforms/InstCombine/memcmp-1.ll
index 4238c5f8fb15..c97b201fc0e9 100644
--- a/test/Transforms/InstCombine/memcmp-1.ll
+++ b/test/Transforms/InstCombine/memcmp-1.ll
@@ -59,7 +59,7 @@ define i32 @test_simplify5() {
   %mem2 = getelementptr [4 x i8]* @foo, i32 0, i32 0
   %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
   ret i32 %ret
-; CHECK: ret i32 {{[0-9]+}}
+; CHECK: ret i32 1
 }
 
 define i32 @test_simplify6() {
@@ -68,5 +68,5 @@ define i32 @test_simplify6() {
   %mem2 = getelementptr [4 x i8]* @hel, i32 0, i32 0
   %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
   ret i32 %ret
-; CHECK: ret i32 {{-[0-9]+}}
+; CHECK: ret i32 -1
 }
diff --git a/test/Transforms/InstCombine/memcpy-from-global.ll b/test/Transforms/InstCombine/memcpy-from-global.ll
index 83c893e17dd6..557b160a8785 100644
--- a/test/Transforms/InstCombine/memcpy-from-global.ll
+++ b/test/Transforms/InstCombine/memcpy-from-global.ll
@@ -134,3 +134,13 @@ define void @test8() {
 ; CHECK: bar
   ret void
 }
+
+define void @test9() {
+  %A = alloca %U, align 4
+  %a = bitcast %U* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i32 4, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK: @test9
+; CHECK-NEXT: call void @bar(i8* bitcast (%U* getelementptr inbounds ([2 x %U]* @H, i64 0, i64 1) to i8*))
+  ret void
+}
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index 6c8e6347634c..16213b8628ca 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -65,7 +65,7 @@ define i32 @test9(i32 %i) {
 ; CHECK: @test9
         %j = mul i32 %i, -1             ; <i32> [#uses=1]
         ret i32 %j
-; CHECJ: sub i32 0, %i
+; CHECK: sub i32 0, %i
 }
 
 define i32 @test10(i32 %a, i32 %b) {
@@ -138,10 +138,8 @@ define i32 @test16(i32 %b, i1 %c) {
         ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
-; CHECK: [[TEST16:%.*]] = zext i1 %c to i32
-; CHECK-NEXT: %1 = sub i32 0, [[TEST16]]
-; CHECK-NEXT: %e = and i32 %1, %b
-; CHECK-NEXT: ret i32 %e
+; CHECK: [[TEST16:%.*]] = select i1 %c, i32 %b, i32 0
+; CHECK-NEXT: ret i32 [[TEST16]]
 }
 
 ; X * Y (when Y is 0 or 1) --> x & (0-Y)
diff --git a/test/Transforms/InstCombine/obfuscated_splat.ll b/test/Transforms/InstCombine/obfuscated_splat.ll
index c25dade168a4..fa9cb423d02c 100644
--- a/test/Transforms/InstCombine/obfuscated_splat.ll
+++ b/test/Transforms/InstCombine/obfuscated_splat.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine -S %s | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 
 define void @test(<4 x float> *%in_ptr, <4 x float> *%out_ptr) {
   %A = load <4 x float>* %in_ptr, align 16
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 31a3cb46e459..0ead9d123749 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -256,3 +256,131 @@ xpto:
 return:
   ret i32 7
 }
+
+declare noalias i8* @valloc(i32) nounwind
+
+; CHECK: @test14
+; CHECK: ret i32 6
+define i32 @test14(i32 %a) nounwind {
+  switch i32 %a, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+  ]
+
+sw.bb:
+  %call = tail call noalias i8* @malloc(i32 6) nounwind
+  br label %sw.epilog
+
+sw.bb1:
+  %call2 = tail call noalias i8* @calloc(i32 3, i32 2) nounwind
+  br label %sw.epilog
+
+sw.default:
+  %call3 = tail call noalias i8* @valloc(i32 6) nounwind
+  br label %sw.epilog
+
+sw.epilog:
+  %b.0 = phi i8* [ %call3, %sw.default ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ]
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %b.0, i1 false)
+  ret i32 %1
+}
+
+; CHECK: @test15
+; CHECK: llvm.objectsize
+define i32 @test15(i32 %a) nounwind {
+  switch i32 %a, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+  ]
+
+sw.bb:
+  %call = tail call noalias i8* @malloc(i32 3) nounwind
+  br label %sw.epilog
+
+sw.bb1:
+  %call2 = tail call noalias i8* @calloc(i32 2, i32 1) nounwind
+  br label %sw.epilog
+
+sw.default:
+  %call3 = tail call noalias i8* @valloc(i32 3) nounwind
+  br label %sw.epilog
+
+sw.epilog:
+  %b.0 = phi i8* [ %call3, %sw.default ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ]
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %b.0, i1 false)
+  ret i32 %1
+}
+
+; CHECK: @test16
+; CHECK: llvm.objectsize
+define i32 @test16(i8* %a, i32 %n) nounwind {
+  %b = alloca [5 x i8], align 1
+  %c = alloca [5 x i8], align 1
+  switch i32 %n, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+  ]
+
+sw.bb:
+  %bp = bitcast [5 x i8]* %b to i8*
+  br label %sw.epilog
+
+sw.bb1:
+  %cp = bitcast [5 x i8]* %c to i8*
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  %phi = phi i8* [ %a, %sw.default ], [ %cp, %sw.bb1 ], [ %bp, %sw.bb ]
+  %sz = call i32 @llvm.objectsize.i32(i8* %phi, i1 false)
+  ret i32 %sz
+}
+
+; CHECK: @test17
+; CHECK: ret i32 5
+define i32 @test17(i32 %n) nounwind {
+  %b = alloca [5 x i8], align 1
+  %c = alloca [5 x i8], align 1
+  %bp = bitcast [5 x i8]* %b to i8*
+  switch i32 %n, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+  ]
+
+sw.bb:
+  br label %sw.epilog
+
+sw.bb1:
+  %cp = bitcast [5 x i8]* %c to i8*
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  %phi = phi i8* [ %bp, %sw.default ], [ %cp, %sw.bb1 ], [ %bp, %sw.bb ]
+  %sz = call i32 @llvm.objectsize.i32(i8* %phi, i1 false)
+  ret i32 %sz
+}
+
+@globalalias = alias internal [60 x i8]* @a
+
+; CHECK: @test18
+; CHECK-NEXT: ret i32 60
+define i32 @test18() {
+  %bc = bitcast [60 x i8]* @globalalias to i8*
+  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
+  ret i32 %1
+}
+
+@globalalias2 = alias weak [60 x i8]* @a
+
+; CHECK: @test19
+; CHECK: llvm.objectsize
+define i32 @test19() {
+  %bc = bitcast [60 x i8]* @globalalias2 to i8*
+  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
+  ret i32 %1
+}
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index c0bb28d15ccf..bde2a54048ad 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -344,10 +344,9 @@ define <4 x i32> @test32(<4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32
   %and.i = and <4 x i32> %vecinit6.i191, %neg.i   ; <<4 x i32>> [#uses=1]
   %or.i = or <4 x i32> %and.i, %and.i129          ; <<4 x i32>> [#uses=1]
   ret <4 x i32> %or.i
-; Don't turn this into a vector select until codegen matures to handle them
-; better.
+; codegen is mature enough to handle vector selects.
 ; CHECK: @test32
-; CHECK: or <4 x i32> %and.i, %and.i129
+; CHECK: select <4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32> %vecinit6.i191
 }
 
 define i1 @test33(i1 %X, i1 %Y) {
diff --git a/test/Transforms/SimplifyLibCalls/osx-names.ll b/test/Transforms/InstCombine/osx-names.ll
index e321d1dd3171..7b83526aceb5 100644
--- a/test/Transforms/SimplifyLibCalls/osx-names.ll
+++ b/test/Transforms/InstCombine/osx-names.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; <rdar://problem/9815881>
 ; On OSX x86-32, fwrite and fputs aren't called fwrite and fputs.
 ; Make sure we use the correct names.
diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll
new file mode 100644
index 000000000000..8a311f0b74c6
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-1.ll
@@ -0,0 +1,154 @@
+; Test that the pow library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; rdar://7251832
+
+; NOTE: The readonly attribute on the pow call should be preserved
+; in the cases below where pow is transformed into another function call.
+
+declare float @powf(float, float) nounwind readonly
+declare double @pow(double, double) nounwind readonly
+
+; Check pow(1.0, x) -> 1.0.
+
+define float @test_simplify1(float %x) {
+; CHECK: @test_simplify1
+  %retval = call float @powf(float 1.0, float %x)
+  ret float %retval
+; CHECK-NEXT: ret float 1.000000e+00
+}
+
+define double @test_simplify2(double %x) {
+; CHECK: @test_simplify2
+  %retval = call double @pow(double 1.0, double %x)
+  ret double %retval
+; CHECK-NEXT: ret double 1.000000e+00
+}
+
+; Check pow(2.0, x) -> exp2(x).
+
+define float @test_simplify3(float %x) {
+; CHECK: @test_simplify3
+  %retval = call float @powf(float 2.0, float %x)
+; CHECK-NEXT: [[EXP2F:%[a-z0-9]+]] = call float @exp2f(float %x) [[NUW_RO:#[0-9]+]]
+  ret float %retval
+; CHECK-NEXT: ret float [[EXP2F]]
+}
+
+define double @test_simplify4(double %x) {
+; CHECK: @test_simplify4
+  %retval = call double @pow(double 2.0, double %x)
+; CHECK-NEXT: [[EXP2:%[a-z0-9]+]] = call double @exp2(double %x) [[NUW_RO]]
+  ret double %retval
+; CHECK-NEXT: ret double [[EXP2]]
+}
+
+; Check pow(x, 0.0) -> 1.0.
+
+define float @test_simplify5(float %x) {
+; CHECK: @test_simplify5
+  %retval = call float @powf(float %x, float 0.0)
+  ret float %retval
+; CHECK-NEXT: ret float 1.000000e+00
+}
+
+define double @test_simplify6(double %x) {
+; CHECK: @test_simplify6
+  %retval = call double @pow(double %x, double 0.0)
+  ret double %retval
+; CHECK-NEXT: ret double 1.000000e+00
+}
+
+; Check pow(x, 0.5) -> fabs(sqrt(x)), where x != -infinity.
+
+define float @test_simplify7(float %x) {
+; CHECK: @test_simplify7
+  %retval = call float @powf(float %x, float 0.5)
+; CHECK-NEXT: [[SQRTF:%[a-z0-9]+]] = call float @sqrtf(float %x) [[NUW_RO]]
+; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @fabsf(float [[SQRTF]]) [[NUW_RO]]
+; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq float %x, 0xFFF0000000000000
+; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], float 0x7FF0000000000000, float [[FABSF]]
+  ret float %retval
+; CHECK-NEXT: ret float [[SELECT]]
+}
+
+define double @test_simplify8(double %x) {
+; CHECK: @test_simplify8
+  %retval = call double @pow(double %x, double 0.5)
+; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]]
+; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]]
+; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000
+; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]]
+  ret double %retval
+; CHECK-NEXT: ret double [[SELECT]]
+}
+
+; Check pow(-infinity, 0.5) -> +infinity.
+
+define float @test_simplify9(float %x) {
+; CHECK: @test_simplify9
+  %retval = call float @powf(float 0xFFF0000000000000, float 0.5)
+  ret float %retval
+; CHECK-NEXT: ret float 0x7FF0000000000000
+}
+
+define double @test_simplify10(double %x) {
+; CHECK: @test_simplify10
+  %retval = call double @pow(double 0xFFF0000000000000, double 0.5)
+  ret double %retval
+; CHECK-NEXT: ret double 0x7FF0000000000000
+}
+
+; Check pow(x, 1.0) -> x.
+
+define float @test_simplify11(float %x) {
+; CHECK: @test_simplify11
+  %retval = call float @powf(float %x, float 1.0)
+  ret float %retval
+; CHECK-NEXT: ret float %x
+}
+
+define double @test_simplify12(double %x) {
+; CHECK: @test_simplify12
+  %retval = call double @pow(double %x, double 1.0)
+  ret double %retval
+; CHECK-NEXT: ret double %x
+}
+
+; Check pow(x, 2.0) -> x*x.
+
+define float @test_simplify13(float %x) {
+; CHECK: @test_simplify13
+  %retval = call float @powf(float %x, float 2.0)
+; CHECK-NEXT: [[SQUARE:%[a-z0-9]+]] = fmul float %x, %x
+  ret float %retval
+; CHECK-NEXT: ret float [[SQUARE]]
+}
+
+define double @test_simplify14(double %x) {
+; CHECK: @test_simplify14
+  %retval = call double @pow(double %x, double 2.0)
+; CHECK-NEXT: [[SQUARE:%[a-z0-9]+]] = fmul double %x, %x
+  ret double %retval
+; CHECK-NEXT: ret double [[SQUARE]]
+}
+
+; Check pow(x, -1.0) -> 1.0/x.
+
+define float @test_simplify15(float %x) {
+; CHECK: @test_simplify15
+  %retval = call float @powf(float %x, float -1.0)
+; CHECK-NEXT: [[RECIPROCAL:%[a-z0-9]+]] = fdiv float 1.000000e+00, %x
+  ret float %retval
+; CHECK-NEXT: ret float [[RECIPROCAL]]
+}
+
+define double @test_simplify16(double %x) {
+; CHECK: @test_simplify16
+  %retval = call double @pow(double %x, double -1.0)
+; CHECK-NEXT: [[RECIPROCAL:%[a-z0-9]+]] = fdiv double 1.000000e+00, %x
+  ret double %retval
+; CHECK-NEXT: ret double [[RECIPROCAL]]
+}
+
+; CHECK: attributes [[NUW_RO]] = { nounwind readonly }
diff --git a/test/Transforms/InstCombine/pow-2.ll b/test/Transforms/InstCombine/pow-2.ll
new file mode 100644
index 000000000000..af64cda0904a
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-2.ll
@@ -0,0 +1,14 @@
+; Test that the pow library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare float @pow(double, double)
+
+; Check that pow functions with the wrong prototype aren't simplified.
+
+define float @test_no_simplify1(double %x) {
+; CHECK: @test_no_simplify1
+  %retval = call float @pow(double 1.0, double %x)
+; CHECK-NEXT: call float @pow(double 1.000000e+00, double %x)
+  ret float %retval
+}
diff --git a/test/Transforms/InstCombine/pr12338.ll b/test/Transforms/InstCombine/pr12338.ll
index 2b5c8f8a74ed..d34600f0fa58 100644
--- a/test/Transforms/InstCombine/pr12338.ll
+++ b/test/Transforms/InstCombine/pr12338.ll
@@ -1,24 +1,24 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-define void @entry() nounwind {
-entry:
-  br label %for.cond
-
-for.cond:
+define void @entry() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:
   %local = phi <1 x i32> [ <i32 0>, %entry ], [ %phi2, %cond.end47 ]
-; CHECK: sub <1 x i32> <i32 92>, %local
-  %phi3 = sub <1 x i32> zeroinitializer, %local
-  br label %cond.end
-
-cond.false:
-  br label %cond.end
-
-cond.end:
-  %cond = phi <1 x i32> [ %phi3, %for.cond ], [ undef, %cond.false ]
-  br label %cond.end47
-
-cond.end47:
-  %sum = add <1 x i32> %cond, <i32 92>
-  %phi2 = sub <1 x i32> zeroinitializer, %sum
-  br label %for.cond
-}
+; CHECK: sub <1 x i32> <i32 92>, %local
+  %phi3 = sub <1 x i32> zeroinitializer, %local
+  br label %cond.end
+
+cond.false:
+  br label %cond.end
+
+cond.end:
+  %cond = phi <1 x i32> [ %phi3, %for.cond ], [ undef, %cond.false ]
+  br label %cond.end47
+
+cond.end47:
+  %sum = add <1 x i32> %cond, <i32 92>
+  %phi2 = sub <1 x i32> zeroinitializer, %sum
+  br label %for.cond
+}
diff --git a/test/Transforms/InstCombine/printf-1.ll b/test/Transforms/InstCombine/printf-1.ll
new file mode 100644
index 000000000000..3a910ea437b7
--- /dev/null
+++ b/test/Transforms/InstCombine/printf-1.ll
@@ -0,0 +1,119 @@
+; Test that the printf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+@h = constant [2 x i8] c"h\00"
+@percent = constant [2 x i8] c"%\00"
+@percent_c = constant [3 x i8] c"%c\00"
+@percent_d = constant [3 x i8] c"%d\00"
+@percent_f = constant [3 x i8] c"%f\00"
+@percent_s = constant [4 x i8] c"%s\0A\00"
+@empty = constant [1 x i8] c"\00"
+; CHECK: [[STR:@[a-z0-9]+]] = private unnamed_addr constant [12 x i8] c"hello world\00"
+
+declare i32 @printf(i8*, ...)
+
+; Check printf("") -> noop.
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+  %fmt = getelementptr [1 x i8]* @empty, i32 0, i32 0
+  call i32 (i8*, ...)* @printf(i8* %fmt)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf("x") -> putchar('x'), even for '%'.
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+  %fmt = getelementptr [2 x i8]* @h, i32 0, i32 0
+  call i32 (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @putchar(i32 104)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+  %fmt = getelementptr [2 x i8]* @percent, i32 0, i32 0
+  call i32 (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @putchar(i32 37)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf("foo\n") -> puts("foo").
+
+define void @test_simplify4() {
+; CHECK: @test_simplify4
+  %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([12 x i8]* [[STR]], i32 0, i32 0))
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf("%c", chr) -> putchar(chr).
+
+define void @test_simplify5() {
+; CHECK: @test_simplify5
+  %fmt = getelementptr [3 x i8]* @percent_c, i32 0, i32 0
+  call i32 (i8*, ...)* @printf(i8* %fmt, i8 104)
+; CHECK-NEXT: call i32 @putchar(i32 104)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf("%s\n", str) -> puts(str).
+
+define void @test_simplify6() {
+; CHECK: @test_simplify6
+  %fmt = getelementptr [4 x i8]* @percent_s, i32 0, i32 0
+  %str = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (i8*, ...)* @printf(i8* %fmt, i8* %str)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0))
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf(format, ...) -> iprintf(format, ...) if no floating point.
+
+define void @test_simplify7() {
+; CHECK-IPRINTF: @test_simplify7
+  %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0
+  call i32 (i8*, ...)* @printf(i8* %fmt, i32 187)
+; CHECK-NEXT-IPRINTF: call i32 (i8*, ...)* @iprintf(i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+  ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify1() {
+; CHECK-IPRINTF: @test_no_simplify1
+  %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
+  call i32 (i8*, ...)* @printf(i8* %fmt, double 1.87)
+; CHECK-NEXT-IPRINTF: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+  ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify2(i8* %fmt, double %d) {
+; CHECK: @test_no_simplify2
+  call i32 (i8*, ...)* @printf(i8* %fmt, double %d)
+; CHECK-NEXT: call i32 (i8*, ...)* @printf(i8* %fmt, double %d)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define i32 @test_no_simplify3() {
+; CHECK: @test_no_simplify3
+  %fmt = getelementptr [2 x i8]* @h, i32 0, i32 0
+  %ret = call i32 (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([2 x i8]* @h, i32 0, i32 0))
+  ret i32 %ret
+; CHECK-NEXT: ret i32 %ret
+}
diff --git a/test/Transforms/InstCombine/printf-2.ll b/test/Transforms/InstCombine/printf-2.ll
new file mode 100644
index 000000000000..466ee1c75770
--- /dev/null
+++ b/test/Transforms/InstCombine/printf-2.ll
@@ -0,0 +1,41 @@
+; Test that the printf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+@h = constant [2 x i8] c"h\00"
+@percent_s = constant [4 x i8] c"%s\0A\00"
+
+declare void @printf(i8*, ...)
+
+; Check simplification of printf with void return type.
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+  %fmt = getelementptr [2 x i8]* @h, i32 0, i32 0
+  call void (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @putchar(i32 104)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+  %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call void (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @str, i32 0, i32 0))
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify6() {
+; CHECK: @test_simplify6
+  %fmt = getelementptr [4 x i8]* @percent_s, i32 0, i32 0
+  %str = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call void (i8*, ...)* @printf(i8* %fmt, i8* %str)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0))
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/InstCombine/ptr-int-cast.ll b/test/Transforms/InstCombine/ptr-int-cast.ll
index 9524d449dd8b..7a6ecff9c0be 100644
--- a/test/Transforms/InstCombine/ptr-int-cast.ll
+++ b/test/Transforms/InstCombine/ptr-int-cast.ll
@@ -27,3 +27,34 @@ define i64 @f0(i32 %a0) nounwind {
        ret i64 %t1
 }
 
+define <4 x i32> @test4(<4 x i8*> %arg) nounwind {
+; CHECK: @test4
+; CHECK: ptrtoint <4 x i8*> %arg to <4 x i64>
+; CHECK: trunc <4 x i64> %1 to <4 x i32>
+  %p1 = ptrtoint <4 x i8*> %arg to <4 x i32>
+  ret <4 x i32> %p1
+}
+
+define <4 x i128> @test5(<4 x i8*> %arg) nounwind {
+; CHECK: @test5
+; CHECK: ptrtoint <4 x i8*> %arg to <4 x i64>
+; CHECK: zext <4 x i64> %1 to <4 x i128>
+  %p1 = ptrtoint <4 x i8*> %arg to <4 x i128>
+  ret <4 x i128> %p1
+}
+
+define <4 x i8*> @test6(<4 x i32> %arg) nounwind {
+; CHECK: @test6
+; CHECK: zext <4 x i32> %arg to <4 x i64>
+; CHECK: inttoptr <4 x i64> %1 to <4 x i8*>
+  %p1 = inttoptr <4 x i32> %arg to <4 x i8*>
+  ret <4 x i8*> %p1
+}
+
+define <4 x i8*> @test7(<4 x i128> %arg) nounwind {
+; CHECK: @test7
+; CHECK: trunc <4 x i128> %arg to <4 x i64>
+; CHECK: inttoptr <4 x i64> %1 to <4 x i8*>
+  %p1 = inttoptr <4 x i128> %arg to <4 x i8*>
+  ret <4 x i8*> %p1
+}
diff --git a/test/Transforms/InstCombine/puts-1.ll b/test/Transforms/InstCombine/puts-1.ll
new file mode 100644
index 000000000000..ef4e1bbd824c
--- /dev/null
+++ b/test/Transforms/InstCombine/puts-1.ll
@@ -0,0 +1,31 @@
+; Test that the puts library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@empty = constant [1 x i8] zeroinitializer
+
+declare i32 @puts(i8*)
+
+; Check puts("") -> putchar('\n').
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+  %str = getelementptr [1 x i8]* @empty, i32 0, i32 0
+  call i32 @puts(i8* %str)
+; CHECK-NEXT: call i32 @putchar(i32 10)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Don't simplify if the return value is used.
+
+define i32 @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+  %str = getelementptr [1 x i8]* @empty, i32 0, i32 0
+  %ret = call i32 @puts(i8* %str)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([1 x i8]* @empty, i32 0, i32 0))
+  ret i32 %ret
+; CHECK-NEXT: ret i32 %ret
+}
diff --git a/test/Transforms/InstCombine/sdiv-1.ll b/test/Transforms/InstCombine/sdiv-1.ll
index c46b5eaef4a8..6ab18ac7f844 100644
--- a/test/Transforms/InstCombine/sdiv-1.ll
+++ b/test/Transforms/InstCombine/sdiv-1.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -instcombine -inline -S | not grep '-715827882'
+; RUN: opt < %s -instcombine -inline -S | FileCheck %s
 ; PR3142
 
+; CHECK-NOT: -715827882
+
 define i32 @a(i32 %X) nounwind readnone {
 entry:
        %0 = sub i32 0, %X
diff --git a/test/Transforms/InstCombine/sext.ll b/test/Transforms/InstCombine/sext.ll
index f1987973f462..968f37c9c129 100644
--- a/test/Transforms/InstCombine/sext.ll
+++ b/test/Transforms/InstCombine/sext.ll
@@ -184,3 +184,12 @@ define i32 @test16(i16 %x) nounwind {
 ; CHECK-NEXT: %ext = sext i16 %sext to i32
 ; CHECK-NEXT: ret i32 %ext
 }
+
+define i32 @test17(i1 %x) nounwind {
+  %c1 = sext i1 %x to i32
+  %c2 = sub i32 0, %c1
+  ret i32 %c2
+; CHECK: @test17
+; CHECK-NEXT: [[TEST17:%.*]] = zext i1 %x to i32
+; CHECK-NEXT: ret i32 [[TEST17]]
+}
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 25e708b7f51d..41f8aa9ee812 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -523,9 +523,9 @@ entry:
   %tmp51 = xor i8 %tmp50, %tmp5
   %tmp52 = and i8 %tmp51, -128
   %tmp53 = lshr i8 %tmp52, 7
-; CHECK: lshr i8 %tmp51, 7
   %tmp54 = mul i8 %tmp53, 16
-; CHECK: shl nuw nsw i8 %tmp53, 4
+; CHECK: %0 = shl i8 %tmp4, 2
+; CHECK: %tmp54 = and i8 %0, 16
   %tmp55 = xor i8 %tmp54, %tmp51
 ; CHECK: ret i8 %tmp551
   ret i8 %tmp55
@@ -659,3 +659,89 @@ define i32 @test53(i32 %x) {
 ; CHECK-NEXT: %B = shl nuw i32 %x, 2
 ; CHECK-NEXT: ret i32 %B
 }
+
+define i32 @test54(i32 %x) {
+  %shr2 = lshr i32 %x, 1
+  %shl = shl i32 %shr2, 4
+  %and = and i32 %shl, 16
+  ret i32 %and
+; CHECK: @test54
+; CHECK: shl i32 %x, 3
+}
+
+
+define i32 @test55(i32 %x) {
+  %shr2 = lshr i32 %x, 1
+  %shl = shl i32 %shr2, 4
+  %or = or i32 %shl, 8
+  ret i32 %or
+; CHECK: @test55
+; CHECK: shl i32 %x, 3
+}
+
+define i32 @test56(i32 %x) {
+  %shr2 = lshr i32 %x, 1
+  %shl = shl i32 %shr2, 4
+  %or = or i32 %shl, 7
+  ret i32 %or
+; CHECK: @test56
+; CHECK: shl i32 %shr2, 4
+}
+
+
+define i32 @test57(i32 %x) {
+  %shr = lshr i32 %x, 1
+  %shl = shl i32 %shr, 4
+  %and = and i32 %shl, 16
+  ret i32 %and
+; CHECK: @test57
+; CHECK: shl i32 %x, 3
+}
+
+define i32 @test58(i32 %x) {
+  %shr = lshr i32 %x, 1
+  %shl = shl i32 %shr, 4
+  %or = or i32 %shl, 8
+  ret i32 %or
+; CHECK: @test58
+; CHECK: shl i32 %x, 3
+}
+
+define i32 @test59(i32 %x) {
+  %shr = ashr i32 %x, 1
+  %shl = shl i32 %shr, 4
+  %or = or i32 %shl, 7
+  ret i32 %or
+; CHECK: @test59
+; CHECK: %shl = shl i32 %shr1, 4
+}
+
+
+define i32 @test60(i32 %x) {
+  %shr = ashr i32 %x, 4
+  %shl = shl i32 %shr, 1
+  %or = or i32 %shl, 1
+  ret i32 %or
+; CHECK: @test60
+; CHECK: ashr i32 %x, 3
+}
+
+
+define i32 @test61(i32 %x) {
+  %shr = ashr i32 %x, 4
+  %shl = shl i32 %shr, 1
+  %or = or i32 %shl, 2
+  ret i32 %or
+; CHECK: @test61
+; CHECK: ashr i32 %x, 4
+}
+
+; propagate "exact" trait
+define i32 @test62(i32 %x) {
+  %shr = ashr exact i32 %x, 4
+  %shl = shl i32 %shr, 1
+  %or = or i32 %shl, 1
+  ret i32 %or
+; CHECK: @test62
+; CHECK: ashr exact i32 %x, 3
+}
diff --git a/test/Transforms/InstCombine/signext.ll b/test/Transforms/InstCombine/signext.ll
index ecee9830cd57..5ed1cd5590ae 100644
--- a/test/Transforms/InstCombine/signext.ll
+++ b/test/Transforms/InstCombine/signext.ll
@@ -82,6 +82,6 @@ entry:
   %sub = add i32 %xor, -67108864                  ; <i32> [#uses=1]
   ret i32 %sub
 ; CHECK: @test8
-; CHECK: %shr = ashr i32 %x, 5
-; CHECK: ret i32 %shr
+; CHECK: %sub = ashr i32 %x, 5
+; CHECK: ret i32 %sub
 }
diff --git a/test/Transforms/InstCombine/sink_instruction.ll b/test/Transforms/InstCombine/sink_instruction.ll
index e521de208f21..5c4019a98df5 100644
--- a/test/Transforms/InstCombine/sink_instruction.ll
+++ b/test/Transforms/InstCombine/sink_instruction.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine %s -S | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 
 ;; This tests that the instructions in the entry blocks are sunk into each
 ;; arm of the 'if'.
diff --git a/test/Transforms/InstCombine/sprintf-1.ll b/test/Transforms/InstCombine/sprintf-1.ll
new file mode 100644
index 000000000000..9b8c8b1b12c7
--- /dev/null
+++ b/test/Transforms/InstCombine/sprintf-1.ll
@@ -0,0 +1,100 @@
+; Test that the sprintf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+@null = constant [1 x i8] zeroinitializer
+@null_hello = constant [7 x i8] c"\00hello\00"
+@h = constant [2 x i8] c"h\00"
+@percent_c = constant [3 x i8] c"%c\00"
+@percent_d = constant [3 x i8] c"%d\00"
+@percent_f = constant [3 x i8] c"%f\00"
+@percent_s = constant [3 x i8] c"%s\00"
+
+declare i32 @sprintf(i8*, i8*, ...)
+
+; Check sprintf(dst, fmt) -> llvm.memcpy(str, fmt, strlen(fmt) + 1, 1).
+
+define void @test_simplify1(i8* %dst) {
+; CHECK: @test_simplify1
+  %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0), i32 13, i32 1, i1 false)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify2(i8* %dst) {
+; CHECK: @test_simplify2
+  %fmt = getelementptr [1 x i8]* @null, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt)
+; CHECK-NEXT: store i8 0, i8* %dst, align 1
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify3(i8* %dst) {
+; CHECK: @test_simplify3
+  %fmt = getelementptr [7 x i8]* @null_hello, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt)
+; CHECK-NEXT: store i8 0, i8* %dst, align 1
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check sprintf(dst, "%c", chr) -> *(i8*)dst = chr; *((i8*)dst + 1) = 0.
+
+define void @test_simplify4(i8* %dst) {
+; CHECK: @test_simplify4
+  %fmt = getelementptr [3 x i8]* @percent_c, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i8 104)
+; CHECK-NEXT: store i8 104, i8* %dst, align 1
+; CHECK-NEXT: [[NUL:%[a-z0-9]+]] = getelementptr i8* %dst, i32 1
+; CHECK-NEXT: store i8 0, i8* [[NUL]], align 1
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check sprintf(dst, "%s", str) -> llvm.memcpy(dest, str, strlen(str) + 1, 1).
+
+define void @test_simplify5(i8* %dst, i8* %str) {
+; CHECK: @test_simplify5
+  %fmt = getelementptr [3 x i8]* @percent_s, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i8* %str)
+; CHECK-NEXT: [[STRLEN:%[a-z0-9]+]] = call i32 @strlen(i8* %str)
+; CHECK-NEXT: [[LENINC:%[a-z0-9]+]] = add i32 [[STRLEN]], 1
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %str, i32 [[LENINC]], i32 1, i1 false)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check sprintf(dst, format, ...) -> siprintf(str, format, ...) if no floating.
+
+define void @test_simplify6(i8* %dst) {
+; CHECK-IPRINTF: @test_simplify6
+  %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i32 187)
+; CHECK-NEXT-IPRINTF: call i32 (i8*, i8*, ...)* @siprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+  ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify1(i8* %dst) {
+; CHECK-IPRINTF: @test_no_simplify1
+  %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double 1.87)
+; CHECK-NEXT-IPRINTF: call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+  ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify2(i8* %dst, i8* %fmt, double %d) {
+; CHECK: @test_no_simplify2
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double %d)
+; CHECK-NEXT: call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double %d)
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/InstCombine/sqrt.ll b/test/Transforms/InstCombine/sqrt.ll
index cc78417ebbd6..440b9748518d 100644
--- a/test/Transforms/InstCombine/sqrt.ll
+++ b/test/Transforms/InstCombine/sqrt.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -instcombine %s | FileCheck %s
+; RUN: opt -S -instcombine < %s | FileCheck %s
 
 define float @test1(float %x) nounwind readnone ssp {
 entry:
diff --git a/test/Transforms/InstCombine/store.ll b/test/Transforms/InstCombine/store.ll
index 64460d7a6d61..164ba7632684 100644
--- a/test/Transforms/InstCombine/store.ll
+++ b/test/Transforms/InstCombine/store.ll
@@ -83,3 +83,37 @@ Cont:
 ; CHECK-NEXT:  ret void
 }
 
+
+; PR14753 - merging two stores should preserve the TBAA tag.
+define void @test6(i32 %n, float* %a, i32* %gi) nounwind uwtable ssp {
+entry:
+  store i32 42, i32* %gi, align 4, !tbaa !0
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %storemerge = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = load i32* %gi, align 4, !tbaa !0
+  %cmp = icmp slt i32 %0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %0 to i64
+  %arrayidx = getelementptr inbounds float* %a, i64 %idxprom
+  store float 0.000000e+00, float* %arrayidx, align 4, !tbaa !3
+  %1 = load i32* %gi, align 4, !tbaa !0
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* %gi, align 4, !tbaa !0
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+; CHECK: @test6
+; CHECK: for.cond:
+; CHECK-NEXT: phi i32 [ 42
+; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"float", metadata !1}
diff --git a/test/Transforms/InstCombine/strto-1.ll b/test/Transforms/InstCombine/strto-1.ll
index 16c0c67970db..7139972fe043 100644
--- a/test/Transforms/InstCombine/strto-1.ll
+++ b/test/Transforms/InstCombine/strto-1.ll
@@ -1,29 +1,29 @@
 ; Test that the strto* library call simplifiers works correctly.
 ;
-; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -instcombine -functionattrs -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
 declare i64 @strtol(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare i64 @strtol(i8*, i8**, i32)
+; CHECK: declare i64 @strtol(i8*, i8** nocapture, i32)
 
 declare double @strtod(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare double @strtod(i8*, i8**, i32)
+; CHECK: declare double @strtod(i8*, i8** nocapture, i32)
 
 declare float @strtof(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare float @strtof(i8*, i8**, i32)
+; CHECK: declare float @strtof(i8*, i8** nocapture, i32)
 
 declare i64 @strtoul(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare i64 @strtoul(i8*, i8**, i32)
+; CHECK: declare i64 @strtoul(i8*, i8** nocapture, i32)
 
 declare i64 @strtoll(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare i64 @strtoll(i8*, i8**, i32)
+; CHECK: declare i64 @strtoll(i8*, i8** nocapture, i32)
 
 declare double @strtold(i8* %s, i8** %endptr)
-; CHECK: declare double @strtold(i8*, i8**)
+; CHECK: declare double @strtold(i8*, i8** nocapture)
 
 declare i64 @strtoull(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare i64 @strtoull(i8*, i8**, i32)
+; CHECK: declare i64 @strtoull(i8*, i8** nocapture, i32)
 
 define void @test_simplify1(i8* %x, i8** %endptr) {
 ; CHECK: @test_simplify1
diff --git a/test/Transforms/InstCombine/toascii-1.ll b/test/Transforms/InstCombine/toascii-1.ll
new file mode 100644
index 000000000000..c4a13e229393
--- /dev/null
+++ b/test/Transforms/InstCombine/toascii-1.ll
@@ -0,0 +1,59 @@
+; Test that the toascii library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @toascii(i32)
+
+; Check isascii(c) -> c & 0x7f.
+
+define i32 @test_simplify1() {
+; CHECK: @test_simplify1
+  %ret = call i32 @toascii(i32 0)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify2() {
+; CHECK: @test_simplify2
+  %ret = call i32 @toascii(i32 1)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify3() {
+; CHECK: @test_simplify3
+  %ret = call i32 @toascii(i32 127)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 127
+}
+
+define i32 @test_simplify4() {
+; CHECK: @test_simplify4
+  %ret = call i32 @toascii(i32 128)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify5() {
+; CHECK: @test_simplify5
+  %ret = call i32 @toascii(i32 255)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 127
+}
+
+define i32 @test_simplify6() {
+; CHECK: @test_simplify6
+  %ret = call i32 @toascii(i32 256)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify7(i32 %x) {
+; CHECK: @test_simplify7
+  %ret = call i32 @toascii(i32 %x)
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 127
+  ret i32 %ret
+; CHECK-NEXT: ret i32 [[AND]]
+}
diff --git a/test/Transforms/InstCombine/vec_extract_elt.ll b/test/Transforms/InstCombine/vec_extract_elt.ll
index 63e4ee2112d8..166066a201bf 100644
--- a/test/Transforms/InstCombine/vec_extract_elt.ll
+++ b/test/Transforms/InstCombine/vec_extract_elt.ll
@@ -7,3 +7,13 @@ define i32 @test(float %f) {
         ret i32 %tmp19
 }
 
+define i64 @test2(i64 %in) {
+  %vec = insertelement <8 x i64> undef, i64 %in, i32 0
+  %splat = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> zeroinitializer
+  %add = add <8 x i64> %splat, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
+  %scl1 = extractelement <8 x i64> %add, i32 0
+  %scl2 = extractelement <8 x i64> %add, i32 0
+  %r = add i64 %scl1, %scl2
+  ret i64 %r
+}
+
diff --git a/test/Transforms/InstCombine/vector-casts.ll b/test/Transforms/InstCombine/vector-casts.ll
index 7bbf53c270f4..2f2990b7b055 100644
--- a/test/Transforms/InstCombine/vector-casts.ll
+++ b/test/Transforms/InstCombine/vector-casts.ll
@@ -64,7 +64,8 @@ entry:
         
 ; CHECK: @test5
 ; CHECK:   sext <4 x i1> %cmp to <4 x i32>	
-; CHECK:   sext <4 x i1> %cmp4 to <4 x i32>	
+; The sext-and pair is canonicalized to a select.
+; CHECK:   select <4 x i1> %cmp4, <4 x i32>	%sext, <4 x i32> zeroinitializer
 }
 
 
diff --git a/test/Transforms/InstCombine/vector-type.ll b/test/Transforms/InstCombine/vector-type.ll
new file mode 100644
index 000000000000..59a4bdd19e70
--- /dev/null
+++ b/test/Transforms/InstCombine/vector-type.ll
@@ -0,0 +1,15 @@
+; The code in InstCombiner::FoldSelectOpOp was calling
+; Type::getVectorNumElements without checking first if the type was a vector.
+
+; RUN: opt < %s -instcombine -S
+
+define i32 @vselect1(i32 %a.coerce, i32 %b.coerce, i32 %c.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %1 = bitcast i32 %b.coerce to <2 x i16>
+  %2 = bitcast i32 %c.coerce to <2 x i16>
+  %cmp = icmp sge <2 x i16> %2, zeroinitializer
+  %or = select <2 x i1> %cmp, <2 x i16> %0, <2 x i16> %1
+  %3 = bitcast <2 x i16> %or to i32
+  ret i32 %3
+}
diff --git a/test/Transforms/InstCombine/vector_gep1.ll b/test/Transforms/InstCombine/vector_gep1.ll
index 652362299562..90ca26212f2a 100644
--- a/test/Transforms/InstCombine/vector_gep1.ll
+++ b/test/Transforms/InstCombine/vector_gep1.ll
@@ -1,5 +1,5 @@
-; RUN: opt -instcombine %s -disable-output
-; RUN: opt -instsimplify %s -disable-output
+; RUN: opt -instcombine -disable-output < %s
+; RUN: opt -instsimplify -disable-output < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -35,3 +35,8 @@ define <2 x i1> @test5(<2 x i8*> %a) {
   %B = icmp ult <2 x i8*> %g, zeroinitializer
   ret <2 x i1> %B
 }
+
+define <2 x i32*> @test7(<2 x {i32, i32}*> %a) {
+  %w = getelementptr <2 x {i32, i32}*> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> zeroinitializer
+  ret <2 x i32*> %w
+}
diff --git a/test/Transforms/InstCombine/xor2.ll b/test/Transforms/InstCombine/xor2.ll
index 89f00bd68475..be06d7999d84 100644
--- a/test/Transforms/InstCombine/xor2.ll
+++ b/test/Transforms/InstCombine/xor2.ll
@@ -51,3 +51,34 @@ define i32 @test4(i32 %A, i32 %B) {
 ; CHECK: %1 = ashr i32 %A, %B
 ; CHECK: ret i32 %1
 }
+
+; defect-2 in rdar://12329730
+; (X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3)
+;   where the "X" has more than one use
+define i32 @test5(i32 %val1) {
+test5:
+  %xor = xor i32 %val1, 1234
+  %shr = lshr i32 %xor, 8
+  %xor1 = xor i32 %shr, 1
+  %add = add i32 %xor1, %xor
+  ret i32 %add
+; CHECK: @test5
+; CHECK: lshr i32 %val1, 8
+; CHECK: ret
+}
+
+; defect-1 in rdar://12329730
+; Simplify (X^Y) -> X or Y in the user's context if we know that 
+; only bits from X or Y are demanded.
+; e.g. the "x ^ 1234" can be optimized into x in the context of "t >> 16".
+;  Put in other word, t >> 16 -> x >> 16.
+; unsigned foo(unsigned x) { unsigned t = x ^ 1234; ;  return (t >> 16) + t;}
+define i32 @test6(i32 %x) {
+  %xor = xor i32 %x, 1234
+  %shr = lshr i32 %xor, 16
+  %add = add i32 %shr, %xor
+  ret i32 %add
+; CHECK: @test6
+; CHECK: lshr i32 %x, 16
+; CHECK: ret
+}
diff --git a/test/Transforms/InstCombine/zext-bool-add-sub.ll b/test/Transforms/InstCombine/zext-bool-add-sub.ll
index 78bcedbbc2e1..b5310575502b 100644
--- a/test/Transforms/InstCombine/zext-bool-add-sub.ll
+++ b/test/Transforms/InstCombine/zext-bool-add-sub.ll
@@ -4,9 +4,9 @@
 define i32 @a(i1 zeroext %x, i1 zeroext %y) {
 entry:
 ; CHECK: @a
-; CHECK: [[TMP1:%.*]] = zext i1 %y to i32
+; CHECK: [[TMP1:%.*]] = sext i1 %y to i32
 ; CHECK: [[TMP2:%.*]] = select i1 %x, i32 2, i32 1
-; CHECK-NEXT: sub i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: add i32 [[TMP2]], [[TMP1]]
   %conv = zext i1 %x to i32
   %conv3 = zext i1 %y to i32
   %conv3.neg = sub i32 0, %conv3
diff --git a/test/Transforms/InstSimplify/call-callconv.ll b/test/Transforms/InstSimplify/call-callconv.ll
new file mode 100644
index 000000000000..e475be781db9
--- /dev/null
+++ b/test/Transforms/InstSimplify/call-callconv.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; Verify that the non-default calling conv doesn't prevent the libcall simplification
+
+@.str = private unnamed_addr constant [4 x i8] c"abc\00", align 1
+
+define arm_aapcscc i32 @_abs(i32 %i) nounwind readnone {
+; CHECK: _abs
+  %call = tail call arm_aapcscc i32 @abs(i32 %i) nounwind readnone
+  ret i32 %call
+; CHECK: %[[ISPOS:.*]] = icmp sgt i32 %i, -1
+; CHECK: %[[NEG:.*]] = sub i32 0, %i
+; CHECK: %[[RET:.*]] = select i1 %[[ISPOS]], i32 %i, i32 %[[NEG]]
+; CHECK: ret i32 %[[RET]]
+}
+
+declare arm_aapcscc i32 @abs(i32) nounwind readnone
+
+define arm_aapcscc i32 @_labs(i32 %i) nounwind readnone {
+; CHECK: _labs
+  %call = tail call arm_aapcscc i32 @labs(i32 %i) nounwind readnone
+  ret i32 %call
+; CHECK: %[[ISPOS:.*]] = icmp sgt i32 %i, -1
+; CHECK: %[[NEG:.*]] = sub i32 0, %i
+; CHECK: %[[RET:.*]] = select i1 %[[ISPOS]], i32 %i, i32 %[[NEG]]
+; CHECK: ret i32 %[[RET]]
+}
+
+declare arm_aapcscc i32 @labs(i32) nounwind readnone
+
+define arm_aapcscc i32 @_strlen1() {
+; CHECK: _strlen1
+  %call = tail call arm_aapcscc i32 @strlen(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0))
+  ret i32 %call
+; CHECK: ret i32 3
+}
+
+declare arm_aapcscc i32 @strlen(i8*)
+
+define arm_aapcscc zeroext i1 @_strlen2(i8* %str) {
+; CHECK: _strlen2
+  %call = tail call arm_aapcscc i32 @strlen(i8* %str)
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+
+; CHECK: %[[STRLENFIRST:.*]] = load i8* %str
+; CHECK: %[[CMP:.*]] = icmp ne i8 %[[STRLENFIRST]], 0
+; CHECK: ret i1 %[[CMP]]
+}
diff --git a/test/Transforms/InstSimplify/call.ll b/test/Transforms/InstSimplify/call.ll
new file mode 100644
index 000000000000..cf2f8476763f
--- /dev/null
+++ b/test/Transforms/InstSimplify/call.ll
@@ -0,0 +1,103 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
+
+define i1 @test_uadd1() {
+; CHECK: @test_uadd1
+  %x = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 254, i8 3)
+  %overflow = extractvalue {i8, i1} %x, 1
+  ret i1 %overflow
+; CHECK-NEXT: ret i1 true
+}
+
+define i8 @test_uadd2() {
+; CHECK: @test_uadd2
+  %x = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 254, i8 44)
+  %result = extractvalue {i8, i1} %x, 0
+  ret i8 %result
+; CHECK-NEXT: ret i8 42
+}
+
+declare i256 @llvm.cttz.i256(i256 %src, i1 %is_zero_undef)
+
+define i256 @test_cttz() {
+; CHECK: @test_cttz
+  %x = call i256 @llvm.cttz.i256(i256 10, i1 false)
+  ret i256 %x
+; CHECK-NEXT: ret i256 1
+}
+
+declare i256 @llvm.ctpop.i256(i256 %src)
+
+define i256 @test_ctpop() {
+; CHECK: @test_ctpop
+  %x = call i256 @llvm.ctpop.i256(i256 10)
+  ret i256 %x
+; CHECK-NEXT: ret i256 2
+}
+
+; Test a non-intrinsic that we know about as a library call.
+declare float @fabs(float %x)
+
+define float @test_fabs_libcall() {
+; CHECK: @test_fabs_libcall
+
+  %x = call float @fabs(float -42.0)
+; This is still a real function call, so instsimplify won't nuke it -- other
+; passes have to do that.
+; CHECK-NEXT: call float @fabs
+
+  ret float %x
+; CHECK-NEXT: ret float 4.2{{0+}}e+01
+}
+
+
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.floor.f32(float) nounwind readnone
+declare float @llvm.ceil.f32(float) nounwind readnone
+declare float @llvm.trunc.f32(float) nounwind readnone
+declare float @llvm.rint.f32(float) nounwind readnone
+declare float @llvm.nearbyint.f32(float) nounwind readnone
+
+; Test idempotent intrinsics
+define float @test_idempotence(float %a) {
+; CHECK: @test_idempotence
+
+; CHECK: fabs
+; CHECK-NOT: fabs
+  %a0 = call float @llvm.fabs.f32(float %a)
+  %a1 = call float @llvm.fabs.f32(float %a0)
+
+; CHECK: floor
+; CHECK-NOT: floor
+  %b0 = call float @llvm.floor.f32(float %a)
+  %b1 = call float @llvm.floor.f32(float %b0)
+
+; CHECK: ceil
+; CHECK-NOT: ceil
+  %c0 = call float @llvm.ceil.f32(float %a)
+  %c1 = call float @llvm.ceil.f32(float %c0)
+
+; CHECK: trunc
+; CHECK-NOT: trunc
+  %d0 = call float @llvm.trunc.f32(float %a)
+  %d1 = call float @llvm.trunc.f32(float %d0)
+
+; CHECK: rint
+; CHECK-NOT: rint
+  %e0 = call float @llvm.rint.f32(float %a)
+  %e1 = call float @llvm.rint.f32(float %e0)
+
+; CHECK: nearbyint
+; CHECK-NOT: nearbyint
+  %f0 = call float @llvm.nearbyint.f32(float %a)
+  %f1 = call float @llvm.nearbyint.f32(float %f0)
+
+  %r0 = fadd float %a1, %b1
+  %r1 = fadd float %r0, %c1
+  %r2 = fadd float %r1, %d1
+  %r3 = fadd float %r2, %e1
+  %r4 = fadd float %r3, %f1
+
+  ret float %r4
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index ce2bb799c813..b764c761cfb2 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -165,6 +165,46 @@ entry:
   ret i1 %cmp
 }
 
+define i1 @gep13(i8* %ptr) {
+; CHECK: @gep13
+; We can prove this GEP is non-null because it is inbounds.
+  %x = getelementptr inbounds i8* %ptr, i32 1
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep14({ {}, i8 }* %ptr) {
+; CHECK: @gep14
+; We can't simplify this because the offset of one in the GEP actually doesn't
+; move the pointer.
+  %x = getelementptr inbounds { {}, i8 }* %ptr, i32 0, i32 1
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NOT: ret i1 false
+}
+
+define i1 @gep15({ {}, [4 x {i8, i8}]}* %ptr, i32 %y) {
+; CHECK: @gep15
+; We can prove this GEP is non-null even though there is a user value, as we
+; would necessarily violate inbounds on one side or the other.
+  %x = getelementptr inbounds { {}, [4 x {i8, i8}]}* %ptr, i32 0, i32 1, i32 %y, i32 1
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep16(i8* %ptr, i32 %a) {
+; CHECK: @gep16
+; We can prove this GEP is non-null because it is inbounds and because we know
+; %b is non-zero even though we don't know its value.
+  %b = or i32 %a, 1
+  %x = getelementptr inbounds i8* %ptr, i32 %b
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
 define i1 @zext(i32 %x) {
 ; CHECK: @zext
   %e1 = zext i32 %x to i64
@@ -607,3 +647,49 @@ unreachableblock:
   %Y = icmp eq i32* %X, null
   ret i1 %Y
 }
+
+; It's not valid to fold a comparison of an argument with an alloca, even though
+; that's tempting. An argument can't *alias* an alloca, however the aliasing rule
+; relies on restrictions against guessing an object's address and dereferencing.
+; There are no restrictions against guessing an object's address and comparing.
+
+define i1 @alloca_argument_compare(i64* %arg) {
+  %alloc = alloca i64
+  %cmp = icmp eq i64* %arg, %alloc
+  ret i1 %cmp
+  ; CHECK: alloca_argument_compare
+  ; CHECK: ret i1 %cmp
+}
+
+; As above, but with the operands reversed.
+
+define i1 @alloca_argument_compare_swapped(i64* %arg) {
+  %alloc = alloca i64
+  %cmp = icmp eq i64* %alloc, %arg
+  ret i1 %cmp
+  ; CHECK: alloca_argument_compare_swapped
+  ; CHECK: ret i1 %cmp
+}
+
+; Don't assume that a noalias argument isn't equal to a global variable's
+; address. This is an example where AliasAnalysis' NoAlias concept is
+; different from actual pointer inequality.
+
+@y = external global i32
+define zeroext i1 @external_compare(i32* noalias %x) {
+  %cmp = icmp eq i32* %x, @y
+  ret i1 %cmp
+  ; CHECK: external_compare
+  ; CHECK: ret i1 %cmp
+}
+
+define i1 @alloca_gep(i64 %a, i64 %b) {
+; CHECK: @alloca_gep
+; We can prove this GEP is non-null because it is inbounds and the pointer
+; is non-null.
+  %strs = alloca [1000 x [1001 x i8]], align 16
+  %x = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %a, i64 %b
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
diff --git a/test/Transforms/InstSimplify/fast-math.ll b/test/Transforms/InstSimplify/fast-math.ll
new file mode 100644
index 000000000000..154b96739791
--- /dev/null
+++ b/test/Transforms/InstSimplify/fast-math.ll
@@ -0,0 +1,107 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+;; x * 0 ==> 0 when no-nans and no-signed-zero
+; CHECK: mul_zero_1
+define float @mul_zero_1(float %a) {
+  %b = fmul nsz nnan float %a, 0.0
+; CHECK: ret float 0.0
+  ret float %b
+}
+; CHECK: mul_zero_2
+define float @mul_zero_2(float %a) {
+  %b = fmul fast float 0.0, %a
+; CHECK: ret float 0.0
+  ret float %b
+}
+
+;; x * 0 =/=> 0 when there could be nans or -0
+; CHECK: no_mul_zero_1
+define float @no_mul_zero_1(float %a) {
+  %b = fmul nsz float %a, 0.0
+; CHECK: ret float %b
+  ret float %b
+}
+; CHECK: no_mul_zero_2
+define float @no_mul_zero_2(float %a) {
+  %b = fmul nnan float %a, 0.0
+; CHECK: ret float %b
+  ret float %b
+}
+; CHECK: no_mul_zero_3
+define float @no_mul_zero_3(float %a) {
+  %b = fmul float %a, 0.0
+; CHECK: ret float %b
+  ret float %b
+}
+
+; fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0
+;   where nnan and ninf have to occur at least once somewhere in this
+;   expression
+; CHECK: fadd_fsub_0
+define float @fadd_fsub_0(float %a) {
+; X + -X ==> 0
+  %t1 = fsub nnan ninf float 0.0, %a
+  %zero1 = fadd nnan ninf float %t1, %a
+
+  %t2 = fsub nnan float 0.0, %a
+  %zero2 = fadd ninf float %t2, %a
+
+  %t3 = fsub nnan ninf float 0.0, %a
+  %zero3 = fadd float %t3, %a
+
+  %t4 = fsub float 0.0, %a
+  %zero4 = fadd nnan ninf float %t4, %a
+
+; Dont fold this
+; CHECK: %nofold = fsub float 0.0
+  %nofold = fsub float 0.0, %a
+; CHECK: %no_zero = fadd nnan float %nofold, %a
+  %no_zero = fadd nnan float %nofold, %a
+
+; Coalesce the folded zeros
+  %zero5 = fadd float %zero1, %zero2
+  %zero6 = fadd float %zero3, %zero4
+  %zero7 = fadd float %zero5, %zero6
+
+; Should get folded
+  %ret = fadd nsz float %no_zero, %zero7
+
+; CHECK: ret float %no_zero
+  ret float %ret
+}
+
+; fsub nnan ninf x, x ==> 0.0
+; CHECK: @fsub_x_x
+define float @fsub_x_x(float %a) {
+; X - X ==> 0
+  %zero1 = fsub nnan ninf float %a, %a
+
+; Dont fold
+; CHECK: %no_zero1 = fsub
+  %no_zero1 = fsub ninf float %a, %a
+; CHECK: %no_zero2 = fsub
+  %no_zero2 = fsub nnan float %a, %a
+; CHECK: %no_zero = fadd
+  %no_zero = fadd float %no_zero1, %no_zero2
+
+; Should get folded
+  %ret = fadd nsz float %no_zero, %zero1
+
+; CHECK: ret float %no_zero
+  ret float %ret
+}
+
+; fadd nsz X, 0 ==> X
+; CHECK: @nofold_fadd_x_0
+define float @nofold_fadd_x_0(float %a) {
+; Dont fold
+; CHECK: %no_zero1 = fadd
+  %no_zero1 = fadd ninf float %a, 0.0
+; CHECK: %no_zero2 = fadd
+  %no_zero2 = fadd nnan float %a, 0.0
+; CHECK: %no_zero = fadd
+  %no_zero = fadd float %no_zero1, %no_zero2
+
+; CHECK: ret float %no_zero
+  ret float %no_zero
+}
diff --git a/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
new file mode 100644
index 000000000000..f9c364cade36
--- /dev/null
+++ b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+; fsub 0, (fsub 0, X) ==> X
+; CHECK: @fsub_0_0_x
+define float @fsub_0_0_x(float %a) {
+  %t1 = fsub float -0.0, %a
+  %ret = fsub float -0.0, %t1
+
+; CHECK: ret float %a
+  ret float %ret
+}
+
+; fsub X, 0 ==> X
+; CHECK: @fsub_x_0
+define float @fsub_x_0(float %a) {
+  %ret = fsub float %a, 0.0
+; CHECK ret float %a
+  ret float %ret
+}
+
+; fadd X, -0 ==> X
+; CHECK: @fadd_x_n0
+define float @fadd_x_n0(float %a) {
+  %ret = fadd float %a, -0.0
+; CHECK ret float %a
+  ret float %ret
+}
+
+; fmul X, 1.0 ==> X
+; CHECK: @fmul_X_1
+define double @fmul_X_1(double %a) {
+  %b = fmul double 1.000000e+00, %a                ; <double> [#uses=1]
+  ; CHECK: ret double %a
+  ret double %b
+}
diff --git a/test/Transforms/InstSimplify/past-the-end.ll b/test/Transforms/InstSimplify/past-the-end.ll
new file mode 100644
index 000000000000..075da4a24be0
--- /dev/null
+++ b/test/Transforms/InstSimplify/past-the-end.ll
@@ -0,0 +1,77 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+target datalayout = "p:32:32"
+
+; Check some past-the-end subtleties.
+
+@opte_a = global i32 0
+@opte_b = global i32 0
+
+; Comparing base addresses of two distinct globals. Never equal.
+
+define zeroext i1 @no_offsets() {
+  %t = icmp eq i32* @opte_a, @opte_b
+  ret i1 %t
+  ; CHECK: no_offsets(
+  ; CHECK: ret i1 false
+}
+
+; Comparing past-the-end addresses of two distinct globals. Never equal.
+
+define zeroext i1 @both_past_the_end() {
+  %x = getelementptr i32* @opte_a, i32 1
+  %y = getelementptr i32* @opte_b, i32 1
+  %t = icmp eq i32* %x, %y
+  ret i1 %t
+  ; CHECK: both_past_the_end(
+  ; CHECK-NOT: ret i1 true
+  ; TODO: refine this
+}
+
+; Comparing past-the-end addresses of one global to the base address
+; of another. Can't fold this.
+
+define zeroext i1 @just_one_past_the_end() {
+  %x = getelementptr i32* @opte_a, i32 1
+  %t = icmp eq i32* %x, @opte_b
+  ret i1 %t
+  ; CHECK: just_one_past_the_end(
+  ; CHECK: ret i1 icmp eq (i32* getelementptr inbounds (i32* @opte_a, i32 1), i32* @opte_b)
+}
+
+; Comparing base addresses of two distinct allocas. Never equal.
+
+define zeroext i1 @no_alloca_offsets() {
+  %m = alloca i32
+  %n = alloca i32
+  %t = icmp eq i32* %m, %n
+  ret i1 %t
+  ; CHECK: no_alloca_offsets(
+  ; CHECK: ret i1 false
+}
+
+; Comparing past-the-end addresses of two distinct allocas. Never equal.
+
+define zeroext i1 @both_past_the_end_alloca() {
+  %m = alloca i32
+  %n = alloca i32
+  %x = getelementptr i32* %m, i32 1
+  %y = getelementptr i32* %n, i32 1
+  %t = icmp eq i32* %x, %y
+  ret i1 %t
+  ; CHECK: both_past_the_end_alloca(
+  ; CHECK-NOT: ret i1 true
+  ; TODO: refine this
+}
+
+; Comparing past-the-end addresses of one alloca to the base address
+; of another. Can't fold this.
+
+define zeroext i1 @just_one_past_the_end_alloca() {
+  %m = alloca i32
+  %n = alloca i32
+  %x = getelementptr i32* %m, i32 1
+  %t = icmp eq i32* %x, %n
+  ret i1 %t
+  ; CHECK: just_one_past_the_end_alloca(
+  ; CHECK: ret i1 %t
+}
diff --git a/test/Transforms/InstSimplify/ptr_diff.ll b/test/Transforms/InstSimplify/ptr_diff.ll
index 1eb1fd4c097e..8b4aa796013b 100644
--- a/test/Transforms/InstSimplify/ptr_diff.ll
+++ b/test/Transforms/InstSimplify/ptr_diff.ll
@@ -46,3 +46,33 @@ define i64 @ptrdiff3(i8* %ptr) {
   %diff = sub i64 %last.int, %first.int
   ret i64 %diff
 }
+
+define <4 x i32> @ptrdiff4(<4 x i8*> %arg) nounwind {
+; Handle simple cases of vectors of pointers.
+; CHECK: @ptrdiff4
+; CHECK: ret <4 x i32> zeroinitializer
+  %p1 = ptrtoint <4 x i8*> %arg to <4 x i32>
+  %bc = bitcast <4 x i8*> %arg to <4 x i32*>
+  %p2 = ptrtoint <4 x i32*> %bc to <4 x i32>
+  %sub = sub <4 x i32> %p1, %p2
+  ret <4 x i32> %sub
+}
+
+%struct.ham = type { i32, [2 x [2 x i32]] }
+
+@global = internal global %struct.ham zeroinitializer, align 4
+
+define i32 @ptrdiff5() nounwind {
+bb:
+  %tmp = getelementptr inbounds %struct.ham* @global, i32 0, i32 1
+  %tmp1 = getelementptr inbounds [2 x [2 x i32]]* %tmp, i32 0, i32 0
+  %tmp2 = bitcast [2 x i32]* %tmp1 to i32*
+  %tmp3 = ptrtoint i32* %tmp2 to i32
+  %tmp4 = getelementptr inbounds %struct.ham* @global, i32 0, i32 1
+  %tmp5 = getelementptr inbounds [2 x [2 x i32]]* %tmp4, i32 0, i32 0
+  %tmp6 = ptrtoint [2 x i32]* %tmp5 to i32
+  %tmp7 = sub i32 %tmp3, %tmp6
+  ret i32 %tmp7
+; CHECK: @ptrdiff5
+; CHECK: ret i32 0
+}
diff --git a/test/Transforms/InstSimplify/vector_gep.ll b/test/Transforms/InstSimplify/vector_gep.ll
index f65260e00f54..5ac1ddef64f8 100644
--- a/test/Transforms/InstSimplify/vector_gep.ll
+++ b/test/Transforms/InstSimplify/vector_gep.ll
@@ -1,4 +1,4 @@
-;RUN: opt -instsimplify %s -disable-output
+;RUN: opt -instsimplify -disable-output < %s
 declare void @helper(<2 x i8*>)
 define void @test(<2 x i8*> %a) {
   %A = getelementptr <2 x i8*> %a, <2 x i32> <i32 0, i32 0>
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
index 46271379bd0d..fe3dc77c9c13 100644
--- a/test/Transforms/JumpThreading/basic.ll
+++ b/test/Transforms/JumpThreading/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -jump-threading -S | FileCheck %s
+; RUN: opt -jump-threading -S < %s | FileCheck %s
 
 declare i32 @f1()
 declare i32 @f2()
@@ -476,3 +476,41 @@ exit1:
 ; CHECK: }
 }
 
+; In this test we check that block duplication is inhibited by the presence
+; of a function with the 'noduplicate' attribute.
+
+declare void @g()
+declare void @j()
+declare void @k()
+
+; CHECK: define void @h(i32 %p) {
+define void @h(i32 %p) {
+  %x = icmp ult i32 %p, 5
+  br i1 %x, label %l1, label %l2
+
+l1:
+  call void @j()
+  br label %l3
+
+l2:
+  call void @k()
+  br label %l3
+
+l3:
+; CHECK: call void @g() [[NOD:#[0-9]+]]
+; CHECK-NOT: call void @g() [[NOD]]
+  call void @g() noduplicate
+  %y = icmp ult i32 %p, 5
+  br i1 %y, label %l4, label %l5
+
+l4:
+  call void @j()
+  ret void
+
+l5:
+  call void @k()
+  ret void
+; CHECK: }
+}
+
+; CHECK: attributes [[NOD]] = { noduplicate }
diff --git a/test/Transforms/JumpThreading/degenerate-phi.ll b/test/Transforms/JumpThreading/degenerate-phi.ll
index 35d9fdec4281..2905b43af72c 100644
--- a/test/Transforms/JumpThreading/degenerate-phi.ll
+++ b/test/Transforms/JumpThreading/degenerate-phi.ll
@@ -1,4 +1,4 @@
-; RUN: opt -jump-threading -disable-output %s
+; RUN: opt -jump-threading -disable-output < %s
 ; PR9112
 
 ; This is actually a test for value tracking. Jump threading produces
diff --git a/test/Transforms/JumpThreading/or-undef.ll b/test/Transforms/JumpThreading/or-undef.ll
index 6e359925b6c6..6311b6df4373 100644
--- a/test/Transforms/JumpThreading/or-undef.ll
+++ b/test/Transforms/JumpThreading/or-undef.ll
@@ -1,4 +1,4 @@
-; RUN: opt -jump-threading -S %s | FileCheck %s
+; RUN: opt -jump-threading -S < %s | FileCheck %s
 ; rdar://7620633
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll b/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
index fe8d44531322..2bf26041626c 100644
--- a/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
+++ b/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -licm | lli %defaultjit
+; RUN: opt < %s -licm | lli -force-interpreter
 
 define i32 @main() {
 entry:
diff --git a/test/Transforms/LICM/2011-07-06-Alignment.ll b/test/Transforms/LICM/2011-07-06-Alignment.ll
index f97b7010bc02..569231489fec 100644
--- a/test/Transforms/LICM/2011-07-06-Alignment.ll
+++ b/test/Transforms/LICM/2011-07-06-Alignment.ll
@@ -1,4 +1,4 @@
-; RUN: opt -licm -S %s | FileCheck %s
+; RUN: opt -licm -S < %s | FileCheck %s
 
 @A = common global [1024 x float] zeroinitializer, align 4
 
diff --git a/test/Transforms/LICM/crash.ll b/test/Transforms/LICM/crash.ll
index de41d008a746..b43477a56df5 100644
--- a/test/Transforms/LICM/crash.ll
+++ b/test/Transforms/LICM/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -licm %s -disable-output
+; RUN: opt -licm -disable-output < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/LICM/hoist-invariant-load.ll b/test/Transforms/LICM/hoist-invariant-load.ll
index f9fc551df358..1ba94d6b489c 100644
--- a/test/Transforms/LICM/hoist-invariant-load.ll
+++ b/test/Transforms/LICM/hoist-invariant-load.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -licm -stats -S 2>&1 | grep "1 licm"
 
 @"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1
diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll
index 98f93345e3c3..1ca377eb4a99 100644
--- a/test/Transforms/LICM/hoisting.ll
+++ b/test/Transforms/LICM/hoisting.ll
@@ -90,3 +90,29 @@ for.end:                                          ; preds = %for.body
 
 declare void @foo_may_call_exit(i32)
 
+; PR14854
+; CHECK: @test5
+; CHECK: extractvalue
+; CHECK: br label %tailrecurse
+; CHECK: tailrecurse:
+; CHECK: ifend:
+; CHECK: insertvalue
+define { i32*, i32 } @test5(i32 %i, { i32*, i32 } %e) {
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %then, %entry
+  %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ]
+  %out = extractvalue { i32*, i32 } %e, 1
+  %d = insertvalue { i32*, i32 } %e, i32* null, 0
+  %cmp1 = icmp sgt i32 %out, %i.tr
+  br i1 %cmp1, label %then, label %ifend
+
+then:                                             ; preds = %tailrecurse
+  call void @foo()
+  %cmp2 = add i32 %i.tr, 1
+  br label %tailrecurse
+
+ifend:                                            ; preds = %tailrecurse
+  ret { i32*, i32 } %d
+}
diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll
index 05a64d632274..e7eab92aa8d7 100644
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar_promote.ll
@@ -1,28 +1,28 @@
-; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+; RUN: opt < %s -basicaa -tbaa -licm -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
-@X = global i32 7		; <i32*> [#uses=4]
+@X = global i32 7   ; <i32*> [#uses=4]
 
 define void @test1(i32 %i) {
 Entry:
-	br label %Loop
+  br label %Loop
 ; CHECK: @test1
 ; CHECK: Entry:
 ; CHECK-NEXT:   load i32* @X
 ; CHECK-NEXT:   br label %Loop
 
 
-Loop:		; preds = %Loop, %0
-	%j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]		; <i32> [#uses=1]
-	%x = load i32* @X		; <i32> [#uses=1]
-	%x2 = add i32 %x, 1		; <i32> [#uses=1]
-	store i32 %x2, i32* @X
-	%Next = add i32 %j, 1		; <i32> [#uses=2]
-	%cond = icmp eq i32 %Next, 0		; <i1> [#uses=1]
-	br i1 %cond, label %Out, label %Loop
+Loop:   ; preds = %Loop, %0
+  %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]    ; <i32> [#uses=1]
+  %x = load i32* @X   ; <i32> [#uses=1]
+  %x2 = add i32 %x, 1   ; <i32> [#uses=1]
+  store i32 %x2, i32* @X
+  %Next = add i32 %j, 1   ; <i32> [#uses=2]
+  %cond = icmp eq i32 %Next, 0    ; <i1> [#uses=1]
+  br i1 %cond, label %Out, label %Loop
 
-Out:	
-	ret void
+Out:
+  ret void
 ; CHECK: Out:
 ; CHECK-NEXT:   store i32 %x2, i32* @X
 ; CHECK-NEXT:   ret void
@@ -31,22 +31,22 @@ Out:
 
 define void @test2(i32 %i) {
 Entry:
-	br label %Loop
+  br label %Loop
 ; CHECK: @test2
 ; CHECK: Entry:
 ; CHECK-NEXT:    %.promoted = load i32* getelementptr inbounds (i32* @X, i64 1)
 ; CHECK-NEXT:    br label %Loop
 
-Loop:		; preds = %Loop, %0
-	%X1 = getelementptr i32* @X, i64 1		; <i32*> [#uses=1]
-	%A = load i32* %X1		; <i32> [#uses=1]
-	%V = add i32 %A, 1		; <i32> [#uses=1]
-	%X2 = getelementptr i32* @X, i64 1		; <i32*> [#uses=1]
-	store i32 %V, i32* %X2
-	br i1 false, label %Loop, label %Exit
+Loop:   ; preds = %Loop, %0
+  %X1 = getelementptr i32* @X, i64 1    ; <i32*> [#uses=1]
+  %A = load i32* %X1    ; <i32> [#uses=1]
+  %V = add i32 %A, 1    ; <i32> [#uses=1]
+  %X2 = getelementptr i32* @X, i64 1    ; <i32*> [#uses=1]
+  store i32 %V, i32* %X2
+  br i1 false, label %Loop, label %Exit
 
-Exit:		; preds = %Loop
-	ret void
+Exit:   ; preds = %Loop
+  ret void
 ; CHECK: Exit:
 ; CHECK-NEXT:   store i32 %V, i32* getelementptr inbounds (i32* @X, i64 1)
 ; CHECK-NEXT:   ret void
@@ -56,19 +56,19 @@ Exit:		; preds = %Loop
 
 define void @test3(i32 %i) {
 ; CHECK: @test3
-	br label %Loop
+  br label %Loop
 Loop:
         ; Should not promote this to a register
-	%x = load volatile i32* @X
-	%x2 = add i32 %x, 1	
-	store i32 %x2, i32* @X
-	br i1 true, label %Out, label %Loop
-        
+  %x = load volatile i32* @X
+  %x2 = add i32 %x, 1
+  store i32 %x2, i32* @X
+  br i1 true, label %Out, label %Loop
+
 ; CHECK: Loop:
 ; CHECK-NEXT: load volatile
 
-Out:		; preds = %Loop
-	ret void
+Out:    ; preds = %Loop
+  ret void
 }
 
 ; PR8041
@@ -120,27 +120,27 @@ exit:
 
 define void @test5(i32 %i, i32** noalias %P2) {
 Entry:
-	br label %Loop
+  br label %Loop
 ; CHECK: @test5
 ; CHECK: Entry:
 ; CHECK-NEXT:   load i32* @X
 ; CHECK-NEXT:   br label %Loop
 
 
-Loop:		; preds = %Loop, %0
-	%j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]		; <i32> [#uses=1]
-	%x = load i32* @X		; <i32> [#uses=1]
-	%x2 = add i32 %x, 1		; <i32> [#uses=1]
-	store i32 %x2, i32* @X
-        
+Loop:   ; preds = %Loop, %0
+  %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]    ; <i32> [#uses=1]
+  %x = load i32* @X   ; <i32> [#uses=1]
+  %x2 = add i32 %x, 1   ; <i32> [#uses=1]
+  store i32 %x2, i32* @X
+
         store volatile i32* @X, i32** %P2
-        
-	%Next = add i32 %j, 1		; <i32> [#uses=2]
-	%cond = icmp eq i32 %Next, 0		; <i1> [#uses=1]
-	br i1 %cond, label %Out, label %Loop
 
-Out:	
-	ret void
+  %Next = add i32 %j, 1   ; <i32> [#uses=2]
+  %cond = icmp eq i32 %Next, 0    ; <i1> [#uses=1]
+  br i1 %cond, label %Out, label %Loop
+
+Out:
+  ret void
 ; CHECK: Out:
 ; CHECK-NEXT:   store i32 %x2, i32* @X
 ; CHECK-NEXT:   ret void
@@ -148,3 +148,40 @@ Out:
 }
 
 
+; PR14753 - Preserve TBAA tags when promoting values in a loop.
+define void @test6(i32 %n, float* nocapture %a, i32* %gi) {
+entry:
+  store i32 0, i32* %gi, align 4, !tbaa !0
+  %cmp1 = icmp slt i32 0, %n
+  br i1 %cmp1, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %storemerge2 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %idxprom = sext i32 %storemerge2 to i64
+  %arrayidx = getelementptr inbounds float* %a, i64 %idxprom
+  store float 0.000000e+00, float* %arrayidx, align 4, !tbaa !3
+  %0 = load i32* %gi, align 4, !tbaa !0
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %gi, align 4, !tbaa !0
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  ret void
+
+; CHECK: for.body.lr.ph:
+; CHECK-NEXT:  %gi.promoted = load i32* %gi, align 4, !tbaa !0
+; CHECK: for.cond.for.end_crit_edge:
+; CHECK-NEXT:  store i32 %inc, i32* %gi, align 4, !tbaa !0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"float", metadata !1}
diff --git a/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll b/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll
index 40c6629e6f4f..cf9d8ce923ba 100644
--- a/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll
+++ b/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -loop-deletion -disable-output
+; RUN: opt -loop-deletion -disable-output < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
diff --git a/test/Transforms/LoopDeletion/simplify-then-delete.ll b/test/Transforms/LoopDeletion/simplify-then-delete.ll
index 5a21672a5960..4278ef16d214 100644
--- a/test/Transforms/LoopDeletion/simplify-then-delete.ll
+++ b/test/Transforms/LoopDeletion/simplify-then-delete.ll
@@ -4,7 +4,7 @@
 ; Indvars and loop deletion should be able to eliminate all looping
 ; in this testcase.
 
-; CHECK:      define i32 @pmat(i32 %m, i32 %n, double* %y) nounwind {
+; CHECK:      define i32 @pmat(i32 %m, i32 %n, double* %y) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   ret i32 0
 ; CHECK-NEXT: }
@@ -63,3 +63,5 @@ w.e:
 w.e12:
   ret i32 0
 }
+
+; CHECK: attributes #0 = { nounwind }
diff --git a/test/Transforms/LoopIdiom/X86/lit.local.cfg b/test/Transforms/LoopIdiom/X86/lit.local.cfg
new file mode 100644
index 000000000000..a8ad0f1a28b2
--- /dev/null
+++ b/test/Transforms/LoopIdiom/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopIdiom/X86/popcnt.ll b/test/Transforms/LoopIdiom/X86/popcnt.ll
new file mode 100644
index 000000000000..25df93d3a082
--- /dev/null
+++ b/test/Transforms/LoopIdiom/X86/popcnt.ll
@@ -0,0 +1,140 @@
+; RUN: opt -loop-idiom < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -S | FileCheck %s
+
+;To recognize this pattern:
+;int popcount(unsigned long long a) {
+;    int c = 0;
+;    while (a) {
+;        c++;
+;        a &= a - 1;
+;    }
+;    return c;
+;}
+; 
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount(i64 %a) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i64 %a, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i64 %a.addr.04, -1
+  %and = and i64 %sub, %a.addr.04
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
+
+; To recognize this pattern:
+;int popcount(unsigned long long a, int mydata1, int mydata2) {
+;    int c = 0;
+;    while (a) {
+;        c++;
+;        a &= a - 1;
+;        mydata1 *= c;
+;        mydata2 *= (int)a;
+;    }
+;    return c + mydata1 + mydata2;
+;}
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp {
+entry:
+  %tobool9 = icmp eq i64 %a, 0
+  br i1 %tobool9, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ]
+  %mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ]
+  %a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.013, 1
+  %sub = add i64 %a.addr.010, -1
+  %and = and i64 %sub, %a.addr.010
+  %mul = mul nsw i32 %inc, %mydata1.addr.011
+  %conv = trunc i64 %and to i32
+  %mul1 = mul nsw i32 %conv, %mydata2.addr.012
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  %mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ]
+  %mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ]
+  %add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa
+  %add2 = add i32 %add, %c.0.lcssa
+  ret i32 %add2
+}
+
+; Some variants once cause crash
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @PopCntCrash1(i64 %a) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i64 %a, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %t = add i32 %c.05, %c.05
+  %inc = add nsw i32 %t, 1
+  %sub = add i64 %a.addr.04, -1
+  %and = and i64 %sub, %a.addr.04
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+
+; CHECK: entry
+; CHECK: ret 
+}
+
+define i32 @PopCntCrash2(i64 %a, i32 %b) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i64 %a, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ %b, %entry ]
+  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i64 %a.addr.04, -1
+  %and = and i64 %sub, %a.addr.04
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
+
+define i32 @PopCntCrash3(i64 %a, i32 %x) {
+entry:
+  %tobool3 = icmp eq i64 %a, 0
+  %cmp = icmp eq i32 %x, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i64 %a.addr.04, -1
+  %and = and i64 %sub, %a.addr.04
+  %tobool = icmp eq i64 %and, 0
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
diff --git a/test/Transforms/LoopRotate/basic.ll b/test/Transforms/LoopRotate/basic.ll
index b7bcb21d56f8..78878f9fa663 100644
--- a/test/Transforms/LoopRotate/basic.ll
+++ b/test/Transforms/LoopRotate/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-rotate %s | FileCheck %s
+; RUN: opt -S -loop-rotate < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
@@ -33,3 +33,29 @@ for.end:                                          ; preds = %for.cond
 
 declare void @g(i32*)
 
+; CHECK: @test2
+define void @test2() nounwind ssp {
+entry:
+  %array = alloca [20 x i32], align 16
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, 100
+; CHECK: call void @f
+; CHECK-NOT: call void @f
+  call void @f() noduplicate 
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %inc = add nsw i32 %i.0, 1
+  call void @h()
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+; CHECK: }
+}
+
+declare void @f() noduplicate
+declare void @h()
diff --git a/test/Transforms/LoopRotate/crash.ll b/test/Transforms/LoopRotate/crash.ll
index 954b83476551..fd922cb5569e 100644
--- a/test/Transforms/LoopRotate/crash.ll
+++ b/test/Transforms/LoopRotate/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-rotate %s -disable-output -verify-dom-info -verify-loop-info
+; RUN: opt -loop-rotate -disable-output -verify-dom-info -verify-loop-info < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
index b32ee82d3a57..6a8d30820f6e 100644
--- a/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-rotate  %s  | FileCheck %s
+; RUN: opt -S -loop-rotate < %s | FileCheck %s
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
diff --git a/test/Transforms/LoopRotate/phi-duplicate.ll b/test/Transforms/LoopRotate/phi-duplicate.ll
index 737283092250..8ad2dce71a65 100644
--- a/test/Transforms/LoopRotate/phi-duplicate.ll
+++ b/test/Transforms/LoopRotate/phi-duplicate.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S %s -loop-rotate | FileCheck %s
+; RUN: opt -S -loop-rotate < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0"
 
diff --git a/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll b/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
index 3793baccbbc1..9524be3ceee0 100644
--- a/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
+++ b/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-reduce -disable-output -debug-only=loop-reduce %s 2> %t
+; RUN: opt -loop-reduce -disable-output -debug-only=loop-reduce < %s 2> %t
 ; RUN: FileCheck %s < %t
 ; REQUIRES: asserts
 ;
@@ -10,15 +10,13 @@
 ; CHECK: After generating reuse formulae:
 ; CHECK: LSR is examining the following uses:
 ; CHECK: LSR Use: Kind=Special
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
 ; CHECK-NOT:reg
 ; CHECK: Filtering for use
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-freebsd9"
 
 %struct.snork = type { %struct.fuga, i32, i32, i32, i32, i32, i32 }
 %struct.fuga = type { %struct.gork, i64 }
diff --git a/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll b/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll
new file mode 100644
index 000000000000..bce234cd4066
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll
@@ -0,0 +1,44 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; Indirect branch in the preheader crashes replaceCongruentIVs.
+; rdar://12910141
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+
+; CHECK: @test
+; CHECK: bb8:
+; CHECK-NEXT: phi i8
+; CHECK-NEXT: phi i8
+; CHECK: ret void
+define void @test() nounwind ssp {
+bb:
+  br label %bb190
+
+bb8:                                              ; preds = %bb190, %bb11
+  %tmp = phi i8 [ %tmp14, %bb11 ], [ 25, %bb190 ]
+  %tmp9 = phi i8 [ %tmp12, %bb11 ], [ 25, %bb190 ]
+  %tmp10 = add i8 %tmp, -5
+  indirectbr i8* undef, [label %bb11, label %bb15]
+
+bb11:                                             ; preds = %bb8
+  %tmp12 = add i8 %tmp9, 1
+  %tmp13 = add i8 %tmp9, -19
+  %tmp14 = add i8 %tmp, 1
+  indirectbr i8* undef, [label %bb8]
+
+bb15:                                             ; preds = %bb8
+  indirectbr i8* undef, [label %bb16]
+
+bb16:                                             ; preds = %bb16, %bb15
+  indirectbr i8* undef, [label %bb37, label %bb190]
+
+
+bb37:                                             ; preds = %bb190
+  indirectbr i8* undef, [label %bb38]
+
+bb38:                                             ; preds = %bb37, %bb5
+  ret void
+
+bb190:                                            ; preds = %bb189, %bb187
+  indirectbr i8* undef, [label %bb37, label %bb8]
+}
diff --git a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
new file mode 100644
index 000000000000..8fbddf8ae4c8
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
@@ -0,0 +1,84 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; LTO of clang, which mistakenly uses no TargetLoweringInfo, causes a
+; miscompile. ReuseOrCreateCast replace ptrtoint operand with undef.
+; Reproducing the miscompile requires no triple, hence no "TTI".
+; rdar://13007381
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Verify that nothing uses the "dead" ptrtoint from "undef".
+; CHECK: @VerifyDiagnosticConsumerTest
+; CHECK: bb:
+; CHECK: %0 = ptrtoint i8* undef to i64
+; CHECK-NOT: %0
+; CHECK: .lr.ph
+; CHECK-NOT: %0
+; CHECK: sub i64 %7, %tmp6
+; CHECK-NOT: %0
+; CHECK: ret void
+define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 {
+bb:
+  %tmp3 = call i8* @getCharData() nounwind
+  %tmp4 = call i8* @getCharData() nounwind
+  %tmp5 = ptrtoint i8* %tmp4 to i64
+  %tmp6 = ptrtoint i8* %tmp3 to i64
+  %tmp7 = sub i64 %tmp5, %tmp6
+  br i1 undef, label %bb87, label %.preheader
+
+.preheader:                                       ; preds = %bb10, %bb
+  br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit42.thread, label %bb10
+
+bb10:                                             ; preds = %.preheader
+  br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit42, label %.preheader
+
+_ZNK4llvm9StringRef4findEcm.exit42:               ; preds = %bb10
+  br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit42.thread, label %.lr.ph
+
+_ZNK4llvm9StringRef4findEcm.exit42.thread:        ; preds = %_ZNK4llvm9StringRef4findEcm.exit42, %.preheader
+  unreachable
+
+.lr.ph:                                           ; preds = %_ZNK4llvm9StringRef4findEcm.exit42
+  br label %bb36
+
+_ZNK4llvm9StringRef4findEcm.exit.loopexit:        ; preds = %bb63
+  %tmp21 = icmp eq i64 %i.0.i, -1
+  br i1 %tmp21, label %_ZNK4llvm9StringRef4findEcm.exit._crit_edge, label %bb36
+
+_ZNK4llvm9StringRef4findEcm.exit._crit_edge:      ; preds = %bb61, %_ZNK4llvm9StringRef4findEcm.exit.loopexit
+  unreachable
+
+bb36:                                             ; preds = %_ZNK4llvm9StringRef4findEcm.exit.loopexit, %.lr.ph
+  %loc.063 = phi i64 [ undef, %.lr.ph ], [ %i.0.i, %_ZNK4llvm9StringRef4findEcm.exit.loopexit ]
+  switch i8 undef, label %bb57 [
+    i8 10, label %bb48
+    i8 13, label %bb48
+  ]
+
+bb48:                                             ; preds = %bb36, %bb36
+  br label %bb58
+
+bb57:                                             ; preds = %bb36
+  br label %bb58
+
+bb58:                                             ; preds = %bb57, %bb48
+  %tmp59 = icmp ugt i64 %tmp7, undef
+  %tmp60 = select i1 %tmp59, i64 undef, i64 %tmp7
+  br label %bb61
+
+bb61:                                             ; preds = %bb63, %bb58
+  %i.0.i = phi i64 [ %tmp60, %bb58 ], [ %tmp67, %bb63 ]
+  %tmp62 = icmp eq i64 %i.0.i, %tmp7
+  br i1 %tmp62, label %_ZNK4llvm9StringRef4findEcm.exit._crit_edge, label %bb63
+
+bb63:                                             ; preds = %bb61
+  %tmp64 = getelementptr inbounds i8* %tmp3, i64 %i.0.i
+  %tmp65 = load i8* %tmp64, align 1
+  %tmp67 = add i64 %i.0.i, 1
+  br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit.loopexit, label %bb61
+
+bb87:                                             ; preds = %bb
+  ret void
+}
+
+declare i8* @getCharData()
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
index 9189d79e2fb6..ee3cc4dd78fc 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -205,18 +205,18 @@ for.end:                                          ; preds = %for.body
 ; post-increment addressing, no add's or add.w's beyond the three
 ; mentioned. Most importantly, there should be no spills or reloads!
 ;
-; CHECK: testNeon:
-; CHECK: %.lr.ph
-; CHECK-NOT: lsl.w
-; CHECK-NOT: {{ldr|str|adds|add r}}
-; CHECK: add.w r
-; CHECK-NOT: {{ldr|str|adds|add r}}
-; CHECK: add.w r
-; CHECK-NOT: {{ldr|str|adds|add r}}
-; CHECK: add.w r
-; CHECK-NOT: {{ldr|str|adds|add r}}
-; CHECK-NOT: add.w r
-; CHECK: bne
+; A9: testNeon:
+; A9: %.lr.ph
+; A9-NOT: lsl.w
+; A9-NOT: {{ldr|str|adds|add r}}
+; A9: add.w r
+; A9-NOT: {{ldr|str|adds|add r}}
+; A9: add.w r
+; A9-NOT: {{ldr|str|adds|add r}}
+; A9: add.w r
+; A9-NOT: {{ldr|str|adds|add r}}
+; A9-NOT: add.w r
+; A9: bne
 define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i8>* nocapture %data) nounwind optsize {
   %1 = icmp sgt i32 %limit, 0
   br i1 %1, label %.lr.ph, label %45
@@ -290,3 +290,80 @@ define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i
 }
 
 declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
+
+; Handle chains in which the same offset is used for both loads and
+; stores to the same array.
+; rdar://11410078.
+;
+; A9: @testReuse
+; A9: %for.body
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE:[r[0-9]+]]], [[INC:r[0-9]]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], {{r[0-9]}}
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]]
+; A9: bne
+define void @testReuse(i8* %src, i32 %stride) nounwind ssp {
+entry:
+  %mul = shl nsw i32 %stride, 2
+  %idx.neg = sub i32 0, %mul
+  %mul1 = mul nsw i32 %stride, 3
+  %idx.neg2 = sub i32 0, %mul1
+  %mul5 = shl nsw i32 %stride, 1
+  %idx.neg6 = sub i32 0, %mul5
+  %idx.neg10 = sub i32 0, %stride
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.0110 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %src.addr = phi i8* [ %src, %entry ], [ %add.ptr45, %for.body ]
+  %add.ptr = getelementptr inbounds i8* %src.addr, i32 %idx.neg
+  %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr, i32 1)
+  %add.ptr3 = getelementptr inbounds i8* %src.addr, i32 %idx.neg2
+  %vld2 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr3, i32 1)
+  %add.ptr7 = getelementptr inbounds i8* %src.addr, i32 %idx.neg6
+  %vld3 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr7, i32 1)
+  %add.ptr11 = getelementptr inbounds i8* %src.addr, i32 %idx.neg10
+  %vld4 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr11, i32 1)
+  %vld5 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %src.addr, i32 1)
+  %add.ptr17 = getelementptr inbounds i8* %src.addr, i32 %stride
+  %vld6 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr17, i32 1)
+  %add.ptr20 = getelementptr inbounds i8* %src.addr, i32 %mul5
+  %vld7 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr20, i32 1)
+  %add.ptr23 = getelementptr inbounds i8* %src.addr, i32 %mul1
+  %vld8 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr23, i32 1)
+  %vadd1 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld1, <8 x i8> %vld2) nounwind
+  %vadd2 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld2, <8 x i8> %vld3) nounwind
+  %vadd3 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld3, <8 x i8> %vld4) nounwind
+  %vadd4 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld4, <8 x i8> %vld5) nounwind
+  %vadd5 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld5, <8 x i8> %vld6) nounwind
+  %vadd6 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld6, <8 x i8> %vld7) nounwind
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr3, <8 x i8> %vadd1, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr7, <8 x i8> %vadd2, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr11, <8 x i8> %vadd3, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %src.addr, <8 x i8> %vadd4, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr17, <8 x i8> %vadd5, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr20, <8 x i8> %vadd6, i32 1)
+  %inc = add nsw i32 %i.0110, 1
+  %add.ptr45 = getelementptr inbounds i8* %src.addr, i32 8
+  %exitcond = icmp eq i32 %inc, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+
+declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
diff --git a/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll b/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll
index c650d8cf76d8..9a7f4865c591 100644
--- a/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-reduce -S | grep "phi double" | count 1
+; RUN: opt < %s -loop-reduce -S -mtriple=x86_64-unknown-unknown | grep "phi double" | count 1
 
 define void @foobar(i32 %n) nounwind {
 entry:
diff --git a/test/Transforms/LoopStrengthReduce/2011-07-20-DoubleIV.ll b/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll
index 5d9ed64ef422..a932b4792586 100644
--- a/test/Transforms/LoopStrengthReduce/2011-07-20-DoubleIV.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; RUN: opt < %s -loop-reduce -S -mtriple=x86_64-unknown-unknown | FileCheck %s
 ;
 ; Test LSR's OptimizeShadowIV. Handle a floating-point IV with a
 ; nonzero initial value.
diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
index 510865096272..eedfc200f48b 100644
--- a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
 ;
 ; Test LSR's ability to prune formulae that refer to nonexistant
 ; AddRecs in other loops.
@@ -15,13 +15,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin"
 
 ; CHECK: @test
-; CHECK: # %for.body{{$}}
-; dummyiv copy should be removed
-; CHECK-NOT: movq
-; CHECK: # %for.cond19.preheader
-; dummycnt should be removed
-; CHECK-NOT: incq
-; CHECK: # %for.body23{{$}}
+; CHECK: for.body:
+; CHECK: %lsr.iv
+; CHECK-NOT: %dummyout
+; CHECK: ret
 define i64 @test(i64 %count, float* nocapture %srcrow, i32* nocapture %destrow) nounwind uwtable ssp {
 entry:
   %cmp34 = icmp eq i64 %count, 0
diff --git a/test/Transforms/LoopStrengthReduce/dominate-assert.ll b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
index b87bf620decf..ff8cab83137b 100644
--- a/test/Transforms/LoopStrengthReduce/dominate-assert.ll
+++ b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-reduce %s
+; RUN: opt -loop-reduce < %s
 ; we used to crash on this one
 
 declare i8* @_Znwm()
diff --git a/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll b/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
index ad4959be340e..498be1a9a1a2 100644
--- a/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
+++ b/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
@@ -2,7 +2,7 @@
 ; having overlapping live ranges that result in copies.  We want the setcc 
 ; instruction immediately before the conditional branch.
 ;
-; RUN: opt -S -loop-reduce %s | FileCheck %s
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
 
 define void @foo(float* %D, i32 %E) {
 entry:
diff --git a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index 96904c66e640..45aeb4e691a0 100644
--- a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -4,18 +4,17 @@
 ; LSR should properly handle the post-inc offset when folding the
 ; non-IV operand of an icmp into the IV.
 
-; CHECK:   %4 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
-; CHECK:   %5 = lshr i64 %4, 1
-; CHECK:   %6 = mul i64 %5, 2
+; CHECK:   [[r1:%[a-z0-9]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+; CHECK:   [[r2:%[a-z0-9]+]] = lshr i64 [[r1]], 1
+; CHECK:   [[r3:%[a-z0-9]+]] = mul i64 [[r2]], 2
 ; CHECK:   br label %for.body
 ; CHECK: for.body:
-; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ %6, %for.body.lr.ph ]
+; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ [[r3]], %for.body.lr.ph ]
 ; CHECK:   %lsr.iv.next = add i64 %lsr.iv2, -2
 ; CHECK:   %lsr.iv.next3 = inttoptr i64 %lsr.iv.next to i16*
 ; CHECK:   %cmp27 = icmp eq i16* %lsr.iv.next3, null
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
 
 %struct.Vector2 = type { i16*, [64 x i16], i32 }
 
diff --git a/test/Transforms/LoopUnroll/basic.ll b/test/Transforms/LoopUnroll/basic.ll
index eeb3e9a57b06..ab5bc568ede4 100644
--- a/test/Transforms/LoopUnroll/basic.ll
+++ b/test/Transforms/LoopUnroll/basic.ll
@@ -22,3 +22,26 @@ l1:                                               ; preds = %l1, %entry
 l2:                                               ; preds = %l1
   ret i32 0
 }
+
+; This should not unroll since the call is 'noduplicate'.
+
+; CHECK: @test2
+define i32 @test2(i8** %P) nounwind ssp {
+entry:
+  br label %l1
+
+l1:                                               ; preds = %l1, %entry
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l1 ]
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() noduplicate
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, 3
+  br i1 %exitcond, label %l2, label %l1
+
+l2:                                               ; preds = %l1
+  ret i32 0
+; CHECK: }
+}
+
+declare void @f()
diff --git a/test/Transforms/LoopUnroll/runtime-loop3.ll b/test/Transforms/LoopUnroll/runtime-loop3.ll
index 55cf22373ece..aa928ccc60c1 100644
--- a/test/Transforms/LoopUnroll/runtime-loop3.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop3.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS
 
 ; Test that nested loops can be unrolled.  We need to increase threshold to do it
diff --git a/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll b/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
index 9d73d31d5044..31dba79be1f8 100644
--- a/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
+++ b/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -loop-unswitch -stats -disable-output 2>&1 | grep "1 loop-unswitch - Number of branches unswitched" | count 1
 ; PR 3170
 define i32 @a(i32 %x, i32 %y) nounwind {
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
index c1fd58810660..a8608b877205 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
@@ -1,5 +1,6 @@
+; REQUIRES: asserts
 ; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
-; RUN: opt -S -loop-unswitch -verify-loop-info -verify-dom-info %s | FileCheck %s
+; RUN: opt -S -loop-unswitch -verify-loop-info -verify-dom-info < %s | FileCheck %s
 
 ; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
 ; STATS: 2 loop-unswitch - Number of switches unswitched
@@ -19,7 +20,7 @@
 ; CHECK-NEXT:     i32 1, label %inc.us
 
 ; CHECK:      inc.us:                                           ; preds = %loop_begin.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us
 
 ; CHECK:      .split:                                           ; preds = %..split_crit_edge
@@ -40,7 +41,7 @@
 ; CHECK-NEXT:   ]
 
 ; CHECK:      dec.us3:                                          ; preds = %loop_begin.us1
-; CHECK-NEXT:   call void @decf() noreturn nounwind
+; CHECK-NEXT:   call void @decf() [[NOR_NUW]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us5
 
 ; CHECK:      .split.split:                                     ; preds = %.split..split.split_crit_edge
@@ -89,3 +90,6 @@ loop_exit:
 
 declare void @incf() noreturn
 declare void @decf() noreturn
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
index f3db47119958..686cedbbc51a 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
@@ -1,5 +1,6 @@
+; REQUIRES: asserts
 ; RUN: opt -loop-unswitch -loop-unswitch-threshold 13 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
-; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 13 -verify-loop-info -verify-dom-info %s | FileCheck %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 13 -verify-loop-info -verify-dom-info < %s | FileCheck %s
 
 ; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
 ; STATS: 1 loop-unswitch - Number of switches unswitched
@@ -25,7 +26,7 @@
 ; CHECK-NEXT:   ]
 
 ; CHECK:      inc.us:                                           ; preds = %second_switch.us, %loop_begin.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us
 
 ; CHECK:      .split:                                           ; preds = %..split_crit_edge
@@ -45,7 +46,7 @@
 ; CHECK-NEXT:   ]
 
 ; CHECK:      inc:                                              ; preds = %loop_begin.inc_crit_edge, %second_switch
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW]]
 ; CHECK-NEXT:   br label %loop_begin.backedge
 
 define i32 @test(i32* %var) {
@@ -82,3 +83,6 @@ loop_exit:
 
 declare void @incf() noreturn
 declare void @decf() noreturn
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
index 270899642ffa..3ba9fc2f5cf1 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
@@ -1,5 +1,6 @@
+; REQUIRES: asserts
 ; RUN: opt -loop-unswitch -loop-unswitch-threshold 1000 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
-; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 1000 -verify-loop-info -verify-dom-info %s | FileCheck %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 1000 -verify-loop-info -verify-dom-info < %s | FileCheck %s
 
 ; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
 ; STATS: 3 loop-unswitch - Number of switches unswitched
@@ -30,7 +31,7 @@
 ; CHECK-NEXT:     i32 1, label %inc.us.us
 
 ; CHECK:      inc.us.us:                                        ; preds = %second_switch.us.us, %loop_begin.us.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us.us
 
 ; CHECK:      .split.us.split:                                  ; preds = %.split.us..split.us.split_crit_edge
@@ -50,7 +51,7 @@
 ; CHECK-NEXT:   br i1 true, label %us-unreachable8, label %inc.us
 
 ; CHECK:      inc.us:                                           ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us
 
 ; CHECK:      .split:                                           ; preds = %..split_crit_edge
@@ -75,7 +76,7 @@
 ; CHECK-NEXT:   ]
 
 ; CHECK:      inc.us4:                                          ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us3
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us6
 
 ; CHECK:      loop_begin.inc_crit_edge.us:                      ; preds = %loop_begin.us1
@@ -136,3 +137,6 @@ loop_exit:
 
 declare void @incf() noreturn
 declare void @decf() noreturn
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/LoopUnswitch/basictest.ll b/test/Transforms/LoopUnswitch/basictest.ll
index 1e6f2cf15ee1..e98d82b6522d 100644
--- a/test/Transforms/LoopUnswitch/basictest.ll
+++ b/test/Transforms/LoopUnswitch/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -verify-loop-info -S < %s 2>&1 | FileCheck %s
 
 define i32 @test(i32* %A, i1 %C) {
 entry:
@@ -29,3 +29,40 @@ return:		; preds = %endif, %then
 	ret i32 %tmp.13
 }
 
+; This simple test would normally unswitch, but should be inhibited by the presence of
+; the noduplicate call.
+
+; CHECK: @test2
+define i32 @test2(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32* %var
+
+  switch i32 %c, label %default [
+      i32 1, label %inc
+      i32 2, label %dec
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+dec:
+; CHECK: call void @decf()
+; CHECK-NOT: call void @decf()
+  call void @decf() noreturn nounwind noduplicate
+  br label %loop_begin
+default:
+  br label %loop_exit
+loop_exit:
+  ret i32 0
+; CHECK: }
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
diff --git a/test/Transforms/LoopUnswitch/infinite-loop.ll b/test/Transforms/LoopUnswitch/infinite-loop.ll
index 73391ca8d19d..8261e389370a 100644
--- a/test/Transforms/LoopUnswitch/infinite-loop.ll
+++ b/test/Transforms/LoopUnswitch/infinite-loop.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
 ; RUN: opt -loop-unswitch -simplifycfg -S < %s | FileCheck %s
 ; PR5373
@@ -21,11 +22,11 @@
 ; CHECK-NEXT: br label %cond.end.us
 
 ; CHECK: abort0.split:
-; CHECK-NEXT: call void @end0() noreturn nounwind
+; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT: unreachable
 
 ; CHECK: abort1:
-; CHECK-NEXT: call void @end1() noreturn nounwind
+; CHECK-NEXT: call void @end1() [[NOR_NUW]]
 ; CHECK-NEXT: unreachable
 
 ; CHECK: }
@@ -51,3 +52,7 @@ abort1:
 
 declare void @end0() noreturn
 declare void @end1() noreturn
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes #1 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/LoopUnswitch/preserve-analyses.ll b/test/Transforms/LoopUnswitch/preserve-analyses.ll
index 668f8ecaf8a5..f79612bef51e 100644
--- a/test/Transforms/LoopUnswitch/preserve-analyses.ll
+++ b/test/Transforms/LoopUnswitch/preserve-analyses.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-unswitch -verify-loop-info -verify-dom-info %s -disable-output
+; RUN: opt -loop-unswitch -verify-loop-info -verify-dom-info -disable-output < %s
 
 ; Loop unswitch should be able to unswitch these loops and
 ; preserve LCSSA and LoopSimplify forms.
diff --git a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
new file mode 100644
index 000000000000..2dd7fe34a70b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @foo
+;CHECK: icmp eq <4 x i32>
+;CHECK: select <4 x i1>
+;CHECK: ret i32
+define i32 @foo(i32 %x, i32 %t, i32* nocapture %A) nounwind uwtable ssp {
+entry:
+  %cmp10 = icmp sgt i32 %x, 0
+  br i1 %cmp10, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %if.end
+  %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %1 = add nsw i64 %indvars.iv, 45
+  %2 = trunc i64 %indvars.iv to i32
+  %mul = mul nsw i32 %2, %t
+  %3 = trunc i64 %1 to i32
+  %add1 = add nsw i32 %3, %mul
+  br label %if.end
+
+if.end:                                           ; preds = %for.body, %if.then
+  %z.0 = phi i32 [ %add1, %if.then ], [ 9, %for.body ]
+  store i32 %z.0, i32* %arrayidx, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %x
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %if.end, %entry
+  ret i32 undef
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
index 0176c9a18966..aa7cc0ee325d 100644
--- a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
+++ b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce
 
 ; Check that we don't fall into an infinite loop.
 define void @test() nounwind {
@@ -25,3 +25,47 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
  unreachable
 }
+
+;PR14701
+define void @start_model_rare() nounwind uwtable ssp {
+entry:
+  br i1 undef, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %cond.false, label %cond.true
+
+cond.true:                                        ; preds = %if.end
+  unreachable
+
+cond.false:                                       ; preds = %if.end
+  br i1 undef, label %cond.false28, label %cond.true20
+
+cond.true20:                                      ; preds = %cond.false
+  unreachable
+
+cond.false28:                                     ; preds = %cond.false
+  br label %for.body40
+
+for.body40:                                       ; preds = %for.inc50, %cond.false28
+  %indvars.iv123 = phi i64 [ 3, %cond.false28 ], [ %indvars.iv.next124, %for.inc50 ]
+  %step.0121 = phi i32 [ 1, %cond.false28 ], [ %step.1, %for.inc50 ]
+  br i1 undef, label %if.then46, label %for.inc50
+
+if.then46:                                        ; preds = %for.body40
+  %inc47 = add nsw i32 %step.0121, 1
+  br label %for.inc50
+
+for.inc50:                                        ; preds = %if.then46, %for.body40
+  %k.1 = phi i32 [ undef, %for.body40 ], [ %inc47, %if.then46 ]
+  %step.1 = phi i32 [ %step.0121, %for.body40 ], [ %inc47, %if.then46 ]
+  %indvars.iv.next124 = add i64 %indvars.iv123, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next124 to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %for.end52, label %for.body40
+
+for.end52:                                        ; preds = %for.inc50
+  unreachable
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
index 2516e248bc96..405582c40899 100644
--- a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
+++ b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -dce -force-vector-width=4 
+; RUN: opt < %s  -loop-vectorize -dce -force-vector-unroll=1 -force-vector-width=4 
 
 ; Check that we don't crash.
 
diff --git a/test/Transforms/LoopVectorize/ARM/arm-unroll.ll b/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
new file mode 100644
index 000000000000..c8d307f5d443
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift -S | FileCheck %s --check-prefix=SWIFT
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+;CHECK: @foo
+;CHECK: load <4 x i32>
+;CHECK-NOT: load <4 x i32>
+;CHECK: ret
+;SWIFT: @foo
+;SWIFT: load <4 x i32>
+;SWIFT: load <4 x i32>
+;SWIFT: ret
+define i32 @foo(i32* nocapture %A, i32 %n) nounwind readonly ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %i.02 = phi i32 [ %5, %.lr.ph ], [ 0, %0 ]
+  %sum.01 = phi i32 [ %4, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i32* %A, i32 %i.02
+  %3 = load i32* %2, align 4
+  %4 = add nsw i32 %3, %sum.01
+  %5 = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %5, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %sum.0.lcssa = phi i32 [ 0, %0 ], [ %4, %.lr.ph ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/ARM/gcc-examples.ll b/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
new file mode 100644
index 000000000000..6a68e81bcae0
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift -S -dce | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+; Select VF = 8;
+;CHECK: @example1
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example1() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %5 = load i32* %4, align 4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %6, i32* %7, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
+;CHECK: @example10b
+;CHECK: load <4 x i16>
+;CHECK: sext <4 x i16>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds i16* %sb, i64 %indvars.iv
+  %3 = load i16* %2, align 2
+  %4 = sext i16 %3 to i32
+  %5 = getelementptr inbounds i32* %ia, i64 %indvars.iv
+  store i32 %4, i32* %5, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/ARM/lit.local.cfg b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
new file mode 100644
index 000000000000..cb77b09ef4ad
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll b/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll
new file mode 100644
index 000000000000..d2e3de279f7c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
+; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s
+; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM
+
+; ModuleID = 'arm.ll'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+%T216 = type <2 x i16>
+%T232 = type <2 x i32>
+%T264 = type <2 x i64>
+
+%T416 = type <4 x i16>
+%T432 = type <4 x i32>
+%T464 = type <4 x i64>
+
+define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'direct':
+  %v0 = load %T432* %loadaddr
+; ASM: vld1.64
+  %v1 = load %T432* %loadaddr2
+; ASM: vld1.64
+  %r3 = mul %T432 %v0, %v1 
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmul.i32
+  store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'ups1632':
+  %v0 = load %T416* %loadaddr
+; ASM: vldr
+  %v1 = load %T416* %loadaddr2
+; ASM: vldr
+  %r1 = sext %T416 %v0 to %T432
+  %r2 = sext %T416 %v1 to %T432
+; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32>
+  %r3 = mul %T432 %r1, %r2 
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmull.s16
+  store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'upu1632':
+  %v0 = load %T416* %loadaddr
+; ASM: vldr
+  %v1 = load %T416* %loadaddr2
+; ASM: vldr
+  %r1 = zext %T416 %v0 to %T432
+  %r2 = zext %T416 %v1 to %T432
+; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32>
+  %r3 = mul %T432 %r1, %r2 
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmull.u16
+  store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+; COST: function 'ups3264':
+  %v0 = load %T232* %loadaddr
+; ASM: vldr
+  %v1 = load %T232* %loadaddr2
+; ASM: vldr
+  %r3 = mul %T232 %v0, %v1 
+; ASM: vmul.i32
+; COST: cost of 1 for instruction: {{.*}} mul <2 x i32>
+  %st = sext %T232 %r3 to %T264
+; ASM: vmovl.s32
+; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64>
+  store %T264 %st, %T264* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+; COST: function 'upu3264':
+  %v0 = load %T232* %loadaddr
+; ASM: vldr
+  %v1 = load %T232* %loadaddr2
+; ASM: vldr
+  %r3 = mul %T232 %v0, %v1 
+; ASM: vmul.i32
+; COST: cost of 1 for instruction: {{.*}} mul <2 x i32>
+  %st = zext %T232 %r3 to %T264
+; ASM: vmovl.u32
+; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64>
+  store %T264 %st, %T264* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
+; COST: function 'dn3216':
+  %v0 = load %T432* %loadaddr
+; ASM: vld1.64
+  %v1 = load %T432* %loadaddr2
+; ASM: vld1.64
+  %r3 = mul %T432 %v0, %v1 
+; ASM: vmul.i32
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+  %st = trunc %T432 %r3 to %T416
+; ASM: vmovn.i32
+; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16>
+  store %T416 %st, %T416* %storeaddr
+; ASM: vstr
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/ARM/width-detect.ll b/test/Transforms/LoopVectorize/ARM/width-detect.ll
new file mode 100644
index 000000000000..c0795b6a79af
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/width-detect.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+;CHECK:foo_F64
+;CHECK: <2 x double>
+;CHECK:ret
+define double @foo_F64(double* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %prod.01 = phi double [ %4, %.lr.ph ], [ 0.000000e+00, %0 ]
+  %2 = getelementptr inbounds double* %A, i64 %indvars.iv
+  %3 = load double* %2, align 8
+  %4 = fmul fast double %prod.01, %3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %prod.0.lcssa = phi double [ 0.000000e+00, %0 ], [ %4, %.lr.ph ]
+  ret double %prod.0.lcssa
+}
+
+;CHECK:foo_I8
+;CHECK: xor <16 x i8>
+;CHECK:ret
+define signext i8 @foo_I8(i8* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %red.01 = phi i8 [ %4, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i8* %A, i64 %indvars.iv
+  %3 = load i8* %2, align 1
+  %4 = xor i8 %3, %red.01
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %red.0.lcssa = phi i8 [ 0, %0 ], [ %4, %.lr.ph ]
+  ret i8 %red.0.lcssa
+}
+
+
diff --git a/test/Transforms/LoopVectorize/X86/avx1.ll b/test/Transforms/LoopVectorize/X86/avx1.ll
index a2d176a534c9..6c0366eae973 100644
--- a/test/Transforms/LoopVectorize/X86/avx1.ll
+++ b/test/Transforms/LoopVectorize/X86/avx1.ll
@@ -27,7 +27,7 @@ define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwta
 
 
 ;CHECK: @read_mod_i64
-;CHECK: load <8 x i64>
+;CHECK: load <2 x i64>
 ;CHECK: ret i32
 define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
@@ -37,7 +37,7 @@ define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp {
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds i64* %a, i64 %indvars.iv
   %3 = load i64* %2, align 4
-  %4 = mul i64 %3, 3
+  %4 = add i64 %3, 3
   store i64 %4, i64* %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
new file mode 100644
index 000000000000..6c924409af37
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
@@ -0,0 +1,28 @@
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -loop-vectorize -dce -instcombine -S < %s | FileCheck %s
+
+@B = common global [1024 x i32] zeroinitializer, align 16
+@A = common global [1024 x i32] zeroinitializer, align 16
+
+; We use to not vectorize this loop because the shift was deemed to expensive.
+; Now that we differentiate shift cost base on the operand value kind, we will
+; vectorize this loop.
+; CHECK: ashr <4 x i32>
+define void @f() {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @B, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %shl = ashr i32 %0, 3
+  %arrayidx2 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  store i32 %shl, i32* %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
index 8f1bb545fa01..760d28deaf27 100644
--- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll
+++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ;CHECK: @conversion_cost1
-;CHECK: store <2 x i8>
+;CHECK: store <32 x i8>
 ;CHECK: ret
 define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 3
@@ -33,11 +33,10 @@ define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) noun
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
-  %2 = add nsw i64 %indvars.iv, 3
-  %3 = trunc i64 %2 to i32
-  %4 = sitofp i32 %3 to float
-  %5 = getelementptr inbounds float* %B, i64 %indvars.iv
-  store float %4, float* %5, align 4
+  %add = add nsw i64 %indvars.iv, 3
+  %tofp = sitofp i64 %add to float
+  %gep = getelementptr inbounds float* %B, i64 %indvars.iv
+  store float %tofp, float* %gep, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/test/Transforms/LoopVectorize/X86/cost-model.ll b/test/Transforms/LoopVectorize/X86/cost-model.ll
index 628f9912c8c9..b7f479acf962 100644
--- a/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -8,8 +8,11 @@ target triple = "x86_64-apple-macosx10.8.0"
 @d = common global [2048 x i32] zeroinitializer, align 16
 @a = common global [2048 x i32] zeroinitializer, align 16
 
+; The program below gathers and scatters data. We better not vectorize it.
 ;CHECK: cost_model_1
-;CHECK: <4 x i32>
+;CHECK-NOT: <2 x i32>
+;CHECK-NOT: <4 x i32>
+;CHECK-NOT: <8 x i32>
 ;CHECK: ret void
 define void @cost_model_1() nounwind uwtable noinline ssp {
 entry:
diff --git a/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
index 574c529834ac..d2d0eac305f5 100644
--- a/test/Transforms/LoopVectorize/X86/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-unroll=0 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -9,10 +10,19 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 ; Select VF = 8;
 ;CHECK: @example1
-;CHECK: load <8 x i32>
-;CHECK: add nsw <8 x i32>
-;CHECK: store <8 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
 ;CHECK: ret void
+
+;UNROLL: @example1
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
 define void @example1() nounwind uwtable ssp {
   br label %1
 
@@ -34,13 +44,18 @@ define void @example1() nounwind uwtable ssp {
   ret void
 }
 
-
-; Select VF=4 because sext <8 x i1> to <8 x i32> is expensive. 
+; Select VF=4 because sext <8 x i1> to <8 x i32> is expensive.
 ;CHECK: @example10b
 ;CHECK: load <4 x i16>
 ;CHECK: sext <4 x i16>
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example10b
+;UNROLL: load <4 x i16>
+;UNROLL: load <4 x i16>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
 define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
   br label %1
 
diff --git a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
new file mode 100644
index 000000000000..186fba87d653
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -vectorizer-min-trip-count=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: <4 x float>
+define void @trivial_loop(float* nocapture %a) nounwind uwtable optsize {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %add = fadd float %0, 1.000000e+00
+  store float %add, float* %arrayidx, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 8
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/X86/no-vector.ll b/test/Transforms/LoopVectorize/X86/no-vector.ll
new file mode 100644
index 000000000000..692eec989591
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/no-vector.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -mtriple=i386-unknown-freebsd -mcpu=i486 -loop-vectorize < %s
+
+define i32 @PR14639(i8* nocapture %s, i32 %len) nounwind {
+entry:
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.05 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8* %s, i32 %i.06
+  %0 = load i8* %arrayidx, align 1
+  %conv = sext i8 %0 to i32
+  %xor = xor i32 %conv, %r.05
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, %len
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %r.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ]
+  ret i32 %r.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
new file mode 100644
index 000000000000..452d0df133db
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; The parallel loop has been invalidated by the new memory accesses introduced
+; by reg2mem (Loop::isParallel() starts to return false). Ensure the loop is
+; now non-vectorizable.
+
+;CHECK-NOT: <4 x i32>
+define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  %indvars.iv.next.reg2mem = alloca i64
+  %indvars.iv.reg2mem = alloca i64
+  %"reg2mem alloca point" = bitcast i32 0 to i32
+  store i64 0, i64* %indvars.iv.reg2mem
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.for.body_crit_edge, %entry
+  %indvars.iv.reload = load i64* %indvars.iv.reg2mem
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv.reload
+  %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv.reload
+  %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %idxprom3 = sext i32 %1 to i64
+  %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %indvars.iv.next = add i64 %indvars.iv.reload, 1
+  ; A new store without the parallel metadata here:
+  store i64 %indvars.iv.next, i64* %indvars.iv.next.reg2mem
+  %indvars.iv.next.reload1 = load i64* %indvars.iv.next.reg2mem
+  %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next.reload1
+  %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %indvars.iv.next.reload = load i64* %indvars.iv.next.reg2mem
+  %lftr.wideiv = trunc i64 %indvars.iv.next.reload to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 512
+  br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge, !llvm.loop.parallel !3
+
+for.body.for.body_crit_edge:                      ; preds = %for.body
+  %indvars.iv.next.reload2 = load i64* %indvars.iv.next.reg2mem
+  store i64 %indvars.iv.next.reload2, i64* %indvars.iv.reg2mem
+  br label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !3}
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
new file mode 100644
index 000000000000..f648722734a1
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; A tricky loop:
+;
+; void loop(int *a, int *b) {
+;    for (int i = 0; i < 512; ++i) {
+;        a[a[i]] = b[i];
+;        a[i] = b[i+1];
+;    }
+;}
+
+;CHECK: @loop
+;CHECK-NOT: <4 x i32>
+define void @loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4, !tbaa !0
+  %idxprom3 = sext i32 %1 to i64
+  %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
+  %2 = load i32* %arrayidx6, align 4, !tbaa !0
+  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 512
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; The same loop with parallel loop metadata added to the loop branch
+; and the memory instructions.
+
+;CHECK: @parallel_loop
+;CHECK: <4 x i32>
+define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %idxprom3 = sext i32 %1 to i64
+  %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+  ; This store might have originated from inlining a function with a parallel
+  ; loop. Refers to a list with the "original loop reference" (!4) also included.
+  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !5
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
+  %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 512
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !3
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; The same loop with an illegal parallel loop metadata: the memory
+; accesses refer to a different loop's identifier.
+
+;CHECK: @mixed_metadata
+;CHECK-NOT: <4 x i32>
+
+define void @mixed_metadata(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  %idxprom3 = sext i32 %1 to i64
+  %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+  ; This refers to the loop marked with !7 which we are not in at the moment.
+  ; It should prevent detecting as a parallel loop.
+  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !7
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
+  %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 512
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !6
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !3}
+!4 = metadata !{metadata !4}
+!5 = metadata !{metadata !3, metadata !4}
+!6 = metadata !{metadata !6}
+!7 = metadata !{metadata !7}
diff --git a/test/Transforms/LoopVectorize/X86/reduction-crash.ll b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
new file mode 100644
index 000000000000..f580846a0228
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -loop-vectorize -mcpu=prescott < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-apple-darwin"
+
+; PR15344
+define void @test1(float* nocapture %arg, i32 %arg1) nounwind {
+; CHECK: @test1
+; CHECK: preheader
+; CHECK: insertelement <2 x double> zeroinitializer, double %tmp, i32 0
+; CHECK: vector.memcheck
+
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb
+  %tmp = load double* null, align 8
+  br i1 undef, label %bb3, label %bb12
+
+bb3:                                              ; preds = %bb3, %bb2
+  %tmp4 = phi double [ %tmp9, %bb3 ], [ %tmp, %bb2 ]
+  %tmp5 = phi i32 [ %tmp8, %bb3 ], [ 0, %bb2 ]
+  %tmp6 = getelementptr inbounds [16 x double]* undef, i32 0, i32 %tmp5
+  %tmp7 = load double* %tmp6, align 4
+  %tmp8 = add nsw i32 %tmp5, 1
+  %tmp9 = fadd fast double %tmp4, undef
+  %tmp10 = getelementptr inbounds float* %arg, i32 %tmp5
+  store float undef, float* %tmp10, align 4
+  %tmp11 = icmp eq i32 %tmp8, %arg1
+  br i1 %tmp11, label %bb12, label %bb3
+
+bb12:                                             ; preds = %bb3, %bb2
+  %tmp13 = phi double [ %tmp, %bb2 ], [ %tmp9, %bb3 ]
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll
new file mode 100644
index 000000000000..f390b33c0388
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -0,0 +1,170 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+@G = common global [32 x [1024 x i32]] zeroinitializer, align 16
+@ub = common global [1024 x i32] zeroinitializer, align 16
+@uc = common global [1024 x i32] zeroinitializer, align 16
+@d = common global [2048 x i32] zeroinitializer, align 16
+@fa = common global [1024 x float] zeroinitializer, align 16
+@fb = common global [1024 x float] zeroinitializer, align 16
+@ic = common global [1024 x i32] zeroinitializer, align 16
+@da = common global [1024 x float] zeroinitializer, align 16
+@db = common global [1024 x float] zeroinitializer, align 16
+@dc = common global [1024 x float] zeroinitializer, align 16
+@dd = common global [1024 x float] zeroinitializer, align 16
+@dj = common global [1024 x i32] zeroinitializer, align 16
+
+; We can optimize this test without a tail.
+;CHECK: @example1
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example1() optsize {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %5 = load i32* %4, align 4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %6, i32* %7, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
+; Can't vectorize in 'optsize' mode because we need a tail.
+;CHECK: @example2
+;CHECK-NOT: store <4 x i32>
+;CHECK: ret void
+define void @example2(i32 %n, i32 %x) optsize {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph5, label %.preheader
+
+..preheader_crit_edge:                            ; preds = %.lr.ph5
+  %phitmp = sext i32 %n to i64
+  br label %.preheader
+
+.preheader:                                       ; preds = %..preheader_crit_edge, %0
+  %i.0.lcssa = phi i64 [ %phitmp, %..preheader_crit_edge ], [ 0, %0 ]
+  %2 = icmp eq i32 %n, 0
+  br i1 %2, label %._crit_edge, label %.lr.ph
+
+.lr.ph5:                                          ; preds = %0, %.lr.ph5
+  %indvars.iv6 = phi i64 [ %indvars.iv.next7, %.lr.ph5 ], [ 0, %0 ]
+  %3 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv6
+  store i32 %x, i32* %3, align 4
+  %indvars.iv.next7 = add i64 %indvars.iv6, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next7 to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %..preheader_crit_edge, label %.lr.ph5
+
+.lr.ph:                                           ; preds = %.preheader, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ %i.0.lcssa, %.preheader ]
+  %.02 = phi i32 [ %4, %.lr.ph ], [ %n, %.preheader ]
+  %4 = add nsw i32 %.02, -1
+  %5 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %6 = load i32* %5, align 4
+  %7 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %8 = load i32* %7, align 4
+  %9 = and i32 %8, %6
+  %10 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %9, i32* %10, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %11 = icmp eq i32 %4, 0
+  br i1 %11, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %.preheader
+  ret void
+}
+
+; N is unknown, we need a tail. Can't vectorize.
+;CHECK: @example3
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) optsize {
+  %1 = icmp eq i32 %n, 0
+  br i1 %1, label %._crit_edge, label %.lr.ph
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %.05 = phi i32 [ %2, %.lr.ph ], [ %n, %0 ]
+  %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ]
+  %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
+  %2 = add nsw i32 %.05, -1
+  %3 = getelementptr inbounds i32* %.023, i64 1
+  %4 = load i32* %.023, align 16
+  %5 = getelementptr inbounds i32* %.014, i64 1
+  store i32 %4, i32* %.014, align 16
+  %6 = icmp eq i32 %2, 0
+  br i1 %6, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
+
+
+; We can't vectorize this one because we need a runtime ptr check.
+;CHECK: @example23
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example23(i16* nocapture %src, i32* nocapture %dst) optsize {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %.04 = phi i16* [ %src, %0 ], [ %2, %1 ]
+  %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ]
+  %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ]
+  %2 = getelementptr inbounds i16* %.04, i64 1
+  %3 = load i16* %.04, align 2
+  %4 = zext i16 %3 to i32
+  %5 = shl nuw nsw i32 %4, 7
+  %6 = getelementptr inbounds i32* %.013, i64 1
+  store i32 %5, i32* %.013, align 4
+  %7 = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %7, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
+
+; We CAN vectorize this example because the pointers are marked as noalias.
+;CHECK: @example23b
+;CHECK: <4 x i32>
+;CHECK: ret void
+define void @example23b(i16* noalias nocapture %src, i32* noalias nocapture %dst) optsize {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %.04 = phi i16* [ %src, %0 ], [ %2, %1 ]
+  %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ]
+  %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ]
+  %2 = getelementptr inbounds i16* %.04, i64 1
+  %3 = load i16* %.04, align 2
+  %4 = zext i16 %3 to i32
+  %5 = shl nuw nsw i32 %4, 7
+  %6 = getelementptr inbounds i32* %.013, i64 1
+  store i32 %5, i32* %.013, align 4
+  %7 = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %7, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
+
diff --git a/test/Transforms/LoopVectorize/X86/struct-store.ll b/test/Transforms/LoopVectorize/X86/struct-store.ll
new file mode 100644
index 000000000000..a995e43a5ab1
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/struct-store.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux-gnu -S
+
+; Make sure we are not crashing on this one.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@glbl = external global [16 x { i64, i64 }], align 16
+
+declare void @fn()
+
+define void @test() {
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 0, %entry ]
+  %tmp = getelementptr inbounds [16 x { i64, i64 }]* @glbl, i64 0, i64 %indvars.iv
+  store { i64, i64 } { i64 ptrtoint (void ()* @fn to i64), i64 0 }, { i64, i64 }* %tmp, align 16
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 16
+  br i1 %exitcond, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
new file mode 100644
index 000000000000..ef63a145d0c1
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+;CHECK: @foo
+;CHECK: load <4 x i32>
+;CHECK-NOT: load <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK-NOT: store <4 x i32>
+;CHECK: ret
+define i32 @foo(i32* nocapture %A) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = add nsw i32 %3, 6
+  store i32 %4, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 100
+  br i1 %exitcond, label %5, label %1
+
+; <label>:5                                       ; preds = %1
+  ret i32 undef
+}
+
+;CHECK: @bar
+;CHECK: store <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = add nsw i32 %3, 6
+  store i32 %4, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}
diff --git a/test/Transforms/LoopVectorize/X86/unroll_selection.ll b/test/Transforms/LoopVectorize/X86/unroll_selection.ll
new file mode 100644
index 000000000000..2d7b663804f5
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/unroll_selection.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Don't unroll when we have register pressure.
+;CHECK: reg_pressure
+;CHECK: load <4 x double>
+;CHECK-NOT: load  <4 x double>
+;CHECK: store <4 x double>
+;CHECK-NOT: store <4 x double>
+;CHECK: ret
+define void @reg_pressure(double* nocapture %A, i32 %n) nounwind uwtable ssp {
+  %1 = sext i32 %n to i64
+  br label %2
+
+; <label>:2                                       ; preds = %2, %0
+  %indvars.iv = phi i64 [ %indvars.iv.next, %2 ], [ %1, %0 ]
+  %3 = getelementptr inbounds double* %A, i64 %indvars.iv
+  %4 = load double* %3, align 8
+  %5 = fadd double %4, 3.000000e+00
+  %6 = fmul double %4, 2.000000e+00
+  %7 = fadd double %5, %6
+  %8 = fadd double %7, 2.000000e+00
+  %9 = fmul double %8, 5.000000e-01
+  %10 = fadd double %6, %9
+  %11 = fsub double %10, %5
+  %12 = fadd double %4, %11
+  %13 = fdiv double %8, %12
+  %14 = fmul double %13, %8
+  %15 = fmul double %6, %14
+  %16 = fmul double %5, %15
+  %17 = fadd double %16, -3.000000e+00
+  %18 = fsub double %4, %5
+  %19 = fadd double %6, %18
+  %20 = fadd double %13, %19
+  %21 = fadd double %20, %17
+  %22 = fadd double %21, 3.000000e+00
+  %23 = fmul double %4, %22
+  store double %23, double* %3, align 8
+  %indvars.iv.next = add i64 %indvars.iv, -1
+  %24 = trunc i64 %indvars.iv to i32
+  %25 = icmp eq i32 %24, 0
+  br i1 %25, label %26, label %2
+
+; <label>:26                                      ; preds = %2
+  ret void
+}
+
+; This is a small loop. Unroll it twice. 
+;CHECK: small_loop
+;CHECK: xor
+;CHECK: xor
+;CHECK: ret
+define void @small_loop(i16* nocapture %A, i64 %n) nounwind uwtable ssp {
+  %1 = icmp eq i64 %n, 0
+  br i1 %1, label %._crit_edge, label %.lr.ph
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %i.01 = phi i64 [ %5, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i16* %A, i64 %i.01
+  %3 = load i16* %2, align 2
+  %4 = xor i16 %3, 3
+  store i16 %4, i16* %2, align 2
+  %5 = add i64 %i.01, 1
+  %exitcond = icmp eq i64 %5, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
new file mode 100644
index 000000000000..3b3a7875ab36
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s  -loop-vectorize -mcpu=core2 -debug-only=loop-vectorize 2>&1 -S | FileCheck %s
+; REQUIRES: asserts
+; Make sure we use the right select kind when querying select costs.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+
+; CHECK: Checking a loop in "scalarselect"
+define void @scalarselect(i1 %cond) {
+  br label %1
+
+; <label>:1
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %5 = load i32* %4, align 4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+
+; A scalar select has a cost of 1 on core2
+; CHECK: cost of 1 for VF 2 {{.*}}  select i1 %cond, i32 %6, i32 0
+
+  %sel = select i1 %cond, i32 %6, i32 zeroinitializer
+  store i32 %sel, i32* %7, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8
+  ret void
+}
+
+; CHECK: Checking a loop in "vectorselect"
+define void @vectorselect(i1 %cond) {
+  br label %1
+
+; <label>:1
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %5 = load i32* %4, align 4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %8 = icmp ult i64 %indvars.iv, 8
+
+; A vector select has a cost of 4 on core2
+; CHECK: cost of 4 for VF 2 {{.*}}  select i1 %8, i32 %6, i32 0
+
+  %sel = select i1 %8, i32 %6, i32 zeroinitializer
+  store i32 %sel, i32* %7, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %9, label %1
+
+; <label>:9
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
new file mode 100644
index 000000000000..59bb8d0054c5
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
@@ -0,0 +1,150 @@
+; RUN: opt -loop-vectorize -mcpu=corei7-avx -debug -S < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%0 = type { %0*, %1 }
+%1 = type { i8*, i32 }
+
+@p = global [2048 x [8 x i32*]] zeroinitializer, align 16
+@q = global [2048 x i16] zeroinitializer, align 16
+@r = global [2048 x i16] zeroinitializer, align 16
+
+; Tests for widest type
+; Ensure that we count the pointer store in the first test case. We have a
+; consecutive vector of pointers store, therefore we should count it towards the
+; widest vector count.
+;
+; CHECK: test_consecutive_store
+; CHECK: The Widest type: 64 bits
+define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwtable align 2 {
+  %4 = load %0** %2, align 8
+  %5 = icmp eq %0** %0, %1
+  br i1 %5, label %12, label %6
+
+; <label>:6                                       ; preds = %3
+  br label %7
+
+; <label>:7                                       ; preds = %7, %6
+  %8 = phi %0** [ %0, %6 ], [ %9, %7 ]
+  store %0* %4, %0** %8, align 8
+  %9 = getelementptr inbounds %0** %8, i64 1
+  %10 = icmp eq %0** %9, %1
+  br i1 %10, label %11, label %7
+
+; <label>:11                                      ; preds = %7
+  br label %12
+
+; <label>:12                                      ; preds = %11, %3
+  ret void
+}
+
+; However, if the store of a set of pointers is not to consecutive memory we do
+; NOT count the store towards the widest vector type.
+; In the test case below we add i16 types to store it in an array of pointer,
+; therefore the widest type should be i16.
+; int* p[2048][8];
+; short q[2048];
+;   for (int y = 0; y < 8; ++y)
+;     for (int i = 0; i < 1024; ++i) {
+;       p[i][y] = (int*) (1 + q[i]);
+;     }
+; CHECK: test_nonconsecutive_store
+; CHECK: The Widest type: 16 bits
+define void @test_nonconsecutive_store() nounwind ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %14, %0
+  %2 = phi i64 [ 0, %0 ], [ %15, %14 ]
+  br label %3
+
+; <label>:3                                       ; preds = %3, %1
+  %4 = phi i64 [ 0, %1 ], [ %11, %3 ]
+  %5 = getelementptr inbounds [2048 x i16]* @q, i64 0, i64 %4
+  %6 = load i16* %5, align 2
+  %7 = sext i16 %6 to i64
+  %8 = add i64 %7, 1
+  %9 = inttoptr i64 %8 to i32*
+  %10 = getelementptr inbounds [2048 x [8 x i32*]]* @p, i64 0, i64 %4, i64 %2
+  store i32* %9, i32** %10, align 8
+  %11 = add i64 %4, 1
+  %12 = trunc i64 %11 to i32
+  %13 = icmp ne i32 %12, 1024
+  br i1 %13, label %3, label %14
+
+; <label>:14                                      ; preds = %3
+  %15 = add i64 %2, 1
+  %16 = trunc i64 %15 to i32
+  %17 = icmp ne i32 %16, 8
+  br i1 %17, label %1, label %18
+
+; <label>:18                                      ; preds = %14
+  ret void
+}
+
+
+@ia = global [1024 x i32*] zeroinitializer, align 16
+@ib = global [1024 x i32] zeroinitializer, align 16
+@ic = global [1024 x i8] zeroinitializer, align 16
+@p2 = global [2048 x [8 x i32*]] zeroinitializer, align 16
+@q2 = global [2048 x i16] zeroinitializer, align 16
+
+;; Now we check the same rules for loads. We should take consecutive loads of
+;; pointer types into account.
+; CHECK: test_consecutive_ptr_load
+; CHECK: The Widest type: 64 bits
+define i8 @test_consecutive_ptr_load() nounwind readonly ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %2 = phi i64 [ 0, %0 ], [ %10, %1 ]
+  %3 = phi i8 [ 0, %0 ], [ %9, %1 ]
+  %4 = getelementptr inbounds [1024 x i32*]* @ia, i32 0, i64 %2
+  %5 = load i32** %4, align 4
+  %6 = ptrtoint i32* %5 to i64
+  %7 = trunc i64 %6 to i8
+  %8 = add i8 %3, 1
+  %9 = add i8 %7, %8
+  %10 = add i64 %2, 1
+  %11 = icmp ne i64 %10, 1024
+  br i1 %11, label %1, label %12
+
+; <label>:12                                      ; preds = %1
+  %13 = phi i8 [ %9, %1 ]
+  ret i8 %13
+}
+
+;; However, we should not take unconsecutive loads of pointers into account.
+; CHECK: test_nonconsecutive_ptr_load
+; CHECK: The Widest type: 16 bits
+define void @test_nonconsecutive_ptr_load() nounwind ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %13, %0
+  %2 = phi i64 [ 0, %0 ], [ %14, %13 ]
+  br label %3
+
+; <label>:3                                       ; preds = %3, %1
+  %4 = phi i64 [ 0, %1 ], [ %10, %3 ]
+  %5 = getelementptr inbounds [2048 x [8 x i32*]]* @p2, i64 0, i64 %4, i64 %2
+  %6 = getelementptr inbounds [2048 x i16]* @q2, i64 0, i64 %4
+  %7 = load i32** %5, align 2
+  %8 = ptrtoint i32* %7 to i64
+  %9 = trunc i64 %8 to i16
+  store i16 %9, i16* %6, align 8
+  %10 = add i64 %4, 1
+  %11 = trunc i64 %10 to i32
+  %12 = icmp ne i32 %11, 1024
+  br i1 %12, label %3, label %13
+
+; <label>:13                                      ; preds = %3
+  %14 = add i64 %2, 1
+  %15 = trunc i64 %14 to i32
+  %16 = icmp ne i32 %15, 8
+  br i1 %16, label %1, label %17
+
+; <label>:17                                      ; preds = %13
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
new file mode 100644
index 000000000000..431e422c2fbe
--- /dev/null
+++ b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: fc
+;CHECK: load <4 x i16>
+;CHECK-NEXT: shufflevector <4 x i16>
+;CHECK: select <4 x i1>
+;CHECK: store <4 x i16>
+;CHECK: ret
+define void @fc(i16* nocapture %p, i32 %n, i32 %size) nounwind uwtable ssp {
+entry:
+  br label %do.body
+
+do.body:                                          ; preds = %cond.end, %entry
+  %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %cond.end ]
+  %p.addr.0 = phi i16* [ %p, %entry ], [ %incdec.ptr, %cond.end ]
+  %incdec.ptr = getelementptr inbounds i16* %p.addr.0, i64 -1
+  %0 = load i16* %incdec.ptr, align 2, !tbaa !0
+  %conv = zext i16 %0 to i32
+  %cmp = icmp ult i32 %conv, %size
+  br i1 %cmp, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %do.body
+  %sub = sub i32 %conv, %size
+  %phitmp = trunc i32 %sub to i16
+  br label %cond.end
+
+cond.end:                                         ; preds = %do.body, %cond.true
+  %cond = phi i16 [ %phitmp, %cond.true ], [ 0, %do.body ]
+  store i16 %cond, i16* %incdec.ptr, align 2, !tbaa !0
+  %dec = add i32 %n.addr.0, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %do.end, label %do.body
+
+do.end:                                           ; preds = %cond.end
+  ret void
+}
+
+;CHECK: example1
+;CHECK: load <4 x i32>
+;CHECK-NEXT: shufflevector <4 x i32>
+;CHECK: select <4 x i1>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define void @example1(i32* nocapture %a, i32 %n, i32 %wsize) nounwind uwtable ssp {
+entry:
+  br label %do.body
+
+do.body:                                          ; preds = %do.body, %entry
+  %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ]
+  %p.0 = phi i32* [ %a, %entry ], [ %incdec.ptr, %do.body ]
+  %incdec.ptr = getelementptr inbounds i32* %p.0, i64 -1
+  %0 = load i32* %incdec.ptr, align 4, !tbaa !3
+  %cmp = icmp slt i32 %0, %wsize
+  %sub = sub nsw i32 %0, %wsize
+  %cond = select i1 %cmp, i32 0, i32 %sub
+  store i32 %cond, i32* %incdec.ptr, align 4, !tbaa !3
+  %dec = add nsw i32 %n.addr.0, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %do.end, label %do.body
+
+do.end:                                           ; preds = %do.body
+  ret void
+}
+
+!0 = metadata !{metadata !"short", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/LoopVectorize/calloc.ll b/test/Transforms/LoopVectorize/calloc.ll
new file mode 100644
index 000000000000..08c84eff5dbf
--- /dev/null
+++ b/test/Transforms/LoopVectorize/calloc.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+;CHECK: hexit
+;CHECK: zext <4 x i8>
+;CHECK: ret
+
+define noalias i8* @hexit(i8* nocapture %bytes, i64 %length) nounwind uwtable ssp {
+entry:
+  %shl = shl i64 %length, 1
+  %add28 = or i64 %shl, 1
+  %call = tail call i8* @calloc(i64 1, i64 %add28) nounwind
+  %cmp29 = icmp eq i64 %shl, 0
+  br i1 %cmp29, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = shl i64 %length, 1
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %i.030 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %shr = lshr i64 %i.030, 1
+  %arrayidx = getelementptr inbounds i8* %bytes, i64 %shr
+  %1 = load i8* %arrayidx, align 1, !tbaa !0
+  %conv = zext i8 %1 to i32
+  %and = shl i64 %i.030, 2
+  %neg = and i64 %and, 4
+  %and3 = xor i64 %neg, 4
+  %sh_prom = trunc i64 %and3 to i32
+  %shl4 = shl i32 15, %sh_prom
+  %and5 = and i32 %conv, %shl4
+  %shr11 = lshr i32 %and5, %sh_prom
+  %conv13 = and i32 %shr11, 254
+  %cmp15 = icmp ugt i32 %conv13, 9
+  %cond = select i1 %cmp15, i32 87, i32 48
+  %add17 = add nsw i32 %cond, %shr11
+  %conv18 = trunc i32 %add17 to i8
+  %arrayidx19 = getelementptr inbounds i8* %call, i64 %i.030
+  store i8 %conv18, i8* %arrayidx19, align 1, !tbaa !0
+  %inc = add i64 %i.030, 1
+  %exitcond = icmp eq i64 %inc, %0
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret i8* %call
+}
+
+declare noalias i8* @calloc(i64, i64) nounwind
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/cast-induction.ll b/test/Transforms/LoopVectorize/cast-induction.ll
new file mode 100644
index 000000000000..2aa29ed2c820
--- /dev/null
+++ b/test/Transforms/LoopVectorize/cast-induction.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+; rdar://problem/12848162
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK: @example12
+;CHECK: trunc i64
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example12() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %3 = trunc i64 %indvars.iv to i32
+  store i32 %3, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %4, label %1
+
+; <label>:4                                       ; preds = %1
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/cpp-new-array.ll b/test/Transforms/LoopVectorize/cpp-new-array.ll
index 26902eba9e29..da0fb05fe843 100644
--- a/test/Transforms/LoopVectorize/cpp-new-array.ll
+++ b/test/Transforms/LoopVectorize/cpp-new-array.ll
@@ -1,10 +1,10 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
 ;CHECK: @cpp_new_arrays
-;CHECK: insertelement <4 x i32>
+;CHECK: sext i32
 ;CHECK: load <4 x float>
 ;CHECK: fadd <4 x float>
 ;CHECK: ret i32
diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll
new file mode 100644
index 000000000000..a2ea9511bb22
--- /dev/null
+++ b/test/Transforms/LoopVectorize/dbg.value.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine | FileCheck %s
+; Make sure we vectorize with debugging turned on.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@A = global [1024 x i32] zeroinitializer, align 16
+@B = global [1024 x i32] zeroinitializer, align 16
+@C = global [1024 x i32] zeroinitializer, align 16
+
+; CHECK: @test
+define i32 @test() #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !9), !dbg !18
+  br label %for.body, !dbg !18
+
+for.body:
+  ;CHECK: load <4 x i32>
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @B, i64 0, i64 %indvars.iv, !dbg !19
+  %0 = load i32* %arrayidx, align 4, !dbg !19, !tbaa !21
+  %arrayidx2 = getelementptr inbounds [1024 x i32]* @C, i64 0, i64 %indvars.iv, !dbg !19
+  %1 = load i32* %arrayidx2, align 4, !dbg !19, !tbaa !21
+  %add = add nsw i32 %1, %0, !dbg !19
+  %arrayidx4 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv, !dbg !19
+  store i32 %add, i32* %arrayidx4, align 4, !dbg !19, !tbaa !21
+  %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !18
+  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !9), !dbg !18
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !18
+  %exitcond = icmp ne i32 %lftr.wideiv, 1024, !dbg !18
+  br i1 %exitcond, label %for.body, label %for.end, !dbg !18
+
+for.end:
+  ret i32 0, !dbg !24
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test", metadata !"/path/to/somewhere", metadata !"clang", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !11, metadata !""}
+!1 = metadata !{i32 0}
+!2 = metadata !{metadata !3}
+!3 = metadata !{i32 786478, i32 0, metadata !4, metadata !"test", metadata !"test", metadata !"test", metadata !4, i32 5, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @test, null, null, metadata !8, i32 5}
+!4 = metadata !{i32 786473, metadata !"test", metadata !"/path/to/somewhere", null}
+!5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0}
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786688, metadata !10, metadata !"i", metadata !4, i32 6, metadata !7, i32 0, i32 0}
+!10 = metadata !{i32 786443, metadata !3, i32 6, i32 0, metadata !4, i32 0}
+!11 = metadata !{metadata !12, metadata !16, metadata !17}
+!12 = metadata !{i32 786484, i32 0, null, metadata !"A", metadata !"A", metadata !"", metadata !4, i32 1, metadata !13, i32 0, i32 1, [1024 x i32]* @A, null}
+!13 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 32768, i64 32, i32 0, i32 0, metadata !7, metadata !14, i32 0, i32 0}
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 786465, i64 0, i64 1024}
+!16 = metadata !{i32 786484, i32 0, null, metadata !"B", metadata !"B", metadata !"", metadata !4, i32 2, metadata !13, i32 0, i32 1, [1024 x i32]* @B, null}
+!17 = metadata !{i32 786484, i32 0, null, metadata !"C", metadata !"C", metadata !"", metadata !4, i32 3, metadata !13, i32 0, i32 1, [1024 x i32]* @C, null} 
+!18 = metadata !{i32 6, i32 0, metadata !10, null}
+!19 = metadata !{i32 7, i32 0, metadata !20, null}
+!20 = metadata !{i32 786443, metadata !10, i32 6, i32 0, metadata !4, i32 1}
+!21 = metadata !{metadata !"int", metadata !22}
+!22 = metadata !{metadata !"omnipotent char", metadata !23}
+!23 = metadata !{metadata !"Simple C/C++ TBAA"}
+!24 = metadata !{i32 9, i32 0, metadata !3, null}
diff --git a/test/Transforms/LoopVectorize/flags.ll b/test/Transforms/LoopVectorize/flags.ll
index 2f22a764572f..656912e178f9 100644
--- a/test/Transforms/LoopVectorize/flags.ll
+++ b/test/Transforms/LoopVectorize/flags.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll
new file mode 100644
index 000000000000..565684cccb9a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/float-reduction.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+;CHECK: @foo
+;CHECK: fadd <4 x float>
+;CHECK: ret
+define float @foo(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %add = fadd fast float %sum.04, %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 200
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret float %add
+}
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll
index fce29d240487..f335557c0019 100644
--- a/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=4 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -24,6 +25,20 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example1
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
 define void @example1() nounwind uwtable ssp {
   br label %1
 
@@ -48,6 +63,12 @@ define void @example1() nounwind uwtable ssp {
 ;CHECK: @example2
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example2
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
 define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph5, label %.preheader
@@ -89,10 +110,15 @@ define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
   ret void
 }
 
-; We can't vectorize this loop because it has non constant loop bounds.
 ;CHECK: @example3
-;CHECK-NOT: <4 x i32>
+;CHECK: <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example3
+;UNROLL: <4 x i32>
+;UNROLL: <4 x i32>
+;UNROLL: <4 x i32>
+;UNROLL: <4 x i32>
+;UNROLL: ret void
 define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
   %1 = icmp eq i32 %n, 0
   br i1 %1, label %._crit_edge, label %.lr.ph
@@ -116,6 +142,12 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
 ;CHECK: @example4
 ;CHECK: load <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example4
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: ret void
 define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
   %1 = add nsw i32 %n, -1
   %2 = icmp eq i32 %n, 0
@@ -176,6 +208,12 @@ define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
 ;CHECK: @example8
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example8
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
 define void @example8(i32 %x) nounwind uwtable ssp {
   br label %.preheader
 
@@ -330,7 +368,7 @@ define void @example11() nounwind uwtable ssp {
 }
 
 ;CHECK: @example12
-;CHECK: trunc <4 x i64>
+;CHECK: trunc i64
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
 define void @example12() nounwind uwtable ssp {
@@ -391,9 +429,9 @@ define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %o
   ret void
 }
 
-; Can't vectorize because of reductions.
+; Can vectorize.
 ;CHECK: @example14
-;CHECK-NOT: <4 x i32>
+;CHECK: <4 x i32>
 ;CHECK: ret void
 define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocapture %out) nounwind uwtable ssp {
 .preheader3:
@@ -537,9 +575,9 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   ret void
 }
 
-; Can't vectorize because the src and dst pointers are not disjoint.
 ;CHECK: @example21
-;CHECK-NOT: <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: shufflevector {{.*}} <i32 3, i32 2, i32 1, i32 0>
 ;CHECK: ret i32
 define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
@@ -565,9 +603,8 @@ define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
   ret i32 %a.0.lcssa
 }
 
-; Can't vectorize because there are multiple PHIs.
 ;CHECK: @example23
-;CHECK-NOT: <4 x i32>
+;CHECK: <4 x i32>
 ;CHECK: ret void
 define void @example23(i16* nocapture %src, i32* nocapture %dst) nounwind uwtable ssp {
   br label %1
diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll
new file mode 100644
index 000000000000..121da8ba7e16
--- /dev/null
+++ b/test/Transforms/LoopVectorize/global_alias.ll
@@ -0,0 +1,1078 @@
+; RUN: opt < %s -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+
+%struct.anon = type { [100 x i32], i32, [100 x i32] }
+%struct.anon.0 = type { [100 x [100 x i32]], i32, [100 x [100 x i32]] }
+
+@Foo = common global %struct.anon zeroinitializer, align 4
+@Bar = common global %struct.anon.0 zeroinitializer, align 4
+
+@PB = external global i32*
+@PA = external global i32*
+
+
+;; === First, the tests that should always vectorize, wither statically or by adding run-time checks ===
+
+
+; /// Different objects, positive induction, constant distance
+; int noAlias01 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i] = Foo.B[i] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias01
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias01(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx1 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add, i32* %arrayidx1, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx2, align 4
+  ret i32 %7
+}
+
+; /// Different objects, positive induction with widening slide
+; int noAlias02 (int a) {
+;   int i;
+;   for (i=0; i<SIZE-10; i++)
+;     Foo.A[i] = Foo.B[i+10] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias02
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias02(i32 %a) {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 90
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %add = add nsw i32 %1, 10
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %add
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add1 = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add1, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Different objects, positive induction with shortening slide
+; int noAlias03 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i+10] = Foo.B[i] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias03
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias03(i32 %a) {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %add1 = add nsw i32 %4, 10
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add1
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Pointer access, positive stride, run-time check added
+; int noAlias04 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     *(PA+i) = *(PB+i) + a;
+;   return *(PA+a);
+; }
+; CHECK: define i32 @noAlias04
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+;
+; TODO: This test vectorizes (with run-time check) on real targets with -O3)
+; Check why it's not being vectorized even when forcing vectorization
+
+define i32 @noAlias04(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32** @PB, align 4
+  %2 = load i32* %i, align 4
+  %add.ptr = getelementptr inbounds i32* %1, i32 %2
+  %3 = load i32* %add.ptr, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32** @PA, align 4
+  %6 = load i32* %i, align 4
+  %add.ptr1 = getelementptr inbounds i32* %5, i32 %6
+  store i32 %add, i32* %add.ptr1, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32** @PA, align 4
+  %9 = load i32* %a.addr, align 4
+  %add.ptr2 = getelementptr inbounds i32* %8, i32 %9
+  %10 = load i32* %add.ptr2, align 4
+  ret i32 %10
+}
+
+; /// Different objects, positive induction, multi-array
+; int noAlias05 (int a) {
+;   int i, N=10;
+;   for (i=0; i<SIZE; i++)
+;     Bar.A[N][i] = Bar.B[N][i] + a;
+;   return Bar.A[N][a];
+; }
+; CHECK: define i32 @noAlias05
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias05(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %N = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 10, i32* %N, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %2 = load i32* %N, align 4
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
+  %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1
+  %3 = load i32* %arrayidx1, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32* %i, align 4
+  %6 = load i32* %N, align 4
+  %arrayidx2 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
+  %arrayidx3 = getelementptr inbounds [100 x i32]* %arrayidx2, i32 0, i32 %5
+  store i32 %add, i32* %arrayidx3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %a.addr, align 4
+  %9 = load i32* %N, align 4
+  %arrayidx4 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
+  %arrayidx5 = getelementptr inbounds [100 x i32]* %arrayidx4, i32 0, i32 %8
+  %10 = load i32* %arrayidx5, align 4
+  ret i32 %10
+}
+
+; /// Same objects, positive induction, multi-array, different sub-elements
+; int noAlias06 (int a) {
+;   int i, N=10;
+;   for (i=0; i<SIZE; i++)
+;     Bar.A[N][i] = Bar.A[N+1][i] + a;
+;   return Bar.A[N][a];
+; }
+; CHECK: define i32 @noAlias06
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias06(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %N = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 10, i32* %N, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %2 = load i32* %N, align 4
+  %add = add nsw i32 %2, 1
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
+  %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1
+  %3 = load i32* %arrayidx1, align 4
+  %4 = load i32* %a.addr, align 4
+  %add2 = add nsw i32 %3, %4
+  %5 = load i32* %i, align 4
+  %6 = load i32* %N, align 4
+  %arrayidx3 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
+  %arrayidx4 = getelementptr inbounds [100 x i32]* %arrayidx3, i32 0, i32 %5
+  store i32 %add2, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %a.addr, align 4
+  %9 = load i32* %N, align 4
+  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
+  %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %8
+  %10 = load i32* %arrayidx6, align 4
+  ret i32 %10
+}
+
+; /// Different objects, negative induction, constant distance
+; int noAlias07 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[SIZE-i-1] = Foo.B[SIZE-i-1] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias07
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias07(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub2 = sub nsw i32 100, %4
+  %sub3 = sub nsw i32 %sub2, 1
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx5, align 4
+  ret i32 %7
+}
+
+; /// Different objects, negative induction, shortening slide
+; int noAlias08 (int a) {
+;   int i;
+;   for (i=0; i<SIZE-10; i++)
+;     Foo.A[SIZE-i-1] = Foo.B[SIZE-i-10] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias08
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias08(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 90
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 10
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub2 = sub nsw i32 100, %4
+  %sub3 = sub nsw i32 %sub2, 1
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx5, align 4
+  ret i32 %7
+}
+
+; /// Different objects, negative induction, widening slide
+; int noAlias09 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[SIZE-i-10] = Foo.B[SIZE-i-1] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias09
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias09(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub2 = sub nsw i32 100, %4
+  %sub3 = sub nsw i32 %sub2, 10
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx5, align 4
+  ret i32 %7
+}
+
+; /// Pointer access, negative stride, run-time check added
+; int noAlias10 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     *(PA+SIZE-i-1) = *(PB+SIZE-i-1) + a;
+;   return *(PA+a);
+; }
+; CHECK: define i32 @noAlias10
+; CHECK-NOT: sub nsw <4 x i32>
+; CHECK: ret
+;
+; TODO: This test vectorizes (with run-time check) on real targets with -O3)
+; Check why it's not being vectorized even when forcing vectorization
+
+define i32 @noAlias10(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32** @PB, align 4
+  %add.ptr = getelementptr inbounds i32* %1, i32 100
+  %2 = load i32* %i, align 4
+  %idx.neg = sub i32 0, %2
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg
+  %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1
+  %3 = load i32* %add.ptr2, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32** @PA, align 4
+  %add.ptr3 = getelementptr inbounds i32* %5, i32 100
+  %6 = load i32* %i, align 4
+  %idx.neg4 = sub i32 0, %6
+  %add.ptr5 = getelementptr inbounds i32* %add.ptr3, i32 %idx.neg4
+  %add.ptr6 = getelementptr inbounds i32* %add.ptr5, i32 -1
+  store i32 %add, i32* %add.ptr6, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32** @PA, align 4
+  %9 = load i32* %a.addr, align 4
+  %add.ptr7 = getelementptr inbounds i32* %8, i32 %9
+  %10 = load i32* %add.ptr7, align 4
+  ret i32 %10
+}
+
+; /// Different objects, negative induction, multi-array
+; int noAlias11 (int a) {
+;   int i, N=10;
+;   for (i=0; i<SIZE; i++)
+;     Bar.A[N][SIZE-i-1] = Bar.B[N][SIZE-i-1] + a;
+;   return Bar.A[N][a];
+; }
+; CHECK: define i32 @noAlias11
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias11(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %N = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 10, i32* %N, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %2 = load i32* %N, align 4
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
+  %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1
+  %3 = load i32* %arrayidx2, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32* %i, align 4
+  %sub3 = sub nsw i32 100, %5
+  %sub4 = sub nsw i32 %sub3, 1
+  %6 = load i32* %N, align 4
+  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
+  %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %sub4
+  store i32 %add, i32* %arrayidx6, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %a.addr, align 4
+  %9 = load i32* %N, align 4
+  %arrayidx7 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
+  %arrayidx8 = getelementptr inbounds [100 x i32]* %arrayidx7, i32 0, i32 %8
+  %10 = load i32* %arrayidx8, align 4
+  ret i32 %10
+}
+
+; /// Same objects, negative induction, multi-array, different sub-elements
+; int noAlias12 (int a) {
+;   int i, N=10;
+;   for (i=0; i<SIZE; i++)
+;     Bar.A[N][SIZE-i-1] = Bar.A[N+1][SIZE-i-1] + a;
+;   return Bar.A[N][a];
+; }
+; CHECK: define i32 @noAlias12
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias12(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %N = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 10, i32* %N, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %2 = load i32* %N, align 4
+  %add = add nsw i32 %2, 1
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
+  %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1
+  %3 = load i32* %arrayidx2, align 4
+  %4 = load i32* %a.addr, align 4
+  %add3 = add nsw i32 %3, %4
+  %5 = load i32* %i, align 4
+  %sub4 = sub nsw i32 100, %5
+  %sub5 = sub nsw i32 %sub4, 1
+  %6 = load i32* %N, align 4
+  %arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
+  %arrayidx7 = getelementptr inbounds [100 x i32]* %arrayidx6, i32 0, i32 %sub5
+  store i32 %add3, i32* %arrayidx7, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %a.addr, align 4
+  %9 = load i32* %N, align 4
+  %arrayidx8 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
+  %arrayidx9 = getelementptr inbounds [100 x i32]* %arrayidx8, i32 0, i32 %8
+  %10 = load i32* %arrayidx9, align 4
+  ret i32 %10
+}
+
+; /// Same objects, positive induction, constant distance, just enough for vector size
+; int noAlias13 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i] = Foo.A[i+4] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias13
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias13(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %add = add nsw i32 %1, 4
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add1 = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add1, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Same objects, negative induction, constant distance, just enough for vector size
+; int noAlias14 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[SIZE-i-1] = Foo.A[SIZE-i-5] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias14
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias14(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 5
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub2 = sub nsw i32 100, %4
+  %sub3 = sub nsw i32 %sub2, 1
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx5, align 4
+  ret i32 %7
+}
+
+
+;; === Now, the tests that we could vectorize with induction changes or run-time checks ===
+
+
+; /// Different objects, swapped induction, alias at the end
+; int mayAlias01 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i] = Foo.B[SIZE-i-1] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mayAlias01
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mayAlias01(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Different objects, swapped induction, alias at the beginning
+; int mayAlias02 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[SIZE-i-1] = Foo.B[i] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mayAlias02
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mayAlias02(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %4
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Pointer access, run-time check added
+; int mayAlias03 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     *(PA+i) = *(PB+SIZE-i-1) + a;
+;   return *(PA+a);
+; }
+; CHECK: define i32 @mayAlias03
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mayAlias03(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32** @PB, align 4
+  %add.ptr = getelementptr inbounds i32* %1, i32 100
+  %2 = load i32* %i, align 4
+  %idx.neg = sub i32 0, %2
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg
+  %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1
+  %3 = load i32* %add.ptr2, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32** @PA, align 4
+  %6 = load i32* %i, align 4
+  %add.ptr3 = getelementptr inbounds i32* %5, i32 %6
+  store i32 %add, i32* %add.ptr3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32** @PA, align 4
+  %9 = load i32* %a.addr, align 4
+  %add.ptr4 = getelementptr inbounds i32* %8, i32 %9
+  %10 = load i32* %add.ptr4, align 4
+  ret i32 %10
+}
+
+
+;; === Finally, the tests that should only vectorize with care (or if we ignore undefined behaviour at all) ===
+
+
+; int mustAlias01 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i+10] = Foo.B[SIZE-i-1] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mustAlias01
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mustAlias01(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %add2 = add nsw i32 %4, 10
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
+  store i32 %add, i32* %arrayidx3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx4, align 4
+  ret i32 %7
+}
+
+; int mustAlias02 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i] = Foo.B[SIZE-i-10] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mustAlias02
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mustAlias02(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 10
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; int mustAlias03 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i+10] = Foo.B[SIZE-i-10] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mustAlias03
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mustAlias03(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 10
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %add2 = add nsw i32 %4, 10
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
+  store i32 %add, i32* %arrayidx3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx4, align 4
+  ret i32 %7
+}
diff --git a/test/Transforms/LoopVectorize/i8-induction.ll b/test/Transforms/LoopVectorize/i8-induction.ll
new file mode 100644
index 000000000000..7759b7085a1b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/i8-induction.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global i8 0, align 1
+@b = common global i8 0, align 1
+
+define void @f() nounwind uwtable ssp {
+scalar.ph:
+  store i8 0, i8* inttoptr (i64 1 to i8*), align 1, !tbaa !0
+  %0 = load i8* @a, align 1, !tbaa !0
+  br label %for.body
+
+for.body:
+  %mul16 = phi i8 [ 0, %scalar.ph ], [ %mul, %for.body ]              ; <------- i8 induction var.
+  %c.015 = phi i8 [ undef, %scalar.ph ], [ %conv8, %for.body ]
+  %conv2 = sext i8 %c.015 to i32
+  %tobool = icmp ne i8 %c.015, 0
+  %.sink = select i1 %tobool, i8 %c.015, i8 %0
+  %mul = mul i8 %mul16, %.sink
+  %add = add nsw i32 %conv2, 1
+  %conv8 = trunc i32 %add to i8
+  %sext = shl i32 %add, 24
+  %phitmp14 = icmp slt i32 %sext, 268435456
+  br i1 %phitmp14, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  store i8 %mul, i8* @b, align 1, !tbaa !0
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
+
diff --git a/test/Transforms/LoopVectorize/if-conv-crash.ll b/test/Transforms/LoopVectorize/if-conv-crash.ll
new file mode 100644
index 000000000000..3283456aa3c3
--- /dev/null
+++ b/test/Transforms/LoopVectorize/if-conv-crash.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define fastcc void @DD_dump() nounwind uwtable ssp {
+entry:
+  br i1 undef, label %lor.lhs.false, label %if.end25
+
+lor.lhs.false:                                    ; preds = %entry
+  br i1 undef, label %if.end21, label %if.else
+
+if.else:                                          ; preds = %lor.lhs.false
+  br i1 undef, label %num_q.exit, label %while.body.i.preheader
+
+while.body.i.preheader:                           ; preds = %if.else
+  br label %while.body.i
+
+while.body.i:                                     ; preds = %if.end.i, %while.body.i.preheader
+  switch i8 undef, label %if.end.i [
+    i8 39, label %if.then.i
+    i8 92, label %if.then.i
+  ]
+
+if.then.i:                                        ; preds = %while.body.i, %while.body.i
+  br label %if.end.i
+
+if.end.i:                                         ; preds = %if.then.i, %while.body.i
+  br i1 undef, label %num_q.exit, label %while.body.i
+
+num_q.exit:                                       ; preds = %if.end.i, %if.else
+  unreachable
+
+if.end21:                                         ; preds = %lor.lhs.false
+  unreachable
+
+if.end25:                                         ; preds = %entry
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/if-conversion-reduction.ll b/test/Transforms/LoopVectorize/if-conversion-reduction.ll
new file mode 100644
index 000000000000..3a2d82e15d63
--- /dev/null
+++ b/test/Transforms/LoopVectorize/if-conversion-reduction.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+;CHECK: @reduction_func
+;CHECK-NOT: load <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  %cmp10 = icmp sgt i32 %n, 0
+  br i1 %cmp10, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %sum.011 = phi i32 [ %sum.1, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 30
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %add = add i32 %sum.011, 2
+  %add4 = add i32 %add, %0
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %sum.1 = phi i32 [ %add4, %if.then ], [ %sum.011, %for.body ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ 4, %for.inc ]
+  ret i32 %sum.0.lcssa
+}
+
diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll
new file mode 100644
index 000000000000..6e7c03a556c4
--- /dev/null
+++ b/test/Transforms/LoopVectorize/if-conversion.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; This is the loop in this example:
+;
+;int function0(int *a, int *b, int start, int end) {
+;
+;  for (int i=start; i<end; ++i) {
+;    unsigned k = a[i];
+;
+;    if (a[i] > b[i])   <------ notice the IF inside the loop.
+;      k = k * 5 + 3;
+;
+;    a[i] = k;  <---- K is a phi node that becomes vector-select.
+;  }
+;}
+
+;CHECK: @function0
+;CHECK: load <4 x i32>
+;CHECK: icmp sgt <4 x i32>
+;CHECK: mul <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: select <4 x i1>
+;CHECK: ret i32
+define i32 @function0(i32* nocapture %a, i32* nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp {
+entry:
+  %cmp16 = icmp slt i32 %start, %end
+  br i1 %cmp16, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = sext i32 %start to i64
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %if.end ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx4, align 4
+  %cmp5 = icmp sgt i32 %1, %2
+  br i1 %cmp5, label %if.then, label %if.end
+
+if.then:
+  %mul = mul i32 %1, 5
+  %add = add i32 %mul, 3
+  br label %if.end
+
+if.end:
+  %k.0 = phi i32 [ %add, %if.then ], [ %1, %for.body ]
+  store i32 %k.0, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %3 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %3, %end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret i32 undef
+}
+
+
+
+; int func(int *A, int n) {
+;   unsigned sum = 0;
+;   for (int i = 0; i < n; ++i)
+;     if (A[i] > 30)
+;       sum += A[i] + 2;
+;
+;   return sum;
+; }
+
+;CHECK: @reduction_func
+;CHECK: load <4 x i32>
+;CHECK: icmp sgt <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: select <4 x i1>
+;CHECK: ret i32
+define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  %cmp10 = icmp sgt i32 %n, 0
+  br i1 %cmp10, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %sum.011 = phi i32 [ %sum.1, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 30
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %add = add i32 %sum.011, 2
+  %add4 = add i32 %add, %0
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %sum.1 = phi i32 [ %add4, %if.then ], [ %sum.011, %for.body ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %sum.1, %for.inc ]
+  ret i32 %sum.0.lcssa
+}
+
diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll
index 71ea7689fc04..3fa6b19ca928 100644
--- a/test/Transforms/LoopVectorize/increment.ll
+++ b/test/Transforms/LoopVectorize/increment.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll
index b31bceb50df6..96595cdc16bc 100644
--- a/test/Transforms/LoopVectorize/induction_plus.ll
+++ b/test/Transforms/LoopVectorize/induction_plus.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -6,8 +6,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 @array = common global [1024 x i32] zeroinitializer, align 16
 
 ;CHECK: @array_at_plus_one
-;CHECK: add <4 x i64>
-;CHECK: trunc <4 x i64>
+;CHECK: trunc i64
 ;CHECK: add i64 %index, 12
 ;CHECK: ret i32
 define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll
new file mode 100644
index 000000000000..e79d78de67c5
--- /dev/null
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -0,0 +1,935 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK: @sqrt_f32
+;CHECK: llvm.sqrt.v4f32
+;CHECK: ret void
+define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.sqrt.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.sqrt.f32(float) nounwind readnone
+
+;CHECK: @sqrt_f64
+;CHECK: llvm.sqrt.v4f64
+;CHECK: ret void
+define void @sqrt_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.sqrt.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+;CHECK: @sin_f32
+;CHECK: llvm.sin.v4f32
+;CHECK: ret void
+define void @sin_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.sin.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.sin.f32(float) nounwind readnone
+
+;CHECK: @sin_f64
+;CHECK: llvm.sin.v4f64
+;CHECK: ret void
+define void @sin_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.sin.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.sin.f64(double) nounwind readnone
+
+;CHECK: @cos_f32
+;CHECK: llvm.cos.v4f32
+;CHECK: ret void
+define void @cos_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.cos.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.cos.f32(float) nounwind readnone
+
+;CHECK: @cos_f64
+;CHECK: llvm.cos.v4f64
+;CHECK: ret void
+define void @cos_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.cos.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.cos.f64(double) nounwind readnone
+
+;CHECK: @exp_f32
+;CHECK: llvm.exp.v4f32
+;CHECK: ret void
+define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.exp.f32(float) nounwind readnone
+
+;CHECK: @exp_f64
+;CHECK: llvm.exp.v4f64
+;CHECK: ret void
+define void @exp_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.exp.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.exp.f64(double) nounwind readnone
+
+;CHECK: @exp2_f32
+;CHECK: llvm.exp2.v4f32
+;CHECK: ret void
+define void @exp2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.exp2.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.exp2.f32(float) nounwind readnone
+
+;CHECK: @exp2_f64
+;CHECK: llvm.exp2.v4f64
+;CHECK: ret void
+define void @exp2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.exp2.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.exp2.f64(double) nounwind readnone
+
+;CHECK: @log_f32
+;CHECK: llvm.log.v4f32
+;CHECK: ret void
+define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.log.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.log.f32(float) nounwind readnone
+
+;CHECK: @log_f64
+;CHECK: llvm.log.v4f64
+;CHECK: ret void
+define void @log_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.log.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.log.f64(double) nounwind readnone
+
+;CHECK: @log10_f32
+;CHECK: llvm.log10.v4f32
+;CHECK: ret void
+define void @log10_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.log10.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.log10.f32(float) nounwind readnone
+
+;CHECK: @log10_f64
+;CHECK: llvm.log10.v4f64
+;CHECK: ret void
+define void @log10_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.log10.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.log10.f64(double) nounwind readnone
+
+;CHECK: @log2_f32
+;CHECK: llvm.log2.v4f32
+;CHECK: ret void
+define void @log2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.log2.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.log2.f32(float) nounwind readnone
+
+;CHECK: @log2_f64
+;CHECK: llvm.log2.v4f64
+;CHECK: ret void
+define void @log2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.log2.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.log2.f64(double) nounwind readnone
+
+;CHECK: @fabs_f32
+;CHECK: llvm.fabs.v4f32
+;CHECK: ret void
+define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.fabs.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.fabs.f32(float) nounwind readnone
+
+define void @fabs_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.fabs(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.fabs(double) nounwind readnone
+
+;CHECK: @floor_f32
+;CHECK: llvm.floor.v4f32
+;CHECK: ret void
+define void @floor_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.floor.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.floor.f32(float) nounwind readnone
+
+;CHECK: @floor_f64
+;CHECK: llvm.floor.v4f64
+;CHECK: ret void
+define void @floor_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.floor.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.floor.f64(double) nounwind readnone
+
+;CHECK: @ceil_f32
+;CHECK: llvm.ceil.v4f32
+;CHECK: ret void
+define void @ceil_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.ceil.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.ceil.f32(float) nounwind readnone
+
+;CHECK: @ceil_f64
+;CHECK: llvm.ceil.v4f64
+;CHECK: ret void
+define void @ceil_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.ceil.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.ceil.f64(double) nounwind readnone
+
+;CHECK: @trunc_f32
+;CHECK: llvm.trunc.v4f32
+;CHECK: ret void
+define void @trunc_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.trunc.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.trunc.f32(float) nounwind readnone
+
+;CHECK: @trunc_f64
+;CHECK: llvm.trunc.v4f64
+;CHECK: ret void
+define void @trunc_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.trunc.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.trunc.f64(double) nounwind readnone
+
+;CHECK: @rint_f32
+;CHECK: llvm.rint.v4f32
+;CHECK: ret void
+define void @rint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.rint.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.rint.f32(float) nounwind readnone
+
+;CHECK: @rint_f64
+;CHECK: llvm.rint.v4f64
+;CHECK: ret void
+define void @rint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.rint.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.rint.f64(double) nounwind readnone
+
+;CHECK: @nearbyint_f32
+;CHECK: llvm.nearbyint.v4f32
+;CHECK: ret void
+define void @nearbyint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.nearbyint.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.nearbyint.f32(float) nounwind readnone
+
+;CHECK: @nearbyint_f64
+;CHECK: llvm.nearbyint.v4f64
+;CHECK: ret void
+define void @nearbyint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.nearbyint.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.nearbyint.f64(double) nounwind readnone
+
+;CHECK: @fma_f32
+;CHECK: llvm.fma.v4f32
+;CHECK: ret void
+define void @fma_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
+entry:
+  %cmp12 = icmp sgt i32 %n, 0
+  br i1 %cmp12, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %2 = load float* %arrayidx4, align 4, !tbaa !0
+  %3 = tail call float @llvm.fma.f32(float %0, float %2, float %1)
+  %arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %3, float* %arrayidx6, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+
+;CHECK: @fma_f64
+;CHECK: llvm.fma.v4f64
+;CHECK: ret void
+define void @fma_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
+entry:
+  %cmp12 = icmp sgt i32 %n, 0
+  br i1 %cmp12, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv
+  %1 = load double* %arrayidx2, align 8, !tbaa !3
+  %arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv
+  %2 = load double* %arrayidx4, align 8, !tbaa !3
+  %3 = tail call double @llvm.fma.f64(double %0, double %2, double %1)
+  %arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %3, double* %arrayidx6, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+
+;CHECK: @fmuladd_f32
+;CHECK: llvm.fmuladd.v4f32
+;CHECK: ret void
+define void @fmuladd_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
+entry:
+  %cmp12 = icmp sgt i32 %n, 0
+  br i1 %cmp12, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %2 = load float* %arrayidx4, align 4, !tbaa !0
+  %3 = tail call float @llvm.fmuladd.f32(float %0, float %2, float %1)
+  %arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %3, float* %arrayidx6, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
+
+;CHECK: @fmuladd_f64
+;CHECK: llvm.fmuladd.v4f64
+;CHECK: ret void
+define void @fmuladd_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
+entry:
+  %cmp12 = icmp sgt i32 %n, 0
+  br i1 %cmp12, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv
+  %1 = load double* %arrayidx2, align 8, !tbaa !3
+  %arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv
+  %2 = load double* %arrayidx4, align 8, !tbaa !3
+  %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %1)
+  %arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %3, double* %arrayidx6, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.fmuladd.f64(double, double, double) nounwind readnone
+
+;CHECK: @pow_f32
+;CHECK: llvm.pow.v4f32
+;CHECK: ret void
+define void @pow_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %call = tail call float @llvm.pow.f32(float %0, float %1) nounwind readnone
+  %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx4, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.pow.f32(float, float) nounwind readnone
+
+;CHECK: @pow_f64
+;CHECK: llvm.pow.v4f64
+;CHECK: ret void
+define void @pow_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %arrayidx2 = getelementptr inbounds double* %z, i64 %indvars.iv
+  %1 = load double* %arrayidx2, align 8, !tbaa !3
+  %call = tail call double @llvm.pow.f64(double %0, double %1) nounwind readnone
+  %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx4, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; CHECK: fabs_libm
+; CHECK:  call <4 x float> @llvm.fabs.v4f32
+; CHECK: ret void
+define void @fabs_libm(float* nocapture %x) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %call = tail call float @fabsf(float %0) nounwind readnone
+  store float %call, float* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare float @fabsf(float) nounwind readnone
+
+declare double @llvm.pow.f64(double, double) nounwind readnone
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"double", metadata !1}
+!4 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/LoopVectorize/lcssa-crash.ll b/test/Transforms/LoopVectorize/lcssa-crash.ll
new file mode 100644
index 000000000000..06b3b08aa0e3
--- /dev/null
+++ b/test/Transforms/LoopVectorize/lcssa-crash.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%type1 = type { %type2 }
+%type2 = type { [0 x i8*], i8**, i32, i32, i32 }
+
+define void @test() nounwind uwtable align 2 {
+  br label %for.body.lr.ph.i.i.i
+
+for.body.lr.ph.i.i.i:
+  br label %for.body.i.i.i
+
+for.body.i.i.i:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.i.i.i ], [ 0, %for.body.lr.ph.i.i.i ]
+  br label %for.inc.i.i.i
+
+for.inc.i.i.i:
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, undef
+  br i1 %exitcond, label %for.body.i.i.i, label %for.end.i.i.i
+
+for.end.i.i.i:
+  %lcssa = phi %type1* [ undef, %for.inc.i.i.i ]
+  unreachable
+}
+
diff --git a/test/Transforms/LoopVectorize/no_int_induction.ll b/test/Transforms/LoopVectorize/no_int_induction.ll
new file mode 100644
index 000000000000..45aa8c7cd9be
--- /dev/null
+++ b/test/Transforms/LoopVectorize/no_int_induction.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+; int __attribute__((noinline)) sum_array(int *A, int n) {
+;  return std::accumulate(A, A + n, 0);
+; }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @sum_array
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: ret i32
+define i32 @sum_array(i32* %A, i32 %n) nounwind uwtable readonly noinline ssp {
+  %1 = sext i32 %n to i64
+  %2 = getelementptr inbounds i32* %A, i64 %1
+  %3 = icmp eq i32 %n, 0
+  br i1 %3, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
+
+.lr.ph.i:                                         ; preds = %0, %.lr.ph.i
+  %.03.i = phi i32* [ %6, %.lr.ph.i ], [ %A, %0 ]
+  %.012.i = phi i32 [ %5, %.lr.ph.i ], [ 0, %0 ]
+  %4 = load i32* %.03.i, align 4
+  %5 = add nsw i32 %4, %.012.i
+  %6 = getelementptr inbounds i32* %.03.i, i64 1
+  %7 = icmp eq i32* %6, %2
+  br i1 %7, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %.lr.ph.i, %0
+  %.01.lcssa.i = phi i32 [ 0, %0 ], [ %5, %.lr.ph.i ]
+  ret i32 %.01.lcssa.i
+}
diff --git a/test/Transforms/LoopVectorize/nofloat.ll b/test/Transforms/LoopVectorize/nofloat.ll
new file mode 100644
index 000000000000..de23bf02b63a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/nofloat.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+; Make sure that we don't vectorize functions with 'noimplicitfloat' attributes.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK: @example12
+;CHECK-NOT: store <4 x i32>
+;CHECK: ret void
+define void @example12() noimplicitfloat { ;           <--------- "noimplicitfloat" attribute here!
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %3 = trunc i64 %indvars.iv to i32
+  store i32 %3, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %4, label %1
+
+; <label>:4                                       ; preds = %1
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll
index 1a6c15ed96c4..8262a18f1807 100644
--- a/test/Transforms/LoopVectorize/non-const-n.ll
+++ b/test/Transforms/LoopVectorize/non-const-n.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/nsw-crash.ll b/test/Transforms/LoopVectorize/nsw-crash.ll
new file mode 100644
index 000000000000..e5fad14d0dda
--- /dev/null
+++ b/test/Transforms/LoopVectorize/nsw-crash.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+define void @test() {
+entry:
+  br i1 undef, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph:
+  br label %while.body
+
+while.body:
+  %it.sroa.0.091 = phi i32* [ undef, %while.body.lr.ph ], [ %incdec.ptr.i, %while.body ]
+  %incdec.ptr.i = getelementptr inbounds i32* %it.sroa.0.091, i64 1
+  %inc32 = add i32 undef, 1                                        ; <------------- Make sure we don't set NSW flags to the undef.
+  %cmp.i11 = icmp eq i32* %incdec.ptr.i, undef
+  br i1 %cmp.i11, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
+
+
diff --git a/test/Transforms/LoopVectorize/phi-hang.ll b/test/Transforms/LoopVectorize/phi-hang.ll
new file mode 100644
index 000000000000..b80d45995dc3
--- /dev/null
+++ b/test/Transforms/LoopVectorize/phi-hang.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -loop-vectorize < %s
+
+; PR15384
+define void @test1(i32 %arg) {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb5, %bb
+  %tmp = phi i32 [ 1, %bb ], [ %tmp7, %bb5 ]
+  %tmp2 = phi i32 [ %arg, %bb ], [ %tmp9, %bb5 ]
+  br i1 true, label %bb5, label %bb3
+
+bb3:                                              ; preds = %bb1
+  br label %bb4
+
+bb4:                                              ; preds = %bb3
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb1
+  %tmp6 = phi i32 [ 0, %bb4 ], [ %tmp, %bb1 ]
+  %tmp7 = phi i32 [ 0, %bb4 ], [ %tmp6, %bb1 ]
+  %tmp8 = phi i32 [ 0, %bb4 ], [ %tmp, %bb1 ]
+  %tmp9 = add nsw i32 %tmp2, 1
+  %tmp10 = icmp eq i32 %tmp9, 0
+  br i1 %tmp10, label %bb11, label %bb1
+
+bb11:                                             ; preds = %bb5
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/ptr_loops.ll b/test/Transforms/LoopVectorize/ptr_loops.ll
new file mode 100644
index 000000000000..25599f8f4c3c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ptr_loops.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@A = global [36 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35], align 16
+@B = global [36 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35], align 16
+
+;CHECK:_Z5test1v
+;CHECK: load <4 x i32>
+;CHECK: shufflevector <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @_Z5test1v() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32]* @A, i64 0, i64 18), %0 ], [ %4, %1 ]
+  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32]* @B, i64 0, i64 0), %0 ], [ %5, %1 ]
+  %2 = load i32* %b.01, align 4
+  %3 = shl nsw i32 %2, 1
+  store i32 %3, i32* %p.02, align 4
+  %4 = getelementptr inbounds i32* %p.02, i64 -1
+  %5 = getelementptr inbounds i32* %b.01, i64 1
+  %6 = icmp eq i32* %4, getelementptr ([36 x i32]* @A, i64 128102389400760775, i64 3)
+  br i1 %6, label %7, label %1
+
+; <label>:7                                       ; preds = %1
+  ret i32 0
+}
+
+;CHECK:_Z5test2v
+;CHECK: load <4 x i32>
+;CHECK: shufflevector <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @_Z5test2v() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32]* @A, i64 0, i64 25), %0 ], [ %3, %1 ]
+  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32]* @B, i64 0, i64 2), %0 ], [ %4, %1 ]
+  %2 = load i32* %b.01, align 4
+  store i32 %2, i32* %p.02, align 4
+  %3 = getelementptr inbounds i32* %p.02, i64 -1
+  %4 = getelementptr inbounds i32* %b.01, i64 1
+  %5 = icmp eq i32* %4, getelementptr inbounds ([36 x i32]* @A, i64 0, i64 18)
+  br i1 %5, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret i32 0
+}
+
+;CHECK:_Z5test3v
+;CHECK: load <4 x i32>
+;CHECK: shufflevector <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @_Z5test3v() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32]* @A, i64 0, i64 29), %0 ], [ %3, %1 ]
+  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32]* @B, i64 0, i64 5), %0 ], [ %4, %1 ]
+  %2 = load i32* %b.01, align 4
+  store i32 %2, i32* %p.02, align 4
+  %3 = getelementptr inbounds i32* %p.02, i64 -1
+  %4 = getelementptr inbounds i32* %b.01, i64 1
+  %5 = icmp eq i32* %3, getelementptr ([36 x i32]* @A, i64 128102389400760775, i64 3)
+  br i1 %5, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret i32 0
+}
diff --git a/test/Transforms/LoopVectorize/read-only.ll b/test/Transforms/LoopVectorize/read-only.ll
index b4d1bac132f0..bfaa6d452bce 100644
--- a/test/Transforms/LoopVectorize/read-only.ll
+++ b/test/Transforms/LoopVectorize/read-only.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
index c1848b35fc6e..08b7b27e4257 100644
--- a/test/Transforms/LoopVectorize/reduction.ll
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -7,6 +7,11 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
@@ -37,6 +42,11 @@ define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: mul <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: mul <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: mul <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
@@ -67,6 +77,11 @@ define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocap
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: mul nsw <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
@@ -95,6 +110,11 @@ define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 
 ;CHECK: @reduction_mul
 ;CHECK: mul <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: mul <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: mul <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
@@ -124,6 +144,11 @@ define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 ;CHECK: @start_at_non_zero
 ;CHECK: phi <4 x i32>
 ;CHECK: <i32 120, i32 0, i32 0, i32 0>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out, i32 %n) nounwind uwtable readonly ssp {
 entry:
@@ -152,6 +177,11 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: @reduction_and
 ;CHECK: and <4 x i32>
 ;CHECK: <i32 -1, i32 -1, i32 -1, i32 -1>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: and <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: and <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
 entry:
@@ -179,6 +209,11 @@ for.end:                                          ; preds = %for.body, %entry
 
 ;CHECK: @reduction_or
 ;CHECK: or <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: or <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: or <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_or(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
 entry:
@@ -206,6 +241,11 @@ for.end:                                          ; preds = %for.body, %entry
 
 ;CHECK: @reduction_xor
 ;CHECK: xor <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: xor <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: xor <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_xor(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
 entry:
@@ -230,3 +270,56 @@ for.end:                                          ; preds = %for.body, %entry
   %result.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ]
   ret i32 %result.0.lcssa
 }
+
+; In this code the subtracted variable is on the RHS and this is not an induction variable.
+;CHECK: @reduction_sub_rhs
+;CHECK-NOT: phi <4 x i32>
+;CHECK-NOT: sub nsw <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_sub_rhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %sub = sub nsw i32 %0, %x.05
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ]
+  ret i32 %x.0.lcssa
+}
+
+
+; In this test the reduction variable is on the LHS and we can vectorize it.
+;CHECK: @reduction_sub_lhs
+;CHECK: phi <4 x i32>
+;CHECK: sub nsw <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_sub_lhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %sub = sub nsw i32 %x.05, %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ]
+  ret i32 %x.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
index 23933cf7c7db..86098a6e7db2 100644
--- a/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
@@ -9,6 +9,10 @@ target triple = "x86_64-apple-macosx10.9.0"
 ;     a[i] = b[i] * 3;
 ; }
 
+;CHECK: for.body.preheader:
+;CHECK: br i1 %cmp.zero, label %middle.block, label %vector.memcheck
+;CHECK: vector.memcheck:
+;CHECK: br i1 %found.conflict, label %middle.block, label %vector.ph
 ;CHECK: load <4 x float>
 define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtable ssp {
 entry:
diff --git a/test/Transforms/LoopVectorize/same-base-access.ll b/test/Transforms/LoopVectorize/same-base-access.ll
new file mode 100644
index 000000000000..15738936457a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/same-base-access.ll
@@ -0,0 +1,110 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; This is kernel11 from "LivermoreLoops". We can't vectorize it because we
+; access both x[k] and x[k-1].
+;
+; void kernel11(double *x, double *y, int n) {
+;   for ( int k=1 ; k<n ; k++ )
+;     x[k] = x[k-1] + y[k];
+; }
+
+; CHECK: @kernel11
+; CHECK-NOT: <4 x double>
+; CHECK: ret
+define i32 @kernel11(double* %x, double* %y, i32 %n) nounwind uwtable ssp {
+  %1 = alloca double*, align 8
+  %2 = alloca double*, align 8
+  %3 = alloca i32, align 4
+  %k = alloca i32, align 4
+  store double* %x, double** %1, align 8
+  store double* %y, double** %2, align 8
+  store i32 %n, i32* %3, align 4
+  store i32 1, i32* %k, align 4
+  br label %4
+
+; <label>:4                                       ; preds = %25, %0
+  %5 = load i32* %k, align 4
+  %6 = load i32* %3, align 4
+  %7 = icmp slt i32 %5, %6
+  br i1 %7, label %8, label %28
+
+; <label>:8                                       ; preds = %4
+  %9 = load i32* %k, align 4
+  %10 = sub nsw i32 %9, 1
+  %11 = sext i32 %10 to i64
+  %12 = load double** %1, align 8
+  %13 = getelementptr inbounds double* %12, i64 %11
+  %14 = load double* %13, align 8
+  %15 = load i32* %k, align 4
+  %16 = sext i32 %15 to i64
+  %17 = load double** %2, align 8
+  %18 = getelementptr inbounds double* %17, i64 %16
+  %19 = load double* %18, align 8
+  %20 = fadd double %14, %19
+  %21 = load i32* %k, align 4
+  %22 = sext i32 %21 to i64
+  %23 = load double** %1, align 8
+  %24 = getelementptr inbounds double* %23, i64 %22
+  store double %20, double* %24, align 8
+  br label %25
+
+; <label>:25                                      ; preds = %8
+  %26 = load i32* %k, align 4
+  %27 = add nsw i32 %26, 1
+  store i32 %27, i32* %k, align 4
+  br label %4
+
+; <label>:28                                      ; preds = %4
+  ret i32 0
+}
+
+
+
+; We don't vectorize this function because A[i*7] is scalarized, and the
+; different scalars can in theory wrap around and overwrite other scalar
+; elements. At the moment we only allow read/write access to arrays
+; that are consecutive.
+; 
+; void foo(int *a) {
+;   for (int i=0; i<256; ++i) {
+;     int x = a[i*7];
+;     if (x>3)
+;       x = x*x+x*4;
+;     a[i*7] = x+3;
+;   }
+; }
+
+; CHECK: @func2
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+define i32 @func2(i32* nocapture %a) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %7, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %7 ]
+  %2 = mul nsw i64 %indvars.iv, 7
+  %3 = getelementptr inbounds i32* %a, i64 %2
+  %4 = load i32* %3, align 4
+  %5 = icmp sgt i32 %4, 3
+  br i1 %5, label %6, label %7
+
+; <label>:6                                       ; preds = %1
+  %tmp = add i32 %4, 4
+  %tmp1 = mul i32 %tmp, %4
+  br label %7
+
+; <label>:7                                       ; preds = %6, %1
+  %x.0 = phi i32 [ %tmp1, %6 ], [ %4, %1 ]
+  %8 = add nsw i32 %x.0, 3
+  store i32 %8, i32* %3, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %9, label %1
+
+; <label>:9                                       ; preds = %7
+  ret i32 0
+}
diff --git a/test/Transforms/LoopVectorize/scalar-select.ll b/test/Transforms/LoopVectorize/scalar-select.ll
index e537bde31bb0..7a14d247c9b4 100644
--- a/test/Transforms/LoopVectorize/scalar-select.ll
+++ b/test/Transforms/LoopVectorize/scalar-select.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/simple-unroll.ll b/test/Transforms/LoopVectorize/simple-unroll.ll
new file mode 100644
index 000000000000..7e2dd5fc0fcf
--- /dev/null
+++ b/test/Transforms/LoopVectorize/simple-unroll.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+; This is the loop.
+;  for (i=0; i<n; i++){
+;    a[i] += i;
+;  }
+;CHECK: @inc
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @inc(i32 %n) nounwind uwtable noinline ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = trunc i64 %indvars.iv to i32
+  %5 = add nsw i32 %3, %4
+  store i32 %5, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/small-loop.ll b/test/Transforms/LoopVectorize/small-loop.ll
index 4a6e4b231dfe..fa83dba3d367 100644
--- a/test/Transforms/LoopVectorize/small-loop.ll
+++ b/test/Transforms/LoopVectorize/small-loop.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/start-non-zero.ll b/test/Transforms/LoopVectorize/start-non-zero.ll
index 5aa3bc034d0b..998001c3187b 100644
--- a/test/Transforms/LoopVectorize/start-non-zero.ll
+++ b/test/Transforms/LoopVectorize/start-non-zero.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/struct_access.ll b/test/Transforms/LoopVectorize/struct_access.ll
new file mode 100644
index 000000000000..de65d0d14870
--- /dev/null
+++ b/test/Transforms/LoopVectorize/struct_access.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%struct.coordinate = type { i32, i32 }
+
+; Make sure that we don't generate a wide load when accessing the struct.
+; struct coordinate {
+;  int x;
+;  int y;
+; };
+;
+;
+; int foo(struct coordinate *A, int n) {
+;
+;   int sum = 0;
+;   for (int i = 0; i < n; ++i)
+;     sum += A[i].x;
+;
+;   return sum;
+; }
+
+;CHECK: @foo
+;CHECK-NOT: load <4 x i32>
+;CHECK: ret
+define i32 @foo(%struct.coordinate* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %x = getelementptr inbounds %struct.coordinate* %A, i64 %indvars.iv, i32 0
+  %0 = load i32* %x, align 4, !tbaa !0
+  %add = add nsw i32 %0, %sum.05
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll
new file mode 100644
index 000000000000..ac1694802a32
--- /dev/null
+++ b/test/Transforms/LoopVectorize/vectorize-once.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;
+; We want to make sure that we are vectorizeing the scalar loop only once
+; even if the pass manager runs the vectorizer multiple times due to inlining.
+
+
+; This test checks that we add metadata to vectorized loops
+; CHECK: _Z4foo1Pii
+; CHECK: <4 x i32>
+; CHECK: llvm.vectorizer.already_vectorized
+; CHECK: ret
+
+; This test comes from the loop:
+;
+;int foo (int *A, int n) {
+;  return std::accumulate(A, A + n, 0);
+;}
+define i32 @_Z4foo1Pii(i32* %A, i32 %n) #0 {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i32* %A, i64 %idx.ext
+  %cmp3.i = icmp eq i32 %n, 0
+  br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+for.body.i:                                       ; preds = %entry, %for.body.i
+  %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
+  %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
+  %0 = load i32* %__first.addr.04.i, align 4, !tbaa !0
+  %add.i = add nsw i32 %0, %__init.addr.05.i
+  %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1
+  %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
+  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
+  %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
+  ret i32 %__init.addr.0.lcssa.i
+}
+
+; This test checks that we don't vectorize loops that are marked with the "already vectorized" metadata.
+; CHECK: _Z4foo2Pii
+; CHECK-NOT: <4 x i32>
+; CHECK: llvm.vectorizer.already_vectorized
+; CHECK: ret
+define i32 @_Z4foo2Pii(i32* %A, i32 %n) #0 {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i32* %A, i64 %idx.ext
+  %cmp3.i = icmp eq i32 %n, 0
+  br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+for.body.i:                                       ; preds = %entry, %for.body.i
+  %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
+  %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
+  %0 = load i32* %__first.addr.04.i, align 4, !tbaa !0
+  %add.i = add nsw i32 %0, %__init.addr.05.i
+  %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1
+  %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
+  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i, !llvm.vectorizer.already_vectorized !3
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
+  %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
+  ret i32 %__init.addr.0.lcssa.i
+}
+
+attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{}
+
diff --git a/test/Transforms/LoopVectorize/write-only.ll b/test/Transforms/LoopVectorize/write-only.ll
index eb027604134f..54cbe8df46b0 100644
--- a/test/Transforms/LoopVectorize/write-only.ll
+++ b/test/Transforms/LoopVectorize/write-only.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
index 2f1ccb493da8..c0eaaa40154b 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -8,8 +8,10 @@ entry:
   %0 = alloca double                              ; <double*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   call void @llvm.dbg.declare(metadata !{i32* %i_addr}, metadata !0), !dbg !8
-; CHECK: call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !0)
-; CHECK: call void @llvm.dbg.value(metadata !{double %j}, i64 0, metadata !9)
+; CHECK: call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata ![[IVAR:[0-9]*]])
+; CHECK: call void @llvm.dbg.value(metadata !{double %j}, i64 0, metadata ![[JVAR:[0-9]*]])
+; CHECK: ![[IVAR]] = {{.*}} ; [ DW_TAG_arg_variable ] [i]
+; CHECK: ![[JVAR]] = {{.*}} ; [ DW_TAG_arg_variable ] [j]
   store i32 %i, i32* %i_addr
   call void @llvm.dbg.declare(metadata !{double* %j_addr}, metadata !9), !dbg !8
   store double %j, double* %j_addr
@@ -30,16 +32,18 @@ return:                                           ; preds = %entry
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"i", metadata !2, i32 2, metadata !7} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"testfunc.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"testfunc.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!llvm.dbg.cu = !{!3}
+
+!0 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 2, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (i32, double)* @testfunc, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 0, i32 1, metadata !"testfunc.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !7, metadata !6}
-!6 = metadata !{i32 524324, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786468, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 2, i32 0, metadata !1, null}
-!9 = metadata !{i32 524545, metadata !1, metadata !"j", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 786689, metadata !1, metadata !"j", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !10 = metadata !{i32 3, i32 0, metadata !11, null}
-!11 = metadata !{i32 524299, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
-
+!11 = metadata !{i32 786443, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{metadata !"testfunc.c", metadata !"/tmp"}
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
index 4cb621f61ca2..f6119f8bbd85 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
@@ -30,23 +30,24 @@ return:                                           ; preds = %entry
   ret void, !dbg !19
 }
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"a", metadata !2, i32 8, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"bar.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"bar.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 8, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32)* @baz, null, null, null, i32 8} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 0, i32 1, metadata !"bar.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{null, metadata !6}
-!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 8, i32 0, metadata !1, null}
 !8 = metadata !{i32 9, i32 0, metadata !1, null}
-!9 = metadata !{i32 524545, metadata !10, metadata !"x", metadata !2, i32 4, metadata !6} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 524334, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{i32 786689, metadata !10, metadata !"x", metadata !2, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 786478, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{null, metadata !6, metadata !13, metadata !14}
-!13 = metadata !{i32 524324, metadata !2, metadata !"long int", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786468, metadata !2, metadata !"long int", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
 !15 = metadata !{i32 4, i32 0, metadata !10, metadata !8}
-!16 = metadata !{i32 524545, metadata !10, metadata !"y", metadata !2, i32 4, metadata !13} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 524545, metadata !10, metadata !"z", metadata !2, i32 4, metadata !14} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 786689, metadata !10, metadata !"y", metadata !2, i32 4, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 786689, metadata !10, metadata !"z", metadata !2, i32 4, metadata !14, i32 0, null} ; [ DW_TAG_arg_variable ]
 !18 = metadata !{i32 5, i32 0, metadata !10, metadata !8}
 !19 = metadata !{i32 10, i32 0, metadata !1, null}
+!20 = metadata !{metadata !"bar.c", metadata !"/tmp/"}
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 3fa16288c219..582a57b5d39c 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -70,20 +70,20 @@ define void @test4(i8 *%P) {
   %A = alloca %1
   %a = bitcast %1* %A to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false)
-  call void @test4a(i8* byval align 1 %a)
+  call void @test4a(i8* align 1 byval %a)
   ret void
 ; CHECK: @test4
 ; CHECK-NEXT: call void @test4a(
 }
 
-declare void @test4a(i8* byval align 1)
+declare void @test4a(i8* align 1 byval)
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 %struct.S = type { i128, [4 x i8]}
 
 @sS = external global %struct.S, align 16
 
-declare void @test5a(%struct.S* byval align 16) nounwind ssp
+declare void @test5a(%struct.S* align 16 byval) nounwind ssp
 
 
 ; rdar://8713376 - This memcpy can't be eliminated.
@@ -94,7 +94,7 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast (%struct.S* @sS to i8*), i64 32, i32 16, i1 false)
   %a = getelementptr %struct.S* %y, i64 0, i32 1, i64 0
   store i8 4, i8* %a
-  call void @test5a(%struct.S* byval align 16 %y)
+  call void @test5a(%struct.S* align 16 byval %y)
   ret i32 0
   ; CHECK: @test5(
   ; CHECK: store i8 4
@@ -114,19 +114,19 @@ define void @test6(i8 *%P) {
 ; isn't itself 8 byte aligned.
 %struct.p = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
 
-define i32 @test7(%struct.p* nocapture byval align 8 %q) nounwind ssp {
+define i32 @test7(%struct.p* nocapture align 8 byval %q) nounwind ssp {
 entry:
   %agg.tmp = alloca %struct.p, align 4
   %tmp = bitcast %struct.p* %agg.tmp to i8*
   %tmp1 = bitcast %struct.p* %q to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false)
-  %call = call i32 @g(%struct.p* byval align 8 %agg.tmp) nounwind
+  %call = call i32 @g(%struct.p* align 8 byval %agg.tmp) nounwind
   ret i32 %call
 ; CHECK: @test7
-; CHECK: call i32 @g(%struct.p* byval align 8 %q) nounwind
+; CHECK: call i32 @g(%struct.p* byval align 8 %q) [[NUW:#[0-9]+]]
 }
 
-declare i32 @g(%struct.p* byval align 8)
+declare i32 @g(%struct.p* align 8 byval)
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
@@ -152,7 +152,7 @@ declare noalias i8* @malloc(i32)
 ; rdar://11341081
 %struct.big = type { [50 x i32] }
 
-define void @test9() nounwind uwtable ssp {
+define void @test9() nounwind ssp uwtable {
 entry:
 ; CHECK: test9
 ; CHECK: f1
@@ -170,3 +170,7 @@ entry:
 
 declare void @f1(%struct.big* sret)
 declare void @f2(%struct.big*)
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { nounwind ssp }
+; CHECK: attributes #2 = { nounwind ssp uwtable }
diff --git a/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll b/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
index e3e52b401af5..19cd6a5171da 100644
--- a/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
+++ b/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mergefunc %s -disable-output
+; RUN: opt -mergefunc -disable-output < %s
 ; This used to crash.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
diff --git a/test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll b/test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll
new file mode 100644
index 000000000000..3f6a5ba157de
--- /dev/null
+++ b/test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll
@@ -0,0 +1,36 @@
+; RUN: opt -mergefunc -disable-output < %s
+; This used to trigger a ConstantExpr::getBitCast assertion.
+
+define void @t1() unnamed_addr uwtable ssp align 2 {
+entry:
+  switch i32 undef, label %sw.bb12 [
+    i32 127, label %sw.bb
+    i32 126, label %sw.bb4
+  ]
+
+sw.bb:                                            ; preds = %entry
+  unreachable
+
+sw.bb4:                                           ; preds = %entry
+  unreachable
+
+sw.bb12:                                          ; preds = %entry
+  ret void
+}
+
+define void @t2() unnamed_addr uwtable ssp align 2 {
+entry:
+  switch i32 undef, label %sw.bb8 [
+    i32 4, label %sw.bb
+    i32 3, label %sw.bb4
+  ]
+
+sw.bb:                                            ; preds = %entry
+  unreachable
+
+sw.bb4:                                           ; preds = %entry
+  ret void
+
+sw.bb8:                                           ; preds = %entry
+  unreachable
+}
diff --git a/test/Transforms/MergeFunc/phi-speculation1.ll b/test/Transforms/MergeFunc/phi-speculation1.ll
index fd0baffb3108..548e5102be10 100644
--- a/test/Transforms/MergeFunc/phi-speculation1.ll
+++ b/test/Transforms/MergeFunc/phi-speculation1.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -mergefunc -stats -disable-output 2>&1 | not grep "functions merged"
 
 define i32 @foo1(i32 %x) {
diff --git a/test/Transforms/MergeFunc/phi-speculation2.ll b/test/Transforms/MergeFunc/phi-speculation2.ll
index eec8b5c5a90a..d42a465d0c65 100644
--- a/test/Transforms/MergeFunc/phi-speculation2.ll
+++ b/test/Transforms/MergeFunc/phi-speculation2.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -mergefunc -stats -disable-output 2>&1 | grep "functions merged"
 
 define i32 @foo1(i32 %x) {
diff --git a/test/Transforms/MergeFunc/vector.ll b/test/Transforms/MergeFunc/vector.ll
index 4af079f8cdf7..dba5fa349aba 100644
--- a/test/Transforms/MergeFunc/vector.ll
+++ b/test/Transforms/MergeFunc/vector.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -mergefunc -stats -disable-output < %s 2>&1 | grep "functions merged"
 
 ; This test is checks whether we can merge
diff --git a/test/Transforms/MergeFunc/vectors-and-arrays.ll b/test/Transforms/MergeFunc/vectors-and-arrays.ll
index dc64a0858ba8..22747224a193 100644
--- a/test/Transforms/MergeFunc/vectors-and-arrays.ll
+++ b/test/Transforms/MergeFunc/vectors-and-arrays.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -mergefunc < %s -disable-output -stats | not grep merged
 ; This used to crash with an assert.
 
diff --git a/test/Transforms/MetaRenamer/metarenamer.ll b/test/Transforms/MetaRenamer/metarenamer.ll
index ad41bcf50f19..4020e1045081 100644
--- a/test/Transforms/MetaRenamer/metarenamer.ll
+++ b/test/Transforms/MetaRenamer/metarenamer.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -metarenamer -S | FileCheck %s
+; RUN: opt -metarenamer -S < %s | FileCheck %s
 
 ; CHECK: target triple {{.*}}
 ; CHECK-NOT: {{^x*}}xxx{{^x*}}
diff --git a/test/Transforms/ObjCARC/apelim.ll b/test/Transforms/ObjCARC/apelim.ll
index 8c7b5b1e654f..4541b3f2fdf3 100644
--- a/test/Transforms/ObjCARC/apelim.ll
+++ b/test/Transforms/ObjCARC/apelim.ll
@@ -38,8 +38,8 @@ entry:
 }
 
 ; CHECK: define internal void @_GLOBAL__I_y()
-; CHECK: %0 = call i8* @objc_autoreleasePoolPush() nounwind
-; CHECK: call void @objc_autoreleasePoolPop(i8* %0) nounwind
+; CHECK: %0 = call i8* @objc_autoreleasePoolPush() [[NUW:#[0-9]+]]
+; CHECK: call void @objc_autoreleasePoolPop(i8* %0) [[NUW]]
 ; CHECK: }
 define internal void @_GLOBAL__I_y() {
 entry:
@@ -51,3 +51,5 @@ entry:
 
 declare i8* @objc_autoreleasePoolPush()
 declare void @objc_autoreleasePoolPop(i8*)
+
+; CHECK: attributes #0 = { nounwind }
diff --git a/test/Transforms/ObjCARC/arc-annotations.ll b/test/Transforms/ObjCARC/arc-annotations.ll
new file mode 100644
index 000000000000..4c56b4a3def9
--- /dev/null
+++ b/test/Transforms/ObjCARC/arc-annotations.ll
@@ -0,0 +1,307 @@
+; This file consists of various tests which ensure that the objc-arc-annotations
+; are working correctly. In the future, I will use this in other lit tests to
+; check the data flow analysis of ARC.
+
+; REQUIRES: asserts
+; RUN: opt -S -objc-arc -enable-objc-arc-annotations < %s | FileCheck %s
+
+declare i8* @objc_retain(i8*)
+declare i8* @objc_retainAutoreleasedReturnValue(i8*)
+declare void @objc_release(i8*)
+declare i8* @objc_autorelease(i8*)
+declare i8* @objc_autoreleaseReturnValue(i8*)
+declare void @objc_autoreleasePoolPop(i8*)
+declare i8* @objc_autoreleasePoolPush()
+declare i8* @objc_retainBlock(i8*)
+
+declare i8* @objc_retainedObject(i8*)
+declare i8* @objc_unretainedObject(i8*)
+declare i8* @objc_unretainedPointer(i8*)
+
+declare void @use_pointer(i8*)
+declare void @callee()
+declare void @callee_fnptr(void ()*)
+declare void @invokee()
+declare i8* @returner()
+
+; Simple retain+release pair deletion, with some intervening control
+; flow and harmless instructions.
+
+; CHECK: define void @test0(
+; CHECK: entry:
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
+; CHECK:   %0 = tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup !0, !llvm.arc.annotation.topdown !1
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Use)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: t:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
+; CHECK:   store float 2.000000e+00, float* %b, !llvm.arc.annotation.bottomup !2
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: f:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
+; CHECK:   store i32 7, i32* %x, !llvm.arc.annotation.bottomup !2
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: return:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release)
+; CHECK:   call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup !3, !llvm.arc.annotation.topdown !4
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
+; CHECK: }
+define void @test0(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  br label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  ret void
+}
+
+; Like test0 but the release isn't always executed when the retain is,
+; so the optimization is not safe.
+
+; TODO: Make the objc_release's argument be %0.
+
+; CHECK: define void @test1(
+; CHECK: entry:
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
+; CHECK:   %0 = tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup !5, !llvm.arc.annotation.topdown !6
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_None)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: t:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
+; CHECK:   store float 2.000000e+00, float* %b, !llvm.arc.annotation.bottomup !7
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: f:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
+; CHECK:   call void @callee(), !llvm.arc.annotation.topdown !8
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_None)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_CanRelease)
+; CHECK: return:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_None)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release)
+; CHECK:   call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup !9
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
+; CHECK: alt_return:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_None)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
+; CHECK: }
+define void @test1(i32* %x, i1 %p, i1 %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  call void @callee()
+  br i1 %q, label %return, label %alt_return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  ret void
+
+alt_return:
+  ret void
+}
+
+; Don't do partial elimination into two different CFG diamonds.
+
+; CHECK: define void @test1b(
+; CHECK: entry:
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
+; CHECK:   %0 = tail call i8* @objc_retain(i8* %x) #0, !llvm.arc.annotation.bottomup !10, !llvm.arc.annotation.topdown !11
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_None)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: if.then:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_CanRelease)
+; CHECK:   tail call void @callee(), !llvm.arc.annotation.bottomup !12, !llvm.arc.annotation.topdown !13
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Use)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_CanRelease)
+; CHECK: if.end:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_CanRelease)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Use)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_CanRelease)
+; CHECK: if.then3:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_CanRelease)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
+; CHECK:   tail call void @use_pointer(i8* %x), !llvm.arc.annotation.bottomup !14, !llvm.arc.annotation.topdown !15
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_MovableRelease)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Use)
+; CHECK: if.end5:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_None)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_MovableRelease)
+; CHECK:   tail call void @objc_release(i8* %x) #0, !clang.imprecise_release !16, !llvm.arc.annotation.bottomup !17
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
+; CHECK: }
+define void @test1b(i8* %x, i1 %p, i1 %q) {
+entry:
+  tail call i8* @objc_retain(i8* %x) nounwind
+  br i1 %p, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @callee()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  br i1 %q, label %if.then3, label %if.end5
+
+if.then3:                                         ; preds = %if.end
+  tail call void @use_pointer(i8* %x)
+  br label %if.end5
+
+if.end5:                                          ; preds = %if.then3, %if.end
+  tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Like test0 but the pointer is passed to an intervening call,
+; so the optimization is not safe.
+
+; CHECK: define void @test2(
+; CHECK: entry:
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
+; CHECK:   %e = tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup !18, !llvm.arc.annotation.topdown !19
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_CanRelease)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: t:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
+; CHECK:   store float 2.000000e+00, float* %b, !llvm.arc.annotation.bottomup !20
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: f:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_CanRelease)
+; CHECK:   call void @use_pointer(i8* %e), !llvm.arc.annotation.bottomup !21, !llvm.arc.annotation.topdown !22
+; CHECK:   store float 3.000000e+00, float* %d, !llvm.arc.annotation.bottomup !20, !llvm.arc.annotation.topdown !23
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Use)
+; CHECK: return:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Use)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release)
+; CHECK:   call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup !24, !llvm.arc.annotation.topdown !25
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
+; CHECK: }
+define void @test2(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %e = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  call void @use_pointer(i8* %e)
+  %d = bitcast i32* %x to float*
+  store float 3.0, float* %d
+  br label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  ret void
+}
+
+; Like test0 but the release is in a loop,
+; so the optimization is not safe.
+
+; TODO: For now, assume this can't happen.
+
+; CHECK: define void @test3(
+; CHECK: entry:
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
+; CHECK:   tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup !26, !llvm.arc.annotation.topdown !27
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: loop:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release)
+; CHECK:   call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup !28, !llvm.arc.annotation.topdown !29
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
+; CHECK: return:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_None)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
+; CHECK: }
+define void @test3(i32* %x, i1* %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br label %loop
+
+loop:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  %j = load volatile i1* %q
+  br i1 %j, label %loop, label %return
+
+return:
+  ret void
+}
+
+!0 = metadata !{}
+
+; CHECK: !0 = metadata !{metadata !"(test0,%x)", metadata !"S_Use", metadata !"S_None"}
+; CHECK: !1 = metadata !{metadata !"(test0,%x)", metadata !"S_None", metadata !"S_Retain"}
+; CHECK: !2 = metadata !{metadata !"(test0,%x)", metadata !"S_Release", metadata !"S_Use"}
+; CHECK: !3 = metadata !{metadata !"(test0,%x)", metadata !"S_None", metadata !"S_Release"}
+; CHECK: !4 = metadata !{metadata !"(test0,%x)", metadata !"S_Retain", metadata !"S_None"}
+; CHECK: !5 = metadata !{metadata !"(test1,%x)", metadata !"S_None", metadata !"S_None"}
+; CHECK: !6 = metadata !{metadata !"(test1,%x)", metadata !"S_None", metadata !"S_Retain"}
+; CHECK: !7 = metadata !{metadata !"(test1,%x)", metadata !"S_Release", metadata !"S_Use"}
+; CHECK: !8 = metadata !{metadata !"(test1,%x)", metadata !"S_Retain", metadata !"S_CanRelease"}
+; CHECK: !9 = metadata !{metadata !"(test1,%x)", metadata !"S_None", metadata !"S_Release"}
+; CHECK: !10 = metadata !{metadata !"(test1b,%x)", metadata !"S_None", metadata !"S_None"}
+; CHECK: !11 = metadata !{metadata !"(test1b,%x)", metadata !"S_None", metadata !"S_Retain"}
+; CHECK: !12 = metadata !{metadata !"(test1b,%x)", metadata !"S_Use", metadata !"S_CanRelease"}
+; CHECK: !13 = metadata !{metadata !"(test1b,%x)", metadata !"S_Retain", metadata !"S_CanRelease"}
+; CHECK: !14 = metadata !{metadata !"(test1b,%x)", metadata !"S_MovableRelease", metadata !"S_Use"}
+; CHECK: !15 = metadata !{metadata !"(test1b,%x)", metadata !"S_CanRelease", metadata !"S_Use"}
+; CHECK: !16 = metadata !{}
+; CHECK: !17 = metadata !{metadata !"(test1b,%x)", metadata !"S_None", metadata !"S_MovableRelease"}
+; CHECK: !18 = metadata !{metadata !"(test2,%x)", metadata !"S_CanRelease", metadata !"S_None"}
+; CHECK: !19 = metadata !{metadata !"(test2,%x)", metadata !"S_None", metadata !"S_Retain"}
+; CHECK: !20 = metadata !{metadata !"(test2,%x)", metadata !"S_Release", metadata !"S_Use"}
+; CHECK: !21 = metadata !{metadata !"(test2,%x)", metadata !"S_Use", metadata !"S_CanRelease"}
+; CHECK: !22 = metadata !{metadata !"(test2,%x)", metadata !"S_Retain", metadata !"S_CanRelease"}
+; CHECK: !23 = metadata !{metadata !"(test2,%x)", metadata !"S_CanRelease", metadata !"S_Use"}
+; CHECK: !24 = metadata !{metadata !"(test2,%x)", metadata !"S_None", metadata !"S_Release"}
+; CHECK: !25 = metadata !{metadata !"(test2,%x)", metadata !"S_Use", metadata !"S_None"}
+; CHECK: !26 = metadata !{metadata !"(test3,%x)", metadata !"S_Release", metadata !"S_None"}
+; CHECK: !27 = metadata !{metadata !"(test3,%x)", metadata !"S_None", metadata !"S_Retain"}
+; CHECK: !28 = metadata !{metadata !"(test3,%x)", metadata !"S_None", metadata !"S_Release"}
+; CHECK: !29 = metadata !{metadata !"(test3,%x)", metadata !"S_Retain", metadata !"S_None"}
+
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 7b64b1be7c62..828a8a701127 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -92,10 +92,10 @@ alt_return:
 
 ; CHECK: define void @test1b(
 ; CHECK: entry:
-; CHECK:   tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK:   tail call i8* @objc_retain(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK-NOT: @objc_
 ; CHECK: if.end5:
-; CHECK:   tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+; CHECK:   tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test1b(i8* %x, i1 %p, i1 %q) {
@@ -404,8 +404,8 @@ entry:
 ; a stack argument.
 
 ; CHECK: define void @test11(
-; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
-; CHECK: tail call i8* @objc_autorelease(i8* %0) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
+; CHECK: call i8* @objc_autorelease(i8* %0) [[NUW]]
 ; CHECK: }
 define void @test11(i8* %x) nounwind {
 entry:
@@ -428,11 +428,13 @@ entry:
   ret void
 }
 
-; Same as test11 but the value is returned. Do an RV optimization.
+; Same as test11 but the value is returned. Do not perform an RV optimization
+; since if the frontend emitted code for an __autoreleasing variable, we may
+; want it to be in the autorelease pool.
 
 ; CHECK: define i8* @test11b(
-; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
-; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %0) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
+; CHECK: call i8* @objc_autorelease(i8* %0) [[NUW]]
 ; CHECK: }
 define i8* @test11b(i8* %x) nounwind {
 entry:
@@ -462,10 +464,10 @@ entry:
 ; Trivial retain,autorelease pair. Don't delete!
 
 ; CHECK: define void @test13(
-; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
-; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
+; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK: @use_pointer(i8* %x)
-; CHECK: tail call i8* @objc_autorelease(i8* %x) nounwind
+; CHECK: call i8* @objc_autorelease(i8* %x) [[NUW]]
 ; CHECK: }
 define void @test13(i8* %x, i64 %n) {
 entry:
@@ -716,7 +718,7 @@ entry:
 ; Bitcast insertion
 
 ; CHECK: define void @test20(
-; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %tmp) [[NUW]]
 ; CHECK-NEXT: invoke
 define void @test20(double* %self) {
 if.then12:
@@ -795,10 +797,10 @@ entry:
   ret void
 }
 
-; Don't optimize objc_retainBlock.
+; Don't optimize objc_retainBlock, but do strength reduce it.
 
 ; CHECK: define void @test23b
-; CHECK: @objc_retainBlock
+; CHECK: @objc_retain
 ; CHECK: @objc_release
 ; CHECK: }
 define void @test23b(i8* %p) {
@@ -980,7 +982,7 @@ done:
 ; CHECK: call i8* @objc_retain(
 ; CHECK: call void @callee()
 ; CHECK: store
-; CHECK: call void @objc_release(i8* %p) nounwind, !clang.imprecise_release
+; CHECK: call void @objc_release(i8* %p) [[NUW]], !clang.imprecise_release
 ; CHECK: done:
 ; CHECK-NOT: @objc_
 ; CHECK: }
@@ -1450,9 +1452,9 @@ define void @test45(i8** %pp, i8** %qq) {
 ; Don't delete retain and autorelease here.
 
 ; CHECK: define void @test46(
-; CHECK: tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK: true:
-; CHECK: tail call i8* @objc_autorelease(i8* %p) nounwind
+; CHECK: call i8* @objc_autorelease(i8* %p) [[NUW]]
 define void @test46(i8* %p, i1 %a) {
 entry:
   call i8* @objc_retain(i8* %p)
@@ -1565,7 +1567,7 @@ define void @test53(void ()** %zz, i8** %pp) {
 
 ; CHECK: define void @test54(
 ; CHECK: call i8* @returner()
-; CHECK-NEXT: call void @objc_release(i8* %t) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: call void @objc_release(i8* %t) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT: ret void
 define void @test54() {
   %t = call i8* @returner()
@@ -1595,10 +1597,10 @@ entry:
 ; CHECK: define void @test56(
 ; CHECK-NOT: @objc
 ; CHECK: if.then:
-; CHECK-NEXT: %0 = tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK-NEXT: %0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK-NEXT: tail call void @use_pointer(i8* %x)
 ; CHECK-NEXT: tail call void @use_pointer(i8* %x)
-; CHECK-NEXT: tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT: br label %if.end
 ; CHECK-NOT: @objc
 ; CHECK: }
@@ -1630,10 +1632,10 @@ if.end:                                           ; preds = %entry, %if.then
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
-; CHECK-NEXT:   call void @objc_release(i8* %x) nounwind
+; CHECK-NEXT:   call void @objc_release(i8* %x) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test57(i8* %x) nounwind {
@@ -1673,10 +1675,10 @@ entry:
 
 ; CHECK:      define void @test59(
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
-; CHECK-NEXT:   call void @objc_release(i8* %x) nounwind
+; CHECK-NEXT:   call void @objc_release(i8* %x) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test59(i8* %x) nounwind {
@@ -1875,8 +1877,8 @@ return:                                           ; preds = %if.then, %entry
 ; rdar://11931823
 
 ; CHECK: define void @test66(
-; CHECK:   %tmp7 = tail call i8* @objc_retain(i8* %cond) nounwind
-; CHECK:   tail call void @objc_release(i8* %cond) nounwind
+; CHECK:   %tmp7 = tail call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %cond) [[NUW]]
 ; CHECK: }
 define void @test66(i8* %tmp5, i8* %bar, i1 %tobool, i1 %tobool1, i8* %call) {
 entry:
@@ -2224,3 +2226,6 @@ end:                                              ; preds = %if.end125, %if.end1
 !0 = metadata !{}
 
 declare i32 @__gxx_personality_v0(...)
+
+; CHECK: attributes #0 = { nounwind readnone }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/cfg-hazards.ll b/test/Transforms/ObjCARC/cfg-hazards.ll
index 15194237c4c5..899298b5967e 100644
--- a/test/Transforms/ObjCARC/cfg-hazards.ll
+++ b/test/Transforms/ObjCARC/cfg-hazards.ll
@@ -86,9 +86,9 @@ for.end:                                          ; preds = %for.body
 
 ; Delete nested retain+release pairs around loops.
 
-;      CHECK: define void @test3(i8* %a) nounwind {
+;      CHECK: define void @test3(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW:#[0-9]+]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
@@ -112,9 +112,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test4(i8* %a) nounwind {
+;      CHECK: define void @test4(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
@@ -142,9 +142,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test5(i8* %a) nounwind {
+;      CHECK: define void @test5(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   call void @callee()
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -176,9 +176,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test6(i8* %a) nounwind {
+;      CHECK: define void @test6(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
@@ -209,9 +209,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test7(i8* %a) nounwind {
+;      CHECK: define void @test7(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   call void @callee()
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -242,9 +242,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test8(i8* %a) nounwind {
+;      CHECK: define void @test8(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
@@ -274,7 +274,7 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test9(i8* %a) nounwind {
+;      CHECK: define void @test9(i8* %a) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -303,7 +303,7 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test10(i8* %a) nounwind {
+;      CHECK: define void @test10(i8* %a) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -332,7 +332,7 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test11(i8* %a) nounwind {
+;      CHECK: define void @test11(i8* %a) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -362,15 +362,15 @@ exit:
 
 ; Don't delete anything if they're not balanced.
 
-;      CHECK: define void @test12(i8* %a) nounwind {
+;      CHECK: define void @test12(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %outer = tail call i8* @objc_retain(i8* %a) nounwind
-; CHECK-NEXT:   %inner = tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   %outer = tail call i8* @objc_retain(i8* %a) [[NUW]]
+; CHECK-NEXT:   %inner = tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
-; CHECK-NEXT: call void @objc_release(i8* %a) nounwind
-; CHECK-NEXT: call void @objc_release(i8* %a) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: call void @objc_release(i8* %a) [[NUW]]
+; CHECK-NEXT: call void @objc_release(i8* %a) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test12(i8* %a) nounwind {
@@ -394,4 +394,6 @@ exit:
   ret void
 }
 
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/clang-arc-used-intrinsic-removed-if-isolated.ll b/test/Transforms/ObjCARC/clang-arc-used-intrinsic-removed-if-isolated.ll
new file mode 100644
index 000000000000..4215b5c36465
--- /dev/null
+++ b/test/Transforms/ObjCARC/clang-arc-used-intrinsic-removed-if-isolated.ll
@@ -0,0 +1,16 @@
+; RUN: opt -objc-arc-contract -S < %s | FileCheck %s
+
+; This file makes sure that clang.arc.used is removed even if no other ARC
+; interesting calls are in the module.
+
+declare void @clang.arc.use(...) nounwind
+
+; Kill calls to @clang.arc.use(...)
+; CHECK: define void @test0(
+; CHECK-NOT: clang.arc.use
+; CHECK: }
+define void @test0(i8* %a, i8* %b) {
+  call void (...)* @clang.arc.use(i8* %a, i8* %b) nounwind
+  ret void
+}
+
diff --git a/test/Transforms/ObjCARC/contract-marker.ll b/test/Transforms/ObjCARC/contract-marker.ll
index 01d978a0e21d..01fd1e71436e 100644
--- a/test/Transforms/ObjCARC/contract-marker.ll
+++ b/test/Transforms/ObjCARC/contract-marker.ll
@@ -3,7 +3,7 @@
 ; CHECK:      %call = tail call i32* @qux()
 ; CHECK-NEXT: %tcall = bitcast i32* %call to i8*
 ; CHECK-NEXT: call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""()
-; CHECK-NEXT: %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %tcall) nounwind
+; CHECK-NEXT: %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %tcall) [[NUW:#[0-9]+]]
 
 define void @foo() {
 entry:
@@ -21,3 +21,5 @@ declare void @bar(i8*)
 !clang.arc.retainAutoreleasedReturnValueMarker = !{!0}
 
 !0 = metadata !{metadata !"mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue"}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/contract-storestrong.ll b/test/Transforms/ObjCARC/contract-storestrong.ll
index 2922f816d589..6999237300e7 100644
--- a/test/Transforms/ObjCARC/contract-storestrong.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong.ll
@@ -10,7 +10,7 @@ declare void @use_pointer(i8*)
 
 ; CHECK: define void @test0(
 ; CHECK: entry:
-; CHECK-NEXT: tail call void @objc_storeStrong(i8** @x, i8* %p) nounwind
+; CHECK-NEXT: tail call void @objc_storeStrong(i8** @x, i8* %p) [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret void
 define void @test0(i8* %p) {
 entry:
@@ -25,10 +25,10 @@ entry:
 
 ;      CHECK: define void @test1(i8* %p) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT:   %tmp = load volatile i8** @x, align 8
 ; CHECK-NEXT:   store i8* %0, i8** @x, align 8
-; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test1(i8* %p) {
@@ -44,10 +44,10 @@ entry:
 
 ;      CHECK: define void @test2(i8* %p) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT:   %tmp = load i8** @x, align 8
 ; CHECK-NEXT:   store volatile i8* %0, i8** @x, align 8
-; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test2(i8* %p) {
@@ -64,11 +64,11 @@ entry:
 
 ; CHECK:      define void @test3(i8* %newValue) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
+; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) [[NUW]]
 ; CHECK-NEXT:   %x1 = load i8** @x, align 8
 ; CHECK-NEXT:   store i8* %x0, i8** @x, align 8
 ; CHECK-NEXT:   tail call void @use_pointer(i8* %x1), !clang.arc.no_objc_arc_exceptions !0
-; CHECK-NEXT:   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT:   tail call void @objc_release(i8* %x1) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test3(i8* %newValue) {
@@ -85,11 +85,11 @@ entry:
 
 ; CHECK:      define i1 @test4(i8* %newValue, i8* %foo) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
+; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) [[NUW]]
 ; CHECK-NEXT:   %x1 = load i8** @x, align 8
 ; CHECK-NEXT:   store i8* %x0, i8** @x, align 8
 ; CHECK-NEXT:   %t = icmp eq i8* %x1, %foo
-; CHECK-NEXT:   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT:   tail call void @objc_release(i8* %x1) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT:   ret i1 %t
 ; CHECK-NEXT: }
 define i1 @test4(i8* %newValue, i8* %foo) {
@@ -106,7 +106,7 @@ entry:
 
 ; CHECK: define i1 @test5(i8* %newValue, i8* %foo) {
 ; CHECK: %t = icmp eq i8* %x1, %foo
-; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) nounwind
+; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) [[NUW]]
 define i1 @test5(i8* %newValue, i8* %foo) {
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
@@ -121,7 +121,7 @@ entry:
 
 ; CHECK: define i1 @test6(i8* %newValue, i8* %foo) {
 ; CHECK: %t = icmp eq i8* %x1, %foo
-; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) nounwind
+; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) [[NUW]]
 define i1 @test6(i8* %newValue, i8* %foo) {
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
@@ -136,9 +136,9 @@ entry:
 
 ;      CHECK: define void @test7(
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT:   %tmp = load i8** @x, align 8
-; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test7(i8* %p) {
@@ -155,7 +155,7 @@ entry:
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %tmp = load i8** @x, align 8
 ; CHECK-NEXT:   store i8* %p, i8** @x, align 8
-; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test8(i8* %p) {
@@ -167,3 +167,5 @@ entry:
 }
 
 !0 = metadata !{}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/contract-testcases.ll b/test/Transforms/ObjCARC/contract-testcases.ll
index 1510ed00e691..85b03be275ec 100644
--- a/test/Transforms/ObjCARC/contract-testcases.ll
+++ b/test/Transforms/ObjCARC/contract-testcases.ll
@@ -69,7 +69,7 @@ bb7:                                              ; preds = %bb6, %bb6, %bb5
 ; CHECK: define void @_Z6doTestP8NSString() {
 ; CHECK: invoke.cont:                                      ; preds = %entry
 ; CHECK-NEXT: call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""()
-; CHECK-NEXT: %tmp = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+; CHECK-NEXT: %tmp = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) [[NUW:#[0-9]+]]
 define void @_Z6doTestP8NSString() {
 entry:
   %call = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* ()*)()
@@ -88,3 +88,6 @@ lpad:                                             ; preds = %entry
 !clang.arc.retainAutoreleasedReturnValueMarker = !{!0}
 
 !0 = metadata !{metadata !"mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue"}
+
+; CHECK: attributes #0 = { optsize }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/contract.ll b/test/Transforms/ObjCARC/contract.ll
index c48f8a534fad..0b60683d9995 100644
--- a/test/Transforms/ObjCARC/contract.ll
+++ b/test/Transforms/ObjCARC/contract.ll
@@ -34,12 +34,12 @@ entry:
 ; Merge objc_retain and objc_autorelease into objc_retainAutorelease.
 
 ; CHECK: define void @test2(
-; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK: }
 define void @test2(i8* %x) nounwind {
 entry:
   %0 = tail call i8* @objc_retain(i8* %x) nounwind
-  tail call i8* @objc_autorelease(i8* %0) nounwind
+  call i8* @objc_autorelease(i8* %0) nounwind
   call void @use_pointer(i8* %x)
   ret void
 }
@@ -47,7 +47,7 @@ entry:
 ; Same as test2 but the value is returned. Do an RV optimization.
 
 ; CHECK: define i8* @test2b(
-; CHECK: tail call i8* @objc_retainAutoreleaseReturnValue(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retainAutoreleaseReturnValue(i8* %x) [[NUW]]
 ; CHECK: }
 define i8* @test2b(i8* %x) nounwind {
 entry:
@@ -59,14 +59,14 @@ entry:
 ; Merge a retain,autorelease pair around a call.
 
 ; CHECK: define void @test3(
-; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) [[NUW]]
 ; CHECK: @use_pointer(i8* %0)
 ; CHECK: }
 define void @test3(i8* %x, i64 %n) {
 entry:
   tail call i8* @objc_retain(i8* %x) nounwind
   call void @use_pointer(i8* %x)
-  tail call i8* @objc_autorelease(i8* %x) nounwind
+  call i8* @objc_autorelease(i8* %x) nounwind
   ret void
 }
 
@@ -75,7 +75,7 @@ entry:
 
 ; CHECK: define void @test4(
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: @objc_retainAutorelease(i8* %x) nounwind
+; CHECK-NEXT: @objc_retainAutorelease(i8* %x) [[NUW]]
 ; CHECK-NEXT: @use_pointer
 ; CHECK-NEXT: @objc_release
 ; CHECK-NEXT: ret void
@@ -84,7 +84,7 @@ define void @test4(i8* %x, i64 %n) {
 entry:
   tail call i8* @objc_retain(i8* %x) nounwind
   call void @use_pointer(i8* %x)
-  tail call i8* @objc_autorelease(i8* %x) nounwind
+  call i8* @objc_autorelease(i8* %x) nounwind
   tail call void @objc_release(i8* %x) nounwind
   ret void
 }
@@ -92,9 +92,9 @@ entry:
 ; Don't merge retain and autorelease if they're not control-equivalent.
 
 ; CHECK: define void @test5(
-; CHECK: tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK: true:
-; CHECK: tail call i8* @objc_autorelease(i8* %0) nounwind
+; CHECK: call i8* @objc_autorelease(i8* %0) [[NUW]]
 ; CHECK: }
 define void @test5(i8* %p, i1 %a) {
 entry:
@@ -102,7 +102,7 @@ entry:
   br i1 %a, label %true, label %false
 
 true:
-  tail call i8* @objc_autorelease(i8* %p) nounwind
+  call i8* @objc_autorelease(i8* %p) nounwind
   call void @use_pointer(i8* %p)
   ret void
 
@@ -119,8 +119,8 @@ false:
 ; Those entrypoints don't exist yet though.
 
 ; CHECK: define i8* @test6(
-; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %p) nounwind
-; CHECK: %t = tail call i8* @objc_autoreleaseReturnValue(i8* %1) nounwind
+; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %p) [[NUW]]
+; CHECK: %t = tail call i8* @objc_autoreleaseReturnValue(i8* %1) [[NUW]]
 ; CHECK: }
 define i8* @test6() {
   %p = call i8* @returner()
@@ -161,3 +161,16 @@ return:                                           ; preds = %if.then, %entry
   %retval = phi i8* [ %c, %if.then ], [ null, %entry ]
   ret i8* %retval
 }
+
+; Kill calls to @clang.arc.use(...)
+; CHECK: define void @test9(
+; CHECK-NOT: clang.arc.use
+; CHECK: }
+define void @test9(i8* %a, i8* %b) {
+  call void (...)* @clang.arc.use(i8* %a, i8* %b) nounwind
+  ret void
+}
+
+declare void @clang.arc.use(...) nounwind
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
new file mode 100644
index 000000000000..05257d1d5cf8
--- /dev/null
+++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -0,0 +1,174 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+; rdar://11744105
+; bugzilla://14584
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%0 = type opaque
+%struct._class_t = type { %struct._class_t*, %struct._class_t*, %struct._objc_cache*, i8* (i8*, i8*)**, %struct._class_ro_t* }
+%struct._objc_cache = type opaque
+%struct._class_ro_t = type { i32, i32, i32, i8*, i8*, %struct.__method_list_t*, %struct._objc_protocol_list*, %struct._ivar_list_t*, i8*, %struct._prop_list_t* }
+%struct.__method_list_t = type { i32, i32, [0 x %struct._objc_method] }
+%struct._objc_method = type { i8*, i8*, i8* }
+%struct._objc_protocol_list = type { i64, [0 x %struct._protocol_t*] }
+%struct._protocol_t = type { i8*, i8*, %struct._objc_protocol_list*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct._prop_list_t*, i32, i32, i8** }
+%struct._prop_list_t = type { i32, i32, [0 x %struct._prop_t] }
+%struct._prop_t = type { i8*, i8* }
+%struct._ivar_list_t = type { i32, i32, [0 x %struct._ivar_t] }
+%struct._ivar_t = type { i64*, i8*, i8*, i32, i32 }
+%struct.NSConstantString = type { i32*, i32, i8*, i64 }
+
+@"OBJC_CLASS_$_NSObject" = external global %struct._class_t
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_" = internal global %struct._class_t* @"OBJC_CLASS_$_NSObject", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"new\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str = linker_private unnamed_addr constant [11 x i8] c"Failed: %@\00", align 1
+@_unnamed_cfstring_ = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i64 10 }, section "__DATA,__cfstring"
+@"OBJC_CLASS_$_NSException" = external global %struct._class_t
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_1" = internal global %struct._class_t* @"OBJC_CLASS_$_NSException", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@.str2 = linker_private unnamed_addr constant [4 x i8] c"Foo\00", align 1
+@_unnamed_cfstring_3 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([4 x i8]* @.str2, i32 0, i32 0), i64 3 }, section "__DATA,__cfstring"
+@"\01L_OBJC_METH_VAR_NAME_4" = internal global [14 x i8] c"raise:format:\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_5" = internal global i8* getelementptr inbounds ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_4", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@llvm.used = appending global [6 x i8*] [i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_" to i8*), i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_1" to i8*), i8* getelementptr inbounds ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_4", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_5" to i8*)], section "llvm.metadata"
+
+define i32 @main() uwtable ssp {
+entry:
+  %tmp = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_", align 8, !dbg !37
+  %tmp1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !dbg !37, !invariant.load !38
+  %tmp2 = bitcast %struct._class_t* %tmp to i8*, !dbg !37
+; CHECK: call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp2, i8* %tmp1)
+  %call = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp2, i8* %tmp1), !dbg !37, !clang.arc.no_objc_arc_exceptions !38
+  call void @llvm.dbg.value(metadata !{i8* %call}, i64 0, metadata !12), !dbg !37
+; CHECK: call i8* @objc_retain(i8* %call) [[NUW:#[0-9]+]]
+  %tmp3 = call i8* @objc_retain(i8* %call) nounwind, !dbg !39
+  call void @llvm.dbg.value(metadata !{i8* %call}, i64 0, metadata !25), !dbg !39
+  invoke fastcc void @ThrowFunc(i8* %call)
+          to label %eh.cont unwind label %lpad, !dbg !40, !clang.arc.no_objc_arc_exceptions !38
+
+eh.cont:                                          ; preds = %entry
+; CHECK: call void @objc_release(i8* %call)
+  call void @objc_release(i8* %call) nounwind, !dbg !42, !clang.imprecise_release !38
+  br label %if.end, !dbg !43
+
+lpad:                                             ; preds = %entry
+  %tmp4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null, !dbg !40
+  %tmp5 = extractvalue { i8*, i32 } %tmp4, 0, !dbg !40
+  %exn.adjusted = call i8* @objc_begin_catch(i8* %tmp5) nounwind, !dbg !44
+  call void @llvm.dbg.value(metadata !45, i64 0, metadata !21), !dbg !46
+  call void @objc_end_catch(), !dbg !49, !clang.arc.no_objc_arc_exceptions !38
+; CHECK: call void @objc_release(i8* %call)
+  call void @objc_release(i8* %call) nounwind, !dbg !42, !clang.imprecise_release !38
+  call void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to i8*), i8* %call), !dbg !50, !clang.arc.no_objc_arc_exceptions !38
+  br label %if.end, !dbg !52
+
+if.end:                                           ; preds = %lpad, %eh.cont
+  call void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to i8*), i8* %call), !dbg !53, !clang.arc.no_objc_arc_exceptions !38
+; CHECK: call void @objc_release(i8* %call)
+  call void @objc_release(i8* %call) nounwind, !dbg !54, !clang.imprecise_release !38
+  ret i32 0, !dbg !54
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+
+declare i8* @objc_retain(i8*) nonlazybind
+
+declare i8* @objc_begin_catch(i8*)
+
+declare void @objc_end_catch()
+
+declare void @objc_exception_rethrow()
+
+define internal fastcc void @ThrowFunc(i8* %obj) uwtable noinline ssp {
+entry:
+  %tmp = call i8* @objc_retain(i8* %obj) nounwind
+  call void @llvm.dbg.value(metadata !{i8* %obj}, i64 0, metadata !32), !dbg !55
+  %tmp1 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_1", align 8, !dbg !56
+  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_5", align 8, !dbg !56, !invariant.load !38
+  %tmp3 = bitcast %struct._class_t* %tmp1 to i8*, !dbg !56
+  call void (i8*, i8*, %0*, %0*, ...)* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %0*, %0*, ...)*)(i8* %tmp3, i8* %tmp2, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_3 to %0*), %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_3 to %0*)), !dbg !56, !clang.arc.no_objc_arc_exceptions !38
+  call void @objc_release(i8* %obj) nounwind, !dbg !58, !clang.imprecise_release !38
+  ret void, !dbg !58
+}
+
+declare i32 @__objc_personality_v0(...)
+
+declare void @objc_release(i8*) nonlazybind
+
+declare void @NSLog(i8*, ...)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+; CHECK: attributes #0 = { ssp uwtable }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes #2 = { nonlazybind }
+; CHECK: attributes #3 = { noinline ssp uwtable }
+; CHECK: attributes [[NUW]] = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!33, !34, !35, !36}
+
+!0 = metadata !{i32 786449, i32 0, i32 16, metadata !"test.m", metadata !"/Volumes/Files/gottesmmcab/Radar/12906997", metadata !"clang version 3.3 ", i1 true, i1 true, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m] [DW_LANG_ObjC]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !27}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @main, null, null, metadata !10, i32 10} ; [ DW_TAG_subprogram ] [line 9] [def] [scope 10] [main]
+!6 = metadata !{i32 786473, metadata !"test.m", metadata !"/Volumes/Files/gottesmmcab/Radar/12906997", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{metadata !11}
+!11 = metadata !{metadata !12, metadata !21, metadata !25}
+!12 = metadata !{i32 786688, metadata !13, metadata !"obj", metadata !6, i32 11, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [obj] [line 11]
+!13 = metadata !{i32 786443, metadata !5, i32 10, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!14 = metadata !{i32 786454, null, metadata !"id", metadata !6, i32 11, i64 0, i64 0, i64 0, i32 0, metadata !15} ; [ DW_TAG_typedef ] [id] [line 11, size 0, align 0, offset 0] [from ]
+!15 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
+!16 = metadata !{i32 786451, null, metadata !"objc_object", metadata !6, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ]
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 786445, metadata !16, metadata !"isa", metadata !6, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !19} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
+!19 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
+!20 = metadata !{i32 786451, null, metadata !"objc_class", metadata !6, i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ]
+!21 = metadata !{i32 786688, metadata !22, metadata !"ok", metadata !6, i32 13, metadata !23, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ok] [line 13]
+!22 = metadata !{i32 786443, metadata !13, i32 12, i32 0, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!23 = metadata !{i32 786454, null, metadata !"BOOL", metadata !6, i32 62, i64 0, i64 0, i64 0, i32 0, metadata !24} ; [ DW_TAG_typedef ] [BOOL] [line 62, size 0, align 0, offset 0] [from signed char]
+!24 = metadata !{i32 786468, null, metadata !"signed char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [signed char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!25 = metadata !{i32 786688, metadata !26, metadata !"obj2", metadata !6, i32 15, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [obj2] [line 15]
+!26 = metadata !{i32 786443, metadata !22, i32 14, i32 0, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!27 = metadata !{i32 786478, i32 0, metadata !6, metadata !"ThrowFunc", metadata !"ThrowFunc", metadata !"", metadata !6, i32 4, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i8*)* @ThrowFunc, null, null, metadata !30, i32 5} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [scope 5] [ThrowFunc]
+!28 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!29 = metadata !{null, metadata !14}
+!30 = metadata !{metadata !31}
+!31 = metadata !{metadata !32}
+!32 = metadata !{i32 786689, metadata !27, metadata !"obj", metadata !6, i32 16777220, metadata !14, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [obj] [line 4]
+!33 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!34 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!35 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!36 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!37 = metadata !{i32 11, i32 0, metadata !13, null}
+!38 = metadata !{}
+!39 = metadata !{i32 15, i32 0, metadata !26, null}
+!40 = metadata !{i32 17, i32 0, metadata !41, null}
+!41 = metadata !{i32 786443, metadata !26, i32 16, i32 0, metadata !6, i32 3} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!42 = metadata !{i32 22, i32 0, metadata !26, null}
+!43 = metadata !{i32 23, i32 0, metadata !22, null}
+!44 = metadata !{i32 19, i32 0, metadata !41, null}
+!45 = metadata !{i8 0}
+!46 = metadata !{i32 20, i32 0, metadata !47, null}
+!47 = metadata !{i32 786443, metadata !48, i32 19, i32 0, metadata !6, i32 5} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!48 = metadata !{i32 786443, metadata !26, i32 19, i32 0, metadata !6, i32 4} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!49 = metadata !{i32 21, i32 0, metadata !47, null}
+!50 = metadata !{i32 24, i32 0, metadata !51, null}
+!51 = metadata !{i32 786443, metadata !22, i32 23, i32 0, metadata !6, i32 6} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!52 = metadata !{i32 25, i32 0, metadata !51, null}
+!53 = metadata !{i32 27, i32 0, metadata !13, null}
+!54 = metadata !{i32 28, i32 0, metadata !13, null}
+!55 = metadata !{i32 4, i32 0, metadata !27, null}
+!56 = metadata !{i32 6, i32 0, metadata !57, null}
+!57 = metadata !{i32 786443, metadata !27, i32 5, i32 0, metadata !6, i32 7} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!58 = metadata !{i32 7, i32 0, metadata !57, null}
diff --git a/test/Transforms/ObjCARC/escape.ll b/test/Transforms/ObjCARC/escape.ll
index 3f694cf1d5a4..8f252a0d343a 100644
--- a/test/Transforms/ObjCARC/escape.ll
+++ b/test/Transforms/ObjCARC/escape.ll
@@ -10,8 +10,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; with the objc_storeWeak call.
 
 ; CHECK: define void @test0(
-; CHECK: %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) nounwind, !clang.arc.copy_on_escape !0
-; CHECK: call void @objc_release(i8* %tmp7) nounwind, !clang.imprecise_release !0
+; CHECK: %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) [[NUW:#[0-9]+]], !clang.arc.copy_on_escape !0
+; CHECK: call void @objc_release(i8* %tmp7) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test0() nounwind {
 entry:
@@ -129,3 +129,6 @@ declare i8* @not_really_objc_storeWeak(i8**, i8*)
 declare void @objc_release(i8*)
 
 !0 = metadata !{}
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { nounwind ssp }
diff --git a/test/Transforms/ObjCARC/gvn.ll b/test/Transforms/ObjCARC/gvn.ll
index 6917b02e0324..3648866de01a 100644
--- a/test/Transforms/ObjCARC/gvn.ll
+++ b/test/Transforms/ObjCARC/gvn.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -basicaa -objc-arc -gvn < %s | FileCheck %s
+; RUN: opt -S -basicaa -objc-arc-aa -gvn < %s | FileCheck %s
 
 @x = common global i8* null, align 8
 
diff --git a/test/Transforms/ObjCARC/intrinsic-use.ll b/test/Transforms/ObjCARC/intrinsic-use.ll
new file mode 100644
index 000000000000..9c7b81a95d23
--- /dev/null
+++ b/test/Transforms/ObjCARC/intrinsic-use.ll
@@ -0,0 +1,63 @@
+; RUN: opt -basicaa -objc-arc -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+declare i8* @objc_retain(i8*)
+declare i8* @objc_retainAutorelease(i8*)
+declare void @objc_release(i8*)
+declare i8* @objc_autorelease(i8*)
+
+declare void @clang.arc.use(...)
+
+declare void @test0_helper(i8*, i8**)
+
+; Ensure that we honor clang.arc.use as a use and don't miscompile
+; the reduced test case from <rdar://13195034>.
+;
+; FIXME: the fact that we re-order retains w.r.t. @clang.arc.use could
+; be problematic if we get run twice, e.g. under LTO.
+;
+; CHECK:      define void @test0(
+; CHECK:        @objc_retain(i8* %x)
+; CHECK-NEXT:   store i8* %y, i8** %temp0
+; CHECK-NEXT:   @objc_retain(i8* %y)
+; CHECK-NEXT:   call void @test0_helper
+; CHECK-NEXT:   [[VAL1:%.*]] = load i8** %temp0
+; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* %y)
+; CHECK-NEXT:   @objc_retain(i8* [[VAL1]])
+; CHECK-NEXT:   @objc_release(i8* %y)
+; CHECK-NEXT:   store i8* [[VAL1]], i8** %temp1
+; CHECK-NEXT:   call void @test0_helper
+; CHECK-NEXT:   [[VAL2:%.*]] = load i8** %temp1
+; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* [[VAL1]])
+; CHECK-NEXT:   @objc_retain(i8* [[VAL2]])
+; CHECK-NEXT:   @objc_release(i8* [[VAL1]])
+; CHECK-NEXT:   @objc_autorelease(i8* %x)
+; CHECK-NEXT:   store i8* %x, i8** %out
+; CHECK-NEXT:   @objc_release(i8* [[VAL2]])
+; CHECK-NEXT:   ret void
+define void @test0(i8** %out, i8* %x, i8* %y) {
+entry:
+  %temp0 = alloca i8*, align 8
+  %temp1 = alloca i8*, align 8
+  %0 = call i8* @objc_retain(i8* %x) nounwind
+  %1 = call i8* @objc_retain(i8* %y) nounwind
+  store i8* %y, i8** %temp0
+  call void @test0_helper(i8* %x, i8** %temp0)
+  %val1 = load i8** %temp0
+  %2 = call i8* @objc_retain(i8* %val1) nounwind
+  call void (...)* @clang.arc.use(i8* %y) nounwind
+  call void @objc_release(i8* %y) nounwind
+  store i8* %val1, i8** %temp1
+  call void @test0_helper(i8* %x, i8** %temp1)
+  %val2 = load i8** %temp1
+  %3 = call i8* @objc_retain(i8* %val2) nounwind
+  call void (...)* @clang.arc.use(i8* %val1) nounwind
+  call void @objc_release(i8* %val1) nounwind
+  %4 = call i8* @objc_retain(i8* %x) nounwind
+  %5 = call i8* @objc_autorelease(i8* %x) nounwind
+  store i8* %x, i8** %out
+  call void @objc_release(i8* %val2) nounwind
+  call void @objc_release(i8* %x) nounwind
+  ret void
+}
diff --git a/test/Transforms/ObjCARC/invoke.ll b/test/Transforms/ObjCARC/invoke.ll
index 1a58e34940e1..f528b4ac35bc 100644
--- a/test/Transforms/ObjCARC/invoke.ll
+++ b/test/Transforms/ObjCARC/invoke.ll
@@ -12,10 +12,10 @@ declare i8* @returner()
 
 ; CHECK: define void @test0(
 ; CHECK: invoke.cont:
-; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @objc_release(i8* %zipFile) [[NUW:#[0-9]+]], !clang.imprecise_release !0
 ; CHECK:   ret void
 ; CHECK: lpad:
-; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @objc_release(i8* %zipFile) [[NUW]], !clang.imprecise_release !0
 ; CHECK:   ret void
 define void @test0(i8* %zipFile) {
 entry:
@@ -39,11 +39,11 @@ lpad:                                             ; preds = %entry
 
 ; CHECK: define void @test1(
 ; CHECK: invoke.cont:
-; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @objc_release(i8* %zipFile) [[NUW]], !clang.imprecise_release !0
 ; CHECK:   call void @callee()
 ; CHECK:   br label %done
 ; CHECK: lpad:
-; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @objc_release(i8* %zipFile) [[NUW]], !clang.imprecise_release !0
 ; CHECK:   call void @callee()
 ; CHECK:   br label %done
 ; CHECK: done:
@@ -108,7 +108,7 @@ finally.rethrow:                                  ; preds = %invoke.cont, %entry
 
 ; CHECK: define void @test3(
 ; CHECK: if.end:
-; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: call void @objc_release(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test3(i8* %p, i1 %b) {
 entry:
@@ -140,10 +140,10 @@ if.end:
 ; CHECK: lpad:
 ; CHECK-NEXT: %r = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
 ; CHECK-NEXT: cleanup
-; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: call void @objc_release(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 ; CHECK: if.end:
-; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: call void @objc_release(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test4(i8* %p, i1 %b) {
 entry:
@@ -215,4 +215,6 @@ if.end:
 declare i32 @__gxx_personality_v0(...)
 declare i32 @__objc_personality_v0(...)
 
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll b/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
index 170d0a99c98b..5d058257c6ed 100644
--- a/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
+++ b/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
@@ -4,7 +4,7 @@
 ; and various scary looking things and fold it into an objc_retainAutorelease.
 
 ; CHECK: bb57:
-; CHECK: tail call i8* @objc_retainAutorelease(i8* %tmp71x) nounwind
+; CHECK: tail call i8* @objc_retainAutorelease(i8* %tmp71x) [[NUW:#[0-9]+]]
 ; CHECK: bb99:
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -212,10 +212,12 @@ bb99:                                             ; preds = %bb57
   br label %bb104
 
 bb104:                                            ; preds = %bb99, %bb57
-  %tmp105 = tail call i8* @objc_autorelease(i8* %tmp72) nounwind
+  %tmp105 = call i8* @objc_autorelease(i8* %tmp72) nounwind
   %tmp106 = bitcast i8* %tmp105 to %14*
   tail call void @objc_release(i8* %tmp85) nounwind
   %tmp107 = bitcast %18* %tmp47 to i8*
   tail call void @objc_release(i8* %tmp107) nounwind
   ret %14* %tmp106
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll
index 32be03ec6ae0..ca9c58bcb3e3 100644
--- a/test/Transforms/ObjCARC/nested.ll
+++ b/test/Transforms/ObjCARC/nested.ll
@@ -770,9 +770,9 @@ forcoll.empty:
 @__block_d_tmp5 = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
 
 ; CHECK: define void @test11(
-; CHECK: tail call i8* @objc_retain(i8* %call) nounwind
-; CHECK: tail call i8* @objc_retain(i8* %call) nounwind
-; CHECK: call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
+; CHECK: tail call i8* @objc_retain(i8* %call) [[NUW:#[0-9]+]]
+; CHECK: tail call i8* @objc_retain(i8* %call) [[NUW]]
+; CHECK: call void @objc_release(i8* %call) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test11() {
 entry:
@@ -820,3 +820,6 @@ entry:
   call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
   ret void
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { nonlazybind }
diff --git a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
index 9728f6e0d94f..58b5bbe9c7e9 100644
--- a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
+++ b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
@@ -59,11 +59,12 @@ lpad:                                             ; preds = %entry
   resume { i8*, i32 } %t8
 }
 
-; There is no !clang.arc.no_objc_arc_exceptions
-; metadata here, so the optimizer shouldn't eliminate anything.
+; There is no !clang.arc.no_objc_arc_exceptions metadata here, so the optimizer
+; shouldn't eliminate anything, but *CAN* strength reduce the objc_retainBlock
+; to an objc_retain.
 
 ; CHECK: define void @test0_no_metadata(
-; CHECK: call i8* @objc_retainBlock(
+; CHECK: call i8* @objc_retain(
 ; CHECK: invoke
 ; CHECK: call void @objc_release(
 ; CHECK: }
diff --git a/test/Transforms/ObjCARC/pr12270.ll b/test/Transforms/ObjCARC/pr12270.ll
index 1faae5f68705..bdff0d7b4d58 100644
--- a/test/Transforms/ObjCARC/pr12270.ll
+++ b/test/Transforms/ObjCARC/pr12270.ll
@@ -1,4 +1,4 @@
-; RUN: opt -disable-output -objc-arc-contract %s
+; RUN: opt -disable-output -objc-arc-contract < %s
 ; test that we don't crash on unreachable code
 %2 = type opaque
 
diff --git a/test/Transforms/ObjCARC/retain-block-alloca.ll b/test/Transforms/ObjCARC/retain-block-alloca.ll
index 01f208704c7b..f40be238baf3 100644
--- a/test/Transforms/ObjCARC/retain-block-alloca.ll
+++ b/test/Transforms/ObjCARC/retain-block-alloca.ll
@@ -9,7 +9,7 @@
 @"\01L_OBJC_SELECTOR_REFERENCES_" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
 
 ; CHECK: define void @test(
-; CHECK: %3 = call i8* @objc_retainBlock(i8* %2) nounwind
+; CHECK: %3 = call i8* @objc_retainBlock(i8* %2) [[NUW:#[0-9]+]]
 ; CHECK: @objc_msgSend
 ; CHECK-NEXT: @objc_release(i8* %3)
 define void @test(%0* %array) uwtable {
@@ -87,4 +87,8 @@ declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
 
 declare void @objc_release(i8*)
 
+; CHECK: attributes #0 = { uwtable }
+; CHECK: attributes #1 = { nonlazybind }
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block-escape-analysis.ll b/test/Transforms/ObjCARC/retain-block-escape-analysis.ll
new file mode 100644
index 000000000000..2c1ddce32836
--- /dev/null
+++ b/test/Transforms/ObjCARC/retain-block-escape-analysis.ll
@@ -0,0 +1,127 @@
+; RUN: opt -S -objc-arc < %s | FileCheck %s
+
+declare i8* @objc_retain(i8*) nonlazybind
+declare void @objc_release(i8*) nonlazybind
+declare i8* @objc_retainBlock(i8*)
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Use by an instruction which copies the value is an escape if the             ;
+; result is an escape. The current instructions with this property are:        ;
+;                                                                              ;
+; 1. BitCast.                                                                  ;
+; 2. GEP.                                                                      ;
+; 3. PhiNode.                                                                  ;
+; 4. SelectInst.                                                               ;
+;                                                                              ;
+; Make sure that such instructions do not confuse the optimizer into removing  ;
+; an objc_retainBlock that is needed.                                          ;
+;                                                                              ;
+; rdar://13273675. (With extra test cases to handle bitcast, phi, and select.  ;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+define void @bitcasttest(i8* %storage, void (...)* %block)  {
+; CHECK: define void @bitcasttest
+entry:
+  %t1 = bitcast void (...)* %block to i8*
+; CHECK-NOT: tail call i8* @objc_retain
+  %t2 = tail call i8* @objc_retain(i8* %t1)
+; CHECK: tail call i8* @objc_retainBlock
+  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
+  %t4 = bitcast i8* %storage to void (...)**
+  %t5 = bitcast i8* %t3 to void (...)*
+  store void (...)* %t5, void (...)** %t4, align 8
+; CHECK-NOT: call void @objc_release
+  call void @objc_release(i8* %t1)
+  ret void
+}
+
+define void @geptest(void (...)** %storage_array, void (...)* %block)  {
+; CHECK: define void @geptest
+entry:
+  %t1 = bitcast void (...)* %block to i8*
+; CHECK-NOT: tail call i8* @objc_retain
+  %t2 = tail call i8* @objc_retain(i8* %t1)
+; CHECK: tail call i8* @objc_retainBlock
+  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
+  %t4 = bitcast i8* %t3 to void (...)*
+  
+  %storage = getelementptr inbounds void (...)** %storage_array, i64 0
+  
+  store void (...)* %t4, void (...)** %storage, align 8
+; CHECK-NOT: call void @objc_release
+  call void @objc_release(i8* %t1)
+  ret void
+}
+
+define void @selecttest(void (...)** %store1, void (...)** %store2,
+                        void (...)* %block) {
+; CHECK: define void @selecttest
+entry:
+  %t1 = bitcast void (...)* %block to i8*
+; CHECK-NOT: tail call i8* @objc_retain
+  %t2 = tail call i8* @objc_retain(i8* %t1)
+; CHECK: tail call i8* @objc_retainBlock
+  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
+  %t4 = bitcast i8* %t3 to void (...)*
+  %store = select i1 undef, void (...)** %store1, void (...)** %store2
+  store void (...)* %t4, void (...)** %store, align 8
+; CHECK-NOT: call void @objc_release
+  call void @objc_release(i8* %t1)
+  ret void
+}
+
+define void @phinodetest(void (...)** %storage1,
+                         void (...)** %storage2,
+                         void (...)* %block) {
+; CHECK: define void @phinodetest
+entry:
+  %t1 = bitcast void (...)* %block to i8*
+; CHECK-NOT: tail call i8* @objc_retain
+  %t2 = tail call i8* @objc_retain(i8* %t1)
+; CHECK: tail call i8* @objc_retainBlock
+  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
+  %t4 = bitcast i8* %t3 to void (...)*
+  br i1 undef, label %store1_set, label %store2_set
+
+store1_set:
+  br label %end
+
+store2_set:
+  br label %end
+
+end:
+  %storage = phi void (...)** [ %storage1, %store1_set ], [ %storage2, %store2_set]
+  store void (...)* %t4, void (...)** %storage, align 8
+; CHECK-NOT: call void @objc_release
+  call void @objc_release(i8* %t1)
+  ret void
+}
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; This test makes sure that we do not hang clang when visiting a use ;
+; cycle caused by phi nodes during objc-arc analysis. *NOTE* This    ;
+; test case looks a little convoluted since it was produced by	     ;
+; bugpoint.							     ;
+; 								     ;
+; bugzilla://14551						     ;
+; rdar://12851911						     ;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+define void @phinode_use_cycle(i8* %block) uwtable optsize ssp {
+; CHECK: define void @phinode_use_cycle(i8* %block)
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %if.then, %for.body, %entry
+  %block.05 = phi void (...)* [ null, %entry ], [ %1, %if.then ], [ %block.05, %for.body ]
+  br i1 undef, label %for.body, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %0 = call i8* @objc_retainBlock(i8* %block), !clang.arc.copy_on_escape !0
+  %1 = bitcast i8* %0 to void (...)*
+  %2 = bitcast void (...)* %block.05 to i8*
+  call void @objc_release(i8* %2) nounwind, !clang.imprecise_release !0
+  br label %for.body
+}
+
+!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block-side-effects.ll b/test/Transforms/ObjCARC/retain-block-side-effects.ll
index e84d48f86912..7fa73cbfef15 100644
--- a/test/Transforms/ObjCARC/retain-block-side-effects.ll
+++ b/test/Transforms/ObjCARC/retain-block-side-effects.ll
@@ -4,7 +4,7 @@
 ; objc_retainBlock stores into %repeater so the load from after the
 ; call isn't forwardable from the store before the call.
 
-; CHECK: %tmp16 = call i8* @objc_retainBlock(i8* %tmp15) nounwind
+; CHECK: %tmp16 = call i8* @objc_retainBlock(i8* %tmp15) [[NUW:#[0-9]+]]
 ; CHECK: %tmp17 = bitcast i8* %tmp16 to void ()*
 ; CHECK: %tmp18 = load %struct.__block_byref_repeater** %byref.forwarding, align 8
 ; CHECK: %repeater12 = getelementptr inbounds %struct.__block_byref_repeater* %tmp18, i64 0, i32 6
@@ -37,3 +37,6 @@ entry:
 }
 
 declare i8* @objc_retainBlock(i8*)
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/retain-block.ll b/test/Transforms/ObjCARC/retain-block.ll
index b3b62d300008..1bb3f0276adf 100644
--- a/test/Transforms/ObjCARC/retain-block.ll
+++ b/test/Transforms/ObjCARC/retain-block.ll
@@ -28,8 +28,8 @@ entry:
 ; optimization possible.
 
 ; CHECK: define void @test0_no_metadata(i8* %tmp) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
-; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW:#[0-9]+]]
+; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test0_no_metadata(i8* %tmp) {
 entry:
@@ -43,8 +43,8 @@ entry:
 ; optimization possible.
 
 ; CHECK: define void @test0_escape(i8* %tmp, i8** %z) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
+; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test0_escape(i8* %tmp, i8** %z) {
 entry:
@@ -58,8 +58,8 @@ entry:
 ; Same as test0_escape, but there's no intervening call.
 
 ; CHECK: define void @test0_just_escape(i8* %tmp, i8** %z) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
+; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test0_just_escape(i8* %tmp, i8** %z) {
 entry:
@@ -73,9 +73,9 @@ entry:
 
 ; CHECK: define void @test1(i8* %tmp) {
 ; CHECK-NOT: @objc
-; CHECK: tail call i8* @objc_retain(i8* %tmp) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %tmp) [[NUW]]
 ; CHECK-NOT: @objc
-; CHECK: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK: tail call void @objc_release(i8* %tmp) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc
 ; CHECK: }
 define void @test1(i8* %tmp) {
@@ -95,10 +95,10 @@ entry:
 
 ; CHECK: define void @test1_no_metadata(i8* %tmp) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]]
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc
 ; CHECK: }
 define void @test1_no_metadata(i8* %tmp) {
@@ -118,11 +118,11 @@ entry:
 
 ; CHECK: define void @test1_escape(i8* %tmp, i8** %z) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+; CHECK-NEXT: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
 ; CHECK-NEXT: store i8* %tmp2, i8** %z
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc
 ; CHECK: }
 define void @test1_escape(i8* %tmp, i8** %z) {
@@ -136,3 +136,5 @@ entry:
   tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
   ret void
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/retain-not-declared.ll b/test/Transforms/ObjCARC/retain-not-declared.ll
index f876e51592b6..165829f7c01f 100644
--- a/test/Transforms/ObjCARC/retain-not-declared.ll
+++ b/test/Transforms/ObjCARC/retain-not-declared.ll
@@ -13,7 +13,7 @@ declare void @objc_release(i8*)
 
 ; CHECK:      define i8* @test0(i8* %p) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleaseReturnValue(i8* %p) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleaseReturnValue(i8* %p) [[NUW:#[0-9]+]]
 ; CHECK-NEXT:   ret i8* %0
 ; CHECK-NEXT: }
 
@@ -21,8 +21,8 @@ define i8* @test0(i8* %p) {
 entry:
   %call = tail call i8* @objc_unretainedObject(i8* %p)
   %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
-  %1 = tail call i8* @objc_autoreleaseReturnValue(i8* %0) nounwind
-  ret i8* %1
+  %1 = tail call i8* @objc_autoreleaseReturnValue(i8* %call) nounwind
+  ret i8* %call
 }
 
 ; Properly create the @objc_retain declaration when it doesn't already exist.
@@ -65,3 +65,5 @@ lpad100:                                          ; preds = %invoke.cont93
 declare i32 @__gxx_personality_v0(...)
 
 !0 = metadata !{}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/rle-s2l.ll b/test/Transforms/ObjCARC/rle-s2l.ll
index 8f8d5c0d3825..2865c94dc88c 100644
--- a/test/Transforms/ObjCARC/rle-s2l.ll
+++ b/test/Transforms/ObjCARC/rle-s2l.ll
@@ -57,7 +57,7 @@ define void @test2(i8** %p) {
 
 ; CHECK:      define void @test3(i8** %p) {
 ; CHECK-NEXT:   %x = call i8* @objc_loadWeak(i8** %p)
-; CHECK-NEXT:   call void @use_pointer(i8* %x) readonly
+; CHECK-NEXT:   call void @use_pointer(i8* %x) [[RO:#[0-9]+]]
 ; CHECK-NEXT:   %1 = tail call i8* @objc_retain(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   ret void
@@ -74,7 +74,7 @@ define void @test3(i8** %p) {
 
 ; CHECK:      define void @test4(i8** %p) {
 ; CHECK-NEXT:   %x = call i8* @objc_loadWeak(i8** %p)
-; CHECK-NEXT:   call void @use_pointer(i8* %x) readonly
+; CHECK-NEXT:   call void @use_pointer(i8* %x) [[RO]]
 ; CHECK-NEXT:   call void @callee()
 ; CHECK-NEXT:   %y = call i8* @objc_loadWeak(i8** %p)
 ; CHECK-NEXT:   call void @use_pointer(i8* %y)
@@ -133,3 +133,6 @@ define void @test7(i8** %p, i8* %n, i8** %q, i8* %m) {
   call void @use_pointer(i8* %y)
   ret void
 }
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes [[RO]] = { readonly }
diff --git a/test/Transforms/ObjCARC/rv.ll b/test/Transforms/ObjCARC/rv.ll
index 9353a19f71a4..589c60f9f3aa 100644
--- a/test/Transforms/ObjCARC/rv.ll
+++ b/test/Transforms/ObjCARC/rv.ll
@@ -29,7 +29,7 @@ declare i8* @returner()
 ; CHECK:      define void @test0(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %x = call i8* @returner
-; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %x) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK: t:
 ; CHECK-NOT: @objc_
 ; CHECK: return:
@@ -121,7 +121,7 @@ define i8* @test7() {
   %p = call i8* @returner()
   call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
   %t = call i8* @objc_autoreleaseReturnValue(i8* %p)
-  call void @use_pointer(i8* %t)
+  call void @use_pointer(i8* %p)
   ret i8* %t
 }
 
@@ -133,7 +133,7 @@ define i8* @test7b() {
   call void @use_pointer(i8* %p)
   call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
   %t = call i8* @objc_autoreleaseReturnValue(i8* %p)
-  ret i8* %t
+  ret i8* %p
 }
 
 ; Turn objc_retain into objc_retainAutoreleasedReturnValue if its operand
@@ -150,17 +150,17 @@ define void @test8() {
 ; Don't apply the RV optimization to autorelease if there's no retain.
 
 ; CHECK: define i8* @test9(i8* %p)
-; CHECK: tail call i8* @objc_autorelease(i8* %p)
+; CHECK: call i8* @objc_autorelease(i8* %p)
 define i8* @test9(i8* %p) {
   call i8* @objc_autorelease(i8* %p)
   ret i8* %p
 }
 
-; Apply the RV optimization.
+; Do not apply the RV optimization.
 
 ; CHECK: define i8* @test10(i8* %p)
-; CHECK: tail call i8* @objc_retain(i8* %p) nounwind
-; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %p) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %p) [[NUW]]
+; CHECK: call i8* @objc_autorelease(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret i8* %p
 define i8* @test10(i8* %p) {
   %1 = call i8* @objc_retain(i8* %p)
@@ -174,7 +174,7 @@ define i8* @test10(i8* %p) {
 ; CHECK: define i8* @test11(i8* %p)
 ; CHECK: tail call i8* @objc_retain(i8* %p)
 ; CHECK-NEXT: call void @use_pointer(i8* %p)
-; CHECK: tail call i8* @objc_autorelease(i8* %p)
+; CHECK: call i8* @objc_autorelease(i8* %p)
 ; CHECK-NEXT: ret i8* %p
 define i8* @test11(i8* %p) {
   %1 = call i8* @objc_retain(i8* %p)
@@ -201,7 +201,7 @@ define i8* @test12(i8* %p) {
 
 ; CHECK: define i8* @test13(
 ; CHECK: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
-; CHECK: tail call i8* @objc_autorelease(i8* %p)
+; CHECK: call i8* @objc_autorelease(i8* %p)
 ; CHECK: ret i8* %p
 define i8* @test13() {
   %p = call i8* @returner()
@@ -215,7 +215,7 @@ define i8* @test13() {
 ; argument is not a return value.
 
 ; CHECK: define void @test14(
-; CHECK-NEXT: tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test14(i8* %p) {
   call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
@@ -227,7 +227,7 @@ define void @test14(i8* %p) {
 
 ; CHECK: define void @test15(
 ; CHECK-NEXT: %y = call i8* @returner()
-; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) nounwind
+; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test15() {
   %y = call i8* @returner()
@@ -240,7 +240,7 @@ define void @test15() {
 
 ; CHECK: define void @test16(
 ; CHECK-NEXT: %y = call i8* @returner()
-; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) nounwind
+; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test16() {
   %y = call i8* @returner()
@@ -252,7 +252,7 @@ define void @test16() {
 ; argument is not a return value.
 
 ; CHECK: define void @test17(
-; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) nounwind
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test17(i8* %y) {
   call i8* @objc_retain(i8* %y)
@@ -265,7 +265,7 @@ define void @test17(i8* %y) {
 ; CHECK: define void @test18(
 ; CHECK-NEXT: %y = call i8* @returner()
 ; CHECK-NEXT: call void @callee()
-; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) nounwind
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test18() {
   %y = call i8* @returner()
@@ -323,7 +323,7 @@ define i8* @test22(i8* %p) {
 ; Convert autoreleaseRV to autorelease.
 
 ; CHECK: define void @test23(
-; CHECK: tail call i8* @objc_autorelease(i8* %p) nounwind
+; CHECK: call i8* @objc_autorelease(i8* %p) [[NUW]]
 define void @test23(i8* %p) {
   store i8 0, i8* %p
   call i8* @objc_autoreleaseReturnValue(i8* %p)
@@ -340,3 +340,5 @@ define {}* @test24(i8* %p) {
   %s = bitcast i8* %p to {}*
   ret {}* %s
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/split-backedge.ll b/test/Transforms/ObjCARC/split-backedge.ll
index 08e2dce1f551..5ac278a45d50 100644
--- a/test/Transforms/ObjCARC/split-backedge.ll
+++ b/test/Transforms/ObjCARC/split-backedge.ll
@@ -4,12 +4,12 @@
 ; rdar://11256239
 
 ; CHECK: define void @test0
-; CHECK: call i8* @objc_retain(i8* %call) nounwind
-; CHECK: call i8* @objc_retain(i8* %call) nounwind
-; CHECK: call i8* @objc_retain(i8* %cond) nounwind
-; CHECK: call void @objc_release(i8* %call) nounwind
-; CHECK: call void @objc_release(i8* %call) nounwind
-; CHECK: call void @objc_release(i8* %cond) nounwind
+; CHECK: call i8* @objc_retain(i8* %call) [[NUW:#[0-9]+]]
+; CHECK: call i8* @objc_retain(i8* %call) [[NUW]]
+; CHECK: call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK: call void @objc_release(i8* %call) [[NUW]]
+; CHECK: call void @objc_release(i8* %call) [[NUW]]
+; CHECK: call void @objc_release(i8* %cond) [[NUW]]
 define void @test0() {
 entry:
   br label %while.body
@@ -46,3 +46,5 @@ declare i8* @objc_retain(i8*)
 declare void @use_pointer(i8*)
 
 !0 = metadata !{}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll b/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
new file mode 100644
index 000000000000..26cd67727e6a
--- /dev/null
+++ b/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
@@ -0,0 +1,74 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+declare i8* @objc_release(i8* %x)
+declare i8* @objc_retain(i8* %x)
+declare i8* @objc_autorelease(i8* %x)
+declare i8* @objc_autoreleaseReturnValue(i8* %x)
+declare i8* @objc_retainAutoreleasedReturnValue(i8* %x)
+
+; Never tail call objc_autorelease.
+define i8* @test0(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = call i8* @objc_autorelease(i8* %x)
+  %tmp0 = call i8* @objc_autorelease(i8* %x)
+  ; CHECK: %tmp1 = call i8* @objc_autorelease(i8* %x)
+  %tmp1 = tail call i8* @objc_autorelease(i8* %x)
+
+  ret i8* %x
+}
+
+; Always tail call autoreleaseReturnValue.
+define i8* @test1(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  %tmp0 = call i8* @objc_autoreleaseReturnValue(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  %tmp1 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  ret i8* %x
+}
+
+; Always tail call objc_retain.
+define i8* @test2(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = tail call i8* @objc_retain(i8* %x)
+  %tmp0 = call i8* @objc_retain(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %x)
+  %tmp1 = tail call i8* @objc_retain(i8* %x)
+  ret i8* %x
+}
+
+define i8* @tmp(i8* %x) {
+  ret i8* %x
+}
+
+; Always tail call objc_retainAutoreleasedReturnValue.
+define i8* @test3(i8* %x) {
+entry:
+  %y = call i8* @tmp(i8* %x)
+  ; CHECK: %tmp0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y)
+  %tmp0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %y)
+  %z = call i8* @tmp(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
+  %tmp1 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
+  ret i8* %x
+}
+
+; By itself, we should never change whether or not objc_release is tail called.
+define i8* @test4(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = call i8* @objc_release(i8* %x)
+  %tmp0 = call i8* @objc_release(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_release(i8* %x)
+  %tmp1 = tail call i8* @objc_release(i8* %x)
+  ret i8* %x
+}
+
+; If we convert a tail called @objc_autoreleaseReturnValue to an
+; @objc_autorelease, ensure that the tail call is removed.
+define i8* @test5(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = call i8* @objc_autorelease(i8* %x)
+  %tmp0 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  ret i8* %tmp0
+}
+
diff --git a/test/Transforms/ObjCARC/weak-copies.ll b/test/Transforms/ObjCARC/weak-copies.ll
index e1a94bb4749a..5dab4e049e22 100644
--- a/test/Transforms/ObjCARC/weak-copies.ll
+++ b/test/Transforms/ObjCARC/weak-copies.ll
@@ -19,7 +19,7 @@ target triple = "x86_64-apple-darwin11.0.0"
 ; CHECK:      define void @foo() {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %call = call i8* @bar()
-; CHECK-NEXT:   call void @use(i8* %call) nounwind
+; CHECK-NEXT:   call void @use(i8* %call) [[NUW:#[0-9]+]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @foo() {
@@ -39,7 +39,7 @@ entry:
 
 ; Eliminate unnecessary weak pointer copies in a block initialization.
 
-; CHECK:      define void @qux(i8* %me) nounwind {
+; CHECK:      define void @qux(i8* %me) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %block = alloca %1, align 8
 ; CHECK-NOT:    alloca
@@ -84,4 +84,6 @@ declare i8* @objc_loadWeak(i8**)
 declare void @use(i8*) nounwind
 declare void @objc_destroyWeak(i8**)
 
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{}
diff --git a/test/Transforms/PhaseOrdering/2010-03-22-empty-baseclass.ll b/test/Transforms/PhaseOrdering/2010-03-22-empty-baseclass.ll
index 8859da8de106..53d98e02ec88 100644
--- a/test/Transforms/PhaseOrdering/2010-03-22-empty-baseclass.ll
+++ b/test/Transforms/PhaseOrdering/2010-03-22-empty-baseclass.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O2 %s -S -o - | FileCheck %s
+; RUN: opt -O2 -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin11.1"
diff --git a/test/Transforms/PhaseOrdering/PR6627.ll b/test/Transforms/PhaseOrdering/PR6627.ll
index ef9947f103a8..58b762a7af49 100644
--- a/test/Transforms/PhaseOrdering/PR6627.ll
+++ b/test/Transforms/PhaseOrdering/PR6627.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O3 -S %s | FileCheck %s
+; RUN: opt -O3 -S < %s | FileCheck %s
 ; XFAIL: *
 
 declare i32 @doo(...)
diff --git a/test/Transforms/PhaseOrdering/basic.ll b/test/Transforms/PhaseOrdering/basic.ll
index 88ebca0a9c3d..8fbe8c58f451 100644
--- a/test/Transforms/PhaseOrdering/basic.ll
+++ b/test/Transforms/PhaseOrdering/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O3 -S %s | FileCheck %s
+; RUN: opt -O3 -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.6.7"
diff --git a/test/Transforms/PhaseOrdering/gdce.ll b/test/Transforms/PhaseOrdering/gdce.ll
index 273e47e97cb4..95f06757a788 100644
--- a/test/Transforms/PhaseOrdering/gdce.ll
+++ b/test/Transforms/PhaseOrdering/gdce.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O2 -S %s | FileCheck %s
+; RUN: opt -O2 -S < %s | FileCheck %s
 
 ; Run global DCE to eliminate unused ctor and dtor.
 ; rdar://9142819
diff --git a/test/Transforms/PhaseOrdering/scev.ll b/test/Transforms/PhaseOrdering/scev.ll
index c73128082216..39adb6b73d3a 100644
--- a/test/Transforms/PhaseOrdering/scev.ll
+++ b/test/Transforms/PhaseOrdering/scev.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O3 -S -analyze -scalar-evolution %s | FileCheck %s
+; RUN: opt -O3 -S -analyze -scalar-evolution < %s | FileCheck %s
 ;
 ; This file contains phase ordering tests for scalar evolution.
 ; Test that the standard passes don't obfuscate the IR so scalar evolution can't
diff --git a/test/Transforms/Reassociate/crash.ll b/test/Transforms/Reassociate/crash.ll
index e29b5dc9c0ce..770f97371d7e 100644
--- a/test/Transforms/Reassociate/crash.ll
+++ b/test/Transforms/Reassociate/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -reassociate -disable-output %s
+; RUN: opt -reassociate -disable-output < %s
 
 
 ; rdar://7507855
diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll
new file mode 100644
index 000000000000..d371a9b5b68f
--- /dev/null
+++ b/test/Transforms/Reassociate/xor_reassoc.ll
@@ -0,0 +1,166 @@
+;RUN: opt -S -reassociate < %s | FileCheck %s
+
+; ==========================================================================
+;
+;   Xor reassociation general cases
+;  
+; ==========================================================================
+
+; (x | c1) ^ (x | c2) => (x & c3) ^ c3, where c3 = c1^c2
+;   
+define i32 @xor1(i32 %x) {
+  %or = or i32 %x, 123
+  %or1 = or i32 %x, 456
+  %xor = xor i32 %or, %or1
+  ret i32 %xor
+
+;CHECK: @xor1
+;CHECK: %and.ra = and i32 %x, 435
+;CHECK: %xor = xor i32 %and.ra, 435
+}
+
+; Test rule : (x & c1) ^ (x & c2) = (x & (c1^c2))
+; Real testing case : (x & 123) ^ y ^ (x & 345) => (x & 435) ^ y
+define i32 @xor2(i32 %x, i32 %y) {
+  %and = and i32 %x, 123
+  %xor = xor i32 %and, %y
+  %and1 = and i32 %x, 456
+  %xor2 = xor i32 %xor, %and1
+  ret i32 %xor2
+
+;CHECK: @xor2
+;CHECK: %and.ra = and i32 %x, 435
+;CHECK: %xor2 = xor i32 %and.ra, %y
+}
+
+; Test rule: (x | c1) ^ (x & c2) = (x & c3) ^ c1, where c3 = ~c1 ^ c2
+;  c3 = ~c1 ^ c2
+define i32 @xor3(i32 %x, i32 %y) {
+  %or = or i32 %x, 123
+  %xor = xor i32 %or, %y
+  %and = and i32 %x, 456
+  %xor1 = xor i32 %xor, %and
+  ret i32 %xor1
+
+;CHECK: @xor3
+;CHECK: %and.ra = and i32 %x, -436
+;CHECK: %xor = xor i32 %y, 123
+;CHECK: %xor1 = xor i32 %xor, %and.ra
+}
+
+; Test rule: (x | c1) ^ c2 = (x & ~c1) ^ (c1 ^ c2)
+define i32 @xor4(i32 %x, i32 %y) {
+  %and = and i32 %x, -124
+  %xor = xor i32 %y, 435
+  %xor1 = xor i32 %xor, %and
+  ret i32 %xor1
+; CHECK: @xor4
+; CHECK: %and = and i32 %x, -124
+; CHECK: %xor = xor i32 %y, 435
+; CHECK: %xor1 = xor i32 %xor, %and
+}
+
+; ==========================================================================
+;
+;  Xor reassociation special cases
+;  
+; ==========================================================================
+
+; Special case1: 
+;  (x | c1) ^ (x & ~c1) = c1
+define i32 @xor_special1(i32 %x, i32 %y) {
+  %or = or i32 %x, 123
+  %xor = xor i32 %or, %y
+  %and = and i32 %x, -124
+  %xor1 = xor i32 %xor, %and
+  ret i32 %xor1
+; CHECK: @xor_special1
+; CHECK: %xor1 = xor i32 %y, 123
+; CHECK: ret i32 %xor1
+}
+
+; Special case1: 
+;  (x | c1) ^ (x & c1) = x ^ c1
+define i32 @xor_special2(i32 %x, i32 %y) {
+  %or = or i32 %x, 123
+  %xor = xor i32 %or, %y
+  %and = and i32 %x, 123
+  %xor1 = xor i32 %xor, %and
+  ret i32 %xor1
+; CHECK: @xor_special2
+; CHECK: %xor = xor i32 %y, 123
+; CHECK: %xor1 = xor i32 %xor, %x
+; CHECK: ret i32 %xor1
+}
+
+; (x | c1) ^ (x | c1) => 0
+define i32 @xor_special3(i32 %x) {
+  %or = or i32 %x, 123
+  %or1 = or i32 %x, 123
+  %xor = xor i32 %or, %or1
+  ret i32 %xor
+;CHECK: @xor_special3
+;CHECK: ret i32 0
+}
+
+; (x & c1) ^ (x & c1) => 0
+define i32 @xor_special4(i32 %x) {
+  %or = and i32 %x, 123
+  %or1 = and i32 123, %x
+  %xor = xor i32 %or, %or1
+  ret i32 %xor
+;CHECK: @xor_special4
+;CHECK: ret i32 0
+}
+
+; ==========================================================================
+;
+;  Xor reassociation curtail code size
+;  
+; ==========================================================================
+
+; (x | c1) ^ (x | c2) => (x & c3) ^ c3
+; is enabled if one of operands has multiple uses
+;   
+define i32 @xor_ra_size1(i32 %x) {
+  %or = or i32 %x, 123
+  %or1 = or i32 %x, 456
+  %xor = xor i32 %or, %or1
+
+  %add = add i32 %xor, %or
+  ret i32 %add
+;CHECK: @xor_ra_size1
+;CHECK: %xor = xor i32 %and.ra, 435
+}
+
+; (x | c1) ^ (x | c2) => (x & c3) ^ c3
+; is disenabled if bothf operands has multiple uses.
+;   
+define i32 @xor_ra_size2(i32 %x) {
+  %or = or i32 %x, 123
+  %or1 = or i32 %x, 456
+  %xor = xor i32 %or, %or1
+
+  %add = add i32 %xor, %or
+  %add2 = add i32 %add, %or1
+  ret i32 %add2
+
+;CHECK: @xor_ra_size2
+;CHECK: %or1 = or i32 %x, 456
+;CHECK: %xor = xor i32 %or, %or1
+}
+
+
+; ==========================================================================
+;
+;  Xor reassociation bugs
+;  
+; ==========================================================================
+
+@xor_bug1_data = external global <{}>, align 4
+define void @xor_bug1() {
+  %1 = ptrtoint i32* undef to i64
+  %2 = xor i64 %1, ptrtoint (<{}>* @xor_bug1_data to i64)
+  %3 = and i64 undef, %2
+  ret void
+}
diff --git a/test/Transforms/Reg2Mem/crash.ll b/test/Transforms/Reg2Mem/crash.ll
new file mode 100644
index 000000000000..02fed94b8527
--- /dev/null
+++ b/test/Transforms/Reg2Mem/crash.ll
@@ -0,0 +1,88 @@
+; RUN: opt -reg2mem -disable-output < %s
+; PR14782
+
+declare void @f1()
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare void @f2()
+
+declare void @f3()
+
+declare void @f4_()
+
+declare void @_Z12xxxdtsP10xxxpq()
+
+define hidden void @_ZN12xxxyzIi9xxxwLi29ELi0EE4f3NewES0_i() ssp align 2 {
+bb:
+  invoke void @f4_()
+          to label %bb1 unwind label %.thread
+
+.thread:                                          ; preds = %bb
+  %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %bb13
+
+bb1:                                              ; preds = %bb
+  invoke void @f1()
+          to label %.noexc unwind label %bb10
+
+.noexc:                                           ; preds = %bb1
+  invoke void @f4_()
+          to label %bb6 unwind label %bb2
+
+bb2:                                              ; preds = %.noexc
+  %tmp3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  invoke void @f3()
+          to label %.body unwind label %bb4
+
+bb4:                                              ; preds = %bb2
+  %tmp5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  unreachable
+
+bb6:                                              ; preds = %.noexc
+  invoke void @_Z12xxxdtsP10xxxpq()
+          to label %_ZN6xxxdIN12xxxyzIi9xxxwLi29ELi0EE4fr1jS3_.exit unwind label %bb10
+
+_ZN6xxxdIN12xxxyzIi9xxxwLi29ELi0EE4fr1jS3_.exit:  ; preds = %bb6
+  invoke void @f2()
+          to label %bb7 unwind label %bb8
+
+bb7:                                              ; preds = %_ZN6xxxdIN12xxxyzIi9xxxwLi29ELi0EE4fr1jS3_.exit
+  ret void
+
+bb8:                                              ; preds = %_ZN6xxxdIN12xxxyzIi9xxxwLi29ELi0EE4fr1jS3_.exit
+  %tmp9 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %_ZN10xxxpqdlev.exit
+
+bb10:                                             ; preds = %bb6, %bb1
+  %.1 = phi i1 [ true, %bb1 ], [ false, %bb6 ]
+  %tmp11 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %.body
+
+.body:                                            ; preds = %bb10, %bb2
+  %.1.lpad-body = phi i1 [ %.1, %bb10 ], [ true, %bb2 ]
+  invoke void @f2()
+          to label %bb12 unwind label %bb14
+
+bb12:                                             ; preds = %.body
+  br i1 %.1.lpad-body, label %bb13, label %_ZN10xxxpqdlev.exit
+
+bb13:                                             ; preds = %bb12, %.thread
+  invoke void @xxx_MemFree()
+          to label %_ZN10xxxpqdlev.exit unwind label %bb14
+
+_ZN10xxxpqdlev.exit:                              ; preds = %bb13, %bb12, %bb8
+  resume { i8*, i32 } undef
+
+bb14:                                             ; preds = %bb13, %.body
+  %tmp15 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  unreachable
+}
+
+declare void @xxx_MemFree()
diff --git a/test/Transforms/Reg2Mem/lit.local.cfg b/test/Transforms/Reg2Mem/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/Reg2Mem/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SCCP/crash.ll b/test/Transforms/SCCP/crash.ll
index 2f6da1d726a0..88528902d721 100644
--- a/test/Transforms/SCCP/crash.ll
+++ b/test/Transforms/SCCP/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -sccp -S
+; RUN: opt -sccp -S < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10.0"
 
diff --git a/test/Transforms/SCCP/ipsccp-addr-taken.ll b/test/Transforms/SCCP/ipsccp-addr-taken.ll
index c6572fa5d141..b49da97ab2c0 100644
--- a/test/Transforms/SCCP/ipsccp-addr-taken.ll
+++ b/test/Transforms/SCCP/ipsccp-addr-taken.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -ipsccp -S | FileCheck %s
+; RUN: opt -ipsccp -S < %s | FileCheck %s
 ; PR7876
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/SCCP/retvalue-undef.ll b/test/Transforms/SCCP/retvalue-undef.ll
index 389561f8a112..5a4ba113b7c0 100644
--- a/test/Transforms/SCCP/retvalue-undef.ll
+++ b/test/Transforms/SCCP/retvalue-undef.ll
@@ -1,4 +1,4 @@
-; RUN: opt -ipsccp -S %s | FileCheck %s
+; RUN: opt -ipsccp -S < %s | FileCheck %s
 ; PR6414
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/SCCP/undef-resolve.ll b/test/Transforms/SCCP/undef-resolve.ll
index a3dddb799a6a..a1a600c9607a 100644
--- a/test/Transforms/SCCP/undef-resolve.ll
+++ b/test/Transforms/SCCP/undef-resolve.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -sccp -S | FileCheck %s
+; RUN: opt -sccp -S < %s | FileCheck %s
 
 
 ; PR6940
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index 9fe926ee2cc1..30dd21774343 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -500,14 +500,27 @@ entry:
 
 define i64 @test9() {
 ; Ensure we can handle loads off the end of an alloca even when wrapped in
-; weird bit casts and types. The result is undef, but this shouldn't crash
-; anything.
+; weird bit casts and types. This is valid IR due to the alignment and masking
+; off the bits past the end of the alloca.
+;
 ; CHECK: @test9
 ; CHECK-NOT: alloca
-; CHECK: ret i64 undef
+; CHECK:      %[[b2:.*]] = zext i8 26 to i64
+; CHECK-NEXT: %[[s2:.*]] = shl i64 %[[b2]], 16
+; CHECK-NEXT: %[[m2:.*]] = and i64 undef, -16711681
+; CHECK-NEXT: %[[i2:.*]] = or i64 %[[m2]], %[[s2]]
+; CHECK-NEXT: %[[b1:.*]] = zext i8 0 to i64
+; CHECK-NEXT: %[[s1:.*]] = shl i64 %[[b1]], 8
+; CHECK-NEXT: %[[m1:.*]] = and i64 %[[i2]], -65281
+; CHECK-NEXT: %[[i1:.*]] = or i64 %[[m1]], %[[s1]]
+; CHECK-NEXT: %[[b0:.*]] = zext i8 0 to i64
+; CHECK-NEXT: %[[m0:.*]] = and i64 %[[i1]], -256
+; CHECK-NEXT: %[[i0:.*]] = or i64 %[[m0]], %[[b0]]
+; CHECK-NEXT: %[[result:.*]] = and i64 %[[i0]], 16777215
+; CHECK-NEXT: ret i64 %[[result]]
 
 entry:
-  %a = alloca { [3 x i8] }
+  %a = alloca { [3 x i8] }, align 8
   %gep1 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 0
   store i8 0, i8* %gep1, align 1
   %gep2 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 1
@@ -516,7 +529,8 @@ entry:
   store i8 26, i8* %gep3, align 1
   %cast = bitcast { [3 x i8] }* %a to { i64 }*
   %elt = getelementptr inbounds { i64 }* %cast, i32 0, i32 0
-  %result = load i64* %elt
+  %load = load i64* %elt
+  %result = and i64 %load, 16777215
   ret i64 %result
 }
 
@@ -575,8 +589,8 @@ entry:
   store i8 0, i8* %a2ptr
   %aiptr = bitcast [3 x i8]* %a to i24*
   %ai = load i24* %aiptr
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 ; CHECK:      %[[ext2:.*]] = zext i8 0 to i24
 ; CHECK-NEXT: %[[shift2:.*]] = shl i24 %[[ext2]], 16
 ; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, 65535
@@ -597,8 +611,8 @@ entry:
   %b1 = load i8* %b1ptr
   %b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2
   %b2 = load i8* %b2ptr
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 ; CHECK:      %[[trunc0:.*]] = trunc i24 %[[insert0]] to i8
 ; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
 ; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
@@ -617,11 +631,12 @@ define i32 @test13() {
 ; Ensure we don't crash and handle undefined loads that straddle the end of the
 ; allocation.
 ; CHECK: @test13
-; CHECK: %[[ret:.*]] = zext i16 undef to i32
-; CHECK: ret i32 %[[ret]]
+; CHECK:      %[[value:.*]] = zext i8 0 to i16
+; CHECK-NEXT: %[[ret:.*]] = zext i16 %[[value]] to i32
+; CHECK-NEXT: ret i32 %[[ret]]
 
 entry:
-  %a = alloca [3 x i8]
+  %a = alloca [3 x i8], align 2
   %b0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
   store i8 0, i8* %b0ptr
   %b1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
@@ -1160,19 +1175,71 @@ define void @PR14548(i1 %x) {
 entry:
   %a = alloca <{ i1 }>, align 8
   %b = alloca <{ i1 }>, align 8
-; Nothing of interest is simplified here.
-; CHECK: alloca
-; CHECK: alloca
+; CHECK:      %[[a:.*]] = alloca i8, align 8
 
   %b.i1 = bitcast <{ i1 }>* %b to i1*
   store i1 %x, i1* %b.i1, align 8
   %b.i8 = bitcast <{ i1 }>* %b to i8*
   %foo = load i8* %b.i8, align 1
+; CHECK-NEXT: {{.*}} = zext i1 %x to i8
+; CHECK-NEXT: %[[ext:.*]] = zext i1 %x to i8
+; CHECK-NEXT: store i8 %[[ext]], i8* %[[a]], align 8
+; CHECK-NEXT: {{.*}} = load i8* %[[a]], align 8
 
   %a.i8 = bitcast <{ i1 }>* %a to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i32 1, i1 false) nounwind
   %bar = load i8* %a.i8, align 1
   %a.i1 = getelementptr inbounds <{ i1 }>* %a, i32 0, i32 0
   %baz = load i1* %a.i1, align 1
+; CHECK-NEXT: %[[a_cast:.*]] = bitcast i8* %[[a]] to i1*
+; CHECK-NEXT: {{.*}} = load i1* %[[a_cast]], align 8
+
   ret void
 }
+
+define <3 x i8> @PR14572.1(i32 %x) {
+; Ensure that a split integer store which is wider than the type size of the
+; alloca (relying on the alloc size padding) doesn't trigger an assert.
+; CHECK: @PR14572.1
+
+entry:
+  %a = alloca <3 x i8>, align 4
+; CHECK-NOT: alloca
+
+  %cast = bitcast <3 x i8>* %a to i32*
+  store i32 %x, i32* %cast, align 1
+  %y = load <3 x i8>* %a, align 4
+  ret <3 x i8> %y
+; CHECK: ret <3 x i8>
+}
+
+define i32 @PR14572.2(<3 x i8> %x) {
+; Ensure that a split integer load which is wider than the type size of the
+; alloca (relying on the alloc size padding) doesn't trigger an assert.
+; CHECK: @PR14572.2
+
+entry:
+  %a = alloca <3 x i8>, align 4
+; CHECK-NOT: alloca
+
+  store <3 x i8> %x, <3 x i8>* %a, align 1
+  %cast = bitcast <3 x i8>* %a to i32*
+  %y = load i32* %cast, align 4
+  ret i32 %y
+; CHECK: ret i32
+}
+
+define i32 @PR14601(i32 %x) {
+; Don't try to form a promotable integer alloca when there is a variable length
+; memory intrinsic.
+; CHECK: @PR14601
+
+entry:
+  %a = alloca i32
+; CHECK: alloca
+
+  %a.i8 = bitcast i32* %a to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.i8, i8 0, i32 %x, i32 1, i1 false)
+  %v = load i32* %a
+  ret i32 %v
+}
diff --git a/test/Transforms/SROA/big-endian.ll b/test/Transforms/SROA/big-endian.ll
index 1ac6d25d6341..64a0cc743974 100644
--- a/test/Transforms/SROA/big-endian.ll
+++ b/test/Transforms/SROA/big-endian.ll
@@ -24,8 +24,8 @@ entry:
   store i8 0, i8* %a2ptr
   %aiptr = bitcast [3 x i8]* %a to i24*
   %ai = load i24* %aiptr
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 ; CHECK:      %[[ext2:.*]] = zext i8 0 to i24
 ; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, -256
 ; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[ext2]]
@@ -46,8 +46,8 @@ entry:
   %b1 = load i8* %b1ptr
   %b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2
   %b2 = load i8* %b2ptr
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 ; CHECK:      %[[shift0:.*]] = lshr i24 %[[insert0]], 16
 ; CHECK-NEXT: %[[trunc0:.*]] = trunc i24 %[[shift0]] to i8
 ; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
@@ -77,8 +77,8 @@ entry:
   %a2ptr = getelementptr [7 x i8]* %a, i64 0, i32 2
   %a3ptr = getelementptr [7 x i8]* %a, i64 0, i32 3
 
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 
   %a0i16ptr = bitcast i8* %a0ptr to i16*
   store i16 1, i16* %a0i16ptr
@@ -98,8 +98,8 @@ entry:
 ; CHECK-NEXT: %[[mask3:.*]] = and i56 undef, -1099511627776
 ; CHECK-NEXT: %[[insert3:.*]] = or i56 %[[mask3]], %[[ext3]]
 
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 
   %aiptr = bitcast [7 x i8]* %a to i56*
   %ai = load i56* %aiptr
diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll
index 921016a9c24b..b9931800e7f4 100644
--- a/test/Transforms/SROA/phi-and-select.ll
+++ b/test/Transforms/SROA/phi-and-select.ll
@@ -396,9 +396,10 @@ define i64 @PR14132(i1 %flag) {
 ; Here we form a PHI-node by promoting the pointer alloca first, and then in
 ; order to promote the other two allocas, we speculate the load of the
 ; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8
-; alloca, which is completely bogus. However, we were asserting on trying to
-; rewrite it. Now it is replaced with undef. Eventually we may replace it with
-; unrechable and even the CFG will go away here.
+; alloca. While this is a bit dubious, we were asserting on trying to
+; rewrite it. The trick is that the code using the value may carefully take
+; steps to only use the not-undef bits, and so we need to at least loosely
+; support this..
 entry:
   %a = alloca i64
   %b = alloca i8
@@ -414,13 +415,14 @@ entry:
 if.then:
   store i8* %b, i8** %ptr.cast
   br label %if.end
+; CHECK-NOT: store
+; CHECK: %[[ext:.*]] = zext i8 1 to i64
 
 if.end:
   %tmp = load i64** %ptr
   %result = load i64* %tmp
-; CHECK-NOT: store
 ; CHECK-NOT: load
-; CHECK: %[[result:.*]] = phi i64 [ undef, %if.then ], [ 0, %entry ]
+; CHECK: %[[result:.*]] = phi i64 [ %[[ext]], %if.then ], [ 0, %entry ]
 
   ret i64 %result
 ; CHECK-NEXT: ret i64 %[[result]]
diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll
index ea28f5d1a647..02f6d040cc95 100644
--- a/test/Transforms/SROA/vector-promotion.ll
+++ b/test/Transforms/SROA/vector-promotion.ll
@@ -36,15 +36,15 @@ entry:
 
 define i32 @test2(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK: @test2
-; FIXME: This should be handled!
 entry:
 	%a = alloca [2 x <4 x i32>]
-; CHECK: alloca <4 x i32>
+; CHECK-NOT: alloca
 
   %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
   store <4 x i32> %x, <4 x i32>* %a.x
   %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
   store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
 
   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
   %tmp1 = load i32* %a.tmp1
@@ -54,10 +54,18 @@ entry:
   %a.tmp3.cast = bitcast i32* %a.tmp3 to <2 x i32>*
   %tmp3.vec = load <2 x i32>* %a.tmp3.cast
   %tmp3 = extractelement <2 x i32> %tmp3.vec, i32 0
+; CHECK-NOT: load
+; CHECK:      %[[extract1:.*]] = extractelement <4 x i32> %x, i32 2
+; CHECK-NEXT: %[[extract2:.*]] = extractelement <4 x i32> %y, i32 3
+; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> %y, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: %[[extract4:.*]] = extractelement <2 x i32> %[[extract3]], i32 0
 
   %tmp4 = add i32 %tmp1, %tmp2
   %tmp5 = add i32 %tmp3, %tmp4
   ret i32 %tmp5
+; CHECK-NEXT: %[[sum1:.*]] = add i32 %[[extract1]], %[[extract2]]
+; CHECK-NEXT: %[[sum2:.*]] = add i32 %[[extract4]], %[[sum1]]
+; CHECK-NEXT: ret i32 %[[sum2]]
 }
 
 define i32 @test3(<4 x i32> %x, <4 x i32> %y) {
@@ -206,6 +214,154 @@ define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) {
   ret i64 %res
 }
 
+define <4 x i32> @test_subvec_store() {
+; CHECK: @test_subvec_store
+entry:
+  %a = alloca <4 x i32>
+; CHECK-NOT: alloca
+
+  %a.gep0 = getelementptr <4 x i32>* %a, i32 0, i32 0
+  %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>*
+  store <2 x i32> <i32 0, i32 0>, <2 x i32>* %a.cast0
+; CHECK-NOT: store
+; CHECK:      %[[insert1:.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>, <4 x i32> undef, <4 x i32> <i32 0, i32 1, {{.*}}>
+
+  %a.gep1 = getelementptr <4 x i32>* %a, i32 0, i32 1
+  %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>*
+  store <2 x i32> <i32 1, i32 1>, <2 x i32>* %a.cast1
+; CHECK-NEXT: %[[insert2:.*]] = shufflevector <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>, <4 x i32> %[[insert1]], <4 x i32> <i32 4, i32 1, i32 2, {{.*}}>
+
+  %a.gep2 = getelementptr <4 x i32>* %a, i32 0, i32 2
+  %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>*
+  store <2 x i32> <i32 2, i32 2>, <2 x i32>* %a.cast2
+; CHECK-NEXT: %[[insert3:.*]] = shufflevector <4 x i32> <i32 undef, i32 undef, i32 2, i32 2>, <4 x i32> %[[insert2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+
+  %a.gep3 = getelementptr <4 x i32>* %a, i32 0, i32 3
+  store i32 3, i32* %a.gep3
+; CHECK-NEXT: %[[insert4:.*]] = insertelement <4 x i32> %[[insert3]], i32 3, i32 3
+
+  %ret = load <4 x i32>* %a
+
+  ret <4 x i32> %ret
+; CHECK-NEXT: ret <4 x i32> %[[insert4]]
+}
+
+define <4 x i32> @test_subvec_load() {
+; CHECK: @test_subvec_load
+entry:
+  %a = alloca <4 x i32>
+; CHECK-NOT: alloca
+  store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a
+; CHECK-NOT: store
+
+  %a.gep0 = getelementptr <4 x i32>* %a, i32 0, i32 0
+  %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>*
+  %first = load <2 x i32>* %a.cast0
+; CHECK-NOT: load
+; CHECK:      %[[extract1:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+
+  %a.gep1 = getelementptr <4 x i32>* %a, i32 0, i32 1
+  %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>*
+  %second = load <2 x i32>* %a.cast1
+; CHECK-NEXT: %[[extract2:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 1, i32 2>
+
+  %a.gep2 = getelementptr <4 x i32>* %a, i32 0, i32 2
+  %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>*
+  %third = load <2 x i32>* %a.cast2
+; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+
+  %tmp = shufflevector <2 x i32> %first, <2 x i32> %second, <2 x i32> <i32 0, i32 2>
+  %ret = shufflevector <2 x i32> %tmp, <2 x i32> %third, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: %[[tmp:.*]] = shufflevector <2 x i32> %[[extract1]], <2 x i32> %[[extract2]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: %[[ret:.*]] = shufflevector <2 x i32> %[[tmp]], <2 x i32> %[[extract3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+  ret <4 x i32> %ret
+; CHECK-NEXT: ret <4 x i32> %[[ret]]
+}
+
+declare void @llvm.memset.p0i32.i32(i32* nocapture, i32, i32, i32, i1) nounwind
+
+define <4 x float> @test_subvec_memset() {
+; CHECK: @test_subvec_memset
+entry:
+  %a = alloca <4 x float>
+; CHECK-NOT: alloca
+
+  %a.gep0 = getelementptr <4 x float>* %a, i32 0, i32 0
+  %a.cast0 = bitcast float* %a.gep0 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.cast0, i8 0, i32 8, i32 0, i1 false)
+; CHECK-NOT: store
+; CHECK:      %[[insert1:.*]] = shufflevector <4 x float> <float 0.000000e+00, float 0.000000e+00, float undef, float undef>, <4 x float> undef, <4 x i32> <i32 0, i32 1, {{.*}}>
+
+  %a.gep1 = getelementptr <4 x float>* %a, i32 0, i32 1
+  %a.cast1 = bitcast float* %a.gep1 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.cast1, i8 1, i32 8, i32 0, i1 false)
+; CHECK-NEXT: %[[insert2:.*]] = shufflevector <4 x float> <float undef, float 0x3820202020000000, float 0x3820202020000000, float undef>, <4 x float> %[[insert1]], <4 x i32> <i32 4, i32 1, i32 2, {{.*}}>
+
+  %a.gep2 = getelementptr <4 x float>* %a, i32 0, i32 2
+  %a.cast2 = bitcast float* %a.gep2 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.cast2, i8 3, i32 8, i32 0, i1 false)
+; CHECK-NEXT: %[[insert3:.*]] = shufflevector <4 x float> <float undef, float undef, float 0x3860606060000000, float 0x3860606060000000>, <4 x float> %[[insert2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+
+  %a.gep3 = getelementptr <4 x float>* %a, i32 0, i32 3
+  %a.cast3 = bitcast float* %a.gep3 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.cast3, i8 7, i32 4, i32 0, i1 false)
+; CHECK-NEXT: %[[insert4:.*]] = insertelement <4 x float> %[[insert3]], float 0x38E0E0E0E0000000, i32 3
+
+  %ret = load <4 x float>* %a
+
+  ret <4 x float> %ret
+; CHECK-NEXT: ret <4 x float> %[[insert4]]
+}
+
+define <4 x float> @test_subvec_memcpy(i8* %x, i8* %y, i8* %z, i8* %f, i8* %out) {
+; CHECK: @test_subvec_memcpy
+entry:
+  %a = alloca <4 x float>
+; CHECK-NOT: alloca
+
+  %a.gep0 = getelementptr <4 x float>* %a, i32 0, i32 0
+  %a.cast0 = bitcast float* %a.gep0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast0, i8* %x, i32 8, i32 0, i1 false)
+; CHECK:      %[[xptr:.*]] = bitcast i8* %x to <2 x float>*
+; CHECK-NEXT: %[[x:.*]] = load <2 x float>* %[[xptr]]
+; CHECK-NEXT: %[[expand_x:.*]] = shufflevector <2 x float> %[[x]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: %[[insert_x:.*]] = shufflevector <4 x float> %[[expand_x]], <4 x float> undef, <4 x i32> <i32 0, i32 1, {{.*}}>
+
+  %a.gep1 = getelementptr <4 x float>* %a, i32 0, i32 1
+  %a.cast1 = bitcast float* %a.gep1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast1, i8* %y, i32 8, i32 0, i1 false)
+; CHECK-NEXT: %[[yptr:.*]] = bitcast i8* %y to <2 x float>*
+; CHECK-NEXT: %[[y:.*]] = load <2 x float>* %[[yptr]]
+; CHECK-NEXT: %[[expand_y:.*]] = shufflevector <2 x float> %[[y]], <2 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
+; CHECK-NEXT: %[[insert_y:.*]] = shufflevector <4 x float> %[[expand_y]], <4 x float> %[[insert_x]], <4 x i32> <i32 4, i32 1, i32 2, {{.*}}>
+
+  %a.gep2 = getelementptr <4 x float>* %a, i32 0, i32 2
+  %a.cast2 = bitcast float* %a.gep2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast2, i8* %z, i32 8, i32 0, i1 false)
+; CHECK-NEXT: %[[zptr:.*]] = bitcast i8* %z to <2 x float>*
+; CHECK-NEXT: %[[z:.*]] = load <2 x float>* %[[zptr]]
+; CHECK-NEXT: %[[expand_z:.*]] = shufflevector <2 x float> %[[z]], <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+; CHECK-NEXT: %[[insert_z:.*]] = shufflevector <4 x float> %[[expand_z]], <4 x float> %[[insert_y]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+
+  %a.gep3 = getelementptr <4 x float>* %a, i32 0, i32 3
+  %a.cast3 = bitcast float* %a.gep3 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i32 0, i1 false)
+; CHECK-NEXT: %[[fptr:.*]] = bitcast i8* %f to float*
+; CHECK-NEXT: %[[f:.*]] = load float* %[[fptr]]
+; CHECK-NEXT: %[[insert_f:.*]] = insertelement <4 x float> %[[insert_z]], float %[[f]], i32 3
+
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i32 0, i1 false)
+; CHECK-NEXT: %[[outptr:.*]] = bitcast i8* %out to <2 x float>*
+; CHECK-NEXT: %[[extract_out:.*]] = shufflevector <4 x float> %[[insert_f]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: store <2 x float> %[[extract_out]], <2 x float>* %[[outptr]]
+
+  %ret = load <4 x float>* %a
+
+  ret <4 x float> %ret
+; CHECK-NEXT: ret <4 x float> %[[insert_f]]
+}
+
 define i32 @PR14212() {
 ; CHECK: @PR14212
 ; This caused a crash when "splitting" the load of the i32 in order to promote
@@ -222,7 +378,7 @@ entry:
 }
 
 define <2 x i8> @PR14349.1(i32 %x) {
-; CEHCK: @PR14349.1
+; CHECK: @PR14349.1
 ; The first testcase for broken SROA rewriting of split integer loads and
 ; stores due to smaller vector loads and stores. This particular test ensures
 ; that we can rewrite a split store of an integer to a store of a vector.
@@ -244,7 +400,7 @@ entry:
 }
 
 define i32 @PR14349.2(<2 x i8> %x) {
-; CEHCK: @PR14349.2
+; CHECK: @PR14349.2
 ; The first testcase for broken SROA rewriting of split integer loads and
 ; stores due to smaller vector loads and stores. This particular test ensures
 ; that we can rewrite a split load of an integer to a load of a vector.
diff --git a/test/Transforms/SROA/vectors-of-pointers.ll b/test/Transforms/SROA/vectors-of-pointers.ll
new file mode 100644
index 000000000000..7e995b9e4476
--- /dev/null
+++ b/test/Transforms/SROA/vectors-of-pointers.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -sroa
+
+; Make sure we don't crash on this one.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define void @foo() {
+entry:
+  %Args.i = alloca <2 x i32*>, align 16
+  br i1 undef, label %bb0.exit158, label %if.then.i.i.i.i.i138
+
+if.then.i.i.i.i.i138:
+  unreachable
+
+bb0.exit158:
+  br i1 undef, label %bb0.exit257, label %if.then.i.i.i.i.i237
+
+if.then.i.i.i.i.i237:
+  unreachable
+
+bb0.exit257:
+  %0 = load <2 x i32*>* %Args.i, align 16
+  unreachable
+}
diff --git a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
index 0b5e4152c423..3f28cb187f86 100644
--- a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
+++ b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
@@ -1,7 +1,6 @@
 ; Scalar replacement was incorrectly promoting this alloca!!
 ;
-; RUN: opt < %s -scalarrepl -S | \
-; RUN:   sed "s/;.*//g" | grep "\["
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
 
 define i8* @test() {
 	%A = alloca [30 x i8]		; <[30 x i8]*> [#uses=1]
@@ -10,4 +9,4 @@ define i8* @test() {
 	store i8 0, i8* %B
 	ret i8* %C
 }
-
+; CHECK: alloca [
diff --git a/test/Transforms/ScalarRepl/crash.ll b/test/Transforms/ScalarRepl/crash.ll
index 58c5a3a0527d..8c60dceb8b07 100644
--- a/test/Transforms/ScalarRepl/crash.ll
+++ b/test/Transforms/ScalarRepl/crash.ll
@@ -1,5 +1,5 @@
-; RUN: opt -scalarrepl %s -disable-output
-; RUN: opt -scalarrepl-ssa %s -disable-output
+; RUN: opt -scalarrepl -disable-output < %s
+; RUN: opt -scalarrepl-ssa -disable-output < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
index c1491345351e..7d3bcea8b857 100644
--- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll
+++ b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
@@ -40,22 +40,23 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !"clang version 3.0 (trunk 131941)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32)* @f, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 131941)", i1 false, metadata !"", i32 0, null, null, metadata !17, null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32)* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 590081, metadata !1, metadata !"a", metadata !2, i32 16777217, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 16777217, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !7 = metadata !{i32 1, i32 11, metadata !1, null}
-!8 = metadata !{i32 590081, metadata !1, metadata !"b", metadata !2, i32 33554433, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 786689, metadata !1, metadata !"b", metadata !2, i32 33554433, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !9 = metadata !{i32 1, i32 18, metadata !1, null}
-!10 = metadata !{i32 590080, metadata !11, metadata !"c", metadata !2, i32 2, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 589835, metadata !1, i32 1, i32 21, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786688, metadata !11, metadata !"c", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 786443, metadata !1, i32 1, i32 21, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{i32 2, i32 9, metadata !11, null}
 !13 = metadata !{i32 2, i32 14, metadata !11, null}
 !14 = metadata !{i32 3, i32 5, metadata !11, null}
 !15 = metadata !{i32 4, i32 5, metadata !11, null}
 !16 = metadata !{i32 5, i32 5, metadata !11, null}
+!17 = metadata !{metadata !1}
+!18 = metadata !{metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b"}
diff --git a/test/Transforms/ScalarRepl/memcpy-align.ll b/test/Transforms/ScalarRepl/memcpy-align.ll
index a7af208f4f39..6046e1295d9e 100644
--- a/test/Transforms/ScalarRepl/memcpy-align.ll
+++ b/test/Transforms/ScalarRepl/memcpy-align.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -scalarrepl -S | FileCheck %s
+; RUN: opt -scalarrepl -S < %s | FileCheck %s
 ; PR6832
 target datalayout =
 "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
diff --git a/test/Transforms/ScalarRepl/phi-cycle.ll b/test/Transforms/ScalarRepl/phi-cycle.ll
index cb5101c2dd8e..05d9382cec40 100644
--- a/test/Transforms/ScalarRepl/phi-cycle.ll
+++ b/test/Transforms/ScalarRepl/phi-cycle.ll
@@ -67,7 +67,7 @@ while.cond.backedge.i:                            ; preds = %if.end.i, %while.bo
 
 ; CHECK: func.exit:
 ; CHECK-NOT: load
-; CHECK: %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp) nounwind
+; CHECK: %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp) [[NUW:#[0-9]+]]
 func.exit:                                        ; preds = %while.body.i.func.exit_crit_edge, %while.cond.i.func.exit_crit_edge
   %tmp3 = load i32* %x.i, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp3) nounwind
@@ -75,3 +75,6 @@ func.exit:                                        ; preds = %while.body.i.func.e
 }
 
 declare i32 @printf(i8* nocapture, ...) nounwind
+
+; CHECK: attributes #0 = { nounwind uwtable }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ScalarRepl/phi-select.ll b/test/Transforms/ScalarRepl/phi-select.ll
index ffe0b1dd5f47..5c21c3bd9f34 100644
--- a/test/Transforms/ScalarRepl/phi-select.ll
+++ b/test/Transforms/ScalarRepl/phi-select.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -scalarrepl -S | FileCheck %s
+; RUN: opt -scalarrepl -S < %s | FileCheck %s
 ; Test promotion of allocas that have phis and select users.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.2"
diff --git a/test/Transforms/ScalarRepl/volatile.ll b/test/Transforms/ScalarRepl/volatile.ll
index 056526cbd92b..d506cdfbd87a 100644
--- a/test/Transforms/ScalarRepl/volatile.ll
+++ b/test/Transforms/ScalarRepl/volatile.ll
@@ -1,12 +1,13 @@
-; RUN: opt < %s -scalarrepl -S | grep "load volatile"
-; RUN: opt < %s -scalarrepl -S | grep "store volatile"
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
 
 define i32 @voltest(i32 %T) {
 	%A = alloca {i32, i32}
 	%B = getelementptr {i32,i32}* %A, i32 0, i32 0
 	store volatile i32 %T, i32* %B
+; CHECK: store volatile
 
 	%C = getelementptr {i32,i32}* %A, i32 0, i32 1
 	%X = load volatile i32* %C
+; CHECK: load volatile
 	ret i32 %X
 }
diff --git a/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll b/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll
index 7bffa1a8e0e2..333336de7673 100644
--- a/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll
+++ b/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -simplifycfg -disable-output
+; RUN: opt -simplifycfg -disable-output < %s
 ; END.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll b/test/Transforms/SimplifyCFG/EmptyBlockMerge.ll
index feffb4e4c812..aba08dc073a8 100644
--- a/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll
+++ b/test/Transforms/SimplifyCFG/EmptyBlockMerge.ll
@@ -1,8 +1,6 @@
 ; Basic block #2 should not be merged into BB #3!
 ;
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   grep "br label"
-;
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 declare void @foo()
 
@@ -13,6 +11,7 @@ bb0:
 	br i1 %cond218, label %bb3, label %bb2
 bb2:		; preds = %bb0
 	call void @foo( )
+; CHECK: br label %bb3
 	br label %bb3
 bb3:		; preds = %bb2, %bb0
 	%reg117 = phi i32 [ 110, %bb2 ], [ %reg108, %bb0 ]		; <i32> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll b/test/Transforms/SimplifyCFG/PHINode.ll
index 88f32bc08279..25a242a55997 100644
--- a/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll
+++ b/test/Transforms/SimplifyCFG/PHINode.ll
@@ -1,10 +1,11 @@
 ; -simplifycfg is not folding blocks if there is a PHI node involved.  This 
 ; should be fixed eventually
 
-; RUN: opt < %s -simplifycfg -S | not grep br
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 define i32 @main(i32 %argc) {
 ; <label>:0
+; CHECK-NOT: br label %InlinedFunctionReturnNode
 	br label %InlinedFunctionReturnNode
 InlinedFunctionReturnNode:		; preds = %0
 	%X = phi i32 [ 7, %0 ]		; <i32> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/PR9946.ll b/test/Transforms/SimplifyCFG/PR9946.ll
index 4a61b846052e..c355a8f5cc98 100644
--- a/test/Transforms/SimplifyCFG/PR9946.ll
+++ b/test/Transforms/SimplifyCFG/PR9946.ll
@@ -1,4 +1,4 @@
-; RUN: opt  %s -simplifycfg -disable-output
+; RUN: opt -simplifycfg -disable-output < %s
 
 @foo = external constant i32
 
diff --git a/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
index a61867fe89c7..dd2e5d1c3a77 100644
--- a/test/Transforms/SimplifyCFG/SpeculativeExec.ll
+++ b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -44,3 +44,44 @@ join:
   ret i8 %c
 }
 
+define i8* @test4(i1* %dummy, i8* %a, i8* %b) {
+; Test that we don't speculate an arbitrarily large number of unfolded constant
+; expressions.
+; CHECK: @test4
+
+entry:
+  %cond1 = load volatile i1* %dummy
+  br i1 %cond1, label %if, label %end
+
+if:
+  %cond2 = load volatile i1* %dummy
+  br i1 %cond2, label %then, label %end
+
+then:
+  br label %end
+
+end:
+  %x1 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 1 to i8*), %then ]
+  %x2 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 2 to i8*), %then ]
+  %x3 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 3 to i8*), %then ]
+  %x4 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 4 to i8*), %then ]
+  %x5 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 5 to i8*), %then ]
+  %x6 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 6 to i8*), %then ]
+  %x7 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 7 to i8*), %then ]
+  %x8 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 8 to i8*), %then ]
+  %x9 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 9 to i8*), %then ]
+  %x10 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 10 to i8*), %then ]
+; CHECK-NOT: select
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+
+  ret i8* %x10
+}
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 8a59992f5e64..5f70465c64d4 100644
--- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -777,3 +777,29 @@ return:
 ; CHECK: switch.lookup:
 ; CHECK: getelementptr inbounds [5 x i32]* @switch.table6, i32 0, i32 %switch.tableidx
 }
+
+; Don't create a table with illegal type
+; rdar://12779436
+define i96 @illegaltype(i32 %c) {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 42, label %return
+    i32 43, label %sw.bb1
+    i32 44, label %sw.bb2
+    i32 45, label %sw.bb3
+    i32 46, label %sw.bb4
+  ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.default: br label %return
+return:
+  %retval.0 = phi i96 [ 15, %sw.default ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
+  ret i96 %retval.0
+
+; CHECK: @illegaltype
+; CHECK-NOT: @switch.table
+; CHECK: switch i32 %c
+}
diff --git a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
index 0897c95a6778..0526883fe8f4 100644
--- a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
@@ -1,4 +1,4 @@
-; RUN: opt -simplifycfg -S %s | FileCheck %s
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
 
 %0 = type { i32*, i32* }
 
diff --git a/test/Transforms/SimplifyCFG/select-gep.ll b/test/Transforms/SimplifyCFG/select-gep.ll
index 7654d0271a9a..3e2a6237b275 100644
--- a/test/Transforms/SimplifyCFG/select-gep.ll
+++ b/test/Transforms/SimplifyCFG/select-gep.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -simplifycfg %s | FileCheck %s
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
 
 define i8* @test1(i8* %x, i64 %y) nounwind {
 entry:
diff --git a/test/Transforms/SimplifyCFG/switch-on-const-select.ll b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
index 673a62bf035c..9cd709ff8ecf 100644
--- a/test/Transforms/SimplifyCFG/switch-on-const-select.ll
+++ b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
@@ -35,7 +35,7 @@ define i32 @bar(i64 %x, i64 %y) nounwind {
 ; CHECK: @bar
 entry:
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @bees.a() nounwind
+; CHECK-NEXT: tail call void @bees.a() [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret i32 0
     %lt = icmp slt i64 %x, %y
     %qux = select i1 %lt, i32 0, i32 2
@@ -61,7 +61,7 @@ define void @bazz(i64 %x, i64 %y) nounwind {
 ; CHECK: @bazz
 entry:
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @bees.b() nounwind
+; CHECK-NEXT: tail call void @bees.b() [[NUW]]
 ; CHECK-NEXT: ret void
     %lt = icmp slt i64 %x, %y
     %qux = select i1 %lt, i32 10, i32 12
@@ -86,7 +86,7 @@ define void @quux(i64 %x, i64 %y) nounwind {
 ; CHECK: @quux
 entry:
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @bees.a() nounwind
+; CHECK-NEXT: tail call void @bees.a() [[NUW]]
 ; CHECK-NEXT: ret void
     %lt = icmp slt i64 %x, %y
     %qux = select i1 %lt, i32 0, i32 0
@@ -136,3 +136,6 @@ bees:
 declare void @llvm.trap() nounwind noreturn
 declare void @bees.a() nounwind
 declare void @bees.b() nounwind
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { noreturn nounwind }
diff --git a/test/Transforms/SimplifyCFG/trivial-throw.ll b/test/Transforms/SimplifyCFG/trivial-throw.ll
new file mode 100644
index 000000000000..ca2b5693e600
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/trivial-throw.ll
@@ -0,0 +1,77 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+; <rdar://problem/13360379>
+
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
+@_ZTS13TestException = linkonce_odr constant [16 x i8] c"13TestException\00"
+@_ZTI13TestException = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([16 x i8]* @_ZTS13TestException, i32 0, i32 0) }
+
+define void @throw(i32 %n) #0 {
+entry:
+  %exception = call i8* @__cxa_allocate_exception(i64 1) #4
+  call void @__cxa_throw(i8* %exception, i8* bitcast ({ i8*, i8* }* @_ZTI13TestException to i8*), i8* null) #2
+  unreachable
+}
+
+define void @func() #0 {
+entry:
+; CHECK: func()
+; CHECK: invoke void @throw
+; CHECK-NOT: call void @throw
+  invoke void @throw(i32 42) #0
+          to label %exit unwind label %lpad
+
+lpad:
+  %tmp0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  resume { i8*, i32 } %tmp0
+
+exit:
+  invoke void @abort() #2
+          to label %invoke.cont unwind label %lpad1
+
+invoke.cont:
+  unreachable
+
+lpad1:
+  %tmp1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast ({ i8*, i8* }* @_ZTI13TestException to i8*)
+  %tmp2 = extractvalue { i8*, i32 } %tmp1, 1
+  %tmp3 = call i32 @llvm.eh.typeid.for(i8* bitcast ({ i8*, i8* }* @_ZTI13TestException to i8*)) #4
+  %matches = icmp eq i32 %tmp2, %tmp3
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:
+  ret void
+
+eh.resume:
+  resume { i8*, i32 } %tmp1
+}
+
+define linkonce_odr hidden void @__clang_call_terminate(i8*) #1 {
+  %2 = call i8* @__cxa_begin_catch(i8* %0) #4
+  call void @_ZSt9terminatev() #5
+  unreachable
+}
+
+declare void @abort() #2
+
+declare i32 @llvm.eh.typeid.for(i8*) #3
+
+declare void @__cxa_end_catch()
+
+declare i8* @__cxa_allocate_exception(i64)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @_ZSt9terminatev()
+
+attributes #0 = { ssp uwtable }
+attributes #1 = { noinline noreturn nounwind }
+attributes #2 = { noreturn }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind }
+attributes #5 = { noreturn nounwind }
diff --git a/test/Transforms/SimplifyCFG/volatile-phioper.ll b/test/Transforms/SimplifyCFG/volatile-phioper.ll
new file mode 100644
index 000000000000..164898897eff
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/volatile-phioper.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+;
+; rdar:13349374
+;
+; SimplifyCFG should not eliminate blocks with volatile stores.
+; Essentially, volatile needs to be backdoor that tells the optimizer
+; it can no longer use language standard as an excuse. The compiler
+; needs to expose the volatile access to the platform.
+;
+; CHECK: @test
+; CHECK: entry:
+; CHECK: @Trace
+; CHECK: while.body:
+; CHECK: store volatile
+; CHECK: end:
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define void @test(i8** nocapture %PeiServices) #0 {
+entry:
+  %call = tail call i32 (...)* @Trace() #2
+  %tobool = icmp eq i32 %call, 0
+  br i1 %tobool, label %while.body, label %if.then
+
+if.then:                                          ; preds = %entry
+  %call1 = tail call i32 (...)* @Trace() #2
+  br label %while.body
+
+while.body:                                       ; preds = %entry, %if.then, %while.body
+  %Addr.017 = phi i8* [ %incdec.ptr, %while.body ], [ null, %if.then ], [ null, %entry ]
+  %x.016 = phi i8 [ %inc, %while.body ], [ 0, %if.then ], [ 0, %entry ]
+  %inc = add i8 %x.016, 1
+  %incdec.ptr = getelementptr inbounds i8* %Addr.017, i64 1
+  store volatile i8 %x.016, i8* %Addr.017, align 1
+  %0 = ptrtoint i8* %incdec.ptr to i64
+  %1 = trunc i64 %0 to i32
+  %cmp = icmp ult i32 %1, 4096
+  br i1 %cmp, label %while.body, label %end
+
+end:
+  ret void
+}
+declare i32 @Trace(...) #1
+
+attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+attributes #1 = { "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+attributes #2 = { nounwind }
+
+!0 = metadata !{i32 1039}
diff --git a/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll b/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
deleted file mode 100644
index 73eb05b05e34..000000000000
--- a/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S > %t
-; RUN: grep noalias %t | count 2
-; RUN: grep nocapture %t | count 3
-; RUN: grep nounwind %t | count 3
-; RUN: grep readonly %t | count 1
-
-declare i8* @fopen(i8*, i8*)
-declare i8 @strlen(i8*)
-declare i32* @realloc(i32*, i32)
-
-; Test deliberately wrong declaration
-declare i32 @strcpy(...)
diff --git a/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll b/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll
deleted file mode 100644
index ac89199b0ec1..000000000000
--- a/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -inline -simplify-libcalls -functionattrs | \
-; RUN:   llvm-dis | grep nocapture | count 2
-; Check that nocapture attributes are added when run after an SCC pass.
-; PR3520
-
-define i32 @use(i8* %x) nounwind readonly {
-entry:
-	%0 = tail call i64 @strlen(i8* %x) nounwind readonly		; <i64> [#uses=1]
-	%1 = trunc i64 %0 to i32		; <i32> [#uses=1]
-	ret i32 %1
-}
-
-declare i64 @strlen(i8*) nounwind readonly
diff --git a/test/Transforms/SimplifyLibCalls/FFS.ll b/test/Transforms/SimplifyLibCalls/FFS.ll
deleted file mode 100644
index 6aecbeacd7e6..000000000000
--- a/test/Transforms/SimplifyLibCalls/FFS.ll
+++ /dev/null
@@ -1,45 +0,0 @@
-; Test that FFSOpt works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-; CHECK-NOT: call{{.*}}@ffs
-
-@non_const = external global i32		; <i32*> [#uses=1]
-
-declare i32 @ffs(i32)
-
-declare i32 @ffsl(i32)
-
-declare i32 @ffsll(i64)
-
-define i32 @main() {
-	%arg = load i32* @non_const		; <i32> [#uses=1]
-	%val0 = call i32 @ffs( i32 %arg )		; <i32> [#uses=1]
-	%val1 = call i32 @ffs( i32 1 )		; <i32> [#uses=1]
-	%val2 = call i32 @ffs( i32 2048 )		; <i32> [#uses=1]
-	%val3 = call i32 @ffsl( i32 65536 )		; <i32> [#uses=1]
-	%val4 = call i32 @ffsll( i64 1024 )		; <i32> [#uses=1]
-	%val5 = call i32 @ffsll( i64 17179869184 )		; <i32> [#uses=1]
-	%val6 = call i32 @ffsll( i64 1152921504606846976 )		; <i32> [#uses=1]
-	%rslt1 = add i32 %val1, %val2		; <i32> [#uses=1]
-	%rslt2 = add i32 %val3, %val4		; <i32> [#uses=1]
-	%rslt3 = add i32 %val5, %val6		; <i32> [#uses=1]
-	%rslt4 = add i32 %rslt1, %rslt2		; <i32> [#uses=1]
-	%rslt5 = add i32 %rslt4, %rslt3		; <i32> [#uses=2]
-	%rslt6 = add i32 %rslt5, %val0		; <i32> [#uses=0]
-	ret i32 %rslt5
-}
-
-
-; PR4206
-define i32 @a(i64) nounwind {
-        %2 = call i32 @ffsll(i64 %0)            ; <i32> [#uses=1]
-        ret i32 %2
-}
-
-; PR13028
-define i32 @b() nounwind {
-  %ffs = call i32 @ffsll(i64 0)
-  ret i32 %ffs
-; CHECK: @b
-; CHECK-NEXT: ret i32 0
-}
diff --git a/test/Transforms/SimplifyLibCalls/FPrintF.ll b/test/Transforms/SimplifyLibCalls/FPrintF.ll
deleted file mode 100644
index 51733e4a1ef6..000000000000
--- a/test/Transforms/SimplifyLibCalls/FPrintF.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; Test that the FPrintFOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep "call.*fprintf"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
-	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
-@str = constant [3 x i8] c"%s\00"		; <[3 x i8]*> [#uses=1]
-@chr = constant [3 x i8] c"%c\00"		; <[3 x i8]*> [#uses=1]
-@hello = constant [13 x i8] c"hello world\0A\00"		; <[13 x i8]*> [#uses=1]
-@stdout = external global %struct._IO_FILE*		; <%struct._IO_FILE**> [#uses=3]
-
-declare i32 @fprintf(%struct._IO_FILE*, i8*, ...)
-
-define i32 @foo() {
-entry:
-	%tmp.1 = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=1]
-	%tmp.0 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp.1, i8* getelementptr ([13 x i8]* @hello, i32 0, i32 0) )		; <i32> [#uses=0]
-	%tmp.4 = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=1]
-	%tmp.3 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp.4, i8* getelementptr ([3 x i8]* @str, i32 0, i32 0), i8* getelementptr ([13 x i8]* @hello, i32 0, i32 0) )		; <i32> [#uses=0]
-	%tmp.8 = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=1]
-	%tmp.7 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp.8, i8* getelementptr ([3 x i8]* @chr, i32 0, i32 0), i32 33 )		; <i32> [#uses=0]
-	ret i32 0
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/FPuts.ll b/test/Transforms/SimplifyLibCalls/FPuts.ll
deleted file mode 100644
index aa01aba2656c..000000000000
--- a/test/Transforms/SimplifyLibCalls/FPuts.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; Test that the FPutsOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep "call.*fputs"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
-	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
-@stdout = external global %struct._IO_FILE*		; <%struct._IO_FILE**> [#uses=1]
-@empty = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-@len1 = constant [2 x i8] c"A\00"		; <[2 x i8]*> [#uses=1]
-@long = constant [7 x i8] c"hello\0A\00"		; <[7 x i8]*> [#uses=1]
-
-declare i32 @fputs(i8*, %struct._IO_FILE*)
-
-define i32 @main() {
-entry:
-	%out = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=3]
-	%s1 = getelementptr [1 x i8]* @empty, i32 0, i32 0		; <i8*> [#uses=1]
-	%s2 = getelementptr [2 x i8]* @len1, i32 0, i32 0		; <i8*> [#uses=1]
-	%s3 = getelementptr [7 x i8]* @long, i32 0, i32 0		; <i8*> [#uses=1]
-	%a = call i32 @fputs( i8* %s1, %struct._IO_FILE* %out )		; <i32> [#uses=0]
-	%b = call i32 @fputs( i8* %s2, %struct._IO_FILE* %out )		; <i32> [#uses=0]
-	%c = call i32 @fputs( i8* %s3, %struct._IO_FILE* %out )		; <i32> [#uses=0]
-	ret i32 0
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/IsDigit.ll b/test/Transforms/SimplifyLibCalls/IsDigit.ll
deleted file mode 100644
index 51a769d9bb3d..000000000000
--- a/test/Transforms/SimplifyLibCalls/IsDigit.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; Test that the IsDigitOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep call
-
-declare i32 @isdigit(i32)
-
-declare i32 @isascii(i32)
-
-define i32 @main() {
-	%val1 = call i32 @isdigit( i32 47 )		; <i32> [#uses=1]
-	%val2 = call i32 @isdigit( i32 48 )		; <i32> [#uses=1]
-	%val3 = call i32 @isdigit( i32 57 )		; <i32> [#uses=1]
-	%val4 = call i32 @isdigit( i32 58 )		; <i32> [#uses=1]
-	%rslt1 = add i32 %val1, %val2		; <i32> [#uses=1]
-	%rslt2 = add i32 %val3, %val4		; <i32> [#uses=1]
-	%sum = add i32 %rslt1, %rslt2		; <i32> [#uses=1]
-	%rslt = call i32 @isdigit( i32 %sum )		; <i32> [#uses=1]
-	%tmp = call i32 @isascii( i32 %rslt )		; <i32> [#uses=1]
-	ret i32 %tmp
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/Printf.ll b/test/Transforms/SimplifyLibCalls/Printf.ll
deleted file mode 100644
index 489c993f2110..000000000000
--- a/test/Transforms/SimplifyLibCalls/Printf.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-@str = internal constant [13 x i8] c"hello world\0A\00"         ; <[13 x i8]*> [#uses=1]
-@str1 = internal constant [2 x i8] c"h\00"              ; <[2 x i8]*> [#uses=1]
-
-; CHECK: private unnamed_addr constant [12 x i8] c"hello world\00"
-
-declare i32 @printf(i8*, ...)
-
-; CHECK: define void @f0
-; CHECK-NOT: printf
-; CHECK: }
-define void @f0() {
-entry:
-        %tmp1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([13 x i8]* @str, i32 0, i32 0) )         ; <i32> [#uses=0]
-        ret void
-}
-
-; CHECK: define void @f1
-; CHECK-NOT: printf
-; CHECK: }
-define void @f1() {
-entry:
-        %tmp1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([2 x i8]* @str1, i32 0, i32 0) )         ; <i32> [#uses=0]
-        ret void
-}
-
-; Verify that we don't turn this into a putchar call (thus changing the return
-; value).
-;
-; CHECK: define i32 @f2
-; CHECK: printf
-; CHECK: }
-define i32 @f2() {
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([2 x i8]* @str1, i32 0, i32 0))
-  ret i32 %call
-}
diff --git a/test/Transforms/SimplifyLibCalls/Puts.ll b/test/Transforms/SimplifyLibCalls/Puts.ll
deleted file mode 100644
index 48431434cc61..000000000000
--- a/test/Transforms/SimplifyLibCalls/Puts.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; Test that the PutsOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "-p:64:64:64"
-
-@.str = private constant [1 x i8] zeroinitializer
-
-declare i32 @puts(i8*)
-
-define void @foo() {
-entry:
-; CHECK: call i32 @putchar(i32 10)
-  %call = call i32 @puts(i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0))
-  ret void
-}
diff --git a/test/Transforms/SimplifyLibCalls/SPrintF.ll b/test/Transforms/SimplifyLibCalls/SPrintF.ll
deleted file mode 100644
index 514a7d9f6eee..000000000000
--- a/test/Transforms/SimplifyLibCalls/SPrintF.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; Test that the SPrintFOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep "call.*sprintf"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-@null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
-@fmt1 = constant [3 x i8] c"%s\00"		; <[3 x i8]*> [#uses=1]
-@fmt2 = constant [3 x i8] c"%c\00"		; <[3 x i8]*> [#uses=1]
-
-declare i32 @sprintf(i8*, i8*, ...)
-
-declare i32 @puts(i8*)
-
-define i32 @foo(i8* %p) {
-	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
-	%target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=7]
-	%hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=2]
-	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
-	%nh_p = getelementptr [7 x i8]* @null_hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%fmt1_p = getelementptr [3 x i8]* @fmt1, i32 0, i32 0		; <i8*> [#uses=2]
-	%fmt2_p = getelementptr [3 x i8]* @fmt2, i32 0, i32 0		; <i8*> [#uses=1]
-	store i8 0, i8* %target_p
-	%r1 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %hello_p )		; <i32> [#uses=1]
-	%r2 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %null_p )		; <i32> [#uses=1]
-	%r3 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %nh_p )		; <i32> [#uses=1]
-	%r4 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %fmt1_p, i8* %hello_p )		; <i32> [#uses=1]
-	%r4.1 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %fmt1_p, i8* %p )		; <i32> [#uses=1]
-	%r5 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %fmt2_p, i32 82 )		; <i32> [#uses=1]
-	%r6 = add i32 %r1, %r2		; <i32> [#uses=1]
-	%r7 = add i32 %r3, %r6		; <i32> [#uses=1]
-	%r8 = add i32 %r5, %r7		; <i32> [#uses=1]
-	%r9 = add i32 %r8, %r4		; <i32> [#uses=1]
-	%r10 = add i32 %r9, %r4.1		; <i32> [#uses=1]
-	ret i32 %r10
-}
diff --git a/test/Transforms/SimplifyLibCalls/ToAscii.ll b/test/Transforms/SimplifyLibCalls/ToAscii.ll
deleted file mode 100644
index aef47333b3c3..000000000000
--- a/test/Transforms/SimplifyLibCalls/ToAscii.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; Test that the ToAsciiOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep "call.*toascii"
-
-declare i32 @toascii(i32)
-
-define i32 @main() {
-	%val1 = call i32 @toascii( i32 1 )		; <i32> [#uses=1]
-	%val2 = call i32 @toascii( i32 0 )		; <i32> [#uses=1]
-	%val3 = call i32 @toascii( i32 127 )		; <i32> [#uses=1]
-	%val4 = call i32 @toascii( i32 128 )		; <i32> [#uses=1]
-	%val5 = call i32 @toascii( i32 255 )		; <i32> [#uses=1]
-	%val6 = call i32 @toascii( i32 256 )		; <i32> [#uses=1]
-	%rslt1 = add i32 %val1, %val2		; <i32> [#uses=1]
-	%rslt2 = add i32 %val3, %val4		; <i32> [#uses=1]
-	%rslt3 = add i32 %val5, %val6		; <i32> [#uses=1]
-	%rslt4 = add i32 %rslt1, %rslt2		; <i32> [#uses=1]
-	%rslt5 = add i32 %rslt4, %rslt3		; <i32> [#uses=1]
-	ret i32 %rslt5
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/abs.ll b/test/Transforms/SimplifyLibCalls/abs.ll
deleted file mode 100644
index 3934a5b98f74..000000000000
--- a/test/Transforms/SimplifyLibCalls/abs.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | grep "select i1 %ispos"
-; PR2337
-
-define i32 @test(i32 %x) {
-entry:
-	%call = call i32 @abs( i32 %x )		; <i32> [#uses=1]
-	ret i32 %call
-}
-
-declare i32 @abs(i32)
-
diff --git a/test/Transforms/SimplifyLibCalls/cos.ll b/test/Transforms/SimplifyLibCalls/cos.ll
deleted file mode 100644
index 6a8ce8c3881d..000000000000
--- a/test/Transforms/SimplifyLibCalls/cos.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define double @foo(double %d) nounwind readnone {
-; CHECK: @foo
-    %1 = fsub double -0.000000e+00, %d
-    %2 = call double @cos(double %1) nounwind readnone
-; CHECK: call double @cos(double %d)
-    ret double %2
-}
-
-declare double @cos(double) nounwind readnone
diff --git a/test/Transforms/SimplifyLibCalls/exp2.ll b/test/Transforms/SimplifyLibCalls/exp2.ll
deleted file mode 100644
index a5927757cf93..000000000000
--- a/test/Transforms/SimplifyLibCalls/exp2.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | grep "call.*ldexp" | count 4
-; rdar://5852514
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin8"
-
-define double @t1(i32 %x) nounwind  {
-entry:
-	%tmp12 = sitofp i32 %x to double		; <double> [#uses=1]
-	%exp2 = tail call double @exp2( double %tmp12 )		; <double> [#uses=1]
-	ret double %exp2
-}
-
-define float @t4(i8 zeroext  %x) nounwind  {
-entry:
-	%tmp12 = uitofp i8 %x to float		; <float> [#uses=1]
-	%tmp3 = tail call float @exp2f( float %tmp12 ) nounwind readonly 		; <float> [#uses=1]
-	ret float %tmp3
-}
-
-declare float @exp2f(float) nounwind readonly 
-
-define double @t3(i16 zeroext  %x) nounwind  {
-entry:
-	%tmp12 = uitofp i16 %x to double		; <double> [#uses=1]
-	%exp2 = tail call double @exp2( double %tmp12 )		; <double> [#uses=1]
-	ret double %exp2
-}
-
-define double @t2(i16 signext  %x) nounwind  {
-entry:
-	%tmp12 = sitofp i16 %x to double		; <double> [#uses=1]
-	%exp2 = tail call double @exp2( double %tmp12 )		; <double> [#uses=1]
-	ret double %exp2
-}
-
-declare double @exp2(double)
-
diff --git a/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll b/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll
index aecb887beb3a..ad54c3e38f13 100644
--- a/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll
+++ b/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -simplify-libcalls -instcombine %s | FileCheck %s
+; RUN: opt -S -simplify-libcalls -instcombine < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
diff --git a/test/Transforms/SimplifyLibCalls/floor.ll b/test/Transforms/SimplifyLibCalls/floor.ll
deleted file mode 100644
index 93c62c20023d..000000000000
--- a/test/Transforms/SimplifyLibCalls/floor.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S -mtriple "i386-pc-linux" | FileCheck -check-prefix=DO-SIMPLIFY %s
-; RUN: opt < %s -simplify-libcalls -S -mtriple "i386-pc-win32" | FileCheck -check-prefix=DONT-SIMPLIFY %s
-; RUN: opt < %s -simplify-libcalls -S -mtriple "x86_64-pc-win32" | FileCheck -check-prefix=C89-SIMPLIFY %s
-; RUN: opt < %s -simplify-libcalls -S -mtriple "i386-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
-; RUN: opt < %s -simplify-libcalls -S -mtriple "x86_64-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
-; RUN: opt < %s -simplify-libcalls -S -mtriple "sparc-sun-solaris" | FileCheck -check-prefix=DO-SIMPLIFY %s
-
-; DO-SIMPLIFY: call float @floorf(
-; DO-SIMPLIFY: call float @ceilf(
-; DO-SIMPLIFY: call float @roundf(
-; DO-SIMPLIFY: call float @nearbyintf(
-; DO-SIMPLIFY: call float @truncf(
-; DO-SIMPLIFY: call float @fabsf(
-
-; C89-SIMPLIFY: call float @floorf(
-; C89-SIMPLIFY: call float @ceilf(
-; C89-SIMPLIFY: call double @round(
-; C89-SIMPLIFY: call double @nearbyint(
-
-; DONT-SIMPLIFY: call double @floor(
-; DONT-SIMPLIFY: call double @ceil(
-; DONT-SIMPLIFY: call double @round(
-; DONT-SIMPLIFY: call double @nearbyint(
-; DONT-SIMPLIFY: call double @trunc(
-; DONT-SIMPLIFY: call double @fabs(
-
-declare double @floor(double)
-
-declare double @ceil(double)
-
-declare double @round(double)
-
-declare double @nearbyint(double)
-
-declare double @trunc(double)
-
-declare double @fabs(double)
-
-define float @test_floor(float %C) {
-	%D = fpext float %C to double		; <double> [#uses=1]
-        ; --> floorf
-	%E = call double @floor( double %D )		; <double> [#uses=1]
-	%F = fptrunc double %E to float		; <float> [#uses=1]
-	ret float %F
-}
-
-define float @test_ceil(float %C) {
-	%D = fpext float %C to double		; <double> [#uses=1]
-	; --> ceilf
-        %E = call double @ceil( double %D )		; <double> [#uses=1]
-	%F = fptrunc double %E to float		; <float> [#uses=1]
-	ret float %F
-}
-
-define float @test_round(float %C) {
-	%D = fpext float %C to double		; <double> [#uses=1]
-	; --> roundf
-        %E = call double @round( double %D )		; <double> [#uses=1]
-	%F = fptrunc double %E to float		; <float> [#uses=1]
-	ret float %F
-}
-
-define float @test_nearbyint(float %C) {
-	%D = fpext float %C to double		; <double> [#uses=1]
-	; --> nearbyintf
-        %E = call double @nearbyint( double %D )		; <double> [#uses=1]
-	%F = fptrunc double %E to float		; <float> [#uses=1]
-	ret float %F
-}
-
-define float @test_trunc(float %C) {
-	%D = fpext float %C to double
-	; --> truncf
-        %E = call double @trunc(double %D)
-	%F = fptrunc double %E to float
-	ret float %F
-}
-
-define float @test_fabs(float %C) {
-	%D = fpext float %C to double
-	; --> fabsf
-        %E = call double @fabs(double %D)
-	%F = fptrunc double %E to float
-	ret float %F
-}
diff --git a/test/Transforms/SimplifyLibCalls/fwrite.ll b/test/Transforms/SimplifyLibCalls/fwrite.ll
deleted file mode 100644
index f0f3dcaac63e..000000000000
--- a/test/Transforms/SimplifyLibCalls/fwrite.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-%FILE = type { i32 }
-
-@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
-
-define i64 @foo(%FILE* %f) {
-; CHECK: %retval = call i64 @fwrite
-  %retval = call i64 @fwrite(i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0), i64 1, i64 1, %FILE* %f)
-  ret i64 %retval
-}
-
-declare i64 @fwrite(i8*, i64, i64, %FILE *)
diff --git a/test/Transforms/SimplifyLibCalls/iprintf.ll b/test/Transforms/SimplifyLibCalls/iprintf.ll
deleted file mode 100644
index 7f036fe3ab8b..000000000000
--- a/test/Transforms/SimplifyLibCalls/iprintf.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S -o %t
-; RUN: FileCheck < %t %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
-target triple = "xcore-xmos-elf"
-
-@.str = internal constant [4 x i8] c"%f\0A\00"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
-
-; Verify printf with no floating point arguments is transformed to iprintf
-define i32 @f0(i32 %x) nounwind {
-entry:
-; CHECK: define i32 @f0
-; CHECK: @iprintf
-; CHECK: }
-	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str1, i32 0, i32 0), i32 %x)		; <i32> [#uses=0]
-	ret i32 %0
-}
-
-; Verify we don't turn this into an iprintf call
-define void @f1(double %x) nounwind {
-entry:
-; CHECK: define void @f1
-; CHECK: @printf
-; CHECK: }
-	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), double %x) nounwind		; <i32> [#uses=0]
-	ret void
-}
-
-; Verify sprintf with no floating point arguments is transformed to siprintf
-define i32 @f2(i8* %p, i32 %x) nounwind {
-entry:
-; CHECK: define i32 @f2
-; CHECK: @siprintf
-; CHECK: }
-	%0 = tail call i32 (i8*, i8*, ...)* @sprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str1, i32 0, i32 0), i32 %x)
-	ret i32 %0
-}
-
-; Verify we don't turn this into an siprintf call
-define i32 @f3(i8* %p, double %x) nounwind {
-entry:
-; CHECK: define i32 @f3
-; CHECK: @sprintf
-; CHECK: }
-	%0 = tail call i32 (i8*, i8*, ...)* @sprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), double %x)
-	ret i32 %0
-}
-
-; Verify fprintf with no floating point arguments is transformed to fiprintf
-define i32 @f4(i8* %p, i32 %x) nounwind {
-entry:
-; CHECK: define i32 @f4
-; CHECK: @fiprintf
-; CHECK: }
-	%0 = tail call i32 (i8*, i8*, ...)* @fprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str1, i32 0, i32 0), i32 %x)
-	ret i32 %0
-}
-
-; Verify we don't turn this into an fiprintf call
-define i32 @f5(i8* %p, double %x) nounwind {
-entry:
-; CHECK: define i32 @f5
-; CHECK: @fprintf
-; CHECK: }
-	%0 = tail call i32 (i8*, i8*, ...)* @fprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), double %x)
-	ret i32 %0
-}
-
-declare i32 @printf(i8* nocapture, ...) nounwind
-declare i32 @sprintf(i8* nocapture, i8* nocapture, ...) nounwind
-declare i32 @fprintf(i8* nocapture, i8* nocapture, ...) nounwind
diff --git a/test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll b/test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll
deleted file mode 100644
index 0480fdda8916..000000000000
--- a/test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-; rdar://7251832
-
-; SimplifyLibcalls should optimize pow(x, 0.5) to sqrt plus code to handle
-; special cases. The readonly attribute on the call should be preserved.
-
-; CHECK: define float @foo(float %x) nounwind {
-; CHECK:   %sqrtf = call float @sqrtf(float %x) nounwind readonly
-; CHECK:   %fabsf = call float @fabsf(float %sqrtf) nounwind readonly
-; CHECK:   %1 = fcmp oeq float %x, 0xFFF0000000000000
-; CHECK:   %retval = select i1 %1, float 0x7FF0000000000000, float %fabsf
-; CHECK:   ret float %retval
-
-define float @foo(float %x) nounwind {
-  %retval = call float @powf(float %x, float 0.5)
-  ret float %retval
-}
-
-; CHECK: define double @doo(double %x) nounwind {
-; CHECK:   %sqrt = call double @sqrt(double %x) nounwind readonly
-; CHECK:   %fabs = call double @fabs(double %sqrt) nounwind readonly
-; CHECK:   %1 = fcmp oeq double %x, 0xFFF0000000000000
-; CHECK:   %retval = select i1 %1, double 0x7FF0000000000000, double %fabs
-; CHECK:   ret double %retval
-; CHECK: }
-
-define double @doo(double %x) nounwind {
-  %retval = call double @pow(double %x, double 0.5)
-  ret double %retval
-}
-
-declare float @powf(float, float) nounwind readonly
-declare double @pow(double, double) nounwind readonly
diff --git a/test/Transforms/SimplifyLibCalls/pow2.ll b/test/Transforms/SimplifyLibCalls/pow2.ll
deleted file mode 100644
index f0964e7d6daa..000000000000
--- a/test/Transforms/SimplifyLibCalls/pow2.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; Testcase for calls to the standard C "pow" function
-;
-; RUN: opt < %s -simplify-libcalls -S | not grep "call .pow"
-
-
-declare double @pow(double, double)
-declare float @powf(float, float)
-
-define double @test1(double %X) {
-	%Y = call double @pow( double %X, double 0.000000e+00 )		; <double> [#uses=1]
-	ret double %Y
-}
-
-define double @test2(double %X) {
-	%Y = call double @pow( double %X, double -0.000000e+00 )		; <double> [#uses=1]
-	ret double %Y
-}
-
-define double @test3(double %X) {
-	%Y = call double @pow( double 1.000000e+00, double %X )		; <double> [#uses=1]
-	ret double %Y
-}
-
-define double @test4(double %X) {
-	%Y = call double @pow( double %X, double 2.0)
-	ret double %Y
-}
-
-define float @test4f(float %X) {
-	%Y = call float @powf( float %X, float 2.0)
-	ret float %Y
-}
-
-define float @test5f(float %X) {
-	%Y = call float @powf(float 2.0, float %X)  ;; exp2
-	ret float %Y
-}
diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll
index 3965c3782276..7de5a028054a 100644
--- a/test/Transforms/StripSymbols/2010-08-25-crash.ll
+++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -strip-dead-debug-info -disable-output %s
+; RUN: opt -strip-dead-debug-info -disable-output < %s
 define i32 @foo() nounwind ssp {
 entry:
   ret i32 0, !dbg !8
diff --git a/test/Transforms/StripSymbols/block-address.ll b/test/Transforms/StripSymbols/block-address.ll
index d22c6b1b157c..113d4d94fa40 100644
--- a/test/Transforms/StripSymbols/block-address.ll
+++ b/test/Transforms/StripSymbols/block-address.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -strip -S | FileCheck %s
+; RUN: opt -strip -S < %s | FileCheck %s
 ; PR10286
 
 @main_addrs = constant [2 x i8*] [i8* blockaddress(@f, %FOO), i8* blockaddress(@f, %BAR)]
diff --git a/test/Transforms/TailCallElim/ackermann.ll b/test/Transforms/TailCallElim/ackermann.ll
index 5b5dbcc225c1..83d98b84ea70 100644
--- a/test/Transforms/TailCallElim/ackermann.ll
+++ b/test/Transforms/TailCallElim/ackermann.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; This function contains two tail calls, which should be eliminated
 ; RUN: opt < %s -tailcallelim -stats -disable-output 2>&1 | grep "2 tailcallelim"
 
diff --git a/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll b/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
index e4f8b483c3c0..97e67b26424d 100644
--- a/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
+++ b/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -tailcallelim -S | \
-; RUN:    grep "call i32 @foo"
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 
 declare void @bar(i32*)
 
@@ -7,6 +6,7 @@ define i32 @foo(i32 %N) {
 	%A = alloca i32, i32 %N		; <i32*> [#uses=2]
 	store i32 17, i32* %A
 	call void @bar( i32* %A )
+; CHECK: tail call i32 @foo
 	%X = tail call i32 @foo( i32 %N )		; <i32> [#uses=1]
 	ret i32 %X
 }
diff --git a/test/Transforms/TailCallElim/dup_tail.ll b/test/Transforms/TailCallElim/dup_tail.ll
index 42ac2f9dc4b9..f5b87f27644d 100644
--- a/test/Transforms/TailCallElim/dup_tail.ll
+++ b/test/Transforms/TailCallElim/dup_tail.ll
@@ -1,5 +1,8 @@
+; REQUIRES: asserts
 ; Duplicate the return into if.end to enable TCE.
-; RUN: opt %s -tailcallelim -stats -disable-output 2>&1 | grep "Number of return duplicated"
+; RUN: opt -tailcallelim -stats -disable-output < %s 2>&1 | FileCheck %s
+
+; CHECK: Number of return duplicated
 
 define i32 @fib(i32 %n) nounwind ssp {
 entry:
diff --git a/test/Transforms/TailCallElim/intervening-inst.ll b/test/Transforms/TailCallElim/intervening-inst.ll
index 0c40bd5dc50d..10dffbd69425 100644
--- a/test/Transforms/TailCallElim/intervening-inst.ll
+++ b/test/Transforms/TailCallElim/intervening-inst.ll
@@ -1,5 +1,5 @@
 ; This function contains intervening instructions which should be moved out of the way
-; RUN: opt < %s -tailcallelim -S | not grep call
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 
 define i32 @Test(i32 %X) {
 entry:
@@ -10,6 +10,7 @@ then.0:		; preds = %entry
 	ret i32 %tmp.4
 endif.0:		; preds = %entry
 	%tmp.10 = add i32 %X, -1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp.8 = call i32 @Test( i32 %tmp.10 )		; <i32> [#uses=1]
 	%DUMMY = add i32 %X, 1		; <i32> [#uses=0]
 	ret i32 %tmp.8
diff --git a/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll b/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
index a556ddb6eb1d..741f5848bc67 100644
--- a/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
+++ b/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
@@ -1,4 +1,4 @@
-; RUN: opt -tailcallelim %s -S | FileCheck %s
+; RUN: opt -tailcallelim -S < %s | FileCheck %s
 ; PR615
 
 declare void @bar(i32*)
diff --git a/test/Transforms/TailCallElim/nocapture.ll b/test/Transforms/TailCallElim/nocapture.ll
index 87cb9dd427b4..e49d87cc4b59 100644
--- a/test/Transforms/TailCallElim/nocapture.ll
+++ b/test/Transforms/TailCallElim/nocapture.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -tailcallelim -S | FileCheck %s
+; RUN: opt -tailcallelim -S < %s | FileCheck %s
 ; XFAIL: *
 
 declare void @use(i8* nocapture, i8* nocapture)
diff --git a/test/Transforms/TailCallElim/reorder_load.ll b/test/Transforms/TailCallElim/reorder_load.ll
index 7f5c36e4a207..53c65dab101b 100644
--- a/test/Transforms/TailCallElim/reorder_load.ll
+++ b/test/Transforms/TailCallElim/reorder_load.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tailcallelim -S | not grep call
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 ; PR4323
 
 ; Several cases where tail call elimination should move the load above the call,
@@ -21,6 +21,7 @@ if:		; preds = %entry
 
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -47,6 +48,7 @@ unwind:		; preds = %else
 
 recurse:		; preds = %else
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* @global		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -66,6 +68,7 @@ if:		; preds = %entry
 
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* @extern_weak_global		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -94,6 +97,7 @@ unwind:		; preds = %else
 recurse:		; preds = %else
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
 	%first = load i32* %a_arg		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7)		; <i32> [#uses=1]
 	%second = load i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %second, %tmp8		; <i32> [#uses=1]
diff --git a/test/Transforms/TailCallElim/return_constant.ll b/test/Transforms/TailCallElim/return_constant.ll
index 48e5641bb57a..e99e57e1457d 100644
--- a/test/Transforms/TailCallElim/return_constant.ll
+++ b/test/Transforms/TailCallElim/return_constant.ll
@@ -1,7 +1,7 @@
 ; Though this case seems to be fairly unlikely to occur in the wild, someone
 ; plunked it into the demo script, so maybe they care about it.
 ;
-; RUN: opt < %s -tailcallelim -S | not grep call
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 
 define i32 @aaa(i32 %c) {
 entry:
@@ -9,6 +9,7 @@ entry:
 	br i1 %tmp.1, label %return, label %else
 else:		; preds = %entry
 	%tmp.5 = add i32 %c, -1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp.3 = call i32 @aaa( i32 %tmp.5 )		; <i32> [#uses=0]
 	ret i32 0
 return:		; preds = %entry
diff --git a/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll b/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
index 3d01d1709952..7049e4d588d4 100644
--- a/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
+++ b/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
@@ -1,11 +1,11 @@
-; RUN: opt < %s -tailcallelim -S | \
-; RUN:    grep "tail call void @foo"
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 
 
 declare void @foo()
 
 define void @bar() {
-	call void @foo( )
+; CHECK: tail call void @foo()
+	call void @foo()
 	ret void
 }
 
diff --git a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
index 7853d7ba06fb..292186020f4f 100644
--- a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
+++ b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output 2>&1 | not grep tailduplicate
 ; XFAIL: *
author	Dimitry Andric <dim@FreeBSD.org>	2013-04-08 18:41:23 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2013-04-08 18:41:23 +0000
commit	4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 (patch)
tree	06099edc18d30894081a822b756f117cbe0b8207 /test/Transforms
parent	482e7bddf617ae804dc47133cb07eb4aa81e45de (diff)