49 files changed, 2166 insertions, 424 deletions
diff --git a/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll b/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll
new file mode 100644
index 000000000000..7ce8ab3ac521
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll
@@ -0,0 +1,38 @@
+; RUN: opt -argpromotion -verify -dse -S %s -o - | FileCheck %s
+
+; Fix for PR33641. ArgumentPromotion removed the argument to bar but left the call to
+; dbg.value which still used the removed argument.
+
+%p_t = type i16*
+%fun_t = type void (%p_t)*
+
+define void @foo() {
+  %tmp = alloca %fun_t
+  store %fun_t @bar, %fun_t* %tmp
+  ret void
+}
+
+define internal void @bar(%p_t %p)  {
+  call void @llvm.dbg.value(metadata %p_t %p, i64 0, metadata !4, metadata !5), !dbg !6
+  ret void
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1)
+!1 = !DIFile(filename: "test.c", directory: "")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "bar", unit: !0)
+!4 = !DILocalVariable(name: "p", scope: !3)
+!5 = !DIExpression()
+!6 = !DILocation(line: 1, column: 1, scope: !3)
+
+; The %p argument should be removed, and the use of it in dbg.value should be
+; changed to undef.
+; CHECK:      define internal void @bar() {
+; CHECK-NEXT:   call void @llvm.dbg.value(metadata i16* undef
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
diff --git a/test/Transforms/CodeGenPrepare/X86/memcmp.ll b/test/Transforms/CodeGenPrepare/X86/memcmp.ll
index 2435cd7d0a83..4b9e7c3956f5 100644
--- a/test/Transforms/CodeGenPrepare/X86/memcmp.ll
+++ b/test/Transforms/CodeGenPrepare/X86/memcmp.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -codegenprepare -mtriple=i686-unknown-unknown   -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X32
 ; RUN: opt -S -codegenprepare -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128         < %s | FileCheck %s --check-prefix=ALL --check-prefix=X64
 
@@ -5,8 +6,8 @@ declare i32 @memcmp(i8* nocapture, i8* nocapture, i64)
 
 define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp2(
-; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* %x to i16*
-; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8* %y to i16*
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
+; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
 ; ALL-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]]
 ; ALL-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]]
 ; ALL-NEXT:    [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
@@ -23,7 +24,7 @@ define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp3(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 3)
 ; ALL-NEXT:    ret i32 [[CALL]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
@@ -32,8 +33,8 @@ define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp4(
-; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* %x to i32*
-; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8* %y to i32*
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
 ; ALL-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
 ; ALL-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
 ; ALL-NEXT:    [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
@@ -50,7 +51,7 @@ define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp5(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 5)
 ; ALL-NEXT:    ret i32 [[CALL]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
@@ -59,7 +60,7 @@ define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp6(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 6)
 ; ALL-NEXT:    ret i32 [[CALL]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
@@ -68,7 +69,7 @@ define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp7(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp7(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 7)
 ; ALL-NEXT:    ret i32 [[CALL]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
@@ -77,12 +78,12 @@ define i32 @cmp7(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X32-LABEL: @cmp8(
-; X32-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
+; X32-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 8)
 ; X32-NEXT:    ret i32 [[CALL]]
 ;
 ; X64-LABEL: @cmp8(
-; X64-NEXT:    [[TMP1:%.*]] = bitcast i8* %x to i64*
-; X64-NEXT:    [[TMP2:%.*]] = bitcast i8* %y to i64*
+; X64-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
 ; X64-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
 ; X64-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
 ; X64-NEXT:    [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
@@ -99,7 +100,7 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp9(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 9)
 ; ALL-NEXT:    ret i32 [[CALL]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
@@ -108,7 +109,7 @@ define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp10(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 10)
 ; ALL-NEXT:    ret i32 [[CALL]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
@@ -117,7 +118,7 @@ define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp11(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp11(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 11)
 ; ALL-NEXT:    ret i32 [[CALL]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)
@@ -126,7 +127,7 @@ define i32 @cmp11(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp12(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 12)
 ; ALL-NEXT:    ret i32 [[CALL]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
@@ -135,7 +136,7 @@ define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp13(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp13(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 13)
 ; ALL-NEXT:    ret i32 [[CALL]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)
@@ -144,7 +145,7 @@ define i32 @cmp13(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp14(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp14(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 14)
 ; ALL-NEXT:    ret i32 [[CALL]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)
@@ -153,7 +154,7 @@ define i32 @cmp14(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp15(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp15(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15)
 ; ALL-NEXT:    ret i32 [[CALL]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)
@@ -162,7 +163,7 @@ define i32 @cmp15(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp16(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 16)
 ; ALL-NEXT:    ret i32 [[CALL]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)
@@ -171,8 +172,8 @@ define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp_eq2(
-; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* %x to i16*
-; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8* %y to i16*
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
+; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
 ; ALL-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]]
 ; ALL-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]]
 ; ALL-NEXT:    [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
@@ -189,7 +190,7 @@ define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp_eq3(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 3)
 ; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
 ; ALL-NEXT:    ret i32 [[CONV]]
@@ -202,8 +203,8 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp_eq4(
-; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* %x to i32*
-; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8* %y to i32*
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
 ; ALL-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
 ; ALL-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
 ; ALL-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
@@ -220,7 +221,7 @@ define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp_eq5(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 5)
 ; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
 ; ALL-NEXT:    ret i32 [[CONV]]
@@ -233,7 +234,7 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp_eq6(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 6)
 ; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
 ; ALL-NEXT:    ret i32 [[CONV]]
@@ -246,7 +247,7 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp_eq7(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 7)
 ; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
 ; ALL-NEXT:    ret i32 [[CONV]]
@@ -259,14 +260,14 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; X32-LABEL: @cmp_eq8(
-; X32-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
+; X32-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 8)
 ; X32-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; X32-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
 ; X32-NEXT:    ret i32 [[CONV]]
 ;
 ; X64-LABEL: @cmp_eq8(
-; X64-NEXT:    [[TMP1:%.*]] = bitcast i8* %x to i64*
-; X64-NEXT:    [[TMP2:%.*]] = bitcast i8* %y to i64*
+; X64-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
 ; X64-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
 ; X64-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
 ; X64-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
@@ -283,7 +284,7 @@ define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp_eq9(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 9)
 ; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
 ; ALL-NEXT:    ret i32 [[CONV]]
@@ -296,7 +297,7 @@ define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp_eq10(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 10)
 ; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
 ; ALL-NEXT:    ret i32 [[CONV]]
@@ -309,7 +310,7 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp_eq11(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 11)
 ; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
 ; ALL-NEXT:    ret i32 [[CONV]]
@@ -322,7 +323,7 @@ define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp_eq12(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 12)
 ; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
 ; ALL-NEXT:    ret i32 [[CONV]]
@@ -335,7 +336,7 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp_eq13(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 13)
 ; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
 ; ALL-NEXT:    ret i32 [[CONV]]
@@ -348,7 +349,7 @@ define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp_eq14(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 14)
 ; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
 ; ALL-NEXT:    ret i32 [[CONV]]
@@ -361,7 +362,7 @@ define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp_eq15(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15)
 ; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
 ; ALL-NEXT:    ret i32 [[CONV]]
@@ -374,7 +375,7 @@ define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 
 define i32 @cmp_eq16(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
 ; ALL-LABEL: @cmp_eq16(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 16)
 ; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
 ; ALL-NEXT:    ret i32 [[CONV]]
diff --git a/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll b/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
index 9d6e668167fb..b6b775797826 100644
--- a/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
+++ b/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
@@ -4,6 +4,8 @@ target datalayout =
 "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
 
+@x = external global [1 x [2 x <4 x float>]]
+
 ; Can we sink single addressing mode computation to use?
 define void @test1(i1 %cond, i64* %base) {
 ; CHECK-LABEL: @test1
@@ -194,3 +196,25 @@ rare.2:
 
 
 declare void @slowpath(i32, i32*)
+
+; Make sure we don't end up in an infinite loop after we fail to sink.
+; CHECK-LABEL: define void @test8
+; CHECK: %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef
+define void @test8() {
+allocas:
+  %aFOO_load = load float*, float** undef
+  %aFOO_load_ptr2int = ptrtoint float* %aFOO_load to i64
+  %aFOO_load_ptr2int_broadcast_init = insertelement <4 x i64> undef, i64 %aFOO_load_ptr2int, i32 0
+  %aFOO_load_ptr2int_2void = inttoptr i64 %aFOO_load_ptr2int to i8*
+  %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef
+  br label %load.i145
+
+load.i145:
+  %ptr.i143 = bitcast i8* %ptr to <4 x float>*
+  %valall.i144 = load <4 x float>, <4 x float>* %ptr.i143, align 4
+  %x_offset = getelementptr [1 x [2 x <4 x float>]], [1 x [2 x <4 x float>]]* @x, i32 0, i64 0
+  br label %pl_loop.i.i122
+
+pl_loop.i.i122:
+  br label %pl_loop.i.i122
+}
diff --git a/test/Transforms/CodeGenPrepare/crash-on-large-allocas.ll b/test/Transforms/CodeGenPrepare/crash-on-large-allocas.ll
new file mode 100644
index 000000000000..3808c0e61c10
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/crash-on-large-allocas.ll
@@ -0,0 +1,16 @@
+; RUN: opt -S -codegenprepare %s -o - | FileCheck %s
+;
+; Ensure that we don't {crash,return a bad value} when given an alloca larger
+; than what a pointer can represent.
+
+target datalayout = "p:16:16"
+
+; CHECK-LABEL: @alloca_overflow_is_unknown(
+define i16 @alloca_overflow_is_unknown() {
+  %i = alloca i8, i32 65537
+  %j = call i16 @llvm.objectsize.i16.p0i8(i8* %i, i1 false, i1 false)
+  ; CHECK: ret i16 -1
+  ret i16 %j
+}
+
+declare i16 @llvm.objectsize.i16.p0i8(i8*, i1, i1)
diff --git a/test/Transforms/ConstantHoisting/ARM/bad-cases.ll b/test/Transforms/ConstantHoisting/ARM/bad-cases.ll
index ffcfb2e56c95..315e69998c62 100644
--- a/test/Transforms/ConstantHoisting/ARM/bad-cases.ll
+++ b/test/Transforms/ConstantHoisting/ARM/bad-cases.ll
@@ -107,3 +107,34 @@ entry:
   %ret = add i32 %cast0, %cast1
   ret i32 %ret
 }
+
+@exception_type = external global i8
+
+; Constants in inline ASM should not be hoisted.
+define i32 @inline_asm_invoke() personality i8* null {
+;CHECK-LABEL: @inline_asm_invoke
+;CHECK-NOT: %const = 214672
+;CHECK: %X = invoke i32 asm "bswap $0", "=r,r"(i32 214672)
+  %X = invoke i32 asm "bswap $0", "=r,r"(i32 214672)
+                  to label %L unwind label %lpad
+;CHECK: %Y = invoke i32 asm "bswap $0", "=r,r"(i32 214672)
+  %Y = invoke i32 asm "bswap $0", "=r,r"(i32 214672)
+                  to label %L unwind label %lpad
+L:
+  ret i32 %X
+lpad:
+  %lp = landingpad i32
+      cleanup
+      catch i8* @exception_type
+  ret i32 1
+}
+
+define i32 @inline_asm_call() {
+;CHECK-LABEL: @inline_asm_call
+;CHECK-NOT: %const = 214672
+;CHECK: %X = call i32 asm "bswap $0", "=r,r"(i32 214672)
+  %X = call i32 asm "bswap $0", "=r,r"(i32 214672)
+;CHECK: %Y = call i32 asm "bswap $0", "=r,r"(i32 214672)
+  %Y = call i32 asm "bswap $0", "=r,r"(i32 214672)
+  ret i32 %X
+}
diff --git a/test/Transforms/ConstantHoisting/ARM/insertvalue.ll b/test/Transforms/ConstantHoisting/ARM/insertvalue.ll
new file mode 100644
index 000000000000..99fe7fbe22a5
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/ARM/insertvalue.ll
@@ -0,0 +1,31 @@
+; RUN: opt -consthoist -S < %s | FileCheck %s
+target triple = "thumbv6m-none-eabi"
+
+%T = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32 }
+
+; The second operand of insertvalue is able to be hoisted.
+define void @test1(%T %P) {
+; CHECK-LABEL:  @test1
+; CHECK:        %const = bitcast i32 256 to i32
+; CHECK:        %1 = insertvalue %T %P, i32 %const, 256
+; CHECK:        %2 = insertvalue %T %P, i32 %const, 256
+  %1 = insertvalue %T %P, i32 256, 256
+  %2 = insertvalue %T %P, i32 256, 256
+  ret void
+}
diff --git a/test/Transforms/ConstantHoisting/X86/ehpad.ll b/test/Transforms/ConstantHoisting/X86/ehpad.ll
index 4f87572f3447..5e345c4515d7 100644
--- a/test/Transforms/ConstantHoisting/X86/ehpad.ll
+++ b/test/Transforms/ConstantHoisting/X86/ehpad.ll
@@ -1,9 +1,6 @@
-; RUN: opt -S -consthoist < %s | FileCheck %s
+; RUN: opt -S -consthoist -consthoist-with-block-frequency=false < %s | FileCheck %s
 ; RUN: opt -S -consthoist -consthoist-with-block-frequency=true < %s | FileCheck --check-prefix=BFIHOIST %s
 
-; FIXME: The catchpad doesn't even use the constant, so a better fix would be to
-; insert the bitcast in the catchpad block.
-
 target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-windows-msvc"
 
diff --git a/test/Transforms/GVN/PRE/atomic.ll b/test/Transforms/GVN/PRE/atomic.ll
index 509acd613e95..3479bc9a0e33 100644
--- a/test/Transforms/GVN/PRE/atomic.ll
+++ b/test/Transforms/GVN/PRE/atomic.ll
@@ -208,14 +208,14 @@ define void @fence_seq_cst(i32* %P1, i32* %P2) {
   ret void
 }
 
-; Can't DSE across a full singlethread fence
+; Can't DSE across a full syncscope("singlethread") fence
 define void @fence_seq_cst_st(i32* %P1, i32* %P2) {
 ; CHECK-LABEL: @fence_seq_cst_st(
 ; CHECK: store
-; CHECK: fence singlethread seq_cst
+; CHECK: fence syncscope("singlethread") seq_cst
 ; CHECK: store
   store i32 0, i32* %P1, align 4
-  fence singlethread seq_cst
+  fence syncscope("singlethread") seq_cst
   store i32 0, i32* %P1, align 4
   ret void
 }
diff --git a/test/Transforms/GVN/PRE/phi-translate-2.ll b/test/Transforms/GVN/PRE/phi-translate-2.ll
deleted file mode 100644
index 78681e20df5e..000000000000
--- a/test/Transforms/GVN/PRE/phi-translate-2.ll
+++ /dev/null
@@ -1,131 +0,0 @@
-; RUN: opt < %s -gvn -S | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-@a = common global [100 x i64] zeroinitializer, align 16
-@b = common global [100 x i64] zeroinitializer, align 16
-@g1 = common global i64 0, align 8
-@g2 = common global i64 0, align 8
-@g3 = common global i64 0, align 8
-declare i64 @goo(...) local_unnamed_addr #1
-
-define void @test1(i64 %a, i64 %b, i64 %c, i64 %d) {
-entry:
-  %mul = mul nsw i64 %b, %a
-  store i64 %mul, i64* @g1, align 8
-  %t0 = load i64, i64* @g2, align 8
-  %cmp = icmp sgt i64 %t0, 3
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:                                          ; preds = %entry
-  %mul2 = mul nsw i64 %d, %c
-  store i64 %mul2, i64* @g2, align 8
-  br label %if.end
-
-; Check phi-translate works and mul is removed.
-; CHECK-LABEL: @test1(
-; CHECK: if.end:
-; CHECK: %[[MULPHI:.*]] = phi i64 [ {{.*}}, %if.then ], [ %mul, %entry ]
-; CHECK-NOT: = mul
-; CHECK: store i64 %[[MULPHI]], i64* @g3, align 8
-if.end:                                           ; preds = %if.then, %entry
-  %b.addr.0 = phi i64 [ %d, %if.then ], [ %b, %entry ]
-  %a.addr.0 = phi i64 [ %c, %if.then ], [ %a, %entry ]
-  %mul3 = mul nsw i64 %a.addr.0, %b.addr.0
-  store i64 %mul3, i64* @g3, align 8
-  ret void
-}
-
-define void @test2(i64 %i) {
-entry:
-  %arrayidx = getelementptr inbounds [100 x i64], [100 x i64]* @a, i64 0, i64 %i
-  %t0 = load i64, i64* %arrayidx, align 8
-  %arrayidx1 = getelementptr inbounds [100 x i64], [100 x i64]* @b, i64 0, i64 %i
-  %t1 = load i64, i64* %arrayidx1, align 8
-  %mul = mul nsw i64 %t1, %t0
-  store i64 %mul, i64* @g1, align 8
-  %cmp = icmp sgt i64 %mul, 3
-  br i1 %cmp, label %if.then, label %if.end
-
-; Check phi-translate works for the phi generated by loadpre. A new mul will be
-; inserted in if.then block.
-; CHECK-LABEL: @test2(
-; CHECK: if.then:
-; CHECK: %[[MUL_THEN:.*]] = mul
-; CHECK: br label %if.end
-if.then:                                          ; preds = %entry
-  %call = tail call i64 (...) @goo() #2
-  store i64 %call, i64* @g2, align 8
-  br label %if.end
-
-; CHECK: if.end:
-; CHECK: %[[MULPHI:.*]] = phi i64 [ %[[MUL_THEN]], %if.then ], [ %mul, %entry ]
-; CHECK-NOT: = mul
-; CHECK: store i64 %[[MULPHI]], i64* @g3, align 8
-if.end:                                           ; preds = %if.then, %entry
-  %i.addr.0 = phi i64 [ 3, %if.then ], [ %i, %entry ]
-  %arrayidx3 = getelementptr inbounds [100 x i64], [100 x i64]* @a, i64 0, i64 %i.addr.0
-  %t2 = load i64, i64* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds [100 x i64], [100 x i64]* @b, i64 0, i64 %i.addr.0
-  %t3 = load i64, i64* %arrayidx4, align 8
-  %mul5 = mul nsw i64 %t3, %t2
-  store i64 %mul5, i64* @g3, align 8
-  ret void
-}
-
-; Check phi-translate doesn't go through backedge, which may lead to incorrect
-; pre transformation.
-; CHECK: for.end:
-; CHECK-NOT: %{{.*pre-phi}} = phi
-; CHECK: ret void
-define void @test3(i64 %N, i64* nocapture readonly %a) {
-entry:
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.body, %entry
-  %i.0 = phi i64 [ 0, %entry ], [ %add, %for.body ]
-  %add = add nuw nsw i64 %i.0, 1
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %add
-  %tmp0 = load i64, i64* %arrayidx, align 8
-  %cmp = icmp slt i64 %i.0, %N
-  br i1 %cmp, label %for.body, label %for.end
-
-for.body:                                         ; preds = %for.cond
-  %call = tail call i64 (...) @goo() #2
-  %add1 = sub nsw i64 0, %call
-  %tobool = icmp eq i64 %tmp0, %add1
-  br i1 %tobool, label %for.cond, label %for.end
-
-for.end:                                          ; preds = %for.body, %for.cond
-  %i.0.lcssa = phi i64 [ %i.0, %for.body ], [ %i.0, %for.cond ]
-  %arrayidx2 = getelementptr inbounds i64, i64* %a, i64 %i.0.lcssa
-  %tmp1 = load i64, i64* %arrayidx2, align 8
-  store i64 %tmp1, i64* @g1, align 8
-  ret void
-}
-
-; It is incorrect to use the value of %andres in last loop iteration
-; to do pre.
-; CHECK-LABEL: @test4(
-; CHECK: for.body:
-; CHECK-NOT: %andres.pre-phi = phi i32
-; CHECK: br i1 %tobool1
-
-define i32 @test4(i32 %cond, i32 %SectionAttrs.0231.ph, i32 *%AttrFlag) {
-for.body.preheader:
-  %t514 = load volatile i32, i32* %AttrFlag
-  br label %for.body
-
-for.body:
-  %t320 = phi i32 [ %t334, %bb343 ], [ %t514, %for.body.preheader ]
-  %andres = and i32 %t320, %SectionAttrs.0231.ph
-  %tobool1 = icmp eq i32 %andres, 0
-  br i1 %tobool1, label %bb343, label %critedge.loopexit
-
-bb343:
-  %t334 = load volatile i32, i32* %AttrFlag
-  %tobool2 = icmp eq i32 %cond, 0
-  br i1 %tobool2, label %critedge.loopexit, label %for.body
-
-critedge.loopexit:
-  unreachable
-}
diff --git a/test/Transforms/GVN/PRE/pre-gep-load.ll b/test/Transforms/GVN/PRE/pre-gep-load.ll
index 1b2b4d20d31d..9eec8bb6455b 100644
--- a/test/Transforms/GVN/PRE/pre-gep-load.ll
+++ b/test/Transforms/GVN/PRE/pre-gep-load.ll
@@ -37,7 +37,7 @@ sw.bb2:                                           ; preds = %if.end, %entry
   %3 = load double, double* %arrayidx5, align 8
 ; CHECK: sw.bb2:
 ; CHECK-NOT: sext
-; CHECK: phi double [
+; CHECK-NEXT: phi double [
 ; CHECK-NOT: load
   %sub6 = fsub double 3.000000e+00, %3
   br label %return
diff --git a/test/Transforms/GVN/PRE/pre-load.ll b/test/Transforms/GVN/PRE/pre-load.ll
index ffff2b7f08e5..685df24f62b6 100644
--- a/test/Transforms/GVN/PRE/pre-load.ll
+++ b/test/Transforms/GVN/PRE/pre-load.ll
@@ -72,7 +72,7 @@ block4:
   %PRE = load i32, i32* %P3
   ret i32 %PRE
 ; CHECK: block4:
-; CHECK: phi i32 [
+; CHECK-NEXT: phi i32 [
 ; CHECK-NOT: load
 ; CHECK: ret i32
 }
@@ -104,7 +104,7 @@ block4:
   %PRE = load i32, i32* %P3
   ret i32 %PRE
 ; CHECK: block4:
-; CHECK: phi i32 [
+; CHECK-NEXT: phi i32 [
 ; CHECK-NOT: load
 ; CHECK: ret i32
 }
@@ -263,7 +263,7 @@ block4:
   %PRE = load i32, i32* %P3
   ret i32 %PRE
 ; CHECK: block4:
-; CHECK: phi i32 [
+; CHECK-NEXT: phi i32 [
 ; CHECK-NOT: load
 ; CHECK: ret i32
 }
diff --git a/test/Transforms/IndVarSimplify/canonicalize-cmp.ll b/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
new file mode 100644
index 000000000000..2b939767284a
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
@@ -0,0 +1,98 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+; Check that we replace signed comparisons between non-negative values with
+; unsigned comparisons if we can.
+
+target datalayout = "n8:16:32:64"
+
+define i32 @test_01(i32 %a, i32 %b, i32* %p) {
+
+; CHECK-LABEL: @test_01(
+; CHECK-NOT:   icmp slt
+; CHECK:       %cmp1 = icmp ult i32 %iv, 100
+; CHECK:       %cmp2 = icmp ult i32 %iv, 100
+; CHECK-NOT:   %cmp3
+; CHECK:       %exitcond = icmp ne i32 %iv.next, 1000
+
+entry:
+  br label %loop.entry
+
+loop.entry:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.be ]
+  %cmp1 = icmp slt i32 %iv, 100
+  br i1 %cmp1, label %b1, label %b2
+
+b1:
+  store i32 %iv, i32* %p
+  br label %merge
+
+b2:
+  store i32 %a, i32* %p
+  br label %merge
+
+merge:
+  %cmp2 = icmp ult i32 %iv, 100
+  br i1 %cmp2, label %b3, label %b4
+
+b3:
+  store i32 %iv, i32* %p
+  br label %loop.be
+
+b4:
+  store i32 %b, i32* %p
+  br label %loop.be
+
+loop.be:
+  %iv.next = add i32 %iv, 1
+  %cmp3 = icmp slt i32 %iv.next, 1000
+  br i1 %cmp3, label %loop.entry, label %exit
+
+exit:
+  ret i32 %iv
+}
+
+define i32 @test_02(i32 %a, i32 %b, i32* %p) {
+
+; CHECK-LABEL: @test_02(
+; CHECK-NOT:   icmp sgt
+; CHECK:       %cmp1 = icmp ugt i32 100, %iv
+; CHECK:       %cmp2 = icmp ugt i32 100, %iv
+; CHECK-NOT:   %cmp3
+; CHECK:       %exitcond = icmp ne i32 %iv.next, 1000
+
+entry:
+  br label %loop.entry
+
+loop.entry:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.be ]
+  %cmp1 = icmp sgt i32 100, %iv
+  br i1 %cmp1, label %b1, label %b2
+
+b1:
+  store i32 %iv, i32* %p
+  br label %merge
+
+b2:
+  store i32 %a, i32* %p
+  br label %merge
+
+merge:
+  %cmp2 = icmp ugt i32 100, %iv
+  br i1 %cmp2, label %b3, label %b4
+
+b3:
+  store i32 %iv, i32* %p
+  br label %loop.be
+
+b4:
+  store i32 %b, i32* %p
+  br label %loop.be
+
+loop.be:
+  %iv.next = add i32 %iv, 1
+  %cmp3 = icmp sgt i32 1000, %iv.next
+  br i1 %cmp3, label %loop.entry, label %exit
+
+exit:
+  ret i32 %iv
+}
diff --git a/test/Transforms/IndVarSimplify/eliminate-comparison.ll b/test/Transforms/IndVarSimplify/eliminate-comparison.ll
index 612f01e3cade..a63617e62c0e 100644
--- a/test/Transforms/IndVarSimplify/eliminate-comparison.ll
+++ b/test/Transforms/IndVarSimplify/eliminate-comparison.ll
@@ -111,7 +111,7 @@ return:
 ; Indvars should not turn the second loop into an infinite one.
 
 ; CHECK-LABEL: @func_11(
-; CHECK: %tmp5 = icmp slt i32 %__key6.0, 10
+; CHECK: %tmp5 = icmp ult i32 %__key6.0, 10
 ; CHECK-NOT: br i1 true, label %noassert68, label %unrolledend
 
 define i32 @func_11() nounwind uwtable {
@@ -163,7 +163,7 @@ declare void @llvm.trap() noreturn nounwind
 
 ; In this case the second loop only has a single iteration, fold the header away
 ; CHECK-LABEL: @func_12(
-; CHECK: %tmp5 = icmp slt i32 %__key6.0, 10
+; CHECK: %tmp5 = icmp ult i32 %__key6.0, 10
 ; CHECK: br i1 true, label %noassert68, label %unrolledend
 define i32 @func_12() nounwind uwtable {
 entry:
diff --git a/test/Transforms/IndVarSimplify/strengthen-overflow.ll b/test/Transforms/IndVarSimplify/strengthen-overflow.ll
index 2bafe96e1ccc..6e0538e04d6b 100644
--- a/test/Transforms/IndVarSimplify/strengthen-overflow.ll
+++ b/test/Transforms/IndVarSimplify/strengthen-overflow.ll
@@ -104,5 +104,89 @@ define i32 @test.unsigned.add.1(i32* %array, i32 %length, i32 %init) {
   ret i32 42
 }
 
+define hidden void @test.shl.exact.equal() {
+; CHECK-LABEL: @test.shl.exact.equal
+entry:
+  br label %for.body
+
+for.body:
+; CHECK-LABEL: for.body
+  %k.021 = phi i32 [ 1, %entry ], [ %inc, %for.body ]
+  %shl = shl i32 1, %k.021
+  %shr1 = ashr i32 %shl, 1
+; CHECK: %shr1 = ashr exact i32 %shl, 1
+  %shr2 = lshr i32 %shl, 1
+; CHECK: %shr2 = lshr exact i32 %shl, 1
+  %inc = add nuw nsw i32 %k.021, 1
+  %exitcond = icmp eq i32 %inc, 9
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define hidden void @test.shl.exact.greater() {
+; CHECK-LABEL: @test.shl.exact.greater
+entry:
+  br label %for.body
+
+for.body:
+; CHECK-LABEL: for.body
+  %k.021 = phi i32 [ 3, %entry ], [ %inc, %for.body ]
+  %shl = shl i32 1, %k.021
+  %shr1 = ashr i32 %shl, 2
+; CHECK: %shr1 = ashr exact i32 %shl, 2
+  %shr2 = lshr i32 %shl, 2
+; CHECK: %shr2 = lshr exact i32 %shl, 2
+  %inc = add nuw nsw i32 %k.021, 1
+  %exitcond = icmp eq i32 %inc, 9
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define hidden void @test.shl.exact.unbound(i32 %arg) {
+; CHECK-LABEL: @test.shl.exact.unbound
+entry:
+  br label %for.body
+
+for.body:
+; CHECK-LABEL: for.body
+  %k.021 = phi i32 [ 2, %entry ], [ %inc, %for.body ]
+  %shl = shl i32 1, %k.021
+  %shr1 = ashr i32 %shl, 2
+; CHECK: %shr1 = ashr exact i32 %shl, 2
+  %shr2 = lshr i32 %shl, 2
+; CHECK: %shr2 = lshr exact i32 %shl, 2
+  %inc = add nuw nsw i32 %k.021, 1
+  %exitcond = icmp eq i32 %inc, %arg
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define hidden void @test.shl.nonexact() {
+; CHECK-LABEL: @test.shl.nonexact
+entry:
+  br label %for.body
+
+for.body:
+; CHECK-LABEL: for.body
+  %k.021 = phi i32 [ 2, %entry ], [ %inc, %for.body ]
+  %shl = shl i32 1, %k.021
+  %shr1 = ashr i32 %shl, 3
+; CHECK: %shr1 = ashr i32 %shl, 3
+  %shr2 = lshr i32 %shl, 3
+; CHECK: %shr2 = lshr i32 %shl, 3
+  %inc = add nuw nsw i32 %k.021, 1
+  %exitcond = icmp eq i32 %inc, 9
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
 !0 = !{i32 0, i32 2}
 !1 = !{i32 0, i32 42}
diff --git a/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/test/Transforms/IndVarSimplify/widen-loop-comp.ll
index b87cd0550192..2d24cd732ce8 100644
--- a/test/Transforms/IndVarSimplify/widen-loop-comp.ll
+++ b/test/Transforms/IndVarSimplify/widen-loop-comp.ll
@@ -64,7 +64,7 @@ for.end:
 ; CHECK-LABEL: @test2
 ; CHECK: for.body4.us
 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK: %cmp2.us = icmp slt i64
+; CHECK: %cmp2.us = icmp ult i64
 ; CHECK-NOT: %2 = trunc i64 %indvars.iv.next to i32
 ; CHECK-NOT: %cmp2.us = icmp slt i32
 
diff --git a/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll b/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
index b566c147e9b8..1eab70754030 100644
--- a/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
+++ b/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
@@ -170,4 +170,16 @@ define { i32 addrspace(4)*, i1 } @cmpxchg_group_to_flat_wrong_operand(i32 addrsp
   ret { i32 addrspace(4)*, i1 } %ret
 }
 
+; Null pointer in local addr space
+; CHECK-LABEL: @local_nullptr
+; CHECK: icmp ne i8 addrspace(3)* %a, addrspacecast (i8* null to i8 addrspace(3)*)
+; CHECK-NOT: i8 addrspace(3)* null
+define void @local_nullptr(i32 addrspace(1)* nocapture %results, i8 addrspace(3)* %a) {
+entry:
+  %tobool = icmp ne i8 addrspace(3)* %a, addrspacecast (i8* null to i8 addrspace(3)*)
+  %conv = zext i1 %tobool to i32
+  store i32 %conv, i32 addrspace(1)* %results, align 4
+  ret void
+}
+
 attributes #0 = { nounwind }
diff --git a/test/Transforms/Inline/ARM/inline-target-attr.ll b/test/Transforms/Inline/ARM/inline-target-attr.ll
new file mode 100644
index 000000000000..5bbecd203528
--- /dev/null
+++ b/test/Transforms/Inline/ARM/inline-target-attr.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -inline | FileCheck %s
+; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s
+; Check that we only inline when we have compatible target attributes.
+; ARM has implemented a target attribute that will verify that the attribute
+; sets are compatible.
+
+define i32 @foo() #0 {
+entry:
+  %call = call i32 (...) @baz()
+  ret i32 %call
+; CHECK-LABEL: foo
+; CHECK: call i32 (...) @baz()
+}
+declare i32 @baz(...) #0
+
+define i32 @bar() #1 {
+entry:
+  %call = call i32 @foo()
+  ret i32 %call
+; CHECK-LABEL: bar
+; CHECK: call i32 (...) @baz()
+}
+
+define i32 @qux() #0 {
+entry:
+  %call = call i32 @bar()
+  ret i32 %call
+; CHECK-LABEL: qux
+; CHECK: call i32 @bar()
+}
+
+define i32 @thumb_fn() #2 {
+entry:
+  %call = call i32 @foo()
+  ret i32 %call
+; CHECK-LABEL: thumb_fn
+; CHECK: call i32 @foo
+}
+
+define i32 @strict_align() #3 {
+entry:
+  %call = call i32 @foo()
+  ret i32 %call
+; CHECK-LABEL: strict_align
+; CHECK: call i32 (...) @baz()
+}
+
+define i32 @soft_float_fn() #4 {
+entry:
+  %call = call i32 @foo()
+  ret i32 %call
+; CHECK-LABEL: soft_float_fn
+; CHECK: call i32 @foo
+}
+
+attributes #0 = { "target-cpu"="generic" "target-features"="+dsp,+neon" }
+attributes #1 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16" }
+attributes #2 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16,+thumb-mode" }
+attributes #3 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+strict-align" }
+attributes #4 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16,+soft-float" }
diff --git a/test/Transforms/Inline/ARM/lit.local.cfg b/test/Transforms/Inline/ARM/lit.local.cfg
new file mode 100644
index 000000000000..236e1d344166
--- /dev/null
+++ b/test/Transforms/Inline/ARM/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'ARM' in config.root.targets:
+    config.unsupported = True
diff --git a/test/Transforms/Inline/cgscc-incremental-invalidate.ll b/test/Transforms/Inline/cgscc-incremental-invalidate.ll
index 82d321ccf225..164f7a66a6f3 100644
--- a/test/Transforms/Inline/cgscc-incremental-invalidate.ll
+++ b/test/Transforms/Inline/cgscc-incremental-invalidate.ll
@@ -11,17 +11,35 @@
 ; CHECK: Running analysis: FunctionAnalysisManagerCGSCCProxy on (test1_f, test1_g, test1_h)
 ; CHECK: Running analysis: DominatorTreeAnalysis on test1_f
 ; CHECK: Running analysis: DominatorTreeAnalysis on test1_g
-; CHECK: Invalidating all non-preserved analyses for: (test1_f, test1_g, test1_h)
+; CHECK: Invalidating all non-preserved analyses for: (test1_f)
 ; CHECK: Invalidating all non-preserved analyses for: test1_f
 ; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_f
+; CHECK: Invalidating analysis: LoopAnalysis on test1_f
+; CHECK: Invalidating analysis: BranchProbabilityAnalysis on test1_f
+; CHECK: Invalidating analysis: BlockFrequencyAnalysis on test1_f
+; CHECK: Invalidating all non-preserved analyses for: (test1_g, test1_h)
 ; CHECK: Invalidating all non-preserved analyses for: test1_g
 ; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_g
-; CHECK: Invalidating all non-preserved analyses for: test1_h
-; CHECK-NOT: Invalidating anaylsis:
-; CHECK: Running analysis: DominatorTreeAnalysis on test1_h
-; CHECK: Invalidating all non-preserved analyses for: (test1_g, test1_h)
+; CHECK: Invalidating analysis: LoopAnalysis on test1_g
+; CHECK: Invalidating analysis: BranchProbabilityAnalysis on test1_g
+; CHECK: Invalidating analysis: BlockFrequencyAnalysis on test1_g
 ; CHECK: Invalidating all non-preserved analyses for: test1_h
 ; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_h
+; CHECK: Invalidating analysis: LoopAnalysis on test1_h
+; CHECK: Invalidating analysis: BranchProbabilityAnalysis on test1_h
+; CHECK: Invalidating analysis: BlockFrequencyAnalysis on test1_h
+; CHECK-NOT: Invalidating analysis:
+; CHECK: Starting llvm::Function pass manager run.
+; CHECK-NEXT: Running pass: DominatorTreeVerifierPass on test1_g
+; CHECK-NEXT: Running analysis: DominatorTreeAnalysis on test1_g
+; CHECK-NEXT: Finished llvm::Function pass manager run.
+; CHECK-NEXT: Starting llvm::Function pass manager run.
+; CHECK-NEXT: Running pass: DominatorTreeVerifierPass on test1_h
+; CHECK-NEXT: Running analysis: DominatorTreeAnalysis on test1_h
+; CHECK-NEXT: Finished llvm::Function pass manager run.
+; CHECK-NOT: Invalidating analysis:
+; CHECK: Running pass: DominatorTreeVerifierPass on test1_f
+; CHECK-NEXT: Running analysis: DominatorTreeAnalysis on test1_f
 
 ; An external function used to control branches.
 declare i1 @flag()
@@ -109,3 +127,80 @@ entry:
   ret void
 ; CHECK: ret void
 }
+
+; The 'test2_' prefixed code works to carefully trigger forming an SCC with
+; a dominator tree for one of the functions but not the other and without even
+; a function analysis manager proxy for the SCC that things get merged into.
+; Without proper handling when updating the call graph this will find a stale
+; dominator tree.
+
+@test2_global = external global i32, align 4
+
+define void @test2_hoge(i1 (i32*)* %arg) {
+; CHECK-LABEL: define void @test2_hoge(
+bb:
+  %tmp2 = call zeroext i1 %arg(i32* @test2_global)
+; CHECK: call zeroext i1 %arg(
+  br label %bb3
+
+bb3:
+  %tmp5 = call zeroext i1 %arg(i32* @test2_global)
+; CHECK: call zeroext i1 %arg(
+  br i1 %tmp5, label %bb3, label %bb6
+
+bb6:
+  ret void
+}
+
+define zeroext i1 @test2_widget(i32* %arg) {
+; CHECK-LABEL: define zeroext i1 @test2_widget(
+bb:
+  %tmp1 = alloca i8, align 1
+  %tmp2 = alloca i32, align 4
+  call void @test2_quux()
+; CHECK-NOT:     call
+;
+; CHECK:         call zeroext i1 @test2_widget(i32* @test2_global)
+; CHECK-NEXT:    br label %[[NEW_BB:.*]]
+;
+; CHECK:       [[NEW_BB]]:
+; CHECK-NEXT:    call zeroext i1 @test2_widget(i32* @test2_global)
+;
+; CHECK:       {{.*}}:
+
+  call void @test2_hoge.1(i32* %arg)
+; CHECK-NEXT:    call void @test2_hoge.1(
+
+  %tmp4 = call zeroext i1 @test2_barney(i32* %tmp2)
+  %tmp5 = zext i1 %tmp4 to i32
+  store i32 %tmp5, i32* %tmp2, align 4
+  %tmp6 = call zeroext i1 @test2_barney(i32* null)
+  call void @test2_ham(i8* %tmp1)
+; CHECK:         call void @test2_ham(
+
+  call void @test2_quux()
+; CHECK-NOT:     call
+;
+; CHECK:         call zeroext i1 @test2_widget(i32* @test2_global)
+; CHECK-NEXT:    br label %[[NEW_BB:.*]]
+;
+; CHECK:       [[NEW_BB]]:
+; CHECK-NEXT:    call zeroext i1 @test2_widget(i32* @test2_global)
+;
+; CHECK:       {{.*}}:
+  ret i1 true
+; CHECK-NEXT:    ret i1 true
+}
+
+define internal void @test2_quux() {
+; CHECK-NOT: @test2_quux
+bb:
+  call void @test2_hoge(i1 (i32*)* @test2_widget)
+  ret void
+}
+
+declare void @test2_hoge.1(i32*)
+
+declare zeroext i1 @test2_barney(i32*)
+
+declare void @test2_ham(i8*)
diff --git a/test/Transforms/InstCombine/2017-07-07-UMul-ZExt.ll b/test/Transforms/InstCombine/2017-07-07-UMul-ZExt.ll
new file mode 100644
index 000000000000..3c4e08b5b515
--- /dev/null
+++ b/test/Transforms/InstCombine/2017-07-07-UMul-ZExt.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: llvm.umul.with.overflow
+define i32 @sterix(i32, i8, i64) {
+entry:
+  %conv = zext i32 %0 to i64
+  %conv1 = sext i8 %1 to i32
+  %mul = mul i32 %conv1, 1945964878
+  %sh_prom = trunc i64 %2 to i32
+  %shr = lshr i32 %mul, %sh_prom
+  %conv2 = zext i32 %shr to i64
+  %mul3 = mul nuw nsw i64 %conv, %conv2
+  %conv6 = and i64 %mul3, 4294967295
+  %tobool = icmp ne i64 %conv6, %mul3
+  br i1 %tobool, label %lor.end, label %lor.rhs
+
+lor.rhs:
+  %and = and i64 %2, %mul3
+  %conv4 = trunc i64 %and to i32
+  %tobool7 = icmp ne i32 %conv4, 0
+  %lnot = xor i1 %tobool7, true
+  br label %lor.end
+
+lor.end:
+  %3 = phi i1 [ true, %entry ], [ %lnot, %lor.rhs ]
+  %conv8 = zext i1 %3 to i32
+  ret i32 %conv8
+}
+
diff --git a/test/Transforms/InstCombine/and-or-not.ll b/test/Transforms/InstCombine/and-or-not.ll
index 1baecb4a13a3..04f7be01eaf5 100644
--- a/test/Transforms/InstCombine/and-or-not.ll
+++ b/test/Transforms/InstCombine/and-or-not.ll
@@ -570,10 +570,8 @@ define i32 @xor_to_xnor1(float %fa, float %fb) {
 ; CHECK-LABEL: @xor_to_xnor1(
 ; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
 ; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
-; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[A]], [[B]]
-; CHECK-NEXT:    [[OR2_DEMORGAN:%.*]] = and i32 [[A]], [[B]]
-; CHECK-NEXT:    [[OR2:%.*]] = xor i32 [[OR2_DEMORGAN]], -1
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[OR1]], [[OR2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %a = fptosi float %fa to i32
@@ -591,10 +589,8 @@ define i32 @xor_to_xnor2(float %fa, float %fb) {
 ; CHECK-LABEL: @xor_to_xnor2(
 ; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
 ; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
-; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[A]], [[B]]
-; CHECK-NEXT:    [[OR2_DEMORGAN:%.*]] = and i32 [[B]], [[A]]
-; CHECK-NEXT:    [[OR2:%.*]] = xor i32 [[OR2_DEMORGAN]], -1
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[OR1]], [[OR2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %a = fptosi float %fa to i32
@@ -612,10 +608,8 @@ define i32 @xor_to_xnor3(float %fa, float %fb) {
 ; CHECK-LABEL: @xor_to_xnor3(
 ; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
 ; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
-; CHECK-NEXT:    [[OR1_DEMORGAN:%.*]] = and i32 [[A]], [[B]]
-; CHECK-NEXT:    [[OR1:%.*]] = xor i32 [[OR1_DEMORGAN]], -1
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[A]], [[B]]
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[OR2]], [[OR1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %a = fptosi float %fa to i32
@@ -633,10 +627,8 @@ define i32 @xor_to_xnor4(float %fa, float %fb) {
 ; CHECK-LABEL: @xor_to_xnor4(
 ; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
 ; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
-; CHECK-NEXT:    [[OR1_DEMORGAN:%.*]] = and i32 [[A]], [[B]]
-; CHECK-NEXT:    [[OR1:%.*]] = xor i32 [[OR1_DEMORGAN]], -1
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[B]], [[A]]
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[OR2]], [[OR1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %a = fptosi float %fa to i32
diff --git a/test/Transforms/InstCombine/bswap-fold.ll b/test/Transforms/InstCombine/bswap-fold.ll
index 91678a91962a..260e2330996e 100644
--- a/test/Transforms/InstCombine/bswap-fold.ll
+++ b/test/Transforms/InstCombine/bswap-fold.ll
@@ -1,35 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-define i1 @test1(i16 %t) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i16 %t, 256
-; CHECK-NEXT:    ret i1 [[TMP2]]
-;
-  %tmp1 = call i16 @llvm.bswap.i16( i16 %t )
-  %tmp2 = icmp eq i16 %tmp1, 1
-  ret i1 %tmp2
-}
-
-define i1 @test2(i32 %tmp) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[TMP_UPGRD_1:%.*]] = icmp eq i32 %tmp, 16777216
-; CHECK-NEXT:    ret i1 [[TMP_UPGRD_1]]
-;
-  %tmp34 = tail call i32 @llvm.bswap.i32( i32 %tmp )
-  %tmp.upgrd.1 = icmp eq i32 %tmp34, 1
-  ret i1 %tmp.upgrd.1
-}
-
-define i1 @test3(i64 %tmp) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:    [[TMP_UPGRD_2:%.*]] = icmp eq i64 %tmp, 72057594037927936
-; CHECK-NEXT:    ret i1 [[TMP_UPGRD_2]]
-;
-  %tmp34 = tail call i64 @llvm.bswap.i64( i64 %tmp )
-  %tmp.upgrd.2 = icmp eq i64 %tmp34, 1
-  ret i1 %tmp.upgrd.2
-}
-
 ; rdar://5992453
 ; A & 255
 define i32 @test4(i32 %a) nounwind  {
@@ -241,6 +212,136 @@ define i64 @bs_xor64(i64 %a, i64 %b) #0 {
   ret i64 %tmp3
 }
 
+define <2 x i32> @bs_and32vec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_and32vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
+  %tmp3 = and <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @bs_or32vec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_or32vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
+  %tmp3 = or <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @bs_xor32vec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_xor32vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
+  %tmp3 = xor <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @bs_and32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_and32ivec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 -1585053440, i32 -1585053440>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = and <2 x i32> %tmp1, <i32 100001, i32 100001>
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @bs_or32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_or32ivec(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], <i32 -1585053440, i32 -1585053440>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = or <2 x i32> %tmp1, <i32 100001, i32 100001>
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @bs_xor32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_xor32ivec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], <i32 -1585053440, i32 -1585053440>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = xor <2 x i32> %tmp1, <i32 100001, i32 100001>
+  ret <2 x i32> %tmp2
+}
+
+define i64 @bs_and64_multiuse1(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64_multiuse1(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = and i64 %tmp1, %tmp2
+  %tmp4 = mul i64 %tmp3, %tmp1 ; to increase use count of the bswaps
+  %tmp5 = mul i64 %tmp4, %tmp2 ; to increase use count of the bswaps
+  ret i64 %tmp5
+}
+
+define i64 @bs_and64_multiuse2(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64_multiuse2(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[A]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = and i64 %tmp1, %tmp2
+  %tmp4 = mul i64 %tmp3, %tmp1 ; to increase use count of the bswaps
+  ret i64 %tmp4
+}
+
+define i64 @bs_and64_multiuse3(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64_multiuse3(
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[A:%.*]], [[B]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = and i64 %tmp1, %tmp2
+  %tmp4 = mul i64 %tmp3, %tmp2 ; to increase use count of the bswaps
+  ret i64 %tmp4
+}
+
+define i64 @bs_and64i_multiuse(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64i_multiuse(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], 1000000001
+; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = and i64 %tmp1, 1000000001
+  %tmp3 = mul i64 %tmp2, %tmp1 ; to increase use count of the bswap
+  ret i64 %tmp3
+}
+
 declare i16 @llvm.bswap.i16(i16)
 declare i32 @llvm.bswap.i32(i32)
 declare i64 @llvm.bswap.i64(i64)
+declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
diff --git a/test/Transforms/InstCombine/cmp-intrinsic.ll b/test/Transforms/InstCombine/cmp-intrinsic.ll
new file mode 100644
index 000000000000..7fc1d12916bf
--- /dev/null
+++ b/test/Transforms/InstCombine/cmp-intrinsic.ll
@@ -0,0 +1,123 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
+declare i33 @llvm.cttz.i33(i33, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i8 @llvm.ctpop.i8(i8)
+declare i11 @llvm.ctpop.i11(i11)
+declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
+
+define i1 @bswap_eq_i16(i16 %x) {
+; CHECK-LABEL: @bswap_eq_i16(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 %x, 256
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %bs = call i16 @llvm.bswap.i16(i16 %x)
+  %cmp = icmp eq i16 %bs, 1
+  ret i1 %cmp
+}
+
+define i1 @bswap_ne_i32(i32 %x) {
+; CHECK-LABEL: @bswap_ne_i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 %x, 33554432
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %bs = tail call i32 @llvm.bswap.i32(i32 %x)
+  %cmp = icmp ne i32 %bs, 2
+  ret i1 %cmp
+}
+
+define <2 x i1> @bswap_eq_v2i64(<2 x i64> %x) {
+; CHECK-LABEL: @bswap_eq_v2i64(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i64> %x, <i64 216172782113783808, i64 216172782113783808>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %bs = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %x)
+  %cmp = icmp eq <2 x i64> %bs, <i64 3, i64 3>
+  ret <2 x i1> %cmp
+}
+
+define i1 @ctlz_eq_bitwidth_i32(i32 %x) {
+; CHECK-LABEL: @ctlz_eq_bitwidth_i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 %x, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %cmp = icmp eq i32 %lz, 32
+  ret i1 %cmp
+}
+
+define <2 x i1> @ctlz_ne_bitwidth_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: @ctlz_ne_bitwidth_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> %a, zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
+  %cmp = icmp ne <2 x i32> %x, <i32 32, i32 32>
+  ret <2 x i1> %cmp
+}
+
+define i1 @cttz_ne_bitwidth_i33(i33 %x) {
+; CHECK-LABEL: @cttz_ne_bitwidth_i33(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i33 %x, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
+  %cmp = icmp ne i33 %tz, 33
+  ret i1 %cmp
+}
+
+define <2 x i1> @cttz_eq_bitwidth_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: @cttz_eq_bitwidth_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> %a, zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
+  %cmp = icmp eq <2 x i32> %x, <i32 32, i32 32>
+  ret <2 x i1> %cmp
+}
+
+define i1 @ctpop_eq_zero_i11(i11 %x) {
+; CHECK-LABEL: @ctpop_eq_zero_i11(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i11 %x, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %pop = tail call i11 @llvm.ctpop.i11(i11 %x)
+  %cmp = icmp eq i11 %pop, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @ctpop_ne_zero_v2i32(<2 x i32> %x) {
+; CHECK-LABEL: @ctpop_ne_zero_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> %x, zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %pop = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %x)
+  %cmp = icmp ne <2 x i32> %pop, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @ctpop_eq_bitwidth_i8(i8 %x) {
+; CHECK-LABEL: @ctpop_eq_bitwidth_i8(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %x, -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %pop = tail call i8 @llvm.ctpop.i8(i8 %x)
+  %cmp = icmp eq i8 %pop, 8
+  ret i1 %cmp
+}
+
+define <2 x i1> @ctpop_ne_bitwidth_v2i32(<2 x i32> %x) {
+; CHECK-LABEL: @ctpop_ne_bitwidth_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> %x, <i32 -1, i32 -1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %pop = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %x)
+  %cmp = icmp ne <2 x i32> %pop, <i32 32, i32 32>
+  ret <2 x i1> %cmp
+}
+
diff --git a/test/Transforms/InstCombine/consecutive-fences.ll b/test/Transforms/InstCombine/consecutive-fences.ll
index 6f1c41277386..8ecb399f39cb 100644
--- a/test/Transforms/InstCombine/consecutive-fences.ll
+++ b/test/Transforms/InstCombine/consecutive-fences.ll
@@ -4,7 +4,7 @@
 
 ; CHECK-LABEL: define void @tinkywinky
 ; CHECK-NEXT:   fence seq_cst
-; CHECK-NEXT:   fence singlethread acquire
+; CHECK-NEXT:   fence syncscope("singlethread") acquire
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 
@@ -12,21 +12,21 @@ define void @tinkywinky() {
   fence seq_cst
   fence seq_cst
   fence seq_cst
-  fence singlethread acquire
-  fence singlethread acquire
-  fence singlethread acquire
+  fence syncscope("singlethread") acquire
+  fence syncscope("singlethread") acquire
+  fence syncscope("singlethread") acquire
   ret void
 }
 
 ; CHECK-LABEL: define void @dipsy
 ; CHECK-NEXT:   fence seq_cst
-; CHECK-NEXT:   fence singlethread seq_cst
+; CHECK-NEXT:   fence syncscope("singlethread") seq_cst
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 
 define void @dipsy() {
   fence seq_cst
-  fence singlethread seq_cst
+  fence syncscope("singlethread") seq_cst
   ret void
 }
 
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 127fde10e9f7..a12f4206b1c6 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -2979,9 +2979,7 @@ declare i32 @llvm.bswap.i32(i32)
 
 define i1 @bswap_ne(i32 %x, i32 %y) {
 ; CHECK-LABEL: @bswap_ne(
-; CHECK-NEXT:    [[SWAPX:%.*]] = call i32 @llvm.bswap.i32(i32 %x)
-; CHECK-NEXT:    [[SWAPY:%.*]] = call i32 @llvm.bswap.i32(i32 %y)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[SWAPX]], [[SWAPY]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 %x, %y
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %swapx = call i32 @llvm.bswap.i32(i32 %x)
@@ -2994,9 +2992,7 @@ declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
 
 define <8 x i1> @bswap_vec_eq(<8 x i16> %x, <8 x i16> %y) {
 ; CHECK-LABEL: @bswap_vec_eq(
-; CHECK-NEXT:    [[SWAPX:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %x)
-; CHECK-NEXT:    [[SWAPY:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %y)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <8 x i16> [[SWAPX]], [[SWAPY]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <8 x i16> %x, %y
 ; CHECK-NEXT:    ret <8 x i1> [[CMP]]
 ;
   %swapx = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %x)
@@ -3009,9 +3005,7 @@ declare i64 @llvm.bitreverse.i64(i64)
 
 define i1 @bitreverse_eq(i64 %x, i64 %y) {
 ; CHECK-LABEL: @bitreverse_eq(
-; CHECK-NEXT:    [[REVX:%.*]] = call i64 @llvm.bitreverse.i64(i64 %x)
-; CHECK-NEXT:    [[REVY:%.*]] = call i64 @llvm.bitreverse.i64(i64 %y)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[REVX]], [[REVY]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 %x, %y
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %revx = call i64 @llvm.bitreverse.i64(i64 %x)
@@ -3024,9 +3018,7 @@ declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)
 
 define <8 x i1> @bitreverse_vec_ne(<8 x i16> %x, <8 x i16> %y) {
 ; CHECK-LABEL: @bitreverse_vec_ne(
-; CHECK-NEXT:    [[REVX:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %x)
-; CHECK-NEXT:    [[REVY:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %y)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i16> [[REVX]], [[REVY]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i16> %x, %y
 ; CHECK-NEXT:    ret <8 x i1> [[CMP]]
 ;
   %revx = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %x)
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index c294d79f15ef..8d2f06edcaf3 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -475,66 +475,6 @@ define <2 x i1> @ctlz_knownbits3_vec(<2 x i8> %arg) {
   ret <2 x i1> %res
 }
 
-define void @cmp.simplify(i32 %a, i32 %b, i1* %c) {
-  %lz = tail call i32 @llvm.ctlz.i32(i32 %a, i1 false) nounwind readnone
-  %lz.cmp = icmp eq i32 %lz, 32
-  store volatile i1 %lz.cmp, i1* %c
-  %tz = tail call i32 @llvm.cttz.i32(i32 %a, i1 false) nounwind readnone
-  %tz.cmp = icmp ne i32 %tz, 32
-  store volatile i1 %tz.cmp, i1* %c
-  %pop0 = tail call i32 @llvm.ctpop.i32(i32 %b) nounwind readnone
-  %pop0.cmp = icmp eq i32 %pop0, 0
-  store volatile i1 %pop0.cmp, i1* %c
-  %pop1 = tail call i32 @llvm.ctpop.i32(i32 %b) nounwind readnone
-  %pop1.cmp = icmp eq i32 %pop1, 32
-  store volatile i1 %pop1.cmp, i1* %c
-  ret void
-; CHECK: @cmp.simplify
-; CHECK-NEXT: %lz.cmp = icmp eq i32 %a, 0
-; CHECK-NEXT: store volatile i1 %lz.cmp, i1* %c
-; CHECK-NEXT: %tz.cmp = icmp ne i32 %a, 0
-; CHECK-NEXT: store volatile i1 %tz.cmp, i1* %c
-; CHECK-NEXT: %pop0.cmp = icmp eq i32 %b, 0
-; CHECK-NEXT: store volatile i1 %pop0.cmp, i1* %c
-; CHECK-NEXT: %pop1.cmp = icmp eq i32 %b, -1
-; CHECK-NEXT: store volatile i1 %pop1.cmp, i1* %c
-}
-
-define <2 x i1> @ctlz_cmp_vec(<2 x i32> %a) {
-; CHECK-LABEL: @ctlz_cmp_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> %a, zeroinitializer
-; CHECK-NEXT:    ret <2 x i1> [[CMP]]
-;
-  %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) nounwind readnone
-  %cmp = icmp eq <2 x i32> %x, <i32 32, i32 32>
-  ret <2 x i1> %cmp
-}
-
-define <2 x i1> @cttz_cmp_vec(<2 x i32> %a) {
-; CHECK-LABEL: @cttz_cmp_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> %a, zeroinitializer
-; CHECK-NEXT:    ret <2 x i1> [[CMP]]
-;
-  %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) nounwind readnone
-  %cmp = icmp ne <2 x i32> %x, <i32 32, i32 32>
-  ret <2 x i1> %cmp
-}
-
-define void @ctpop_cmp_vec(<2 x i32> %a, <2 x i1>* %b) {
-  %pop0 = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) nounwind readnone
-  %pop0.cmp = icmp eq <2 x i32> %pop0, zeroinitializer
-  store volatile <2 x i1> %pop0.cmp, <2 x i1>* %b
-  %pop1 = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) nounwind readnone
-  %pop1.cmp = icmp eq <2 x i32> %pop1, < i32 32, i32 32 >
-  store volatile <2 x i1> %pop1.cmp, <2 x i1>* %b
-  ret void
-; CHECK-LABEL: @ctpop_cmp_vec(
-; CHECK-NEXT: %pop0.cmp = icmp eq <2 x i32> %a, zeroinitializer
-; CHECK-NEXT: store volatile <2 x i1> %pop0.cmp, <2 x i1>* %b
-; CHECK-NEXT: %pop1.cmp = icmp eq <2 x i32> %a, <i32 -1, i32 -1>
-; CHECK-NEXT: store volatile <2 x i1> %pop1.cmp, <2 x i1>* %b
-}
-
 define i32 @ctlz_undef(i32 %Value) {
 ; CHECK-LABEL: @ctlz_undef(
 ; CHECK-NEXT:    ret i32 undef
diff --git a/test/Transforms/InstCombine/or-xor.ll b/test/Transforms/InstCombine/or-xor.ll
index 2164f0df8d27..947971c6c83b 100644
--- a/test/Transforms/InstCombine/or-xor.ll
+++ b/test/Transforms/InstCombine/or-xor.ll
@@ -348,10 +348,8 @@ define i8 @test18(i8 %A, i8 %B) {
 ; ((x | y) ^ (~x | ~y)) -> ~(x ^ y)
 define i32 @test19(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test19(
-; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[OR2_DEMORGAN:%.*]] = and i32 [[X]], [[Y]]
-; CHECK-NEXT:    [[OR2:%.*]] = xor i32 [[OR2_DEMORGAN]], -1
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[OR1]], [[OR2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %noty = xor i32 %y, -1
@@ -365,10 +363,8 @@ define i32 @test19(i32 %x, i32 %y) {
 ; ((x | y) ^ (~y | ~x)) -> ~(x ^ y)
 define i32 @test20(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test20(
-; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[OR2_DEMORGAN:%.*]] = and i32 [[Y]], [[X]]
-; CHECK-NEXT:    [[OR2:%.*]] = xor i32 [[OR2_DEMORGAN]], -1
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[OR1]], [[OR2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %noty = xor i32 %y, -1
@@ -382,10 +378,8 @@ define i32 @test20(i32 %x, i32 %y) {
 ; ((~x | ~y) ^ (x | y)) -> ~(x ^ y)
 define i32 @test21(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test21(
-; CHECK-NEXT:    [[OR1_DEMORGAN:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[OR1:%.*]] = xor i32 [[OR1_DEMORGAN]], -1
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[X]], [[Y]]
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[OR2]], [[OR1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %noty = xor i32 %y, -1
@@ -399,10 +393,8 @@ define i32 @test21(i32 %x, i32 %y) {
 ; ((~x | ~y) ^ (y | x)) -> ~(x ^ y)
 define i32 @test22(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test22(
-; CHECK-NEXT:    [[OR1_DEMORGAN:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[OR1:%.*]] = xor i32 [[OR1_DEMORGAN]], -1
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[Y]], [[X]]
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[OR2]], [[OR1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %noty = xor i32 %y, -1
diff --git a/test/Transforms/InstCombine/pr33689_same_bitwidth.ll b/test/Transforms/InstCombine/pr33689_same_bitwidth.ll
new file mode 100644
index 000000000000..e5dd019b9b51
--- /dev/null
+++ b/test/Transforms/InstCombine/pr33689_same_bitwidth.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+
+; All the "useless" instructions should be removed and we shouldn't crash.
+
+target datalayout = "p:16:16"
+
+%i64_t = type i64
+
+@a = external global i16
+@b = external global i16*
+
+define void @f() {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    [[TMP12:%.*]] = alloca [2 x i32], align 8
+; CHECK-NEXT:    [[TMP12_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP12]], i16 0, i16 0
+; CHECK-NEXT:    br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint [2 x i32]* [[TMP12]] to i16
+; CHECK-NEXT:    store i16 [[TMP8]], i16* @a, align 2
+; CHECK-NEXT:    unreachable
+; CHECK:       bb2:
+; CHECK-NEXT:    [[TMP9:%.*]] = load i16*, i16** @b, align 2
+; CHECK-NEXT:    store i16 0, i16* [[TMP9]], align 2
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP12_SUB]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], -1
+; CHECK-NEXT:    store i32 [[TMP11]], i32* [[TMP12_SUB]], align 8
+; CHECK-NEXT:    ret void
+;
+bb0:
+  %tmp1 = alloca %i64_t
+  %tmp2 = bitcast %i64_t* %tmp1 to i32*
+  %useless3 = bitcast %i64_t* %tmp1 to i16*
+  %useless4 = getelementptr inbounds i16, i16* %useless3, i16 undef
+  %useless5 = bitcast i16* %useless4 to i32*
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb0
+  %useless6 = insertvalue [1 x i32*] undef, i32* %tmp2, 0
+  %useless7 = insertvalue [1 x i32*] %useless6, i32* null, 0
+  %tmp8 = ptrtoint i32* %tmp2 to i16
+  store i16 %tmp8, i16* @a
+  unreachable
+
+bb2:                                              ; preds = %bb0
+  %tmp9 = load i16*, i16** @b
+  store i16 0, i16* %tmp9
+  %tmp10 = load i32, i32* %tmp2
+  %tmp11 = sub i32 %tmp10, 1
+  store i32 %tmp11, i32* %tmp2
+  ret void
+}
diff --git a/test/Transforms/InstCombine/select-implied.ll b/test/Transforms/InstCombine/select-implied.ll
index 2100e3eae008..2558745c18f3 100644
--- a/test/Transforms/InstCombine/select-implied.ll
+++ b/test/Transforms/InstCombine/select-implied.ll
@@ -121,3 +121,80 @@ end:
 
 declare void @foo(i32)
 declare i32 @bar(i32)
+
+; CHECK-LABEL: @test_and
+; CHECK: tpath:
+; CHECK-NOT: select
+; CHECK: ret i32 313
+define i32 @test_and(i32 %a, i32 %b) {
+entry:
+  %cmp1 = icmp ne i32 %a, 0
+  %cmp2 = icmp ne i32 %b, 0
+  %and = and i1 %cmp1, %cmp2
+  br i1 %and, label %tpath, label %end
+
+tpath:
+  %cmp3 = icmp eq i32 %a, 0 ;; <-- implied false
+  %c = select i1 %cmp3, i32 0, i32 313
+  ret i32 %c
+
+end:
+  ret i32 0
+}
+
+; cmp1 and cmp2 are false on the 'fpath' path and thus cmp3 is true.
+; CHECK-LABEL: @test_or1
+; CHECK: fpath:
+; CHECK-NOT: select
+; CHECK: ret i32 37
+define i32 @test_or1(i32 %a, i32 %b) {
+entry:
+  %cmp1 = icmp eq i32 %a, 0
+  %cmp2 = icmp eq i32 %b, 0
+  %or = or i1 %cmp1, %cmp2
+  br i1 %or, label %end, label %fpath
+
+fpath:
+  %cmp3 = icmp ne i32 %a, 0  ;; <-- implied true
+  %c = select i1 %cmp3, i32 37, i32 0
+  ret i32 %c
+
+end:
+  ret i32 0
+}
+
+; LHS ==> RHS by definition (true -> true)
+; CHECK-LABEL: @test6
+; CHECK: taken:
+; CHECK-NOT: select
+; CHECK: call void @foo(i32 10)
+define void @test6(i32 %a, i32 %b) {
+  %cmp1 = icmp eq i32 %a, %b
+  br i1 %cmp1, label %taken, label %end
+
+taken:
+  %c = select i1 %cmp1, i32 10, i32 0
+  call void @foo(i32 %c)
+  br label %end
+
+end:
+  ret void
+}
+
+; LHS ==> RHS by definition (false -> false)
+; CHECK-LABEL: @test7
+; CHECK: taken:
+; CHECK-NOT: select
+; CHECK: call void @foo(i32 11)
+define void @test7(i32 %a, i32 %b) {
+  %cmp1 = icmp eq i32 %a, %b
+  br i1 %cmp1, label %end, label %taken
+
+taken:
+  %c = select i1 %cmp1, i32 0, i32 11
+  call void @foo(i32 %c)
+  br label %end
+
+end:
+  ret void
+}
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index c8f2a50b72ed..acfa053daaf8 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -1370,3 +1370,10 @@ define i8 @assume_cond_false(i1 %cond, i8 %x, i8 %y) {
   ret i8 %sel
 }
 
+; Test case to make sure we don't consider an all ones float values for converting the select into a sext.
+define <4 x float> @PR33721(<4 x float> %w) {
+entry:
+  %0 = fcmp ole <4 x float> %w, zeroinitializer
+  %1 = select <4 x i1> %0, <4 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, <4 x float> zeroinitializer
+  ret <4 x float> %1
+}
diff --git a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll
index 5938f9d7321d..715c9413a819 100644
--- a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll
+++ b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll
@@ -854,3 +854,32 @@ define void @load_factor2_fp128(<4 x fp128>* %ptr) {
   %v1 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> undef, <2 x i32> <i32 1, i32 3>
   ret void
 }
+
+define void @load_factor2_wide_pointer(<16 x i32*>* %ptr) {
+; NEON-LABEL:    @load_factor2_wide_pointer(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <16 x i32*>* %ptr to i32*
+; NEON-NEXT:       [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP2]], i32 4)
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP4:%.*]] = inttoptr <4 x i32> [[TMP3]] to <4 x i32*>
+; NEON-NEXT:       [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
+; NEON-NEXT:       [[TMP6:%.*]] = inttoptr <4 x i32> [[TMP5]] to <4 x i32*>
+; NEON-NEXT:       [[TMP7:%.*]] = getelementptr i32, i32* [[TMP1]], i32 8
+; NEON-NEXT:       [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8*
+; NEON-NEXT:       [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP8]], i32 4)
+; NEON-NEXT:       [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1
+; NEON-NEXT:       [[TMP10:%.*]] = inttoptr <4 x i32> [[TMP9]] to <4 x i32*>
+; NEON-NEXT:       [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0
+; NEON-NEXT:       [[TMP12:%.*]] = inttoptr <4 x i32> [[TMP11]] to <4 x i32*>
+; NEON-NEXT:       [[TMP13:%.*]] = shufflevector <4 x i32*> [[TMP4]], <4 x i32*> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP14:%.*]] = shufflevector <4 x i32*> [[TMP6]], <4 x i32*> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_factor2_wide_pointer(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <16 x i32*>, <16 x i32*>* %ptr, align 4
+  %v0 = shufflevector <16 x i32*> %interleaved.vec, <16 x i32*> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %v1 = shufflevector <16 x i32*> %interleaved.vec, <16 x i32*> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret void
+}
diff --git a/test/Transforms/LoopRotate/pr33701.ll b/test/Transforms/LoopRotate/pr33701.ll
new file mode 100644
index 000000000000..ed162b120982
--- /dev/null
+++ b/test/Transforms/LoopRotate/pr33701.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output
+
+define void @func() {
+bb0:
+  br label %bb1
+
+bb1:                                              ; preds = %bb4, %bb0
+  %0 = phi i16 [ %2, %bb4 ], [ 0, %bb0 ]
+  %1 = icmp sle i16 %0, 2
+  br i1 %1, label %bb2, label %bb5
+
+bb2:                                              ; preds = %bb1
+  br i1 undef, label %bb6, label %bb4
+
+bb3:                                              ; No predecessors!
+  br label %bb6
+
+bb4:                                              ; preds = %bb2
+  %2 = add i16 undef, 1
+  br label %bb1
+
+bb5:                                              ; preds = %bb1
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %bb3, %bb2
+  unreachable
+}
diff --git a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
index dcd068191e10..ea3f60772319 100644
--- a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
+++ b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
@@ -14,8 +14,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; current LSR cost model.
 ; CHECK-NOT: = ptrtoint i8* undef to i64
 ; CHECK: .lr.ph
-; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp5, 1
-; CHECK: sub i64 [[TMP]], %tmp6
+; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp{{[0-9]+}}, -1
+; CHECK: sub i64 [[TMP]], %tmp{{[0-9]+}}
 ; CHECK: ret void
 define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 {
 bb:
diff --git a/test/Transforms/LoopStrengthReduce/X86/lsr-filtering-scaledreg.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-filtering-scaledreg.ll
new file mode 100644
index 000000000000..4ce6f1a79fbf
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/lsr-filtering-scaledreg.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s -loop-reduce -lsr-filter-same-scaled-reg=true -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.ham = type { i8, i8, [5 x i32], i64, i64, i64 }
+
+@global = external local_unnamed_addr global %struct.ham, align 8
+
+define void @foo() local_unnamed_addr {
+bb:
+  %tmp = load i64, i64* getelementptr inbounds (%struct.ham, %struct.ham* @global, i64 0, i32 3), align 8
+  %tmp1 = and i64 %tmp, 1792
+  %tmp2 = load i64, i64* getelementptr inbounds (%struct.ham, %struct.ham* @global, i64 0, i32 4), align 8
+  %tmp3 = add i64 %tmp1, %tmp2
+  %tmp4 = load i8*, i8** null, align 8
+  %tmp5 = getelementptr inbounds i8, i8* %tmp4, i64 0
+  %tmp6 = sub i64 0, %tmp3
+  %tmp7 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp6
+  %tmp8 = inttoptr i64 0 to i8*
+  br label %bb9
+
+; Without filtering non-optimal formulae with the same ScaledReg and Scale, the strategy
+; to narrow LSR search space by picking winner reg will generate only one lsr.iv and
+; unoptimal result.
+; CHECK-LABEL: @foo(
+; CHECK: bb9:
+; CHECK-NEXT: = phi i8*
+; CHECK-NEXT: = phi i8*
+
+bb9:                                              ; preds = %bb12, %bb
+  %tmp10 = phi i8* [ %tmp7, %bb ], [ %tmp16, %bb12 ]
+  %tmp11 = phi i8* [ %tmp8, %bb ], [ %tmp17, %bb12 ]
+  br i1 false, label %bb18, label %bb12
+
+bb12:                                             ; preds = %bb9
+  %tmp13 = getelementptr inbounds i8, i8* %tmp10, i64 8
+  %tmp14 = bitcast i8* %tmp13 to i64*
+  %tmp15 = load i64, i64* %tmp14, align 1
+  %tmp16 = getelementptr inbounds i8, i8* %tmp10, i64 16
+  %tmp17 = getelementptr inbounds i8, i8* %tmp11, i64 16
+  br label %bb9
+
+bb18:                                             ; preds = %bb9
+  %tmp19 = icmp ugt i8* %tmp11, null
+  %tmp20 = getelementptr inbounds i8, i8* %tmp10, i64 8
+  %tmp21 = getelementptr inbounds i8, i8* %tmp11, i64 8
+  %tmp22 = select i1 %tmp19, i8* %tmp10, i8* %tmp20
+  %tmp23 = select i1 %tmp19, i8* %tmp11, i8* %tmp21
+  br label %bb24
+
+bb24:                                             ; preds = %bb24, %bb18
+  %tmp25 = phi i8* [ %tmp27, %bb24 ], [ %tmp22, %bb18 ]
+  %tmp26 = phi i8* [ %tmp29, %bb24 ], [ %tmp23, %bb18 ]
+  %tmp27 = getelementptr inbounds i8, i8* %tmp25, i64 1
+  %tmp28 = load i8, i8* %tmp25, align 1
+  %tmp29 = getelementptr inbounds i8, i8* %tmp26, i64 1
+  store i8 %tmp28, i8* %tmp26, align 1
+  %tmp30 = icmp eq i8* %tmp29, %tmp5
+  br label %bb24
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
index 1f31a133e34d..73672e14f78a 100644
--- a/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
@@ -1,29 +1,52 @@
-; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine -S| FileCheck %s
+; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -S | FileCheck %s -check-prefix=EPILOG-NO-IC
+; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine -S | FileCheck %s -check-prefix=EPILOG
 ; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine
+; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine -S | FileCheck %s -check-prefix=PROLOG
+; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-runtime-epilog=false -unroll-count=2 -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine
 
-; the second RUN generates an epilog remainder block for all the test
+; the third and fifth RUNs generate an epilog/prolog remainder block for all the test
 ; cases below (it does not generate a loop).
 
 ; test with three exiting and three exit blocks.
 ; none of the exit blocks have successors
 define void @test1(i64 %trip, i1 %cond) {
-; CHECK-LABEL: test1
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1
-; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TRIP]], 7
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
-; CHECK-NEXT:    br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]]
-; CHECK:       entry.new:
-; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TRIP]], [[XTRAITER]]
-; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
-; CHECK-LABEL:  loop_latch.epil:
-; CHECK-NEXT:     %epil.iter.sub = add i64 %epil.iter, -1
-; CHECK-NEXT:     %epil.iter.cmp = icmp eq i64 %epil.iter.sub, 0
-; CHECK-NEXT:     br i1 %epil.iter.cmp, label %exit2.loopexit.epilog-lcssa, label %loop_header.epil
-; CHECK-LABEL:  loop_latch.7:
-; CHECK-NEXT:     %niter.nsub.7 = add i64 %niter, -8
-; CHECK-NEXT:     %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
-; CHECK-NEXT:     br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header
+; EPILOG: test1(
+; EPILOG-NEXT:  entry:
+; EPILOG-NEXT:    [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1
+; EPILOG-NEXT:    [[XTRAITER:%.*]] = and i64 [[TRIP]], 7
+; EPILOG-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
+; EPILOG-NEXT:    br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]]
+; EPILOG:       entry.new:
+; EPILOG-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TRIP]], [[XTRAITER]]
+; EPILOG-NEXT:    br label [[LOOP_HEADER:%.*]]
+; EPILOG:  loop_latch.epil:
+; EPILOG-NEXT:     %epil.iter.sub = add i64 %epil.iter, -1
+; EPILOG-NEXT:     %epil.iter.cmp = icmp eq i64 %epil.iter.sub, 0
+; EPILOG-NEXT:     br i1 %epil.iter.cmp, label %exit2.loopexit.epilog-lcssa, label %loop_header.epil
+; EPILOG:  loop_latch.7:
+; EPILOG-NEXT:     %niter.nsub.7 = add i64 %niter, -8
+; EPILOG-NEXT:     %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
+; EPILOG-NEXT:     br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header
+
+; PROLOG: test1(
+; PROLOG-NEXT:  entry:
+; PROLOG-NEXT:    [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1
+; PROLOG-NEXT:    [[XTRAITER:%.*]] = and i64 [[TRIP]], 7
+; PROLOG-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[XTRAITER]], 0
+; PROLOG-NEXT:    br i1 [[TMP1]], label %loop_header.prol.loopexit, label %loop_header.prol.preheader
+; PROLOG:       loop_header.prol:
+; PROLOG-NEXT:    %iv.prol = phi i64 [ 0, %loop_header.prol.preheader ], [ %iv_next.prol, %loop_latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ [[XTRAITER]], %loop_header.prol.preheader ], [ %prol.iter.sub, %loop_latch.prol ]
+; PROLOG-NEXT:    br i1 %cond, label %loop_latch.prol, label %loop_exiting_bb1.prol
+; PROLOG:       loop_latch.prol:
+; PROLOG-NEXT:    %iv_next.prol = add i64 %iv.prol, 1
+; PROLOG-NEXT:    %prol.iter.sub = add i64 %prol.iter, -1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp eq i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %loop_header.prol.loopexit.unr-lcssa, label %loop_header.prol
+; PROLOG:  loop_latch.7:
+; PROLOG-NEXT:     %iv_next.7 = add i64 %iv, 8
+; PROLOG-NEXT:     %cmp.7 = icmp eq i64 %iv_next.7, %trip
+; PROLOG-NEXT:     br i1 %cmp.7, label %exit2.loopexit.unr-lcssa, label %loop_header
 entry:
   br label %loop_header
 
@@ -59,17 +82,30 @@ exit2.loopexit:
 ; %sum.02 and %add. Both of these are incoming values for phi from every exiting
 ; unrolled block.
 define i32 @test2(i32* nocapture %a, i64 %n) {
-; CHECK-LABEL: test2
-; CHECK-LABEL: for.exit2.loopexit:
-; CHECK-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ],
-; CHECK-NEXT:    br label %for.exit2
-; CHECK-LABEL: for.exit2.loopexit2:
-; CHECK-NEXT:    %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
-; CHECK-NEXT:    br label %for.exit2
-; CHECK-LABEL: for.exit2:
-; CHECK-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ]
-; CHECK-NEXT:    ret i32 %retval
-; CHECK: %niter.nsub.7 = add i64 %niter, -8
+; EPILOG: test2(
+; EPILOG: for.exit2.loopexit:
+; EPILOG-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ],
+; EPILOG-NEXT:    br label %for.exit2
+; EPILOG: for.exit2.loopexit2:
+; EPILOG-NEXT:    %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
+; EPILOG-NEXT:    br label %for.exit2
+; EPILOG: for.exit2:
+; EPILOG-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ]
+; EPILOG-NEXT:    ret i32 %retval
+; EPILOG: %niter.nsub.7 = add i64 %niter, -8
+
+; PROLOG: test2(
+; PROLOG: for.exit2.loopexit:
+; PROLOG-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ],
+; PROLOG-NEXT:    br label %for.exit2
+; PROLOG: for.exit2.loopexit1:
+; PROLOG-NEXT:    %retval.ph2 = phi i32 [ 42, %for.exiting_block.prol ], [ %sum.02.prol, %header.prol ]
+; PROLOG-NEXT:    br label %for.exit2
+; PROLOG: for.exit2:
+; PROLOG-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph2, %for.exit2.loopexit1 ]
+; PROLOG-NEXT:    ret i32 %retval
+; PROLOG: %indvars.iv.next.7 = add i64 %indvars.iv, 8
+
 entry:
   br label %header
 
@@ -102,25 +138,42 @@ for.exit2:
 ; test with two exiting and three exit blocks.
 ; the non-latch exiting block has a switch.
 define void @test3(i64 %trip, i64 %add) {
-; CHECK-LABEL: test3
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1
-; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TRIP]], 7
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
-; CHECK-NEXT:    br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]]
-; CHECK:       entry.new:
-; CHECK-NEXT:    %unroll_iter = sub i64 [[TRIP]], [[XTRAITER]]
-; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
-; CHECK-LABEL:  loop_header:
-; CHECK-NEXT:     %sum = phi i64 [ 0, %entry.new ], [ %sum.next.7, %loop_latch.7 ]
-; CHECK-NEXT:     %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ]
-; CHECK-LABEL:  loop_exiting_bb1.7:
-; CHECK-NEXT:     switch i64 %sum.next.6, label %loop_latch.7
-; CHECK-LABEL:  loop_latch.7:
-; CHECK-NEXT:     %sum.next.7 = add i64 %sum.next.6, %add
-; CHECK-NEXT:     %niter.nsub.7 = add i64 %niter, -8
-; CHECK-NEXT:     %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
-; CHECK-NEXT:     br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header
+; EPILOG: test3(
+; EPILOG-NEXT:  entry:
+; EPILOG-NEXT:    [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1
+; EPILOG-NEXT:    [[XTRAITER:%.*]] = and i64 [[TRIP]], 7
+; EPILOG-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
+; EPILOG-NEXT:    br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]]
+; EPILOG:       entry.new:
+; EPILOG-NEXT:    %unroll_iter = sub i64 [[TRIP]], [[XTRAITER]]
+; EPILOG-NEXT:    br label [[LOOP_HEADER:%.*]]
+; EPILOG:  loop_header:
+; EPILOG-NEXT:     %sum = phi i64 [ 0, %entry.new ], [ %sum.next.7, %loop_latch.7 ]
+; EPILOG-NEXT:     %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ]
+; EPILOG:  loop_exiting_bb1.7:
+; EPILOG-NEXT:     switch i64 %sum.next.6, label %loop_latch.7
+; EPILOG:  loop_latch.7:
+; EPILOG-NEXT:     %sum.next.7 = add i64 %sum.next.6, %add
+; EPILOG-NEXT:     %niter.nsub.7 = add i64 %niter, -8
+; EPILOG-NEXT:     %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
+; EPILOG-NEXT:     br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header
+
+; PROLOG:  test3(
+; PROLOG-NEXT:  entry:
+; PROLOG-NEXT:    [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1
+; PROLOG-NEXT:    [[XTRAITER:%.*]] = and i64 [[TRIP]], 7
+; PROLOG-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[XTRAITER]], 0
+; PROLOG-NEXT:    br i1 [[TMP1]], label %loop_header.prol.loopexit, label %loop_header.prol.preheader
+; PROLOG:  loop_header:
+; PROLOG-NEXT:     %iv = phi i64 [ %iv.unr, %entry.new ], [ %iv_next.7, %loop_latch.7 ]
+; PROLOG-NEXT:     %sum = phi i64 [ %sum.unr, %entry.new ], [ %sum.next.7, %loop_latch.7 ]
+; PROLOG:  loop_exiting_bb1.7:
+; PROLOG-NEXT:     switch i64 %sum.next.6, label %loop_latch.7
+; PROLOG:  loop_latch.7:
+; PROLOG-NEXT:     %iv_next.7 = add nsw i64 %iv, 8
+; PROLOG-NEXT:     %sum.next.7 = add i64 %sum.next.6, %add
+; PROLOG-NEXT:     %cmp.7 = icmp eq i64 %iv_next.7, %trip
+; PROLOG-NEXT:     br i1 %cmp.7, label %exit2.loopexit.unr-lcssa, label %loop_header
 entry:
   br label %loop_header
 
@@ -153,9 +206,13 @@ exit2.loopexit:
 
 ; FIXME: Support multiple exiting blocks to the same latch exit block.
 define i32 @test4(i32* nocapture %a, i64 %n, i1 %cond) {
-; CHECK-LABEL: test4
-; CHECK-NOT: .unr
-; CHECK-NOT: .epil
+; EPILOG: test4(
+; EPILOG-NOT: .unr
+; EPILOG-NOT: .epil
+
+; PROLOG: test4(
+; PROLOG-NOT: .unr
+; PROLOG-NOT: .prol
 entry:
   br label %header
 
@@ -184,21 +241,68 @@ for.exit2:
   ret i32 42
 }
 
+; FIXME: Support multiple exiting blocks to the unique exit block.
+define void @unique_exit(i32 %arg) {
+; EPILOG: unique_exit(
+; EPILOG-NOT: .unr
+; EPILOG-NOT: .epil
+
+; PROLOG: unique_exit(
+; PROLOG-NOT: .unr
+; PROLOG-NOT: .prol
+entry:
+  %tmp = icmp sgt i32 undef, %arg
+  br i1 %tmp, label %preheader, label %returnblock
+
+preheader:                                 ; preds = %entry
+  br label %header
+
+LoopExit:                                ; preds = %header, %latch
+  %tmp2.ph = phi i32 [ %tmp4, %header ], [ -1, %latch ]
+  br label %returnblock
+
+returnblock:                                         ; preds = %LoopExit, %entry
+  %tmp2 = phi i32 [ -1, %entry ], [ %tmp2.ph, %LoopExit ]
+  ret void
+
+header:                                           ; preds = %preheader, %latch
+  %tmp4 = phi i32 [ %inc, %latch ], [ %arg, %preheader ]
+  %inc = add nsw i32 %tmp4, 1
+  br i1 true, label %LoopExit, label %latch
+
+latch:                                            ; preds = %header
+  %cmp = icmp slt i32 %inc, undef
+  br i1 %cmp, label %header, label %LoopExit
+}
+
 ; two exiting and two exit blocks.
 ; the non-latch exiting block has duplicate edges to the non-latch exit block.
 define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
-; CHECK-LABEL: test5
-; CHECK-LABEL:   exit1.loopexit:
-; CHECK-NEXT:      %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ],
-; CHECK-NEXT:      br label %exit1
-; CHECK-LABEL:   exit1.loopexit2:
-; CHECK-NEXT:      %ivy.epil = add i64 %iv.epil, %add
-; CHECK-NEXT:      br label %exit1
-; CHECK-LABEL:   exit1:
-; CHECK-NEXT:      %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.epil, %exit1.loopexit2 ]
-; CHECK-NEXT:      ret i64 %result
-; CHECK-LABEL:   loop_latch.7:
-; CHECK: %niter.nsub.7 = add i64 %niter, -8
+; EPILOG: test5(
+; EPILOG:   exit1.loopexit:
+; EPILOG-NEXT:      %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ],
+; EPILOG-NEXT:      br label %exit1
+; EPILOG:   exit1.loopexit2:
+; EPILOG-NEXT:      %ivy.epil = add i64 %iv.epil, %add
+; EPILOG-NEXT:      br label %exit1
+; EPILOG:   exit1:
+; EPILOG-NEXT:      %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.epil, %exit1.loopexit2 ]
+; EPILOG-NEXT:      ret i64 %result
+; EPILOG:   loop_latch.7:
+; EPILOG:      %niter.nsub.7 = add i64 %niter, -8
+
+; PROLOG: test5(
+; PROLOG:   exit1.loopexit:
+; PROLOG-NEXT:      %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ],
+; PROLOG-NEXT:      br label %exit1
+; PROLOG:   exit1.loopexit1:
+; PROLOG-NEXT:      %ivy.prol = add i64 %iv.prol, %add
+; PROLOG-NEXT:      br label %exit1
+; PROLOG:   exit1:
+; PROLOG-NEXT:      %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.prol, %exit1.loopexit1 ]
+; PROLOG-NEXT:      ret i64 %result
+; PROLOG:   loop_latch.7:
+; PROLOG:      %iv_next.7 = add nsw i64 %iv, 8
 entry:
   br label %loop_header
 
@@ -230,18 +334,31 @@ latchexit:
 
 ; test when exit blocks have successors.
 define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
-; CHECK-LABEL: test6
-; CHECK-LABEL:   for.exit2.loopexit:
-; CHECK-NEXT:      %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ],
-; CHECK-NEXT:      br label %for.exit2
-; CHECK-LABEL:   for.exit2.loopexit2:
-; CHECK-NEXT:      %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
-; CHECK-NEXT:      br label %for.exit2
-; CHECK-LABEL:   for.exit2:
-; CHECK-NEXT:      %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ]
-; CHECK-NEXT:      br i1 %cond, label %exit_true, label %exit_false
-; CHECK-LABEL:   latch.7:
-; CHECK:           %niter.nsub.7 = add i64 %niter, -8
+; EPILOG: test6(
+; EPILOG:   for.exit2.loopexit:
+; EPILOG-NEXT:      %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ],
+; EPILOG-NEXT:      br label %for.exit2
+; EPILOG:   for.exit2.loopexit2:
+; EPILOG-NEXT:      %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
+; EPILOG-NEXT:      br label %for.exit2
+; EPILOG:   for.exit2:
+; EPILOG-NEXT:      %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ]
+; EPILOG-NEXT:      br i1 %cond, label %exit_true, label %exit_false
+; EPILOG:   latch.7:
+; EPILOG:           %niter.nsub.7 = add i64 %niter, -8
+
+; PROLOG: test6(
+; PROLOG:   for.exit2.loopexit:
+; PROLOG-NEXT:      %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ],
+; PROLOG-NEXT:      br label %for.exit2
+; PROLOG:   for.exit2.loopexit1:
+; PROLOG-NEXT:      %retval.ph2 = phi i32 [ 42, %for.exiting_block.prol ], [ %sum.02.prol, %header.prol ]
+; PROLOG-NEXT:      br label %for.exit2
+; PROLOG:   for.exit2:
+; PROLOG-NEXT:      %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph2, %for.exit2.loopexit1 ]
+; PROLOG-NEXT:      br i1 %cond, label %exit_true, label %exit_false
+; PROLOG: latch.7:
+; PROLOG:   %indvars.iv.next.7 = add i64 %indvars.iv, 8
 entry:
   br label %header
 
@@ -277,3 +394,87 @@ exit_true:
 exit_false:
   ret i32 %addx
 }
+
+; test when value in exit block does not have VMap.
+define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
+; EPILOG-NO-IC: test7(
+; EPILOG-NO-IC: loopexit1.loopexit:
+; EPILOG-NO-IC-NEXT:  %sext3.ph = phi i32 [ %shft, %header ], [ %shft, %latch ], [ %shft, %latch.1 ], [ %shft, %latch.2 ], [ %shft, %latch.3 ], [ %shft, %latch.4 ], [ %shft, %latch.5 ], [ %shft, %latch.6 ]
+; EPILOG-NO-IC-NEXT:  br label %loopexit1
+; EPILOG-NO-IC: loopexit1.loopexit1:
+; EPILOG-NO-IC-NEXT:  %sext3.ph2 = phi i32 [ %shft, %header.epil ]
+; EPILOG-NO-IC-NEXT:  br label %loopexit1
+; EPILOG-NO-IC: loopexit1:
+; EPILOG-NO-IC-NEXT:   %sext3 = phi i32 [ %sext3.ph, %loopexit1.loopexit ], [ %sext3.ph2, %loopexit1.loopexit1 ]
+bb:
+  %tmp = icmp slt i32 undef, 2
+  %sext = sext i32 undef to i64
+  %shft = ashr exact i32 %arg, 16
+  br i1 %tmp, label %loopexit2, label %preheader
+
+preheader:                                              ; preds = %bb2
+  br label %header
+
+header:                                              ; preds = %latch, %preheader
+  %tmp6 = phi i64 [ 1, %preheader ], [ %add, %latch ]
+  br i1 false, label %loopexit1, label %latch
+
+latch:                                              ; preds = %header
+  %add = add nuw nsw i64 %tmp6, 1
+  %tmp9 = icmp slt i64 %add, %sext
+  br i1 %tmp9, label %header, label %latchexit
+
+latchexit:                                             ; preds = %latch
+  unreachable
+
+loopexit2:                                             ; preds = %bb2
+ ret i32 %shft
+
+loopexit1:                                             ; preds = %header
+  %sext3 = phi i32 [ %shft, %header ]
+  ret i32 %sext3
+}
+
+; Nested loop and inner loop is unrolled
+; FIXME: we cannot unroll with epilog remainder currently, because 
+; the outer loop does not contain the epilog preheader and epilog exit (while
+; infact it should). This causes us to choke up on LCSSA form being incorrect in
+; outer loop. However, the exit block where LCSSA fails, is infact still within
+; the outer loop. For now, we just bail out in presence of outer loop and epilog
+; loop is generated.
+; The outer loop header is the preheader for the inner loop and the inner header
+; branches back to the outer loop.
+define void @test8() {
+; EPILOG: test8(
+; EPILOG-NOT: niter
+
+; PROLOG: test8(
+; PROLOG: outerloop:
+; PROLOG-NEXT: phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit ]
+; PROLOG:      %lcmp.mod = icmp eq i64
+; PROLOG-NEXT: br i1 %lcmp.mod, label %innerH.prol.loopexit, label %innerH.prol.preheader
+; PROLOG: latch.6:
+; PROLOG-NEXT: %tmp4.7 = add nsw i64 %tmp3, 8
+; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.7
+; PROLOG: latch.7
+; PROLOG-NEXT: %tmp6.7 = icmp ult i64 %tmp4.7, 100
+; PROLOG-NEXT: br i1 %tmp6.7, label %innerH, label %exit.unr-lcssa
+bb:
+  br label %outerloop
+
+outerloop:                                              ; preds = %innerH, %bb
+  %tmp = phi i64 [ 3, %bb ], [ 0, %innerH ]
+  br label %innerH
+
+innerH:                                              ; preds = %latch, %outerloop
+  %tmp3 = phi i64 [ %tmp4, %latch ], [ %tmp, %outerloop ]
+  %tmp4 = add nuw nsw i64 %tmp3, 1
+  br i1 false, label %outerloop, label %latch
+
+latch:                                              ; preds = %innerH
+  %tmp6 = icmp ult i64 %tmp4, 100
+  br i1 %tmp6, label %innerH, label %exit
+
+exit:                                              ; preds = %latch
+  ret void
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll
index 04661314eb1d..878f4e8c78f0 100644
--- a/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -170,6 +170,74 @@ for.end:                                          ; preds = %for.cond.for.end_cr
   ret i16 %res.0.lcssa
 }
 
+; dont unroll loop with multiple exit/exiting blocks, unless
+; -runtime-unroll-multi-exit=true
+; single exit, multiple exiting blocks.
+define void @unique_exit(i32 %arg) {
+; PROLOG: unique_exit(
+; PROLOG-NOT: .unr
+
+; EPILOG: unique_exit(
+; EPILOG-NOT: .unr
+entry:
+  %tmp = icmp sgt i32 undef, %arg
+  br i1 %tmp, label %preheader, label %returnblock
+
+preheader:                                 ; preds = %entry
+  br label %header
+
+LoopExit:                                ; preds = %header, %latch
+  %tmp2.ph = phi i32 [ %tmp4, %header ], [ -1, %latch ]
+  br label %returnblock
+
+returnblock:                                         ; preds = %LoopExit, %entry
+  %tmp2 = phi i32 [ -1, %entry ], [ %tmp2.ph, %LoopExit ]
+  ret void
+
+header:                                           ; preds = %preheader, %latch
+  %tmp4 = phi i32 [ %inc, %latch ], [ %arg, %preheader ]
+  %inc = add nsw i32 %tmp4, 1
+  br i1 true, label %LoopExit, label %latch
+
+latch:                                            ; preds = %header
+  %cmp = icmp slt i32 %inc, undef
+  br i1 %cmp, label %header, label %LoopExit
+}
+
+; multiple exit blocks. don't unroll
+define void @multi_exit(i64 %trip, i1 %cond) {
+; PROLOG: multi_exit(
+; PROLOG-NOT: .unr
+
+; EPILOG: multi_exit(
+; EPILOG-NOT: .unr
+entry:
+  br label %loop_header
+
+loop_header:
+  %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ]
+  br i1 %cond, label %loop_latch, label %loop_exiting_bb1
+
+loop_exiting_bb1:
+  br i1 false, label %loop_exiting_bb2, label %exit1
+
+loop_exiting_bb2:
+  br i1 false, label %loop_latch, label %exit3
+
+exit3:
+  ret void
+
+loop_latch:
+  %iv_next = add i64 %iv, 1
+  %cmp = icmp ne i64 %iv_next, %trip
+  br i1 %cmp, label %loop_header, label %exit2.loopexit
+
+exit1:
+ ret void
+
+exit2.loopexit:
+  ret void
+}
 !0 = distinct !{!0, !1}
 !1 = !{!"llvm.loop.unroll.runtime.disable"}
 
diff --git a/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll b/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll
new file mode 100644
index 000000000000..cd3e89ae7350
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -mcpu=slm -debug 2>&1 | FileCheck -check-prefix=MSG %s
+; REQUIRES: asserts
+; This test should not be vectorized in X86\SLM arch
+; Vectorizing the 64bit multiply in this case is wrong since
+; it can be done with a lower bit mode (notice that the sources is 16bit)
+; Also addq\subq (quad word) has a high cost on SLM arch.
+; this test has a bad performance (regression of -70%) if vectorized on SLM arch
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @no_vec(i32 %LastIndex, i16* nocapture readonly %InputData, i16 signext %lag, i16 signext %Scale) {
+entry:
+; MSG: LV: Selecting VF: 1. 
+  %cmp17 = icmp sgt i32 %LastIndex, 0
+  br i1 %cmp17, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:                                   ; preds = %entry
+  %conv5 = sext i16 %Scale to i64
+  %sh_prom = and i64 %conv5, 4294967295
+  %0 = sext i16 %lag to i64
+  %wide.trip.count = zext i32 %LastIndex to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  %conv8 = trunc i64 %add7 to i32
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  %Accumulator.0.lcssa = phi i32 [ 0, %entry ], [ %conv8, %for.cond.cleanup.loopexit ]
+  ret i32 %Accumulator.0.lcssa
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %Accumulator.018 = phi i64 [ 0, %for.body.lr.ph ], [ %add7, %for.body ]
+  %arrayidx = getelementptr inbounds i16, i16* %InputData, i64 %indvars.iv
+  %1 = load i16, i16* %arrayidx, align 2
+  %conv = sext i16 %1 to i64
+  %2 = add nsw i64 %indvars.iv, %0
+  %arrayidx3 = getelementptr inbounds i16, i16* %InputData, i64 %2
+  %3 = load i16, i16* %arrayidx3, align 2 
+  %conv4 = sext i16 %3 to i64
+  %mul = mul nsw i64 %conv4, %conv
+  %shr = ashr i64 %mul, %sh_prom
+  %add7 = add i64 %shr, %Accumulator.018 
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
diff --git a/test/Transforms/LoopVectorize/if-conversion-nest.ll b/test/Transforms/LoopVectorize/if-conversion-nest.ll
index 3a581ebf847e..7f381ae6ad7b 100644
--- a/test/Transforms/LoopVectorize/if-conversion-nest.ll
+++ b/test/Transforms/LoopVectorize/if-conversion-nest.ll
@@ -1,18 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-;CHECK-LABEL: @foo(
-;CHECK: icmp sgt
-;CHECK: icmp sgt
-;CHECK: icmp slt
-;CHECK: select <4 x i1>
-;CHECK: %[[P1:.*]] = select <4 x i1>
-;CHECK: xor <4 x i1>
-;CHECK: and <4 x i1>
-;CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %[[P1]]
-;CHECK: ret
 define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP26:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP26]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[MIN_ITERS_CHECKED:%.*]]
+; CHECK:       min.iters.checked:
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[N]], 3
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[CMP_ZERO:%.*]] = icmp eq i64 [[N_VEC]], 0
+; CHECK-NEXT:    br i1 [[CMP_ZERO]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP6]]
+; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP6]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[A]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]]
+; CHECK-NEXT:    [[MEMCHECK_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4, !alias.scope !0, !noalias !3
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4, !alias.scope !3
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD6]]
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], <i32 19, i32 19, i32 19, i32 19>
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[TMP14:%.*]] = select <4 x i1> [[TMP13]], <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[TMP15:%.*]] = and <4 x i1> [[TMP12]], [[TMP11]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> <i32 9, i32 9, i32 9, i32 9>
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP17:%.*]] = and <4 x i1> [[TMP11]], [[TMP16]]
+; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP17]], <4 x i32> [[TMP14]], <4 x i32> [[PREDPHI]]
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[PREDPHI7]], <4 x i32>* [[TMP18]], align 4, !alias.scope !0, !noalias !3
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[MIN_ITERS_CHECKED]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END14:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[IF_THEN:%.*]], label [[IF_END14]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[TMP20]], 19
+; CHECK-NEXT:    br i1 [[CMP6]], label [[IF_END14]], label [[IF_ELSE:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[CMP10:%.*]] = icmp slt i32 [[TMP21]], 4
+; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[CMP10]], i32 4, i32 5
+; CHECK-NEXT:    br label [[IF_END14]]
+; CHECK:       if.end14:
+; CHECK-NEXT:    [[X_0:%.*]] = phi i32 [ 9, [[FOR_BODY]] ], [ 3, [[IF_THEN]] ], [ [[DOT]], [[IF_ELSE]] ]
+; CHECK-NEXT:    store i32 [[X_0]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !8
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret i32 undef
+;
 entry:
   %cmp26 = icmp sgt i32 %n, 0
   br i1 %cmp26, label %for.body, label %for.end
@@ -46,3 +120,4 @@ if.end14:
 for.end:
   ret i32 undef
 }
+
diff --git a/test/Transforms/LoopVectorize/pr33706.ll b/test/Transforms/LoopVectorize/pr33706.ll
new file mode 100644
index 000000000000..b9d0d8a44acc
--- /dev/null
+++ b/test/Transforms/LoopVectorize/pr33706.ll
@@ -0,0 +1,61 @@
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s | FileCheck %s
+
+@global = local_unnamed_addr global i32 0, align 4
+@global.1 = local_unnamed_addr global i32 0, align 4
+@global.2 = local_unnamed_addr global float 0x3EF0000000000000, align 4
+
+; CHECK-LABEL: @PR33706
+; CHECK-NOT: <2 x i32>
+define void @PR33706(float* nocapture readonly %arg, float* nocapture %arg1, i32 %arg2) local_unnamed_addr {
+bb:
+  %tmp = load i32, i32* @global.1, align 4
+  %tmp3 = getelementptr inbounds float, float* %arg, i64 190
+  %tmp4 = getelementptr inbounds float, float* %arg1, i64 512
+  %tmp5 = and i32 %tmp, 65535
+  %tmp6 = icmp ugt i32 %arg2, 65536
+  br i1 %tmp6, label %bb7, label %bb9
+
+bb7:                                              ; preds = %bb
+  %tmp8 = load i32, i32* @global, align 4
+  br label %bb27
+
+bb9:                                              ; preds = %bb
+  %tmp10 = udiv i32 65536, %arg2
+  br label %bb11
+
+bb11:                                             ; preds = %bb11, %bb9
+  %tmp12 = phi i32 [ %tmp20, %bb11 ], [ %tmp5, %bb9 ]
+  %tmp13 = phi float* [ %tmp18, %bb11 ], [ %tmp4, %bb9 ]
+  %tmp14 = phi i32 [ %tmp16, %bb11 ], [ %tmp10, %bb9 ]
+  %tmp15 = phi i32 [ %tmp19, %bb11 ], [ %tmp, %bb9 ]
+  %tmp16 = add nsw i32 %tmp14, -1
+  %tmp17 = sitofp i32 %tmp12 to float
+  store float %tmp17, float* %tmp13, align 4
+  %tmp18 = getelementptr inbounds float, float* %tmp13, i64 1
+  %tmp19 = add i32 %tmp15, %arg2
+  %tmp20 = and i32 %tmp19, 65535
+  %tmp21 = icmp eq i32 %tmp16, 0
+  br i1 %tmp21, label %bb22, label %bb11
+
+bb22:                                             ; preds = %bb11
+  %tmp23 = phi float* [ %tmp18, %bb11 ]
+  %tmp24 = phi i32 [ %tmp19, %bb11 ]
+  %tmp25 = phi i32 [ %tmp20, %bb11 ]
+  %tmp26 = ashr i32 %tmp24, 16
+  store i32 %tmp26, i32* @global, align 4
+  br label %bb27
+
+bb27:                                             ; preds = %bb22, %bb7
+  %tmp28 = phi i32 [ %tmp26, %bb22 ], [ %tmp8, %bb7 ]
+  %tmp29 = phi float* [ %tmp23, %bb22 ], [ %tmp4, %bb7 ]
+  %tmp30 = phi i32 [ %tmp25, %bb22 ], [ %tmp5, %bb7 ]
+  %tmp31 = sext i32 %tmp28 to i64
+  %tmp32 = getelementptr inbounds float, float* %tmp3, i64 %tmp31
+  %tmp33 = load float, float* %tmp32, align 4
+  %tmp34 = sitofp i32 %tmp30 to float
+  %tmp35 = load float, float* @global.2, align 4
+  %tmp36 = fmul float %tmp35, %tmp34
+  %tmp37 = fadd float %tmp33, %tmp36
+  store float %tmp37, float* %tmp29, align 4
+  ret void
+}
diff --git a/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml b/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml
index 17b634acd0e1..558aa9aa73f2 100644
--- a/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml
+++ b/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml
@@ -16,4 +16,5 @@ CfiFunctionDefs:
 CfiFunctionDecls:
   - external
   - external_weak
+  - local_decl
 ...
diff --git a/test/Transforms/LowerTypeTests/import-icall.ll b/test/Transforms/LowerTypeTests/import-icall.ll
index ddeb7fb5c9a2..b4e374720321 100644
--- a/test/Transforms/LowerTypeTests/import-icall.ll
+++ b/test/Transforms/LowerTypeTests/import-icall.ll
@@ -19,6 +19,10 @@ define i8 @use_b() {
   ret i8 %x
 }
 
+define void @local_decl() {
+  call void @local_decl()
+  ret void
+}
 
 declare void @external()
 declare extern_weak void @external_weak()
@@ -33,6 +37,9 @@ declare extern_weak void @external_weak()
 ; CHECK:      define internal i8 @local_b() {
 ; CHECK-NEXT:   call i8 @local_a()
 
+; CHECK:      define void @local_decl()
+; CHECK-NEXT:   call void @local_decl()
+
 ; CHECK: declare void @external()
 ; CHECK: declare extern_weak void @external_weak()
 ; CHECK: declare i8 @local_a()
diff --git a/test/Transforms/NewGVN/pr33720.ll b/test/Transforms/NewGVN/pr33720.ll
new file mode 100644
index 000000000000..3b6c190a4494
--- /dev/null
+++ b/test/Transforms/NewGVN/pr33720.ll
@@ -0,0 +1,91 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -newgvn -S %s | FileCheck %s
+
+@f = external local_unnamed_addr global i64
+@b = external local_unnamed_addr global i64
+@e = external local_unnamed_addr global i64
+
+define void @patatino() {
+; CHECK-LABEL: @patatino(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[IF_END24:%.*]], label [[FOR_COND16:%.*]]
+; CHECK:       for.cond2thread-pre-split:
+; CHECK-NEXT:    br i1 false, label [[FOR_BODY:%.*]], label [[FOR_COND8_PREHEADER:%.*]]
+; CHECK:       for.cond8.preheader:
+; CHECK-NEXT:    br i1 undef, label [[L1:%.*]], label %for.cond11thread-pre-split.lr.ph
+; CHECK:       for.cond11thread-pre-split.lr.ph:
+; CHECK-NEXT:    br label [[L1]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp ne i64 [[K_2:%.*]], 3
+; CHECK-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP3]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* @f
+; CHECK-NEXT:    [[OR:%.*]] = or i64 [[TMP0]], [[CONV4]]
+; CHECK-NEXT:    store i64 [[OR]], i64* @f
+; CHECK-NEXT:    [[TOBOOL7:%.*]] = icmp ne i64 [[K_2]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL7]], label %for.cond2thread-pre-split, label [[LOR_RHS:%.*]]
+; CHECK:       lor.rhs:
+; CHECK-NEXT:    store i64 1, i64* @b, align 8
+; CHECK-NEXT:    br label %for.cond2thread-pre-split
+; CHECK:       l1:
+; CHECK-NEXT:    [[K_2]] = phi i64 [ undef, [[L1_PREHEADER:%.*]] ], [ 15, [[FOR_COND8_PREHEADER]] ], [ 5, %for.cond11thread-pre-split.lr.ph ]
+; CHECK-NEXT:    store i64 7, i64* [[J_3:%.*]]
+; CHECK-NEXT:    br label [[FOR_BODY]]
+; CHECK:       for.cond16:
+; CHECK-NEXT:    [[J_0:%.*]] = phi i64* [ @f, [[ENTRY:%.*]] ], [ undef, [[FOR_COND20:%.*]] ], [ @e, [[FOR_COND16]] ]
+; CHECK-NEXT:    br i1 undef, label [[FOR_COND20]], label [[FOR_COND16]]
+; CHECK:       for.cond20:
+; CHECK-NEXT:    [[J_2:%.*]] = phi i64* [ [[J_0]], [[FOR_COND16]] ], [ undef, [[IF_END24]] ]
+; CHECK-NEXT:    br i1 true, label [[IF_END24]], label [[FOR_COND16]]
+; CHECK:       if.end24:
+; CHECK-NEXT:    [[J_3]] = phi i64* [ [[J_2]], [[FOR_COND20]] ], [ undef, [[ENTRY]] ]
+; CHECK-NEXT:    br i1 false, label [[FOR_COND20]], label [[L1_PREHEADER]]
+; CHECK:       l1.preheader:
+; CHECK-NEXT:    br label [[L1]]
+;
+entry:
+  br i1 undef, label %if.end24, label %for.cond16
+
+for.cond2thread-pre-split:
+  br i1 false, label %for.body, label %for.cond8.preheader
+
+for.cond8.preheader:
+  br i1 undef, label %l1, label %for.cond11thread-pre-split.lr.ph
+
+for.cond11thread-pre-split.lr.ph:
+  br label %l1
+
+for.body:
+  %k.031 = phi i64 [ %k.2, %l1 ], [ 15, %for.cond2thread-pre-split ]
+  %cmp3 = icmp ne i64 %k.031, 3
+  %conv4 = zext i1 %cmp3 to i64
+  %0 = load i64, i64* @f
+  %or = or i64 %0, %conv4
+  store i64 %or, i64* @f
+  %tobool7 = icmp ne i64 %k.031, 0
+  %or.cond = or i1 %tobool7, false
+  br i1 %or.cond, label %for.cond2thread-pre-split, label %lor.rhs
+
+lor.rhs:
+  store i64 1, i64* @b, align 8
+  br label %for.cond2thread-pre-split
+
+l1:
+  %k.2 = phi i64 [ undef, %l1.preheader ], [ 15, %for.cond8.preheader ], [ 5, %for.cond11thread-pre-split.lr.ph ]
+  store i64 7, i64* %j.3
+  br label %for.body
+
+for.cond16:
+  %j.0 = phi i64* [ @f, %entry ], [ %j.2, %for.cond20 ], [ @e, %for.cond16 ]
+  br i1 undef, label %for.cond20, label %for.cond16
+
+for.cond20:
+  %j.2 = phi i64* [ %j.0, %for.cond16 ], [ %j.3, %if.end24 ]
+  br i1 true, label %if.end24, label %for.cond16
+
+if.end24:
+  %j.3 = phi i64* [ %j.2, %for.cond20 ], [ undef, %entry ]
+  br i1 false, label %for.cond20, label %l1.preheader
+
+l1.preheader:
+  br label %l1
+}
diff --git a/test/Transforms/PGOProfile/counter_promo_exit_merge.ll b/test/Transforms/PGOProfile/counter_promo_exit_merge.ll
index f53d37600ce6..85ca1613c8ad 100644
--- a/test/Transforms/PGOProfile/counter_promo_exit_merge.ll
+++ b/test/Transforms/PGOProfile/counter_promo_exit_merge.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -instrprof -do-counter-promotion=true -speculative-counter-promotion -S | FileCheck --check-prefix=PROMO %s
-; RUN: opt < %s --passes=instrprof -do-counter-promotion=true -speculative-counter-promotion -S | FileCheck --check-prefix=PROMO %s
+; RUN: opt < %s -instrprof -do-counter-promotion=true -speculative-counter-promotion-max-exiting=3 -S | FileCheck --check-prefix=PROMO %s
+; RUN: opt < %s --passes=instrprof -do-counter-promotion=true -speculative-counter-promotion-max-exiting=3 -S | FileCheck --check-prefix=PROMO %s
 
 $__llvm_profile_raw_version = comdat any
 
diff --git a/test/Transforms/PGOProfile/counter_promo_mexits.ll b/test/Transforms/PGOProfile/counter_promo_mexits.ll
index 71e5f066d50f..bb799757a47c 100644
--- a/test/Transforms/PGOProfile/counter_promo_mexits.ll
+++ b/test/Transforms/PGOProfile/counter_promo_mexits.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -pgo-instr-gen -instrprof -do-counter-promotion=true -speculative-counter-promotion -S | FileCheck --check-prefix=PROMO %s
-; RUN: opt < %s --passes=pgo-instr-gen,instrprof -do-counter-promotion=true -speculative-counter-promotion -S | FileCheck --check-prefix=PROMO %s
+; RUN: opt < %s -pgo-instr-gen -instrprof -do-counter-promotion=true -speculative-counter-promotion-max-exiting=3 -S | FileCheck --check-prefix=PROMO %s
+; RUN: opt < %s --passes=pgo-instr-gen,instrprof -do-counter-promotion=true -speculative-counter-promotion-max-exiting=3 -S | FileCheck --check-prefix=PROMO %s
 
 @g = common local_unnamed_addr global i32 0, align 4
 
diff --git a/test/Transforms/PGOProfile/counter_promo_nest.ll b/test/Transforms/PGOProfile/counter_promo_nest.ll
new file mode 100644
index 000000000000..b7f117b3e949
--- /dev/null
+++ b/test/Transforms/PGOProfile/counter_promo_nest.ll
@@ -0,0 +1,165 @@
+; TEST that counter updates are promoted outside the whole loop nest
+; RUN: opt < %s -pgo-instr-gen -instrprof -do-counter-promotion=true -S | FileCheck --check-prefix=PROMO  %s
+; RUN: opt < %s --passes=pgo-instr-gen,instrprof -do-counter-promotion=true -S | FileCheck --check-prefix=PROMO  %s 
+
+@g = common local_unnamed_addr global i32 0, align 4
+@c = local_unnamed_addr global i32 10, align 4
+
+; Function Attrs: noinline norecurse nounwind uwtable
+define void @bar() local_unnamed_addr #0 {
+bb:
+  %tmp2 = load i32, i32* @g, align 4, !tbaa !2
+  %tmp3 = add nsw i32 %tmp2, 1
+  store i32 %tmp3, i32* @g, align 4, !tbaa !2
+  ret void
+}
+
+; Function Attrs: norecurse nounwind uwtable
+define i32 @main() local_unnamed_addr #1 {
+bb:
+  store i32 0, i32* @g, align 4, !tbaa !2
+  %tmp = load i32, i32* @c, align 4, !tbaa !2
+  %tmp1 = icmp sgt i32 %tmp, 0
+  br i1 %tmp1, label %bb2_1, label %bb84
+
+bb2_1:
+  br label %bb2
+
+bb2:                                              ; preds = %bb39, %bb
+  %tmp3 = phi i32 [ %tmp40, %bb39 ], [ %tmp, %bb2_1 ]
+  %tmp5 = phi i32 [ %tmp43, %bb39 ], [ 0, %bb2_1 ]
+  %tmp7 = icmp sgt i32 %tmp3, 0
+  br i1 %tmp7, label %bb14_1, label %bb39
+
+bb8:                                              ; preds = %bb39
+; PROMO-LABEL: bb8
+; PROMO: load {{.*}} @__profc_main{{.*}}
+; PROMO-NEXT: add
+; PROMO-NEXT: store {{.*}}@__profc_main{{.*}}
+; PROMO-NEXT: load {{.*}} @__profc_main{{.*}}
+; PROMO-NEXT: add
+; PROMO-NEXT: store {{.*}}@__profc_main{{.*}}
+; PROMO-NEXT: load {{.*}} @__profc_main{{.*}}
+; PROMO-NEXT: add
+; PROMO-NEXT: store {{.*}}@__profc_main{{.*}}
+; PROMO-NEXT: load {{.*}} @__profc_main{{.*}}
+; PROMO-NEXT: add
+; PROMO-NEXT: store {{.*}}@__profc_main{{.*}}
+; PROMO-NEXT: load {{.*}} @__profc_main{{.*}}
+; PROMO-NEXT: add
+; PROMO-NEXT: store {{.*}}@__profc_main{{.*}}
+
+  %tmp13 = icmp sgt i32 %tmp40, 0
+  br i1 %tmp13, label %bb45, label %bb84
+
+bb14_1:
+  br label %bb14
+
+bb14:                                             ; preds = %bb29, %bb2
+  %tmp15 = phi i32 [ %tmp30, %bb29 ], [ %tmp3, %bb14_1 ]
+  %tmp16 = phi i64 [ %tmp31, %bb29 ], [ 0, %bb14_1 ]
+  %tmp17 = phi i64 [ %tmp32, %bb29 ], [ 0, %bb14_1 ]
+  %tmp18 = phi i32 [ %tmp33, %bb29 ], [ 0, %bb14_1 ]
+  %tmp19 = icmp sgt i32 %tmp15, 0
+  br i1 %tmp19, label %bb20_split, label %bb29
+
+bb20_split:                                             
+ br label %bb20
+
+bb20:                                             ; preds = %bb20, %bb14
+  %tmp21 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb20_split ]
+  %tmp22 = phi i32 [ %tmp24, %bb20 ], [ 0, %bb20_split ]
+  %tmp23 = add nuw i64 %tmp21, 1
+  tail call void @bar()
+  %tmp24 = add nuw nsw i32 %tmp22, 1
+  %tmp25 = load i32, i32* @c, align 4, !tbaa !2
+  %tmp26 = icmp slt i32 %tmp24, %tmp25
+  br i1 %tmp26, label %bb20, label %bb27
+
+bb27:                                             ; preds = %bb20
+  %tmp28 = add i64 %tmp23, %tmp16
+  br label %bb29
+
+bb29:                                             ; preds = %bb27, %bb14
+  %tmp30 = phi i32 [ %tmp25, %bb27 ], [ %tmp15, %bb14 ]
+  %tmp31 = phi i64 [ %tmp28, %bb27 ], [ %tmp16, %bb14 ]
+  %tmp32 = add nuw i64 %tmp17, 1
+  %tmp33 = add nuw nsw i32 %tmp18, 1
+  %tmp34 = icmp slt i32 %tmp33, %tmp30
+  br i1 %tmp34, label %bb14, label %bb35
+
+bb35:                                             ; preds = %bb29
+  %tmp36 = insertelement <2 x i64> undef, i64 %tmp31, i32 0
+  br label %bb39
+
+bb39:                                             ; preds = %bb35, %bb2
+  %tmp40 = phi i32 [ %tmp30, %bb35 ], [ %tmp3, %bb2 ]
+  %tmp43 = add nuw nsw i32 %tmp5, 1
+  %tmp44 = icmp slt i32 %tmp43, %tmp40
+  br i1 %tmp44, label %bb2, label %bb8
+
+bb45:                                             ; preds = %bb67, %bb8
+  %tmp46 = phi i32 [ %tmp68, %bb67 ], [ %tmp40, %bb8 ]
+  %tmp47 = phi i64 [ %tmp69, %bb67 ], [ 0, %bb8 ]
+  %tmp48 = phi i64 [ %tmp70, %bb67 ], [ 0, %bb8 ]
+  %tmp49 = phi i32 [ %tmp71, %bb67 ], [ 0, %bb8 ]
+  %tmp50 = icmp sgt i32 %tmp46, 0
+  br i1 %tmp50, label %bb57, label %bb67
+
+bb51:                                             ; preds = %bb67
+  %tmp56 = icmp sgt i32 %tmp68, 0
+  br i1 %tmp56, label %bb73, label %bb84
+
+bb57:                                             ; preds = %bb57, %bb45
+  %tmp58 = phi i64 [ %tmp60, %bb57 ], [ 0, %bb45 ]
+  %tmp59 = phi i32 [ %tmp61, %bb57 ], [ 0, %bb45 ]
+  %tmp60 = add nuw i64 %tmp58, 1
+  tail call void @bar()
+  %tmp61 = add nuw nsw i32 %tmp59, 1
+  %tmp62 = load i32, i32* @c, align 4, !tbaa !2
+  %tmp63 = mul nsw i32 %tmp62, 10
+  %tmp64 = icmp slt i32 %tmp61, %tmp63
+  br i1 %tmp64, label %bb57, label %bb65
+
+bb65:                                             ; preds = %bb57
+  %tmp66 = add i64 %tmp60, %tmp47
+  br label %bb67
+
+bb67:                                             ; preds = %bb65, %bb45
+  %tmp68 = phi i32 [ %tmp62, %bb65 ], [ %tmp46, %bb45 ]
+  %tmp69 = phi i64 [ %tmp66, %bb65 ], [ %tmp47, %bb45 ]
+  %tmp70 = add nuw i64 %tmp48, 1
+  %tmp71 = add nuw nsw i32 %tmp49, 1
+  %tmp72 = icmp slt i32 %tmp71, %tmp68
+  br i1 %tmp72, label %bb45, label %bb51
+
+bb73:                                             ; preds = %bb73, %bb51
+  %tmp74 = phi i64 [ %tmp76, %bb73 ], [ 0, %bb51 ]
+  %tmp75 = phi i32 [ %tmp77, %bb73 ], [ 0, %bb51 ]
+  %tmp76 = add nuw i64 %tmp74, 1
+  tail call void @bar()
+  %tmp77 = add nuw nsw i32 %tmp75, 1
+  %tmp78 = load i32, i32* @c, align 4, !tbaa !2
+  %tmp79 = mul nsw i32 %tmp78, 100
+  %tmp80 = icmp slt i32 %tmp77, %tmp79
+  br i1 %tmp80, label %bb73, label %bb81
+
+bb81:                                             ; preds = %bb73
+  br label %bb84
+
+bb84:                                             ; preds = %bb81, %bb51, %bb8, %bb
+  ret i32 0
+}
+
+attributes #0 = { noinline }
+attributes #1 = { norecurse nounwind uwtable } 
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 307355)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
diff --git a/test/Transforms/SimplifyCFG/implied-and-or.ll b/test/Transforms/SimplifyCFG/implied-and-or.ll
new file mode 100644
index 000000000000..e615f302feef
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/implied-and-or.ll
@@ -0,0 +1,183 @@
+; RUN: opt %s -S -simplifycfg | FileCheck %s
+
+declare void @foo()
+declare void @bar()
+
+
+; CHECK-LABEL: @test_and1
+; CHECK: taken:
+; CHECK-NOT: cmp3
+; CHECK: call void @bar()
+; CHECK-NEXT: call void @foo()
+; CHECK: ret
+define void @test_and1(i32 %a, i32 %b) {
+entry:
+  %cmp1 = icmp eq i32 %a, 0
+  %cmp2 = icmp eq i32 %b, 0
+  %and = and i1 %cmp1, %cmp2
+  br i1 %and, label %taken, label %end
+
+taken:
+  call void @bar()
+  %cmp3 = icmp eq i32 %a, 0  ;; <-- implied true
+  br i1 %cmp3, label %if.then, label %end
+
+if.then:
+  call void @foo()
+  br label %end
+
+end:
+  ret void
+}
+
+; We can't infer anything if the result of the 'and' is false
+; CHECK-LABEL: @test_and2
+; CHECK: taken:
+; CHECK:   call void @bar()
+; CHECK:   %cmp3
+; CHECK:   br i1 %cmp3
+; CHECK: if.then:
+; CHECK:   call void @foo()
+; CHECK: ret
+define void @test_and2(i32 %a, i32 %b) {
+entry:
+  %cmp1 = icmp eq i32 %a, 0
+  %cmp2 = icmp eq i32 %b, 0
+  %and = and i1 %cmp1, %cmp2
+  br i1 %and, label %end, label %taken
+
+taken:
+  call void @bar()
+  %cmp3 = icmp eq i32 %a, 0
+  br i1 %cmp3, label %if.then, label %end
+
+if.then:
+  call void @foo()
+  br label %end
+
+end:
+  ret void
+}
+
+; CHECK-LABEL: @test_or1
+; CHECK: taken:
+; CHECK-NOT: cmp3
+; CHECK: call void @bar()
+; CHECK-NEXT: call void @foo()
+; CHECK: ret
+define void @test_or1(i32 %a, i32 %b) {
+entry:
+  %cmp1 = icmp eq i32 %a, 0
+  %cmp2 = icmp eq i32 %b, 0
+  %or = or i1 %cmp1, %cmp2
+  br i1 %or, label %end, label %taken
+
+taken:
+  call void @bar()
+  %cmp3 = icmp ne i32 %a, 0   ;; <-- implied true
+  br i1 %cmp3, label %if.then, label %end
+
+if.then:
+  call void @foo()
+  br label %end
+
+end:
+  ret void
+}
+
+; We can't infer anything if the result of the 'or' is true
+; CHECK-LABEL: @test_or2
+; CHECK:   call void @bar()
+; CHECK:   %cmp3
+; CHECK:   br i1 %cmp3
+; CHECK: if.then:
+; CHECK:   call void @foo()
+; CHECK: ret
+define void @test_or2(i32 %a, i32 %b) {
+entry:
+  %cmp1 = icmp eq i32 %a, 0
+  %cmp2 = icmp eq i32 %b, 0
+  %or = or i1 %cmp1, %cmp2
+  br i1 %or, label %taken, label %end
+
+taken:
+  call void @bar()
+  %cmp3 = icmp eq i32 %a, 0
+  br i1 %cmp3, label %if.then, label %end
+
+if.then:
+  call void @foo()
+  br label %end
+
+end:
+  ret void
+}
+
+; We can recurse a tree of 'and' or 'or's.
+; CHECK-LABEL: @test_and_recurse1
+; CHECK: taken:
+; CHECK-NEXT:  call void @bar()
+; CHECK-NEXT:  call void @foo()
+; CHECK-NEXT:  br label %end
+; CHECK: ret
+define void @test_and_recurse1(i32 %a, i32 %b, i32 %c) {
+entry:
+  %cmpa = icmp eq i32 %a, 0
+  %cmpb = icmp eq i32 %b, 0
+  %cmpc = icmp eq i32 %c, 0
+  %and1 = and i1 %cmpa, %cmpb
+  %and2 = and i1 %and1, %cmpc
+  br i1 %and2, label %taken, label %end
+
+taken:
+  call void @bar()
+  %cmp3 = icmp eq i32 %a, 0
+  br i1 %cmp3, label %if.then, label %end
+
+if.then:
+  call void @foo()
+  br label %end
+
+end:
+  ret void
+}
+
+; Check to make sure we don't recurse too deep.
+; CHECK-LABEL: @test_and_recurse2
+; CHECK: taken:
+; CHECK-NEXT:  call void @bar()
+; CHECK-NEXT:  %cmp3 = icmp eq i32 %a, 0
+; CHECK-NEXT:  br i1 %cmp3, label %if.then, label %end
+; CHECK: ret
+define void @test_and_recurse2(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
+                               i32 %g, i32 %h) {
+entry:
+  %cmpa = icmp eq i32 %a, 0
+  %cmpb = icmp eq i32 %b, 0
+  %cmpc = icmp eq i32 %c, 0
+  %cmpd = icmp eq i32 %d, 0
+  %cmpe = icmp eq i32 %e, 0
+  %cmpf = icmp eq i32 %f, 0
+  %cmpg = icmp eq i32 %g, 0
+  %cmph = icmp eq i32 %h, 0
+  %and1 = and i1 %cmpa, %cmpb
+  %and2 = and i1 %and1, %cmpc
+  %and3 = and i1 %and2, %cmpd
+  %and4 = and i1 %and3, %cmpe
+  %and5 = and i1 %and4, %cmpf
+  %and6 = and i1 %and5, %cmpg
+  %and7 = and i1 %and6, %cmph
+  br i1 %and7, label %taken, label %end
+
+taken:
+  call void @bar()
+  %cmp3 = icmp eq i32 %a, 0 ; <-- can be implied true
+  br i1 %cmp3, label %if.then, label %end
+
+if.then:
+  call void @foo()
+  br label %end
+
+end:
+  ret void
+}
diff --git a/test/Transforms/SimplifyCFG/sink-common-code.ll b/test/Transforms/SimplifyCFG/sink-common-code.ll
index 0f7bfa8516c9..513da477607b 100644
--- a/test/Transforms/SimplifyCFG/sink-common-code.ll
+++ b/test/Transforms/SimplifyCFG/sink-common-code.ll
@@ -818,6 +818,30 @@ merge:
 ; CHECK: right:
 ; CHECK-NEXT:   %val1 = call i32 @call_target() [ "deopt"(i32 20) ]
 
+%T = type {i32, i32}
+
+define i32 @test_insertvalue(i1 zeroext %flag, %T %P) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %t1 = insertvalue %T %P, i32 0, 0
+  br label %if.end
+
+if.else:
+  %t2 = insertvalue %T %P, i32 1, 0
+  br label %if.end
+
+if.end:
+  %t = phi %T [%t1, %if.then], [%t2, %if.else]
+  ret i32 1
+}
+
+; CHECK-LABEL: @test_insertvalue
+; CHECK: select
+; CHECK: insertvalue
+; CHECK-NOT: insertvalue
+
 ; CHECK: ![[TBAA]] = !{![[TYPE:[0-9]]], ![[TYPE]], i64 0}
 ; CHECK: ![[TYPE]] = !{!"float", ![[TEXT:[0-9]]]}
 ; CHECK: ![[TEXT]] = !{!"an example type tree"}
diff --git a/test/Transforms/Sink/fence.ll b/test/Transforms/Sink/fence.ll
index aa237d8192b6..09aa565d88f8 100644
--- a/test/Transforms/Sink/fence.ll
+++ b/test/Transforms/Sink/fence.ll
@@ -5,9 +5,9 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test1(i32* ()*) {
 entry:
   %1 = call i32* %0() #0
-  fence singlethread seq_cst
+  fence syncscope("singlethread") seq_cst
   %2 = load i32, i32* %1, align 4
-  fence singlethread seq_cst
+  fence syncscope("singlethread") seq_cst
   %3 = icmp eq i32 %2, 0
   br i1 %3, label %fail, label %pass
 
@@ -20,9 +20,9 @@ pass:                                             ; preds = %fail, %top
 
 ; CHECK-LABEL: @test1(
 ; CHECK:  %[[call:.*]] = call i32* %0()
-; CHECK:  fence singlethread seq_cst
+; CHECK:  fence syncscope("singlethread") seq_cst
 ; CHECK:  load i32, i32* %[[call]], align 4
-; CHECK:  fence singlethread seq_cst
+; CHECK:  fence syncscope("singlethread") seq_cst
 
 
 attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/ThinLTOBitcodeWriter/pr33536.ll b/test/Transforms/ThinLTOBitcodeWriter/pr33536.ll
new file mode 100644
index 000000000000..661d0739401a
--- /dev/null
+++ b/test/Transforms/ThinLTOBitcodeWriter/pr33536.ll
@@ -0,0 +1,37 @@
+; Test for a bug specific to the new pass manager where we may build a domtree
+; to make more precise AA queries for functions.
+;
+; RUN: opt -aa-pipeline=default -passes='no-op-module' -debug-pass-manager -thinlto-bc -o %t %s
+; RUN: llvm-modextract -b -n 0 -o - %t | llvm-dis | FileCheck --check-prefix=M0 %s
+; RUN: llvm-modextract -b -n 1 -o - %t | llvm-dis | FileCheck --check-prefix=M1 %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.hoge = type { %struct.widget }
+%struct.widget = type { i32 (...)** }
+
+; M0: @global = local_unnamed_addr global
+; M1-NOT: @global
+@global = local_unnamed_addr global %struct.hoge { %struct.widget { i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @global.1, i32 0, inrange i32 0, i32 2) to i32 (...)**) } }, align 8
+
+; M0: @global.1 = external unnamed_addr constant
+; M1: @global.1 = linkonce_odr unnamed_addr constant
+@global.1 = linkonce_odr unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @global.4 to i8*), i8* bitcast (i32 (%struct.widget*)* @quux to i8*)] }, align 8, !type !0
+
+; M0: @global.2 = external global
+; M1-NOT: @global.2
+@global.2 = external global i8*
+
+; M0: @global.3 = linkonce_odr constant
+; M1-NOT: @global.3
+@global.3 = linkonce_odr constant [22 x i8] c"zzzzzzzzzzzzzzzzzzzzz\00"
+
+; M0: @global.4 = linkonce_odr constant
+; M1: @global.4 = external constant
+@global.4 = linkonce_odr constant { i8*, i8* }{ i8* bitcast (i8** getelementptr inbounds (i8*, i8** @global.2, i64 2) to i8*), i8* getelementptr inbounds ([22 x i8], [22 x i8]* @global.3, i32 0, i32 0) }
+
+@llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer
+
+declare i32 @quux(%struct.widget*) unnamed_addr
+
+!0 = !{i64 16, !"yyyyyyyyyyyyyyyyyyyyyyyyy"}