vendor/llvm/llvm-trunk-r304149

author: Dimitry Andric <dim@FreeBSD.org> 2017-05-29 16:25:25 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2017-05-29 16:25:25 +0000
commit: ab44ce3d598882e51a25eb82eb7ae6308de85ae6 (patch)
tree: 568d786a59d49bef961dcb9bd09d422701b9da5b /test/Transforms
parent: b5630dbadf9a2a06754194387d6b0fd9962a67f1 (diff)
38 files changed, 2542 insertions, 265 deletions
diff --git a/test/Transforms/Coroutines/coro-debug.ll b/test/Transforms/Coroutines/coro-debug.ll
new file mode 100644
index 000000000000..4da545499f94
--- /dev/null
+++ b/test/Transforms/Coroutines/coro-debug.ll
@@ -0,0 +1,142 @@
+; Tests that debug information is sane after coro-split
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+source_filename = "simple-repro.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind
+define i8* @f(i32 %x) #0 !dbg !6 {
+entry:
+  %x.addr = alloca i32, align 4
+  %coro_hdl = alloca i8*, align 8
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14
+  call void @llvm.dbg.declare(metadata i8** %coro_hdl, metadata !15, metadata !13), !dbg !16
+  %0 = call token @llvm.coro.id(i32 0, i8* null, i8* bitcast (i8* (i32)* @f to i8*), i8* null), !dbg !16
+  %1 = call i64 @llvm.coro.size.i64(), !dbg !16
+  %call = call i8* @malloc(i64 %1), !dbg !16
+  %2 = call i8* @llvm.coro.begin(token %0, i8* %call) #7, !dbg !16
+  store i8* %2, i8** %coro_hdl, align 8, !dbg !16
+  %3 = call i8 @llvm.coro.suspend(token none, i1 false), !dbg !17
+  %conv = sext i8 %3 to i32, !dbg !17
+  call void @coro.devirt.trigger(i8* null)
+  switch i32 %conv, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+  ], !dbg !17
+
+sw.bb:                                            ; preds = %entry
+  br label %sw.epilog, !dbg !18
+
+sw.bb1:                                           ; preds = %entry
+  br label %coro_Cleanup, !dbg !18
+
+sw.default:                                       ; preds = %entry
+  br label %coro_Suspend, !dbg !18
+
+sw.epilog:                                        ; preds = %sw.bb
+  %4 = load i32, i32* %x.addr, align 4, !dbg !20
+  %add = add nsw i32 %4, 1, !dbg !21
+  store i32 %add, i32* %x.addr, align 4, !dbg !22
+  br label %coro_Cleanup, !dbg !23
+
+coro_Cleanup:                                     ; preds = %sw.epilog, %sw.bb1
+  %5 = load i8*, i8** %coro_hdl, align 8, !dbg !24
+  %6 = call i8* @llvm.coro.free(token %0, i8* %5), !dbg !24
+  call void @free(i8* %6), !dbg !24
+  br label %coro_Suspend, !dbg !24
+
+coro_Suspend:                                     ; preds = %coro_Cleanup, %sw.default
+  %7 = call i1 @llvm.coro.end(i8* null, i1 false) #7, !dbg !24
+  %8 = load i8*, i8** %coro_hdl, align 8, !dbg !24
+  ret i8* %8, !dbg !24
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: argmemonly nounwind readonly
+declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #2
+
+declare i8* @malloc(i64) #3
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.coro.size.i64() #4
+
+; Function Attrs: nounwind
+declare i8* @llvm.coro.begin(token, i8* writeonly) #5
+
+; Function Attrs: nounwind
+declare i8 @llvm.coro.suspend(token, i1) #5
+
+declare void @free(i8*) #3
+
+; Function Attrs: argmemonly nounwind readonly
+declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2
+
+; Function Attrs: nounwind
+declare i1 @llvm.coro.end(i8*, i1) #5
+
+; Function Attrs: alwaysinline
+define private void @coro.devirt.trigger(i8*) #6 {
+entry:
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #2
+
+attributes #0 = { noinline nounwind "coroutine.presplit"="1" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { argmemonly nounwind readonly }
+attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind readnone }
+attributes #5 = { nounwind }
+attributes #6 = { alwaysinline }
+attributes #7 = { noduplicate }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0 (http://llvm.org/git/clang.git 97b002238b11ff30d94d0516d6a0515db5725fd8) (http://llvm.org/git/llvm.git 0cb060ba567f1aa5b4b04e86665f88e4632b528a)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "<stdin>", directory: "C:\5CGitHub\5Cllvm\5Cbuild\5CDebug\5Cbin")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 5.0.0 (http://llvm.org/git/clang.git 97b002238b11ff30d94d0516d6a0515db5725fd8) (http://llvm.org/git/llvm.git 0cb060ba567f1aa5b4b04e86665f88e4632b528a)"}
+!6 = distinct !DISubprogram(name: "f", linkageName: "flink", scope: !7, file: !7, line: 55, type: !8, isLocal: false, isDefinition: true, scopeLine: 55, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!7 = !DIFile(filename: "simple-repro.c", directory: "C:\5CGitHub\5Cllvm\5Cbuild\5CDebug\5Cbin")
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10, !11}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !DILocalVariable(name: "x", arg: 1, scope: !6, file: !7, line: 55, type: !11)
+!13 = !DIExpression()
+!14 = !DILocation(line: 55, column: 13, scope: !6)
+!15 = !DILocalVariable(name: "coro_hdl", scope: !6, file: !7, line: 56, type: !10)
+!16 = !DILocation(line: 56, column: 3, scope: !6)
+!17 = !DILocation(line: 58, column: 5, scope: !6)
+!18 = !DILocation(line: 58, column: 5, scope: !19)
+!19 = distinct !DILexicalBlock(scope: !6, file: !7, line: 58, column: 5)
+!20 = !DILocation(line: 59, column: 9, scope: !6)
+!21 = !DILocation(line: 59, column: 10, scope: !6)
+!22 = !DILocation(line: 59, column: 7, scope: !6)
+!23 = !DILocation(line: 59, column: 5, scope: !6)
+!24 = !DILocation(line: 62, column: 3, scope: !6)
+
+; CHECK: define i8* @f(i32 %x) #0 !dbg ![[ORIG:[0-9]+]]
+; CHECK: define internal fastcc void @f.resume(%f.Frame* %FramePtr) #0 !dbg ![[RESUME:[0-9]+]]
+; CHECK: define internal fastcc void @f.destroy(%f.Frame* %FramePtr) #0 !dbg ![[DESTROY:[0-9]+]]
+; CHECK: define internal fastcc void @f.cleanup(%f.Frame* %FramePtr) #0 !dbg ![[CLEANUP:[0-9]+]]
+
+; CHECK: ![[ORIG]] = distinct !DISubprogram(name: "f", linkageName: "flink"
+; CHECK: !DILocalVariable(name: "x", arg: 1, scope: ![[ORIG]]
+
+; CHECK: ![[RESUME]] = distinct !DISubprogram(name: "f", linkageName: "flink"
+; CHECK: !DILocalVariable(name: "x", arg: 1, scope: ![[RESUME]]
+
+; CHECK: ![[DESTROY]] = distinct !DISubprogram(name: "f", linkageName: "flink"
+
+; CHECK: ![[CLEANUP]] = distinct !DISubprogram(name: "f", linkageName: "flink"
diff --git a/test/Transforms/Coroutines/coro-frame.ll b/test/Transforms/Coroutines/coro-frame.ll
index 001012fcd0c9..826d3a04fa1e 100644
--- a/test/Transforms/Coroutines/coro-frame.ll
+++ b/test/Transforms/Coroutines/coro-frame.ll
@@ -1,8 +1,11 @@
 ; Check that we can handle spills of the result of the invoke instruction
 ; RUN: opt < %s -coro-split -S | FileCheck %s
 
-define i8* @f() "coroutine.presplit"="1" personality i32 0 {
+define i8* @f(i64 %this) "coroutine.presplit"="1" personality i32 0 {
 entry:
+  %this.addr = alloca i64
+  store i64 %this, i64* %this.addr
+  %this1 = load i64, i64* %this.addr
   %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
   %size = call i32 @llvm.coro.size.i32()
   %alloc = call i8* @malloc(i32 %size)
@@ -15,6 +18,7 @@ cont:
                                 i8 1, label %cleanup]
 resume:
   call double @print(double %r)
+  call void @print2(i64 %this1)
   br label %cleanup
 
 cleanup:
@@ -30,12 +34,12 @@ pad:
 }
 
 ; See if the float was added to the frame
-; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, double }
+; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i64, double }
 
 ; See if the float was spilled into the frame
 ; CHECK-LABEL: @f(
 ; CHECK: %r = call double @print(
-; CHECK: %r.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
+; CHECK: %r.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5
 ; CHECK: store double %r, double* %r.spill.addr
 ; CHECK: ret i8* %hdl
 
@@ -58,4 +62,5 @@ declare i1 @llvm.coro.end(i8*, i1)
 
 declare noalias i8* @malloc(i32)
 declare double @print(double)
+declare void @print2(i64)
 declare void @free(i8*)
diff --git a/test/Transforms/Coroutines/coro-materialize.ll b/test/Transforms/Coroutines/coro-materialize.ll
new file mode 100644
index 000000000000..95e8a049ad2f
--- /dev/null
+++ b/test/Transforms/Coroutines/coro-materialize.ll
@@ -0,0 +1,52 @@
+; Verifies that we materialize instruction across suspend points
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+define i8* @f(i32 %n) "coroutine.presplit"="1" {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+
+  %inc1 = add i32 %n, 1
+  %sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp1, label %suspend [i8 0, label %resume1
+                                  i8 1, label %cleanup]
+resume1:
+  %inc2 = add i32 %inc1, 1
+  %sp2 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp1, label %suspend [i8 0, label %resume2
+                                  i8 1, label %cleanup]
+
+resume2:
+  call void @print(i32 %inc1)
+  call void @print(i32 %inc2)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; See that we only spilled one value
+; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i32 }
+; CHECK-LABEL: @f(
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare noalias i8* @malloc(i32)
+declare void @print(i32)
+declare void @free(i8*)
diff --git a/test/Transforms/EarlyCSE/const-speculation.ll b/test/Transforms/EarlyCSE/const-speculation.ll
new file mode 100644
index 000000000000..5b7f2f5b6982
--- /dev/null
+++ b/test/Transforms/EarlyCSE/const-speculation.ll
@@ -0,0 +1,39 @@
+; RUN: opt -early-cse -S %s | FileCheck %s
+
+%mystruct = type { i32 }
+
+; @var is global so that *every* GEP argument is Constant.
+@var = external global %mystruct
+
+; Control flow is to make the dominance tree consider the final icmp before it
+; gets to simplify the purely constant one (%tst). Since that icmp uses the
+; select that gets considered next. Finally the select simplification looks at
+; the %tst icmp and we don't want it to speculate about what happens if "i32 0"
+; is actually "i32 1", broken universes are automatic UB.
+;
+; In this case doing the speculation would create an invalid GEP(@var, 0, 1) and
+; crash.
+
+define i1 @test_constant_speculation() {
+; CHECK-LABEL: define i1 @test_constant_speculation
+entry:
+  br i1 undef, label %end, label %select
+
+select:
+; CHECK: select:
+; CHECK-NOT: icmp
+; CHECK-NOT: getelementptr
+; CHECK-NOT: select
+
+  %tst = icmp eq i32 1, 0
+  %elt = getelementptr %mystruct, %mystruct* @var, i64 0, i32 0
+  %sel = select i1 %tst, i32* null, i32* %elt
+  br label %end
+
+end:
+; CHECK: end:
+; CHECK: %tmp = phi i32* [ null, %entry ], [ getelementptr inbounds (%mystruct, %mystruct* @var, i64 0, i32 0), %select ]
+  %tmp = phi i32* [null, %entry], [%sel, %select]
+  %res = icmp eq i32* %tmp, null
+  ret i1 %res
+}
diff --git a/test/Transforms/GVN/PRE/phi-translate-2.ll b/test/Transforms/GVN/PRE/phi-translate-2.ll
new file mode 100644
index 000000000000..b2993657c7f5
--- /dev/null
+++ b/test/Transforms/GVN/PRE/phi-translate-2.ll
@@ -0,0 +1,105 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@a = common global [100 x i64] zeroinitializer, align 16
+@b = common global [100 x i64] zeroinitializer, align 16
+@g1 = common global i64 0, align 8
+@g2 = common global i64 0, align 8
+@g3 = common global i64 0, align 8
+declare i64 @goo(...) local_unnamed_addr #1
+
+define void @test1(i64 %a, i64 %b, i64 %c, i64 %d) {
+entry:
+  %mul = mul nsw i64 %b, %a
+  store i64 %mul, i64* @g1, align 8
+  %t0 = load i64, i64* @g2, align 8
+  %cmp = icmp sgt i64 %t0, 3
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %mul2 = mul nsw i64 %d, %c
+  store i64 %mul2, i64* @g2, align 8
+  br label %if.end
+
+; Check phi-translate works and mul is removed.
+; CHECK-LABEL: @test1(
+; CHECK: if.end:
+; CHECK: %[[MULPHI:.*]] = phi i64 [ {{.*}}, %if.then ], [ %mul, %entry ]
+; CHECK-NOT: = mul
+; CHECK: store i64 %[[MULPHI]], i64* @g3, align 8
+if.end:                                           ; preds = %if.then, %entry
+  %b.addr.0 = phi i64 [ %d, %if.then ], [ %b, %entry ]
+  %a.addr.0 = phi i64 [ %c, %if.then ], [ %a, %entry ]
+  %mul3 = mul nsw i64 %a.addr.0, %b.addr.0
+  store i64 %mul3, i64* @g3, align 8
+  ret void
+}
+
+define void @test2(i64 %i) {
+entry:
+  %arrayidx = getelementptr inbounds [100 x i64], [100 x i64]* @a, i64 0, i64 %i
+  %t0 = load i64, i64* %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds [100 x i64], [100 x i64]* @b, i64 0, i64 %i
+  %t1 = load i64, i64* %arrayidx1, align 8
+  %mul = mul nsw i64 %t1, %t0
+  store i64 %mul, i64* @g1, align 8
+  %cmp = icmp sgt i64 %mul, 3
+  br i1 %cmp, label %if.then, label %if.end
+
+; Check phi-translate works for the phi generated by loadpre. A new mul will be
+; inserted in if.then block.
+; CHECK-LABEL: @test2(
+; CHECK: if.then:
+; CHECK: %[[MUL_THEN:.*]] = mul
+; CHECK: br label %if.end
+if.then:                                          ; preds = %entry
+  %call = tail call i64 (...) @goo() #2
+  store i64 %call, i64* @g2, align 8
+  br label %if.end
+
+; CHECK: if.end:
+; CHECK: %[[MULPHI:.*]] = phi i64 [ %[[MUL_THEN]], %if.then ], [ %mul, %entry ]
+; CHECK-NOT: = mul
+; CHECK: store i64 %[[MULPHI]], i64* @g3, align 8
+if.end:                                           ; preds = %if.then, %entry
+  %i.addr.0 = phi i64 [ 3, %if.then ], [ %i, %entry ]
+  %arrayidx3 = getelementptr inbounds [100 x i64], [100 x i64]* @a, i64 0, i64 %i.addr.0
+  %t2 = load i64, i64* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds [100 x i64], [100 x i64]* @b, i64 0, i64 %i.addr.0
+  %t3 = load i64, i64* %arrayidx4, align 8
+  %mul5 = mul nsw i64 %t3, %t2
+  store i64 %mul5, i64* @g3, align 8
+  ret void
+}
+
+; Check phi-translate doesn't go through backedge, which may lead to incorrect
+; pre transformation.
+; CHECK: for.end:
+; CHECK-NOT: %{{.*pre-phi}} = phi
+; CHECK: ret void
+define void @test3(i64 %N, i64* nocapture readonly %a) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  %add = add nuw nsw i64 %i.0, 1
+  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %add
+  %tmp0 = load i64, i64* %arrayidx, align 8
+  %cmp = icmp slt i64 %i.0, %N
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %call = tail call i64 (...) @goo() #2
+  %add1 = sub nsw i64 0, %call
+  %tobool = icmp eq i64 %tmp0, %add1
+  br i1 %tobool, label %for.cond, label %for.end
+
+for.end:                                          ; preds = %for.body, %for.cond
+  %i.0.lcssa = phi i64 [ %i.0, %for.body ], [ %i.0, %for.cond ]
+  %arrayidx2 = getelementptr inbounds i64, i64* %a, i64 %i.0.lcssa
+  %tmp1 = load i64, i64* %arrayidx2, align 8
+  store i64 %tmp1, i64* @g1, align 8
+  ret void
+}
+
diff --git a/test/Transforms/GVN/PRE/pre-gep-load.ll b/test/Transforms/GVN/PRE/pre-gep-load.ll
index 9eec8bb6455b..1b2b4d20d31d 100644
--- a/test/Transforms/GVN/PRE/pre-gep-load.ll
+++ b/test/Transforms/GVN/PRE/pre-gep-load.ll
@@ -37,7 +37,7 @@ sw.bb2:                                           ; preds = %if.end, %entry
   %3 = load double, double* %arrayidx5, align 8
 ; CHECK: sw.bb2:
 ; CHECK-NOT: sext
-; CHECK-NEXT: phi double [
+; CHECK: phi double [
 ; CHECK-NOT: load
   %sub6 = fsub double 3.000000e+00, %3
   br label %return
diff --git a/test/Transforms/GVN/PRE/pre-load.ll b/test/Transforms/GVN/PRE/pre-load.ll
index 685df24f62b6..ffff2b7f08e5 100644
--- a/test/Transforms/GVN/PRE/pre-load.ll
+++ b/test/Transforms/GVN/PRE/pre-load.ll
@@ -72,7 +72,7 @@ block4:
   %PRE = load i32, i32* %P3
   ret i32 %PRE
 ; CHECK: block4:
-; CHECK-NEXT: phi i32 [
+; CHECK: phi i32 [
 ; CHECK-NOT: load
 ; CHECK: ret i32
 }
@@ -104,7 +104,7 @@ block4:
   %PRE = load i32, i32* %P3
   ret i32 %PRE
 ; CHECK: block4:
-; CHECK-NEXT: phi i32 [
+; CHECK: phi i32 [
 ; CHECK-NOT: load
 ; CHECK: ret i32
 }
@@ -263,7 +263,7 @@ block4:
   %PRE = load i32, i32* %P3
   ret i32 %PRE
 ; CHECK: block4:
-; CHECK-NEXT: phi i32 [
+; CHECK: phi i32 [
 ; CHECK-NOT: load
 ; CHECK: ret i32
 }
diff --git a/test/Transforms/GVNSink/dither.ll b/test/Transforms/GVNSink/dither.ll
new file mode 100644
index 000000000000..9717021aca82
--- /dev/null
+++ b/test/Transforms/GVNSink/dither.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -S -gvn-sink | FileCheck %s
+
+; Because %tmp17 has flipped operands to its equivalents %tmp14 and %tmp7, we
+; can't sink the zext as we'd need a shuffling PHI in between.
+;
+; Just sinking the zext isn't profitable, so ensure nothing is sunk.
+
+; CHECK-LABEL: @hoge
+; CHECK-NOT: bb18.gvnsink.split
+define void @hoge() {
+bb:
+  br i1 undef, label %bb4, label %bb11
+
+bb4:                                              ; preds = %bb3
+  br i1 undef, label %bb6, label %bb8
+
+bb6:                                              ; preds = %bb5
+  %tmp = zext i16 undef to i64
+  %tmp7 = add i64 %tmp, undef
+  br label %bb18
+
+bb8:                                              ; preds = %bb5
+  %tmp9 = zext i16 undef to i64
+  br label %bb18
+
+bb11:                                             ; preds = %bb10
+  br i1 undef, label %bb12, label %bb15
+
+bb12:                                             ; preds = %bb11
+  %tmp13 = zext i16 undef to i64
+  %tmp14 = add i64 %tmp13, undef
+  br label %bb18
+
+bb15:                                             ; preds = %bb11
+  %tmp16 = zext i16 undef to i64
+  %tmp17 = add i64 undef, %tmp16
+  br label %bb18
+
+bb18:                                             ; preds = %bb15, %bb12, %bb8, %bb6
+  %tmp19 = phi i64 [ %tmp7, %bb6 ], [ undef, %bb8 ], [ %tmp14, %bb12 ], [ %tmp17, %bb15 ]
+  unreachable
+}
diff --git a/test/Transforms/GVNSink/indirect-call.ll b/test/Transforms/GVNSink/indirect-call.ll
new file mode 100644
index 000000000000..da98ed0819a6
--- /dev/null
+++ b/test/Transforms/GVNSink/indirect-call.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -gvn-sink -simplifycfg -simplifycfg-sink-common=false -S | FileCheck %s
+
+declare i8 @ext(i1)
+
+define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks, i8(i1)* %ext) {
+entry:
+  %cmp = icmp uge i32 %blksA, %nblks
+  br i1 %flag, label %if.then, label %if.else
+
+; CHECK-LABEL: test1
+; CHECK: call i8 @ext
+; CHECK: call i8 %ext
+if.then:
+  %frombool1 = call i8 @ext(i1 %cmp)
+  br label %if.end
+
+if.else:
+  %frombool3 = call i8 %ext(i1 %cmp)
+  br label %if.end
+
+if.end:
+  %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+  %tobool4 = icmp ne i8 %obeys.0, 0
+  ret i1 %tobool4
+}
+
+define zeroext i1 @test2(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks, i8(i1)* %ext) {
+entry:
+  %cmp = icmp uge i32 %blksA, %nblks
+  br i1 %flag, label %if.then, label %if.else
+
+; CHECK-LABEL: test2
+; CHECK: call i8 %ext
+; CHECK-NOT: call
+if.then:
+  %frombool1 = call i8 %ext(i1 %cmp)
+  br label %if.end
+
+if.else:
+  %frombool3 = call i8 %ext(i1 %cmp)
+  br label %if.end
+
+if.end:
+  %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+  %tobool4 = icmp ne i8 %obeys.0, 0
+  ret i1 %tobool4
+}
+
+define zeroext i1 @test3(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks, i8(i1)* %ext1, i8(i1)* %ext2) {
+entry:
+  %cmp = icmp uge i32 %blksA, %nblks
+  br i1 %flag, label %if.then, label %if.else
+
+; CHECK-LABEL: test3
+; CHECK: %[[x:.*]] = select i1 %flag, i8 (i1)* %ext1, i8 (i1)* %ext2
+; CHECK: call i8 %[[x]](i1 %cmp)
+; CHECK-NOT: call
+if.then:
+  %frombool1 = call i8 %ext1(i1 %cmp)
+  br label %if.end
+
+if.else:
+  %frombool3 = call i8 %ext2(i1 %cmp)
+  br label %if.end
+
+if.end:
+  %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+  %tobool4 = icmp ne i8 %obeys.0, 0
+  ret i1 %tobool4
+}
diff --git a/test/Transforms/GVNSink/sink-common-code.ll b/test/Transforms/GVNSink/sink-common-code.ll
new file mode 100644
index 000000000000..d9e757cd10fc
--- /dev/null
+++ b/test/Transforms/GVNSink/sink-common-code.ll
@@ -0,0 +1,694 @@
+; RUN: opt < %s -gvn-sink -simplifycfg -simplifycfg-sink-common=false -S | FileCheck %s
+
+define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+; CHECK-LABEL: test1
+; CHECK: add
+; CHECK: select
+; CHECK: icmp
+; CHECK-NOT: br
+if.then:
+  %cmp = icmp uge i32 %blksA, %nblks
+  %frombool1 = zext i1 %cmp to i8
+  br label %if.end
+
+if.else:
+  %add = add i32 %nblks, %blksB
+  %cmp2 = icmp ule i32 %add, %blksA
+  %frombool3 = zext i1 %cmp2 to i8
+  br label %if.end
+
+if.end:
+  %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+  %tobool4 = icmp ne i8 %obeys.0, 0
+  ret i1 %tobool4
+}
+
+define zeroext i1 @test2(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+; CHECK-LABEL: test2
+; CHECK: add
+; CHECK: select
+; CHECK: icmp
+; CHECK-NOT: br
+if.then:
+  %cmp = icmp uge i32 %blksA, %nblks
+  %frombool1 = zext i1 %cmp to i8
+  br label %if.end
+
+if.else:
+  %add = add i32 %nblks, %blksB
+  %cmp2 = icmp uge i32 %blksA, %add
+  %frombool3 = zext i1 %cmp2 to i8
+  br label %if.end
+
+if.end:
+  %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+  %tobool4 = icmp ne i8 %obeys.0, 0
+  ret i1 %tobool4
+}
+
+declare i32 @foo(i32, i32) nounwind readnone
+
+define i32 @test3(i1 zeroext %flag, i32 %x, i32 %y) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone
+  %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone
+  br label %if.end
+
+if.else:
+  %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone
+  %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone
+  br label %if.end
+
+if.end:
+  %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ]
+  %yy = phi i32 [ %y0, %if.then ], [ %y1, %if.else ]
+  %ret = add i32 %xx, %yy
+  ret i32 %ret
+}
+
+; CHECK-LABEL: test3
+; CHECK: select
+; CHECK: call
+; CHECK: call
+; CHECK: add
+; CHECK-NOT: br
+
+define i32 @test4(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %a = add i32 %x, 5
+  store i32 %a, i32* %y
+  br label %if.end
+
+if.else:
+  %b = add i32 %x, 7
+  store i32 %b, i32* %y
+  br label %if.end
+
+if.end:
+  ret i32 1
+}
+
+; CHECK-LABEL: test4
+; CHECK: select
+; CHECK: store
+; CHECK-NOT: store
+
+define i32 @test5(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %a = add i32 %x, 5
+  store volatile i32 %a, i32* %y
+  br label %if.end
+
+if.else:
+  %b = add i32 %x, 7
+  store i32 %b, i32* %y
+  br label %if.end
+
+if.end:
+  ret i32 1
+}
+
+; CHECK-LABEL: test5
+; CHECK: store volatile
+; CHECK: store
+
+define i32 @test6(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %a = add i32 %x, 5
+  store volatile i32 %a, i32* %y
+  br label %if.end
+
+if.else:
+  %b = add i32 %x, 7
+  store volatile i32 %b, i32* %y
+  br label %if.end
+
+if.end:
+  ret i32 1
+}
+
+; CHECK-LABEL: test6
+; CHECK: select
+; CHECK: store volatile
+; CHECK-NOT: store
+
+define i32 @test7(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %z = load volatile i32, i32* %y
+  %a = add i32 %z, 5
+  store volatile i32 %a, i32* %y
+  br label %if.end
+
+if.else:
+  %w = load volatile i32, i32* %y
+  %b = add i32 %w, 7
+  store volatile i32 %b, i32* %y
+  br label %if.end
+
+if.end:
+  ret i32 1
+}
+
+; CHECK-LABEL: test7
+; CHECK-DAG: select
+; CHECK-DAG: load volatile
+; CHECK: store volatile
+; CHECK-NOT: load
+; CHECK-NOT: store
+
+; The extra store in %if.then means %z and %w are not equivalent.
+define i32 @test9(i1 zeroext %flag, i32 %x, i32* %y, i32* %p) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  store i32 7, i32* %p
+  %z = load volatile i32, i32* %y
+  store i32 6, i32* %p
+  %a = add i32 %z, 5
+  store volatile i32 %a, i32* %y
+  br label %if.end
+
+if.else:
+  %w = load volatile i32, i32* %y
+  %b = add i32 %w, 7
+  store volatile i32 %b, i32* %y
+  br label %if.end
+
+if.end:
+  ret i32 1
+}
+
+; CHECK-LABEL: test9
+; CHECK: add
+; CHECK: add
+
+%struct.anon = type { i32, i32 }
+
+; The GEP indexes a struct type so cannot have a variable last index.
+define i32 @test10(i1 zeroext %flag, i32 %x, i32* %y, %struct.anon* %s) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %dummy = add i32 %x, 5
+  %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0
+  store volatile i32 %x, i32* %gepa
+  br label %if.end
+
+if.else:
+  %dummy1 = add i32 %x, 6
+  %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1
+  store volatile i32 %x, i32* %gepb
+  br label %if.end
+
+if.end:
+  ret i32 1
+}
+
+; CHECK-LABEL: test10
+; CHECK: getelementptr
+; CHECK: store volatile
+; CHECK: getelementptr
+; CHECK: store volatile
+
+; The shufflevector's mask operand cannot be merged in a PHI.
+define i32 @test11(i1 zeroext %flag, i32 %w, <2 x i32> %x, <2 x i32> %y) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %dummy = add i32 %w, 5
+  %sv1 = shufflevector <2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 0, i32 1>
+  br label %if.end
+
+if.else:
+  %dummy1 = add i32 %w, 6
+  %sv2 = shufflevector <2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 1, i32 0>
+  br label %if.end
+
+if.end:
+  %p = phi <2 x i32> [ %sv1, %if.then ], [ %sv2, %if.else ]
+  ret i32 1
+}
+
+; CHECK-LABEL: test11
+; CHECK: shufflevector
+; CHECK: shufflevector
+
+; We can't common an intrinsic!
+define i32 @test12(i1 zeroext %flag, i32 %w, i32 %x, i32 %y) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %dummy = add i32 %w, 5
+  %sv1 = call i32 @llvm.ctlz.i32(i32 %x)
+  br label %if.end
+
+if.else:
+  %dummy1 = add i32 %w, 6
+  %sv2 = call i32 @llvm.cttz.i32(i32 %x)
+  br label %if.end
+
+if.end:
+  %p = phi i32 [ %sv1, %if.then ], [ %sv2, %if.else ]
+  ret i32 1
+}
+
+declare i32 @llvm.ctlz.i32(i32 %x) readnone
+declare i32 @llvm.cttz.i32(i32 %x) readnone
+
+; CHECK-LABEL: test12
+; CHECK: call i32 @llvm.ctlz
+; CHECK: call i32 @llvm.cttz
+
+; The TBAA metadata should be properly combined.
+define i32 @test13(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %z = load volatile i32, i32* %y
+  %a = add i32 %z, 5
+  store volatile i32 %a, i32* %y, !tbaa !3
+  br label %if.end
+
+if.else:
+  %w = load volatile i32, i32* %y
+  %b = add i32 %w, 7
+  store volatile i32 %b, i32* %y, !tbaa !4
+  br label %if.end
+
+if.end:
+  ret i32 1
+}
+
+!0 = !{ !"an example type tree" }
+!1 = !{ !"int", !0 }
+!2 = !{ !"float", !0 }
+!3 = !{ !"const float", !2, i64 0 }
+!4 = !{ !"special float", !2, i64 1 }
+
+; CHECK-LABEL: test13
+; CHECK-DAG: select
+; CHECK-DAG: load volatile
+; CHECK: store volatile {{.*}}, !tbaa !0
+; CHECK-NOT: load
+; CHECK-NOT: store
+
+; The call should be commoned.
+define i32 @test13a(i1 zeroext %flag, i32 %w, i32 %x, i32 %y) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %sv1 = call i32 @bar(i32 %x)
+  br label %if.end
+
+if.else:
+  %sv2 = call i32 @bar(i32 %y)
+  br label %if.end
+
+if.end:
+  %p = phi i32 [ %sv1, %if.then ], [ %sv2, %if.else ]
+  ret i32 1
+}
+declare i32 @bar(i32)
+
+; CHECK-LABEL: test13a
+; CHECK: %[[x:.*]] = select i1 %flag
+; CHECK: call i32 @bar(i32 %[[x]])
+
+; The load should be commoned.
+define i32 @test14(i1 zeroext %flag, i32 %w, i32 %x, i32 %y, %struct.anon* %s) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %dummy = add i32 %x, 1
+  %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1
+  %sv1 = load i32, i32* %gepa
+  %cmp1 = icmp eq i32 %sv1, 56
+  br label %if.end
+
+if.else:
+  %dummy2 = add i32 %x, 4
+  %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1
+  %sv2 = load i32, i32* %gepb
+  %cmp2 = icmp eq i32 %sv2, 57
+  br label %if.end
+
+if.end:
+  %p = phi i1 [ %cmp1, %if.then ], [ %cmp2, %if.else ]
+  ret i32 1
+}
+
+; CHECK-LABEL: test14
+; CHECK: getelementptr
+; CHECK: load
+; CHECK-NOT: load
+
+; The load should be commoned.
+define i32 @test15(i1 zeroext %flag, i32 %w, i32 %x, i32 %y, %struct.anon* %s) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %dummy = add i32 %x, 1
+  %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0
+  %sv1 = load i32, i32* %gepa
+  %ext1 = zext i32 %sv1 to i64
+  %cmp1 = icmp eq i64 %ext1, 56
+  br label %if.end
+
+if.else:
+  %dummy2 = add i32 %x, 4
+  %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1
+  %sv2 = load i32, i32* %gepb
+  %ext2 = zext i32 %sv2 to i64
+  %cmp2 = icmp eq i64 %ext2, 56
+  br label %if.end
+
+if.end:
+  %p = phi i1 [ %cmp1, %if.then ], [ %cmp2, %if.else ]
+  ret i32 1
+}
+
+; CHECK-LABEL: test15
+; CHECK: getelementptr
+; CHECK: load
+; CHECK-NOT: load
+
+define zeroext i1 @test_crash(i1 zeroext %flag, i32* %i4, i32* %m, i32* %n) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %tmp1 = load i32, i32* %i4
+  %tmp2 = add i32 %tmp1, -1
+  store i32 %tmp2, i32* %i4
+  br label %if.end
+
+if.else:
+  %tmp3 = load i32, i32* %m
+  %tmp4 = load i32, i32* %n
+  %tmp5 = add i32 %tmp3, %tmp4
+  store i32 %tmp5, i32* %i4
+  br label %if.end
+
+if.end:
+  ret i1 true
+}
+
+; CHECK-LABEL: test_crash
+; No checks for test_crash - just ensure it doesn't crash!
+
+define zeroext i1 @test16(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) {
+
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %cmp = icmp uge i32 %blksA, %nblks
+  %frombool1 = zext i1 %cmp to i8
+  br label %if.end
+
+if.else:
+  br i1 %flag2, label %if.then2, label %if.end
+
+if.then2:
+  %add = add i32 %nblks, %blksB
+  %cmp2 = icmp ule i32 %add, %blksA
+  %frombool3 = zext i1 %cmp2 to i8
+  br label %if.end
+
+if.end:
+  %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %if.else ]
+  %tobool4 = icmp ne i8 %obeys.0, 0
+  ret i1 %tobool4
+}
+
+; CHECK-LABEL: test16
+; CHECK: zext
+; CHECK: zext
+
+define zeroext i1 @test16a(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks, i8* %p) {
+
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %cmp = icmp uge i32 %blksA, %nblks
+  %frombool1 = zext i1 %cmp to i8
+  %b1 = sext i8 %frombool1 to i32
+  %b2 = trunc i32 %b1 to i8
+  store i8 %b2, i8* %p
+  br label %if.end
+
+if.else:
+  br i1 %flag2, label %if.then2, label %if.end
+
+if.then2:
+  %add = add i32 %nblks, %blksB
+  %cmp2 = icmp ule i32 %add, %blksA
+  %frombool3 = zext i1 %cmp2 to i8
+  %a1 = sext i8 %frombool3 to i32
+  %a2 = trunc i32 %a1 to i8
+  store i8 %a2, i8* %p
+  br label %if.end
+
+if.end:
+  ret i1 true
+}
+
+; CHECK-LABEL: test16a
+; CHECK: zext
+; CHECK-NOT: zext
+
+define zeroext i1 @test17(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+  switch i32 %flag, label %if.end [
+    i32 0, label %if.then
+    i32 1, label %if.then2
+  ]
+
+if.then:
+  %cmp = icmp uge i32 %blksA, %nblks
+  %frombool1 = call i8 @i1toi8(i1 %cmp)
+  %a1 = sext i8 %frombool1 to i32
+  %a2 = trunc i32 %a1 to i8
+  br label %if.end
+
+if.then2:
+  %add = add i32 %nblks, %blksB
+  %cmp2 = icmp ule i32 %add, %blksA
+  %frombool3 = call i8 @i1toi8(i1 %cmp2)
+  %b1 = sext i8 %frombool3 to i32
+  %b2 = trunc i32 %b1 to i8
+  br label %if.end
+
+if.end:
+  %obeys.0 = phi i8 [ %a2, %if.then ], [ %b2, %if.then2 ], [ 0, %entry ]
+  %tobool4 = icmp ne i8 %obeys.0, 0
+  ret i1 %tobool4
+}
+declare i8 @i1toi8(i1)
+
+; FIXME: DISABLED - we don't consider this profitable. We should
+;  - Consider argument setup/return mov'ing for calls, like InlineCost does.
+;  - Consider the removal of the %obeys.0 PHI (zero PHI movement overall)
+
+; DISABLED-CHECK-LABEL: test17
+; DISABLED-CHECK: if.then:
+; DISABLED-CHECK-NEXT: icmp uge
+; DISABLED-CHECK-NEXT: br label %[[x:.*]]
+
+; DISABLED-CHECK: if.then2:
+; DISABLED-CHECK-NEXT: add
+; DISABLED-CHECK-NEXT: icmp ule
+; DISABLED-CHECK-NEXT: br label %[[x]]
+
+; DISABLED-CHECK: [[x]]:
+; DISABLED-CHECK-NEXT: %[[y:.*]] = phi i1 [ %cmp
+; DISABLED-CHECK-NEXT: %[[z:.*]] = call i8 @i1toi8(i1 %[[y]])
+; DISABLED-CHECK-NEXT: br label %if.end
+
+; DISABLED-CHECK: if.end:
+; DISABLED-CHECK-NEXT: phi i8
+; DISABLED-CHECK-DAG: [ %[[z]], %[[x]] ]
+; DISABLED-CHECK-DAG: [ 0, %entry ]
+
+define zeroext i1 @test18(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+  switch i32 %flag, label %if.then3 [
+    i32 0, label %if.then
+    i32 1, label %if.then2
+  ]
+
+if.then:
+  %cmp = icmp uge i32 %blksA, %nblks
+  %frombool1 = zext i1 %cmp to i8
+  br label %if.end
+
+if.then2:
+  %add = add i32 %nblks, %blksB
+  %cmp2 = icmp ule i32 %add, %blksA
+  %frombool3 = zext i1 %cmp2 to i8
+  br label %if.end
+
+if.then3:
+  %add2 = add i32 %nblks, %blksA
+  %cmp3 = icmp ule i32 %add2, %blksA
+  %frombool4 = zext i1 %cmp3 to i8
+  br label %if.end
+
+if.end:
+  %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ %frombool4, %if.then3 ]
+  %tobool4 = icmp ne i8 %obeys.0, 0
+  ret i1 %tobool4
+}
+
+; CHECK-LABEL: test18
+; CHECK: if.end:
+; CHECK-NEXT: %[[x:.*]] = phi i1
+; CHECK-DAG: [ %cmp, %if.then ]
+; CHECK-DAG: [ %cmp2, %if.then2 ]
+; CHECK-DAG: [ %cmp3, %if.then3 ]
+; CHECK-NEXT: zext i1 %[[x]] to i8
+
+; The phi is confusing - both add instructions are used by it, but
+; not on their respective unconditional arcs. It should not be
+; optimized.
+define void @test_pr30292(i1 %cond, i1 %cond2, i32 %a, i32 %b) {
+entry:
+  %add1 = add i32 %a, 1
+  br label %succ
+
+one:
+  br i1 %cond, label %two, label %succ
+
+two:
+  call void @g()
+  %add2 = add i32 %a, 1
+  br label %succ
+
+succ:
+  %p = phi i32 [ 0, %entry ], [ %add1, %one ], [ %add2, %two ]
+  br label %one
+}
+declare void @g()
+
+; CHECK-LABEL: test_pr30292
+; CHECK: phi i32 [ 0, %entry ], [ %add1, %succ ], [ %add2, %two ]
+
+define zeroext i1 @test_pr30244(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) {
+
+entry:
+  %p = alloca i8
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %cmp = icmp uge i32 %blksA, %nblks
+  %frombool1 = zext i1 %cmp to i8
+  store i8 %frombool1, i8* %p
+  br label %if.end
+
+if.else:
+  br i1 %flag2, label %if.then2, label %if.end
+
+if.then2:
+  %add = add i32 %nblks, %blksB
+  %cmp2 = icmp ule i32 %add, %blksA
+  %frombool3 = zext i1 %cmp2 to i8
+  store i8 %frombool3, i8* %p
+  br label %if.end
+
+if.end:
+  ret i1 true
+}
+
+; CHECK-LABEL: @test_pr30244
+; CHECK: store
+; CHECK-NOT: store
+
+define i32 @test_pr30373a(i1 zeroext %flag, i32 %x, i32 %y) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone
+  %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone
+  %z0 = lshr i32 %y0, 8
+  br label %if.end
+
+if.else:
+  %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone
+  %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone
+  %z1 = lshr exact i32 %y1, 8
+  br label %if.end
+
+if.end:
+  %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ]
+  %yy = phi i32 [ %z0, %if.then ], [ %z1, %if.else ]
+  %ret = add i32 %xx, %yy
+  ret i32 %ret
+}
+
+; CHECK-LABEL: test_pr30373a
+; CHECK: lshr
+; CHECK-NOT: exact
+; CHECK: }
+
+define i32 @test_pr30373b(i1 zeroext %flag, i32 %x, i32 %y) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone
+  %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone
+  %z0 = lshr exact i32 %y0, 8
+  br label %if.end
+
+if.else:
+  %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone
+  %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone
+  %z1 = lshr i32 %y1, 8
+  br label %if.end
+
+if.end:
+  %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ]
+  %yy = phi i32 [ %z0, %if.then ], [ %z1, %if.else ]
+  %ret = add i32 %xx, %yy
+  ret i32 %ret
+}
+
+; CHECK-LABEL: test_pr30373b
+; CHECK: lshr
+; CHECK-NOT: exact
+; CHECK: }
+
+; CHECK: !0 = !{!1, !1, i64 0}
+; CHECK: !1 = !{!"float", !2}
+; CHECK: !2 = !{!"an example type tree"}
diff --git a/test/Transforms/GVNSink/struct.ll b/test/Transforms/GVNSink/struct.ll
new file mode 100644
index 000000000000..2228cf2803ae
--- /dev/null
+++ b/test/Transforms/GVNSink/struct.ll
@@ -0,0 +1,71 @@
+; RUN: opt -gvn-sink -S < %s | FileCheck %s
+
+%struct = type {i32, i32}
+%struct2 = type { [ 2 x i32], i32 }
+
+; Struct indices cannot be variant.
+
+; CHECK-LABEL: @f() {
+; CHECK: getelementptr
+; CHECK: getelementptr
+define void @f() {
+bb:
+  br i1 undef, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp = getelementptr inbounds %struct, %struct* null, i64 0, i32 1
+  br label %bb4
+
+bb2:                                              ; preds = %bb
+  %tmp3 = getelementptr inbounds %struct, %struct* null, i64 0, i32 0
+  br label %bb4
+
+bb4:                                              ; preds = %bb2, %bb1
+  %tmp5 = phi i32 [ 1, %bb1 ], [ 0, %bb2 ]
+  ret void
+}
+
+; Struct indices cannot be variant.
+
+; CHECK-LABEL: @g() {
+; CHECK: getelementptr
+; CHECK: getelementptr
+define void @g() {
+bb:
+  br i1 undef, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp = getelementptr inbounds %struct2, %struct2* null, i64 0, i32 0, i32 1
+  br label %bb4
+
+bb2:                                              ; preds = %bb
+  %tmp3 = getelementptr inbounds %struct2, %struct2* null, i64 0, i32 0, i32 0
+  br label %bb4
+
+bb4:                                              ; preds = %bb2, %bb1
+  %tmp5 = phi i32 [ 1, %bb1 ], [ 0, %bb2 ]
+  ret void
+}
+
+
+; ... but the first parameter to a GEP can.
+
+; CHECK-LABEL: @h() {
+; CHECK: getelementptr
+; CHECK-NOT: getelementptr
+define void @h() {
+bb:
+  br i1 undef, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp = getelementptr inbounds %struct, %struct* null, i32 0, i32 0
+  br label %bb4
+
+bb2:                                              ; preds = %bb
+  %tmp3 = getelementptr inbounds %struct, %struct* null, i32 1, i32 0
+  br label %bb4
+
+bb4:                                              ; preds = %bb2, %bb1
+  %tmp5 = phi i32 [ 0, %bb1 ], [ 1, %bb2 ]
+  ret void
+}
+\ No newline at end of file
diff --git a/test/Transforms/GlobalDCE/externally_available.ll b/test/Transforms/GlobalDCE/externally_available.ll
index fca49b29ec8e..bc54db38cee0 100644
--- a/test/Transforms/GlobalDCE/externally_available.ll
+++ b/test/Transforms/GlobalDCE/externally_available.ll
@@ -1,12 +1,21 @@
 ; RUN: opt < %s -globaldce -S | FileCheck %s
 
+; test_global should not be emitted to the .s file.
+; CHECK-NOT: @test_global =
+@test_global = available_externally global i32 4
+
+; test_global2 is a normal global using an available externally function.
+; CHECK: @test_global2 =
+@test_global2 = global i32 ()* @test_function2
+
 ; test_function should not be emitted to the .s file.
-; CHECK-NOT: test_function
+; CHECK-NOT: define {{.*}} @test_function()
 define available_externally i32 @test_function() {
   ret i32 4
 }
 
-; test_global should not be emitted to the .s file.
-; CHECK-NOT: test_global
-@test_global = available_externally global i32 4
-
+; test_function2 isn't actually dead even though it's available externally.
+; CHECK: define available_externally i32 @test_function2()
+define available_externally i32 @test_function2() {
+  ret i32 4
+}
diff --git a/test/Transforms/Inline/prof-update-instr.ll b/test/Transforms/Inline/prof-update-instr.ll
new file mode 100644
index 000000000000..6650165cb904
--- /dev/null
+++ b/test/Transforms/Inline/prof-update-instr.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
+; Checks if inliner updates VP metadata for indrect call instructions
+; with instrumentation based profile.
+
+@func = global void ()* null
+@func2 = global void ()* null
+
+; CHECK: define void @callee(i32 %n) !prof ![[ENTRY_COUNT:[0-9]*]]
+define void  @callee(i32 %n) !prof !15 {
+  %cond = icmp sle i32 %n, 10
+  br i1 %cond, label %cond_true, label %cond_false, !prof !20
+cond_true:
+; f2 is optimized away, thus not updated.
+  %f2 = load void ()*, void ()** @func2
+; CHECK: call void %f2(), !prof ![[COUNT_IND_CALLEE1:[0-9]*]]
+  call void %f2(), !prof !19
+  ret void
+cond_false:
+  %f = load void ()*, void ()** @func
+; CHECK: call void %f(), !prof ![[COUNT_IND_CALLEE:[0-9]*]]
+  call void %f(), !prof !18
+  ret void
+}
+
+; CHECK: define void @caller()
+define void @caller() !prof !21 {
+; CHECK: call void %f.i(), !prof ![[COUNT_IND_CALLER:[0-9]*]]
+  call void @callee(i32 15)
+  ret void
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 10}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 2000}
+!8 = !{!"NumCounts", i64 2}
+!9 = !{!"NumFunctions", i64 2}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!15 = !{!"function_entry_count", i64 1000}
+!16 = !{!"branch_weights", i64 2000}
+!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20}
+!19 = !{!"VP", i32 0, i64 200, i64 111, i64 100, i64 222, i64 60, i64 333, i64 40}
+!20 = !{!"branch_weights", i32 1000, i32 1000}
+!21 = !{!"function_entry_count", i64 400}
+attributes #0 = { alwaysinline }
+; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600}
+; CHECK: ![[COUNT_IND_CALLEE1]] = !{!"VP", i32 0, i64 200, i64 111, i64 100, i64 222, i64 60, i64 333, i64 40}
+; CHECK: ![[COUNT_IND_CALLEE]] = !{!"VP", i32 0, i64 84, i64 111, i64 48, i64 222, i64 24, i64 333, i64 12}
+; CHECK: ![[COUNT_IND_CALLER]] = !{!"VP", i32 0, i64 56, i64 111, i64 32, i64 222, i64 16, i64 333, i64 8}
diff --git a/test/Transforms/Inline/prof-update.ll b/test/Transforms/Inline/prof-update-sample.ll
index 4a4471e8e17a..4a4471e8e17a 100644
--- a/test/Transforms/Inline/prof-update.ll
+++ b/test/Transforms/Inline/prof-update-sample.ll
diff --git a/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll b/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll
deleted file mode 100644
index 76e30399a666..000000000000
--- a/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt < %s -instcombine -S | not grep add
-; RUN: opt < %s -instcombine -S | not grep mul
-; PR2330
-
-define i1 @f(i32 %x, i32 %y) nounwind {
-entry:
-  %A = add i32 %x, 5
-  %B = add i32 %y, 5
-  %C = icmp eq i32 %A, %B
-  ret i1 %C
-}
-
-define i1 @g(i32 %x, i32 %y) nounwind {
-entry:
-  %A = mul i32 %x, 5
-  %B = mul i32 %y, 5
-  %C = icmp eq i32 %A, %B
-  ret i1 %C
-}
diff --git a/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll b/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll
deleted file mode 100644
index b91457c79dea..000000000000
--- a/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll
+++ /dev/null
@@ -1,87 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-define i1 @test1(i8 %x, i8 %y) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[TMP:%.*]] = icmp ult i8 %x, %y
-; CHECK-NEXT:    ret i1 [[TMP]]
-;
-  %X = xor i8 %x, 128
-  %Y = xor i8 %y, 128
-  %tmp = icmp slt i8 %X, %Y
-  ret i1 %tmp
-}
-
-define i1 @test2(i8 %x, i8 %y) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[TMP:%.*]] = icmp slt i8 %x, %y
-; CHECK-NEXT:    ret i1 [[TMP]]
-;
-  %X = xor i8 %x, 128
-  %Y = xor i8 %y, 128
-  %tmp = icmp ult i8 %X, %Y
-  ret i1 %tmp
-}
-
-define i1 @test3(i8 %x) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:    [[TMP:%.*]] = icmp sgt i8 %x, -114
-; CHECK-NEXT:    ret i1 [[TMP]]
-;
-  %X = xor i8 %x, 128
-  %tmp = icmp uge i8 %X, 15
-  ret i1 %tmp
-}
-
-define <2 x i1> @test3vec(<2 x i8> %x) {
-; CHECK-LABEL: @test3vec(
-; CHECK-NEXT:    [[TMP:%.*]] = icmp sgt <2 x i8> %x, <i8 -114, i8 -114>
-; CHECK-NEXT:    ret <2 x i1> [[TMP]]
-;
-  %X = xor <2 x i8> %x, <i8 128, i8 128>
-  %tmp = icmp uge <2 x i8> %X, <i8 15, i8 15>
-  ret <2 x i1> %tmp
-}
-
-define i1 @test4(i8 %x, i8 %y) {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[TMP:%.*]] = icmp ugt i8 %x, %y
-; CHECK-NEXT:    ret i1 [[TMP]]
-;
-  %X = xor i8 %x, 127
-  %Y = xor i8 %y, 127
-  %tmp = icmp slt i8 %X, %Y
-  ret i1 %tmp
-}
-
-define i1 @test5(i8 %x, i8 %y) {
-; CHECK-LABEL: @test5(
-; CHECK-NEXT:    [[TMP:%.*]] = icmp sgt i8 %x, %y
-; CHECK-NEXT:    ret i1 [[TMP]]
-;
-  %X = xor i8 %x, 127
-  %Y = xor i8 %y, 127
-  %tmp = icmp ult i8 %X, %Y
-  ret i1 %tmp
-}
-
-define i1 @test6(i8 %x) {
-; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[TMP:%.*]] = icmp slt i8 %x, 113
-; CHECK-NEXT:    ret i1 [[TMP]]
-;
-  %X = xor i8 %x, 127
-  %tmp = icmp uge i8 %X, 15
-  ret i1 %tmp
-}
-
-define <2 x i1> @test6vec(<2 x i8> %x) {
-; CHECK-LABEL: @test6vec(
-; CHECK-NEXT:    [[TMP:%.*]] = icmp slt <2 x i8> %x, <i8 113, i8 113>
-; CHECK-NEXT:    ret <2 x i1> [[TMP]]
-;
-  %X = xor <2 x i8> %x, <i8 127, i8 127>
-  %tmp = icmp uge <2 x i8> %X, <i8 15, i8 15>
-  ret <2 x i1> %tmp
-}
-
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index f81f700e6cf4..490830af2d82 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -51,8 +51,8 @@ define i32* @test4(i32 %n) {
   ret i32* %A
 }
 
-; Allocas which are only used by GEPs, bitcasts, and stores (transitively)
-; should be deleted.
+; Allocas which are only used by GEPs, bitcasts, addrspacecasts, and stores
+; (transitively) should be deleted.
 define void @test5() {
 ; CHECK-LABEL: @test5(
 ; CHECK-NOT: alloca
@@ -62,6 +62,7 @@ define void @test5() {
 entry:
   %a = alloca { i32 }
   %b = alloca i32*
+  %c = alloca i32
   %a.1 = getelementptr { i32 }, { i32 }* %a, i32 0, i32 0
   store i32 123, i32* %a.1
   store i32* %a.1, i32** %b
@@ -73,6 +74,8 @@ entry:
   store atomic i32 3, i32* %a.3 release, align 4
   %a.4 = getelementptr { i32 }, { i32 }* %a, i32 0, i32 0
   store atomic i32 4, i32* %a.4 seq_cst, align 4
+  %c.1 = addrspacecast i32* %c to i32 addrspace(1)*
+  store i32 123, i32 addrspace(1)* %c.1
   ret void
 }
 
diff --git a/test/Transforms/InstCombine/bitcast-vec-canon.ll b/test/Transforms/InstCombine/bitcast-vec-canon.ll
index 97145221099e..a92a7b73fd7e 100644
--- a/test/Transforms/InstCombine/bitcast-vec-canon.ll
+++ b/test/Transforms/InstCombine/bitcast-vec-canon.ll
@@ -1,41 +1,40 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define double @a(<1 x i64> %y) {
+; CHECK-LABEL: @a(
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <1 x i64> %y to <1 x double>
+; CHECK-NEXT:    [[C:%.*]] = extractelement <1 x double> [[BC]], i32 0
+; CHECK-NEXT:    ret double [[C]]
+;
   %c = bitcast <1 x i64> %y to double
   ret double %c
- 
-; CHECK-LABEL: @a(
-; CHECK-NEXT:  bitcast <1 x i64> %y to <1 x double>
-; CHECK-NEXT:  extractelement <1 x double> {{.*}}, i32 0
-; CHECK-NEXT:  ret double
 }
 
 define i64 @b(<1 x i64> %y) {
+; CHECK-LABEL: @b(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> %y, i32 0
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
   %c = bitcast <1 x i64> %y to i64
   ret i64 %c
-
-; CHECK-LABEL: @b(
-; CHECK-NEXT:  extractelement <1 x i64> %y, i32 0
-; CHECK-NEXT:  ret i64
 }
 
 define <1 x i64> @c(double %y) {
+; CHECK-LABEL: @c(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double %y to i64
+; CHECK-NEXT:    [[C:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+; CHECK-NEXT:    ret <1 x i64> [[C]]
+;
   %c = bitcast double %y to <1 x i64>
   ret <1 x i64> %c
-
-; CHECK-LABEL: @c(
-; CHECK-NEXT:  bitcast double %y to i64
-; CHECK-NEXT:  insertelement <1 x i64> undef, i64 {{.*}}, i32 0
-; CHECK-NEXT:  ret <1 x i64>
 }
 
 define <1 x i64> @d(i64 %y) {
+; CHECK-LABEL: @d(
+; CHECK-NEXT:    [[C:%.*]] = insertelement <1 x i64> undef, i64 %y, i32 0
+; CHECK-NEXT:    ret <1 x i64> [[C]]
+;
   %c = bitcast i64 %y to <1 x i64>
   ret <1 x i64> %c
-
-; CHECK-LABEL: @d(
-; CHECK-NEXT:  insertelement <1 x i64> undef, i64 %y, i32 0
-; CHECK-NEXT:  ret <1 x i64>
 }
 
-
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 2e7f30fee14d..4cf3f27ab014 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -70,6 +70,51 @@ define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
   ret <2 x i32> %t2
 }
 
+; PR26702 - https://bugs.llvm.org//show_bug.cgi?id=26702
+; Bitcast is canonicalized below logic, so we can see the not-not pattern.
+
+define <2 x i64> @is_negative(<4 x i32> %x) {
+; CHECK-LABEL: @is_negative(
+; CHECK-NEXT:    [[LOBIT:%.*]] = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[NOTNOT:%.*]] = bitcast <4 x i32> [[LOBIT]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[NOTNOT]]
+;
+  %lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+  %not = xor <4 x i32> %lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %bc = bitcast <4 x i32> %not to <2 x i64>
+  %notnot = xor <2 x i64> %bc, <i64 -1, i64 -1>
+  ret <2 x i64> %notnot
+}
+
+; This variation has an extra bitcast at the end. This means that the 2nd xor
+; can be done in <4 x i32> to eliminate a bitcast regardless of canonicalizaion.
+
+define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) {
+; CHECK-LABEL: @is_negative_bonus_bitcast(
+; CHECK-NEXT:    [[LOBIT:%.*]] = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[LOBIT]]
+;
+  %lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+  %not = xor <4 x i32> %lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %bc = bitcast <4 x i32> %not to <2 x i64>
+  %notnot = xor <2 x i64> %bc, <i64 -1, i64 -1>
+  %bc2 = bitcast <2 x i64> %notnot to <4 x i32>
+  ret <4 x i32> %bc2
+}
+
+; Negative test: bitcasts are canonicalized below bitwise logic. No changes here.
+
+define <2 x i8> @canonicalize_bitcast_logic_with_constant(<4 x i4> %x) {
+; CHECK-LABEL: @canonicalize_bitcast_logic_with_constant(
+; CHECK-NEXT:    [[A:%.*]] = and <4 x i4> %x, <i4 0, i4 -8, i4 0, i4 -8>
+; CHECK-NEXT:    [[B:%.*]] = bitcast <4 x i4> [[A]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[B]]
+;
+  %a = and <4 x i4> %x, <i4 0, i4 8, i4 0, i4 8>
+  %b = bitcast <4 x i4> %a to <2 x i8>
+  ret <2 x i8> %b
+}
+
 ; PR27925 - https://llvm.org/bugs/show_bug.cgi?id=27925
 
 define <4 x i32> @bitcasts_and_bitcast(<4 x i32> %a, <8 x i16> %b) {
diff --git a/test/Transforms/InstCombine/ctpop.ll b/test/Transforms/InstCombine/ctpop.ll
index 38612c92aaa4..6bc6f9731979 100644
--- a/test/Transforms/InstCombine/ctpop.ll
+++ b/test/Transforms/InstCombine/ctpop.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -S -instcombine | FileCheck %s
 
 declare i32 @llvm.ctpop.i32(i32)
@@ -5,8 +6,9 @@ declare i8 @llvm.ctpop.i8(i8)
 declare void @llvm.assume(i1)
 
 define i1 @test1(i32 %arg) {
-; CHECK: @test1
-; CHECK: ret i1 false
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i1 false
+;
   %and = and i32 %arg, 15
   %cnt = call i32 @llvm.ctpop.i32(i32 %and)
   %res = icmp eq i32 %cnt, 9
@@ -14,8 +16,9 @@ define i1 @test1(i32 %arg) {
 }
 
 define i1 @test2(i32 %arg) {
-; CHECK: @test2
-; CHECK: ret i1 false
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i1 false
+;
   %and = and i32 %arg, 1
   %cnt = call i32 @llvm.ctpop.i32(i32 %and)
   %res = icmp eq i32 %cnt, 2
@@ -23,9 +26,12 @@ define i1 @test2(i32 %arg) {
 }
 
 define i1 @test3(i32 %arg) {
-; CHECK: @test3
-; CHECK: ret i1 false
-  ;; Use an assume to make all the bits known without triggering constant 
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[ASSUME:%.*]] = icmp eq i32 [[ARG:%.*]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[ASSUME]])
+; CHECK-NEXT:    ret i1 false
+;
+  ;; Use an assume to make all the bits known without triggering constant
   ;; folding.  This is trying to hit a corner case where we have to avoid
   ;; taking the log of 0.
   %assume = icmp eq i32 %arg, 0
@@ -37,8 +43,11 @@ define i1 @test3(i32 %arg) {
 
 ; Negative test for when we know nothing
 define i1 @test4(i8 %arg) {
-; CHECK: @test4
-; CHECK: ret i1 %res
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]])
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq i8 [[CNT]], 2
+; CHECK-NEXT:    ret i1 [[RES]]
+;
   %cnt = call i8 @llvm.ctpop.i8(i8 %arg)
   %res = icmp eq i8 %cnt, 2
   ret i1 %res
diff --git a/test/Transforms/InstCombine/icmp-xor-signbit.ll b/test/Transforms/InstCombine/icmp-xor-signbit.ll
new file mode 100644
index 000000000000..30a9668f37df
--- /dev/null
+++ b/test/Transforms/InstCombine/icmp-xor-signbit.ll
@@ -0,0 +1,228 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
+
+define i1 @slt_to_ult(i8 %x, i8 %y) {
+; CHECK-LABEL: @slt_to_ult(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a = xor i8 %x, 128
+  %b = xor i8 %y, 128
+  %cmp = icmp slt i8 %a, %b
+  ret i1 %cmp
+}
+
+; PR33138 - https://bugs.llvm.org/show_bug.cgi?id=33138
+
+define <2 x i1> @slt_to_ult_splat(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @slt_to_ult_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i8> %x, %y
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %a = xor <2 x i8> %x, <i8 128, i8 128>
+  %b = xor <2 x i8> %y, <i8 128, i8 128>
+  %cmp = icmp slt <2 x i8> %a, %b
+  ret <2 x i1> %cmp
+}
+
+; Make sure that unsigned -> signed works too.
+
+define i1 @ult_to_slt(i8 %x, i8 %y) {
+; CHECK-LABEL: @ult_to_slt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a = xor i8 %x, 128
+  %b = xor i8 %y, 128
+  %cmp = icmp ult i8 %a, %b
+  ret i1 %cmp
+}
+
+define <2 x i1> @ult_to_slt_splat(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @ult_to_slt_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> %x, %y
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %a = xor <2 x i8> %x, <i8 128, i8 128>
+  %b = xor <2 x i8> %y, <i8 128, i8 128>
+  %cmp = icmp ult <2 x i8> %a, %b
+  ret <2 x i1> %cmp
+}
+
+; icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b
+
+define i1 @slt_to_ugt(i8 %x, i8 %y) {
+; CHECK-LABEL: @slt_to_ugt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a = xor i8 %x, 127
+  %b = xor i8 %y, 127
+  %cmp = icmp slt i8 %a, %b
+  ret i1 %cmp
+}
+
+define <2 x i1> @slt_to_ugt_splat(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @slt_to_ugt_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i8> %x, %y
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %a = xor <2 x i8> %x, <i8 127, i8 127>
+  %b = xor <2 x i8> %y, <i8 127, i8 127>
+  %cmp = icmp slt <2 x i8> %a, %b
+  ret <2 x i1> %cmp
+}
+
+; Make sure that unsigned -> signed works too.
+
+define i1 @ult_to_sgt(i8 %x, i8 %y) {
+; CHECK-LABEL: @ult_to_sgt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a = xor i8 %x, 127
+  %b = xor i8 %y, 127
+  %cmp = icmp ult i8 %a, %b
+  ret i1 %cmp
+}
+
+define <2 x i1> @ult_to_sgt_splat(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @ult_to_sgt_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> %x, %y
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %a = xor <2 x i8> %x, <i8 127, i8 127>
+  %b = xor <2 x i8> %y, <i8 127, i8 127>
+  %cmp = icmp ult <2 x i8> %a, %b
+  ret <2 x i1> %cmp
+}
+
+; icmp u/s (a ^ signmask), C --> icmp s/u a, C'
+
+define i1 @sge_to_ugt(i8 %x) {
+; CHECK-LABEL: @sge_to_ugt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 %x, -114
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a = xor i8 %x, 128
+  %cmp = icmp sge i8 %a, 15
+  ret i1 %cmp
+}
+
+define <2 x i1> @sge_to_ugt_splat(<2 x i8> %x) {
+; CHECK-LABEL: @sge_to_ugt_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i8> %x, <i8 -114, i8 -114>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %a = xor <2 x i8> %x, <i8 128, i8 128>
+  %cmp = icmp sge <2 x i8> %a, <i8 15, i8 15>
+  ret <2 x i1> %cmp
+}
+
+; Make sure that unsigned -> signed works too.
+
+define i1 @uge_to_sgt(i8 %x) {
+; CHECK-LABEL: @uge_to_sgt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, -114
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a = xor i8 %x, 128
+  %cmp = icmp uge i8 %a, 15
+  ret i1 %cmp
+}
+
+define <2 x i1> @uge_to_sgt_splat(<2 x i8> %x) {
+; CHECK-LABEL: @uge_to_sgt_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> %x, <i8 -114, i8 -114>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %a = xor <2 x i8> %x, <i8 128, i8 128>
+  %cmp = icmp uge <2 x i8> %a, <i8 15, i8 15>
+  ret <2 x i1> %cmp
+}
+
+; icmp u/s (a ^ maxsignval), C --> icmp s/u' a, C'
+
+define i1 @sge_to_ult(i8 %x) {
+; CHECK-LABEL: @sge_to_ult(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 %x, 113
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a = xor i8 %x, 127
+  %cmp = icmp sge i8 %a, 15
+  ret i1 %cmp
+}
+
+define <2 x i1> @sge_to_ult_splat(<2 x i8> %x) {
+; CHECK-LABEL: @sge_to_ult_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i8> %x, <i8 113, i8 113>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %a = xor <2 x i8> %x, <i8 127, i8 127>
+  %cmp = icmp sge <2 x i8> %a, <i8 15, i8 15>
+  ret <2 x i1> %cmp
+}
+
+; Make sure that unsigned -> signed works too.
+
+define i1 @uge_to_slt(i8 %x) {
+; CHECK-LABEL: @uge_to_slt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 113
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a = xor i8 %x, 127
+  %cmp = icmp uge i8 %a, 15
+  ret i1 %cmp
+}
+
+define <2 x i1> @uge_to_slt_splat(<2 x i8> %x) {
+; CHECK-LABEL: @uge_to_slt_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> %x, <i8 113, i8 113>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %a = xor <2 x i8> %x, <i8 127, i8 127>
+  %cmp = icmp uge <2 x i8> %a, <i8 15, i8 15>
+  ret <2 x i1> %cmp
+}
+
+; PR33138, part 2: https://bugs.llvm.org/show_bug.cgi?id=33138
+; TODO: We could look through vector bitcasts for icmp folds,
+; or we could canonicalize bitcast ahead of logic ops with constants.
+
+define <8 x i1> @sgt_to_ugt_bitcasted_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @sgt_to_ugt_bitcasted_splat(
+; CHECK-NEXT:    [[A:%.*]] = xor <2 x i32> %x, <i32 -2139062144, i32 -2139062144>
+; CHECK-NEXT:    [[B:%.*]] = xor <2 x i32> %y, <i32 -2139062144, i32 -2139062144>
+; CHECK-NEXT:    [[C:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[D:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[E:%.*]] = icmp sgt <8 x i8> [[C]], [[D]]
+; CHECK-NEXT:    ret <8 x i1> [[E]]
+;
+  %a = xor <2 x i32> %x, <i32 2155905152, i32 2155905152> ; 0x80808080
+  %b = xor <2 x i32> %y, <i32 2155905152, i32 2155905152>
+  %c = bitcast <2 x i32> %a to <8 x i8>
+  %d = bitcast <2 x i32> %b to <8 x i8>
+  %e = icmp sgt <8 x i8> %c, %d
+  ret <8 x i1> %e
+}
+
+; TODO: This is false (little-endian). How should that be recognized?
+; Ie, should InstSimplify know this directly, should InstCombine canonicalize
+; this so InstSimplify can know this, or is that not something that we want
+; either pass to recognize?
+
+define <2 x i1> @negative_simplify_splat(<4 x i8> %x) {
+; CHECK-LABEL: @negative_simplify_splat(
+; CHECK-NEXT:    [[A:%.*]] = or <4 x i8> %x, <i8 0, i8 -128, i8 0, i8 -128>
+; CHECK-NEXT:    [[B:%.*]] = bitcast <4 x i8> [[A]] to <2 x i16>
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i16> [[B]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %a = or <4 x i8> %x, <i8 0, i8 128, i8 0, i8 128>
+  %b = bitcast <4 x i8> %a to <2 x i16>
+  %c = icmp sgt <2 x i16> %b, zeroinitializer
+  ret <2 x i1> %c
+}
+
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 6f657b190454..ed570da73c9e 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -2895,3 +2895,67 @@ define i1 @cmp_ult_rhs_dec(float %x, i32 %y) {
   %cmp = icmp ult i32 %conv, %dec
   ret i1 %cmp
 }
+
+define i1 @eq_add_constants(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_add_constants(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 %x, %y
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A = add i32 %x, 5
+  %B = add i32 %y, 5
+  %C = icmp eq i32 %A, %B
+  ret i1 %C
+}
+
+define i1 @eq_mul_constants(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_mul_constants(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 %x, %y
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A = mul i32 %x, 5
+  %B = mul i32 %y, 5
+  %C = icmp eq i32 %A, %B
+  ret i1 %C
+}
+
+define <2 x i1> @eq_mul_constants_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @eq_mul_constants_splat(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> %x, %y
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %A = mul <2 x i32> %x, <i32 5, i32 5>
+  %B = mul <2 x i32> %y, <i32 5, i32 5>
+  %C = icmp ne <2 x i32> %A, %B
+  ret <2 x i1> %C
+}
+
+; If the multiply constant has any trailing zero bits, we get something completely different.
+; We mask off the high bits of each input and then convert:
+; (X&Z) == (Y&Z) -> (X^Y) & Z == 0
+
+define i1 @eq_mul_constants_with_tz(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_mul_constants_with_tz(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 %x, %y
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A = mul i32 %x, 12
+  %B = mul i32 %y, 12
+  %C = icmp ne i32 %A, %B
+  ret i1 %C
+}
+
+define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @eq_mul_constants_with_tz_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> %x, %y
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %A = mul <2 x i32> %x, <i32 12, i32 12>
+  %B = mul <2 x i32> %y, <i32 12, i32 12>
+  %C = icmp eq <2 x i32> %A, %B
+  ret <2 x i1> %C
+}
+
diff --git a/test/Transforms/InstSimplify/call.ll b/test/Transforms/InstSimplify/call.ll
index 988ec2b71c50..68daac65ee6b 100644
--- a/test/Transforms/InstSimplify/call.ll
+++ b/test/Transforms/InstSimplify/call.ll
@@ -1,64 +1,212 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
 ; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
 
 declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
+declare {i8, i1} @llvm.sadd.with.overflow.i8(i8 %a, i8 %b)
 declare {i8, i1} @llvm.usub.with.overflow.i8(i8 %a, i8 %b)
 declare {i8, i1} @llvm.ssub.with.overflow.i8(i8 %a, i8 %b)
 declare {i8, i1} @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
+declare {i8, i1} @llvm.smul.with.overflow.i8(i8 %a, i8 %b)
 
 define i1 @test_uadd1() {
 ; CHECK-LABEL: @test_uadd1(
+; CHECK-NEXT:    ret i1 true
+;
   %x = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 254, i8 3)
   %overflow = extractvalue {i8, i1} %x, 1
   ret i1 %overflow
-; CHECK-NEXT: ret i1 true
 }
 
 define i8 @test_uadd2() {
 ; CHECK-LABEL: @test_uadd2(
+; CHECK-NEXT:    ret i8 42
+;
   %x = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 254, i8 44)
   %result = extractvalue {i8, i1} %x, 0
   ret i8 %result
-; CHECK-NEXT: ret i8 42
+}
+
+define {i8, i1} @test_uadd3(i8 %v) {
+; CHECK-LABEL: @test_uadd3(
+; CHECK-NEXT:    ret { i8, i1 } undef
+;
+  %result = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %v, i8 undef)
+  ret {i8, i1} %result
+}
+
+define {i8, i1} @test_uadd4(i8 %v) {
+; CHECK-LABEL: @test_uadd4(
+; CHECK-NEXT:    ret { i8, i1 } undef
+;
+  %result = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 undef, i8 %v)
+  ret {i8, i1} %result
+}
+
+define i1 @test_sadd1() {
+; CHECK-LABEL: @test_sadd1(
+; CHECK-NEXT:    ret i1 true
+;
+  %x = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 126, i8 3)
+  %overflow = extractvalue {i8, i1} %x, 1
+  ret i1 %overflow
+}
+
+define i8 @test_sadd2() {
+; CHECK-LABEL: @test_sadd2(
+; CHECK-NEXT:    ret i8 -86
+;
+  %x = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 126, i8 44)
+  %result = extractvalue {i8, i1} %x, 0
+  ret i8 %result
+}
+
+define {i8, i1} @test_sadd3(i8 %v) {
+; CHECK-LABEL: @test_sadd3(
+; CHECK-NEXT:    ret { i8, i1 } undef
+;
+  %result = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %v, i8 undef)
+  ret {i8, i1} %result
+}
+
+define {i8, i1} @test_sadd4(i8 %v) {
+; CHECK-LABEL: @test_sadd4(
+; CHECK-NEXT:    ret { i8, i1 } undef
+;
+  %result = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 undef, i8 %v)
+  ret {i8, i1} %result
 }
 
 define {i8, i1} @test_usub1(i8 %V) {
 ; CHECK-LABEL: @test_usub1(
+; CHECK-NEXT:    ret { i8, i1 } zeroinitializer
+;
   %x = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %V, i8 %V)
   ret {i8, i1} %x
-; CHECK-NEXT: ret { i8, i1 } zeroinitializer
+}
+
+define {i8, i1} @test_usub2(i8 %V) {
+; CHECK-LABEL: @test_usub2(
+; CHECK-NEXT:    ret { i8, i1 } undef
+;
+  %x = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %V, i8 undef)
+  ret {i8, i1} %x
+}
+
+define {i8, i1} @test_usub3(i8 %V) {
+; CHECK-LABEL: @test_usub3(
+; CHECK-NEXT:    ret { i8, i1 } undef
+;
+  %x = call {i8, i1} @llvm.usub.with.overflow.i8(i8 undef, i8 %V)
+  ret {i8, i1} %x
 }
 
 define {i8, i1} @test_ssub1(i8 %V) {
 ; CHECK-LABEL: @test_ssub1(
+; CHECK-NEXT:    ret { i8, i1 } zeroinitializer
+;
   %x = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %V, i8 %V)
   ret {i8, i1} %x
-; CHECK-NEXT: ret { i8, i1 } zeroinitializer
+}
+
+define {i8, i1} @test_ssub2(i8 %V) {
+; CHECK-LABEL: @test_ssub2(
+; CHECK-NEXT:    ret { i8, i1 } undef
+;
+  %x = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %V, i8 undef)
+  ret {i8, i1} %x
+}
+
+define {i8, i1} @test_ssub3(i8 %V) {
+; CHECK-LABEL: @test_ssub3(
+; CHECK-NEXT:    ret { i8, i1 } undef
+;
+  %x = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 undef, i8 %V)
+  ret {i8, i1} %x
 }
 
 define {i8, i1} @test_umul1(i8 %V) {
 ; CHECK-LABEL: @test_umul1(
+; CHECK-NEXT:    ret { i8, i1 } zeroinitializer
+;
   %x = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %V, i8 0)
   ret {i8, i1} %x
-; CHECK-NEXT: ret { i8, i1 } zeroinitializer
+}
+
+define {i8, i1} @test_umul2(i8 %V) {
+; CHECK-LABEL: @test_umul2(
+; CHECK-NEXT:    ret { i8, i1 } zeroinitializer
+;
+  %x = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %V, i8 undef)
+  ret {i8, i1} %x
+}
+
+define {i8, i1} @test_umul3(i8 %V) {
+; CHECK-LABEL: @test_umul3(
+; CHECK-NEXT:    ret { i8, i1 } zeroinitializer
+;
+  %x = call {i8, i1} @llvm.umul.with.overflow.i8(i8 0, i8 %V)
+  ret {i8, i1} %x
+}
+
+define {i8, i1} @test_umul4(i8 %V) {
+; CHECK-LABEL: @test_umul4(
+; CHECK-NEXT:    ret { i8, i1 } zeroinitializer
+;
+  %x = call {i8, i1} @llvm.umul.with.overflow.i8(i8 undef, i8 %V)
+  ret {i8, i1} %x
+}
+
+define {i8, i1} @test_smul1(i8 %V) {
+; CHECK-LABEL: @test_smul1(
+; CHECK-NEXT:    ret { i8, i1 } zeroinitializer
+;
+  %x = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %V, i8 0)
+  ret {i8, i1} %x
+}
+
+define {i8, i1} @test_smul2(i8 %V) {
+; CHECK-LABEL: @test_smul2(
+; CHECK-NEXT:    ret { i8, i1 } zeroinitializer
+;
+  %x = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %V, i8 undef)
+  ret {i8, i1} %x
+}
+
+define {i8, i1} @test_smul3(i8 %V) {
+; CHECK-LABEL: @test_smul3(
+; CHECK-NEXT:    ret { i8, i1 } zeroinitializer
+;
+  %x = call {i8, i1} @llvm.smul.with.overflow.i8(i8 0, i8 %V)
+  ret {i8, i1} %x
+}
+
+define {i8, i1} @test_smul4(i8 %V) {
+; CHECK-LABEL: @test_smul4(
+; CHECK-NEXT:    ret { i8, i1 } zeroinitializer
+;
+  %x = call {i8, i1} @llvm.smul.with.overflow.i8(i8 undef, i8 %V)
+  ret {i8, i1} %x
 }
 
 declare i256 @llvm.cttz.i256(i256 %src, i1 %is_zero_undef)
 
 define i256 @test_cttz() {
 ; CHECK-LABEL: @test_cttz(
+; CHECK-NEXT:    ret i256 1
+;
   %x = call i256 @llvm.cttz.i256(i256 10, i1 false)
   ret i256 %x
-; CHECK-NEXT: ret i256 1
 }
 
 declare i256 @llvm.ctpop.i256(i256 %src)
 
 define i256 @test_ctpop() {
 ; CHECK-LABEL: @test_ctpop(
+; CHECK-NEXT:    ret i256 2
+;
   %x = call i256 @llvm.ctpop.i256(i256 10)
   ret i256 %x
-; CHECK-NEXT: ret i256 2
 }
 
 ; Test a non-intrinsic that we know about as a library call.
@@ -66,14 +214,15 @@ declare float @fabs(float %x)
 
 define float @test_fabs_libcall() {
 ; CHECK-LABEL: @test_fabs_libcall(
+; CHECK-NEXT:    [[X:%.*]] = call float @fabs(float -4.200000e+01)
+; CHECK-NEXT:    ret float 4.200000e+01
+;
 
   %x = call float @fabs(float -42.0)
 ; This is still a real function call, so instsimplify won't nuke it -- other
 ; passes have to do that.
-; CHECK-NEXT: call float @fabs
 
   ret float %x
-; CHECK-NEXT: ret float 4.2{{0+}}e+01
 }
 
 
@@ -87,34 +236,35 @@ declare float @llvm.nearbyint.f32(float) nounwind readnone
 ; Test idempotent intrinsics
 define float @test_idempotence(float %a) {
 ; CHECK-LABEL: @test_idempotence(
+; CHECK-NEXT:    [[A0:%.*]] = call float @llvm.fabs.f32(float [[A:%.*]])
+; CHECK-NEXT:    [[B0:%.*]] = call float @llvm.floor.f32(float [[A]])
+; CHECK-NEXT:    [[C0:%.*]] = call float @llvm.ceil.f32(float [[A]])
+; CHECK-NEXT:    [[D0:%.*]] = call float @llvm.trunc.f32(float [[A]])
+; CHECK-NEXT:    [[E0:%.*]] = call float @llvm.rint.f32(float [[A]])
+; CHECK-NEXT:    [[F0:%.*]] = call float @llvm.nearbyint.f32(float [[A]])
+; CHECK-NEXT:    [[R0:%.*]] = fadd float [[A0]], [[B0]]
+; CHECK-NEXT:    [[R1:%.*]] = fadd float [[R0]], [[C0]]
+; CHECK-NEXT:    [[R2:%.*]] = fadd float [[R1]], [[D0]]
+; CHECK-NEXT:    [[R3:%.*]] = fadd float [[R2]], [[E0]]
+; CHECK-NEXT:    [[R4:%.*]] = fadd float [[R3]], [[F0]]
+; CHECK-NEXT:    ret float [[R4]]
+;
 
-; CHECK: fabs
-; CHECK-NOT: fabs
   %a0 = call float @llvm.fabs.f32(float %a)
   %a1 = call float @llvm.fabs.f32(float %a0)
 
-; CHECK: floor
-; CHECK-NOT: floor
   %b0 = call float @llvm.floor.f32(float %a)
   %b1 = call float @llvm.floor.f32(float %b0)
 
-; CHECK: ceil
-; CHECK-NOT: ceil
   %c0 = call float @llvm.ceil.f32(float %a)
   %c1 = call float @llvm.ceil.f32(float %c0)
 
-; CHECK: trunc
-; CHECK-NOT: trunc
   %d0 = call float @llvm.trunc.f32(float %a)
   %d1 = call float @llvm.trunc.f32(float %d0)
 
-; CHECK: rint
-; CHECK-NOT: rint
   %e0 = call float @llvm.rint.f32(float %a)
   %e1 = call float @llvm.rint.f32(float %e0)
 
-; CHECK: nearbyint
-; CHECK-NOT: nearbyint
   %f0 = call float @llvm.nearbyint.f32(float %a)
   %f1 = call float @llvm.nearbyint.f32(float %f0)
 
@@ -128,6 +278,17 @@ define float @test_idempotence(float %a) {
 }
 
 define i8* @operator_new() {
+; CHECK-LABEL: @operator_new(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @_Znwm(i64 8)
+; CHECK-NEXT:    br i1 false, label [[CAST_END:%.*]], label [[CAST_NOTNULL:%.*]]
+; CHECK:       cast.notnull:
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 4
+; CHECK-NEXT:    br label [[CAST_END]]
+; CHECK:       cast.end:
+; CHECK-NEXT:    [[CAST_RESULT:%.*]] = phi i8* [ [[ADD_PTR]], [[CAST_NOTNULL]] ], [ null, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i8* [[CAST_RESULT]]
+;
 entry:
   %call = tail call noalias i8* @_Znwm(i64 8)
   %cmp = icmp eq i8* %call, null
@@ -141,8 +302,6 @@ cast.end:                                         ; preds = %cast.notnull, %entr
   %cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ]
   ret i8* %cast.result
 
-; CHECK-LABEL: @operator_new
-; CHECK: br i1 false, label %cast.end, label %cast.notnull
 }
 
 declare nonnull noalias i8* @_Znwm(i64)
@@ -151,6 +310,18 @@ declare nonnull noalias i8* @_Znwm(i64)
 @_ZSt7nothrow = external global %"struct.std::nothrow_t"
 
 define i8* @operator_new_nothrow_t() {
+; CHECK-LABEL: @operator_new_nothrow_t(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @_ZnamRKSt9nothrow_t(i64 8, %"struct.std::nothrow_t"* @_ZSt7nothrow)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[CALL]], null
+; CHECK-NEXT:    br i1 [[CMP]], label [[CAST_END:%.*]], label [[CAST_NOTNULL:%.*]]
+; CHECK:       cast.notnull:
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 4
+; CHECK-NEXT:    br label [[CAST_END]]
+; CHECK:       cast.end:
+; CHECK-NEXT:    [[CAST_RESULT:%.*]] = phi i8* [ [[ADD_PTR]], [[CAST_NOTNULL]] ], [ null, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i8* [[CAST_RESULT]]
+;
 entry:
   %call = tail call noalias i8* @_ZnamRKSt9nothrow_t(i64 8, %"struct.std::nothrow_t"* @_ZSt7nothrow)
   %cmp = icmp eq i8* %call, null
@@ -164,13 +335,23 @@ cast.end:                                         ; preds = %cast.notnull, %entr
   %cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ]
   ret i8* %cast.result
 
-; CHECK-LABEL: @operator_new_nothrow_t
-; CHECK: br i1 %cmp, label %cast.end, label %cast.notnull
 }
 
 declare i8* @_ZnamRKSt9nothrow_t(i64, %"struct.std::nothrow_t"*) nounwind
 
 define i8* @malloc_can_return_null() {
+; CHECK-LABEL: @malloc_can_return_null(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @malloc(i64 8)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[CALL]], null
+; CHECK-NEXT:    br i1 [[CMP]], label [[CAST_END:%.*]], label [[CAST_NOTNULL:%.*]]
+; CHECK:       cast.notnull:
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 4
+; CHECK-NEXT:    br label [[CAST_END]]
+; CHECK:       cast.end:
+; CHECK-NEXT:    [[CAST_RESULT:%.*]] = phi i8* [ [[ADD_PTR]], [[CAST_NOTNULL]] ], [ null, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i8* [[CAST_RESULT]]
+;
 entry:
   %call = tail call noalias i8* @malloc(i64 8)
   %cmp = icmp eq i8* %call, null
@@ -184,38 +365,44 @@ cast.end:                                         ; preds = %cast.notnull, %entr
   %cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ]
   ret i8* %cast.result
 
-; CHECK-LABEL: @malloc_can_return_null
-; CHECK: br i1 %cmp, label %cast.end, label %cast.notnull
 }
 
 define i32 @call_null() {
+; CHECK-LABEL: @call_null(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 null()
+; CHECK-NEXT:    ret i32 undef
+;
 entry:
   %call = call i32 null()
   ret i32 %call
 }
-; CHECK-LABEL: define i32 @call_null(
-; CHECK: ret i32 undef
 
 define i32 @call_undef() {
+; CHECK-LABEL: @call_undef(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 undef()
+; CHECK-NEXT:    ret i32 undef
+;
 entry:
   %call = call i32 undef()
   ret i32 %call
 }
-; CHECK-LABEL: define i32 @call_undef(
-; CHECK: ret i32 undef
 
 @GV = private constant [8 x i32] [i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49]
 
 define <8 x i32> @partial_masked_load() {
 ; CHECK-LABEL: @partial_masked_load(
-; CHECK:         ret <8 x i32> <i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
+; CHECK-NEXT:    ret <8 x i32> <i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
+;
   %masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* bitcast (i32* getelementptr ([8 x i32], [8 x i32]* @GV, i64 0, i64 -2) to <8 x i32>*), i32 4, <8 x i1> <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
   ret <8 x i32> %masked.load
 }
 
 define <8 x i32> @masked_load_undef_mask(<8 x i32>* %V) {
 ; CHECK-LABEL: @masked_load_undef_mask(
-; CHECK:         ret <8 x i32> <i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i32> <i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0>
+;
   %masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %V, i32 4, <8 x i1> undef, <8 x i32> <i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0>)
   ret <8 x i32> %masked.load
 }
diff --git a/test/Transforms/InstSimplify/or.ll b/test/Transforms/InstSimplify/or.ll
index 2c5b6181bc6c..14b08af00646 100644
--- a/test/Transforms/InstSimplify/or.ll
+++ b/test/Transforms/InstSimplify/or.ll
@@ -159,7 +159,7 @@ define i399 @test4_apint(i399 %V, i399 %M) {
     %A = add i399 %V, %N
     %B = and i399 %A, %C1
     %D = and i399 %V, 274877906943
-    %R = or i399 %B, %D
+    %R = or i399 %D, %B
     ret i399 %R
 }
 
@@ -179,3 +179,42 @@ define i117 @test6_apint(i117 %X) {
     ret i117 %Y
 }
 
+; Test the case where integer BitWidth <= 64 && BitWidth % 2 != 0.
+; Vector version of test1_apint with the add commuted
+define <2 x i39> @test7_apint(<2 x i39> %V, <2 x i39> %M) {
+; CHECK-LABEL: @test7_apint(
+; CHECK-NEXT:    [[N:%.*]] = and <2 x i39> [[M:%.*]], <i39 -274877906944, i39 -274877906944>
+; CHECK-NEXT:    [[A:%.*]] = add <2 x i39> [[N]], [[V:%.*]]
+; CHECK-NEXT:    ret <2 x i39> [[A]]
+;
+  ;; If we have: ((V + N) & C1) | (V & C2)
+  ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+  ;; replace with V+N.
+  %C1 = xor <2 x i39> <i39 274877906943, i39 274877906943>, <i39 -1, i39 -1> ;; C2 = 274877906943
+  %N = and <2 x i39> %M, <i39 274877906944, i39 274877906944>
+  %A = add <2 x i39> %N, %V
+  %B = and <2 x i39> %A, %C1
+  %D = and <2 x i39> %V, <i39 274877906943, i39 274877906943>
+  %R = or <2 x i39> %B, %D
+  ret <2 x i39> %R
+}
+
+; Test the case where Integer BitWidth > 64 && BitWidth <= 1024.
+; Vector version of test4_apint with the add and the or commuted
+define <2 x i399> @test8_apint(<2 x i399> %V, <2 x i399> %M) {
+; CHECK-LABEL: @test8_apint(
+; CHECK-NEXT:    [[N:%.*]] = and <2 x i399> [[M:%.*]], <i399 18446742974197923840, i399 18446742974197923840>
+; CHECK-NEXT:    [[A:%.*]] = add <2 x i399> [[N]], [[V:%.*]]
+; CHECK-NEXT:    ret <2 x i399> [[A]]
+;
+  ;; If we have: ((V + N) & C1) | (V & C2)
+  ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+  ;; replace with V+N.
+  %C1 = xor <2 x i399> <i399 274877906943, i399 274877906943>, <i399 -1, i399 -1> ;; C2 = 274877906943
+  %N = and <2 x i399> %M, <i399 18446742974197923840, i399 18446742974197923840>
+  %A = add <2 x i399> %N, %V
+  %B = and <2 x i399> %A, %C1
+  %D = and <2 x i399> %V, <i399 274877906943, i399 274877906943>
+  %R = or <2 x i399> %D, %B
+  ret <2 x i399> %R
+}
diff --git a/test/Transforms/JumpThreading/assume.ll b/test/Transforms/JumpThreading/assume.ll
index 3a039676e172..f58ee299cba0 100644
--- a/test/Transforms/JumpThreading/assume.ll
+++ b/test/Transforms/JumpThreading/assume.ll
@@ -59,12 +59,12 @@ return:                                           ; preds = %entry, %if.then
 @g = external global i32
 
 ; Check that we do prove a fact using an assume within the block.
-; FIXME: We can fold the assume based on the semantics of assume.
-; CHECK-LABEL: @can_fold_assume
-; CHECK: %notnull = icmp ne i32* %array, null
-; CHECK-NEXT: call void @llvm.assume(i1 %notnull)
-; CHECK-NEXT: ret void
+; We can fold the assume based on the semantics of assume.
 define void @can_fold_assume(i32* %array) {
+; CHECK-LABEL: @can_fold_assume
+; CHECK-NOT: call void @llvm.assume
+; CHECK-NOT: br
+; CHECK: ret void
   %notnull = icmp ne i32* %array, null
   call void @llvm.assume(i1 %notnull)
   br i1 %notnull, label %normal, label %error
@@ -80,19 +80,128 @@ error:
 declare void @f(i1)
 declare void @exit()
 ; We can fold the assume but not the uses before the assume.
-define void @dont_fold_incorrectly(i32* %array) {
-; CHECK-LABEL:@dont_fold_incorrectly
+define void @cannot_fold_use_before_assume(i32* %array) {
+; CHECK-LABEL:@cannot_fold_use_before_assume
 ; CHECK: @f(i1 %notnull)
 ; CHECK-NEXT: exit()
-; CHECK-NEXT: assume(i1 %notnull)
+; CHECK-NOT: assume
+; CHECK-NEXT: ret void
+  %notnull = icmp ne i32* %array, null
+  call void @f(i1 %notnull)
+  call void @exit()
+  call void @llvm.assume(i1 %notnull)
+  br i1 %notnull, label %normal, label %error
+
+normal:
+  ret void
+
+error:
+  store atomic i32 0, i32* @g unordered, align 4
+  ret void
+}
+
+declare void @dummy(i1) nounwind argmemonly
+define void @can_fold_some_use_before_assume(i32* %array) {
+
+; CHECK-LABEL:@can_fold_some_use_before_assume
+; CHECK: @f(i1 %notnull)
+; CHECK-NEXT: @dummy(i1 true)
+; CHECK-NOT: assume
 ; CHECK-NEXT: ret void
   %notnull = icmp ne i32* %array, null
   call void @f(i1 %notnull)
+  call void @dummy(i1 %notnull)
+  call void @llvm.assume(i1 %notnull)
+  br i1 %notnull, label %normal, label %error
+
+normal:
+  ret void
+
+error:
+  store atomic i32 0, i32* @g unordered, align 4
+  ret void
+
+}
+
+; FIXME: can fold assume and all uses before/after assume.
+; because the trapping exit call is after the assume.
+define void @can_fold_assume_and_all_uses(i32* %array) {
+; CHECK-LABEL:@can_fold_assume_and_all_uses
+; CHECK: @dummy(i1 %notnull)
+; CHECK-NEXT: assume(i1 %notnull)
+; CHECK-NEXT: exit()
+; CHECK-NEXT: %notnull2 = or i1 true, false
+; CHECK-NEXT: @f(i1 %notnull2)
+; CHECK-NEXT: ret void
+  %notnull = icmp ne i32* %array, null
+  call void @dummy(i1 %notnull)
+  call void @llvm.assume(i1 %notnull)
   call void @exit()
+  br i1 %notnull, label %normal, label %error
+
+normal:
+  %notnull2 = or i1 %notnull, false
+  call void @f(i1 %notnull2)
+  ret void
+
+error:
+  store atomic i32 0, i32* @g unordered, align 4
+  ret void
+}
+
+declare void @fz(i8)
+; FIXME: We can fold assume to true, and the use after assume, but we do not do so
+; currently, because of the function call after the assume.
+define void @can_fold_assume2(i32* %array) {
+
+; CHECK-LABEL:@can_fold_assume2
+; CHECK: @f(i1 %notnull)
+; CHECK-NEXT: assume(i1 %notnull)
+; CHECK-NEXT: znotnull = zext i1 %notnull to i8
+; CHECK-NEXT: @f(i1 %notnull)
+; CHECK-NEXT: @f(i1 true)
+; CHECK-NEXT: @fz(i8 %znotnull)
+; CHECK-NEXT: ret void
+  %notnull = icmp ne i32* %array, null
+  call void @f(i1 %notnull)
+  call void @llvm.assume(i1 %notnull)
+  %znotnull = zext i1 %notnull to i8
+  call void @f(i1 %notnull)
+  br i1 %notnull, label %normal, label %error
+
+normal:
+  call void @f(i1 %notnull)
+  call void @fz(i8 %znotnull)
+  ret void
+
+error:
+  store atomic i32 0, i32* @g unordered, align 4
+  ret void
+}
+
+declare void @llvm.experimental.guard(i1, ...)
+; FIXME: We can fold assume to true, but we do not do so
+; because of the guard following the assume.
+define void @can_fold_assume3(i32* %array){
+
+; CHECK-LABEL:@can_fold_assume3
+; CHECK: @f(i1 %notnull)
+; CHECK-NEXT: assume(i1 %notnull)
+; CHECK-NEXT: guard(i1 %notnull)
+; CHECK-NEXT: znotnull = zext i1 true to i8
+; CHECK-NEXT: @f(i1 true)
+; CHECK-NEXT: @fz(i8 %znotnull)
+; CHECK-NEXT: ret void
+  %notnull = icmp ne i32* %array, null
+  call void @f(i1 %notnull)
   call void @llvm.assume(i1 %notnull)
+  call void(i1, ...) @llvm.experimental.guard(i1 %notnull) [ "deopt"() ]
+  %znotnull = zext i1 %notnull to i8
   br i1 %notnull, label %normal, label %error
 
 normal:
+  call void @f(i1 %notnull)
+  call void @fz(i8 %znotnull)
   ret void
 
 error:
@@ -100,6 +209,26 @@ error:
   ret void
 }
 
+
+; can fold all uses and remove the cond
+define void @can_fold_assume4(i32* %array) {
+; CHECK-LABEL: can_fold_assume4
+; CHECK-NOT: notnull
+; CHECK: dummy(i1 true)
+; CHECK-NEXT: ret void
+  %notnull = icmp ne i32* %array, null
+  call void @exit()
+  call void @dummy(i1 %notnull)
+  call void @llvm.assume(i1 %notnull)
+  br i1 %notnull, label %normal, label %error
+
+normal:
+  ret void
+
+error:
+  store atomic i32 0, i32* @g unordered, align 4
+  ret void
+}
 ; Function Attrs: nounwind
 declare void @llvm.assume(i1) #1
 
diff --git a/test/Transforms/JumpThreading/fold-not-thread.ll b/test/Transforms/JumpThreading/fold-not-thread.ll
index f05169b31bc8..85cdcc0d9b33 100644
--- a/test/Transforms/JumpThreading/fold-not-thread.ll
+++ b/test/Transforms/JumpThreading/fold-not-thread.ll
@@ -133,10 +133,10 @@ L3:
   ret void
 }
 
-; FIXME: Make sure we can do the RAUW for %add...
+; Make sure we can do the RAUW for %add...
 ;
 ; CHECK-LABEL: @rauw_if_possible(
-; CHECK: call void @f4(i32 %add)
+; CHECK: call void @f4(i32 96)
 define void @rauw_if_possible(i32 %value) nounwind {
 entry:
   %cmp = icmp eq i32 %value, 32
diff --git a/test/Transforms/JumpThreading/guards.ll b/test/Transforms/JumpThreading/guards.ll
index c5f72b113efc..53175a7b7253 100644
--- a/test/Transforms/JumpThreading/guards.ll
+++ b/test/Transforms/JumpThreading/guards.ll
@@ -182,86 +182,89 @@ Exit:
   ret void
 }
 
-declare void @never_called()
+declare void @never_called(i1)
 
-; Assume the guard is always taken and we deoptimize, so we never reach the
-; branch below that guard. We should *never* change the behaviour of a guard from
-; `must deoptimize` to `may deoptimize`, since this affects the program
-; semantics.
+; LVI uses guard to identify value of %c2 in branch as true, we cannot replace that
+; guard with guard(true & c1).
 define void @dont_fold_guard(i8* %addr, i32 %i, i32 %length) {
 ; CHECK-LABEL: dont_fold_guard
-; CHECK: experimental.guard(i1 %wide.chk)
-
-entry:
-  br label %BBPred
+; CHECK: %wide.chk = and i1 %c1, %c2
+; CHECK-NEXT: experimental.guard(i1 %wide.chk)
+; CHECK-NEXT: call void @never_called(i1 true)
+; CHECK-NEXT: ret void
+  %c1 = icmp ult i32 %i, %length
+  %c2 = icmp eq i32 %i, 0
+  %wide.chk = and i1 %c1, %c2
+  call void(i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+  br i1 %c2, label %BB1, label %BB2
 
-BBPred:
- %cond = icmp eq i8* %addr, null
- br i1 %cond, label %zero, label %not_zero
+BB1:
+  call void @never_called(i1 %c2)
+  ret void
 
-zero:
-  unreachable
+BB2:
+  ret void
+}
 
-not_zero:
+declare void @dummy(i1) nounwind argmemonly
+; same as dont_fold_guard1 but there's a use immediately after guard and before
+; branch. We can fold that use.
+define void @dont_fold_guard2(i8* %addr, i32 %i, i32 %length) {
+; CHECK-LABEL: dont_fold_guard2
+; CHECK: %wide.chk = and i1 %c1, %c2
+; CHECK-NEXT: experimental.guard(i1 %wide.chk)
+; CHECK-NEXT: dummy(i1 true)
+; CHECK-NEXT: call void @never_called(i1 true)
+; CHECK-NEXT: ret void
   %c1 = icmp ult i32 %i, %length
   %c2 = icmp eq i32 %i, 0
   %wide.chk = and i1 %c1, %c2
   call void(i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
-  br i1 %c2, label %unreachedBB2, label %unreachedBB1
+  call void @dummy(i1 %c2)
+  br i1 %c2, label %BB1, label %BB2
 
-unreachedBB2:
-  call void @never_called()
+BB1:
+  call void @never_called(i1 %c2)
   ret void
 
-unreachedBB1:
+BB2:
   ret void
 }
 
-
 ; same as dont_fold_guard1 but condition %cmp is not an instruction.
 ; We cannot fold the guard under any circumstance.
 ; FIXME: We can merge unreachableBB2 into not_zero.
-define void @dont_fold_guard2(i8* %addr, i1 %cmp, i32 %i, i32 %length) {
-; CHECK-LABEL: dont_fold_guard2
+define void @dont_fold_guard3(i8* %addr, i1 %cmp, i32 %i, i32 %length) {
+; CHECK-LABEL: dont_fold_guard3
 ; CHECK: guard(i1 %cmp)
-
-entry:
-  br label %BBPred
-
-BBPred:
- %cond = icmp eq i8* %addr, null
- br i1 %cond, label %zero, label %not_zero
-
-zero:
-  unreachable
-
-not_zero:
   call void(i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
-  br i1 %cmp, label %unreachedBB2, label %unreachedBB1
+  br i1 %cmp, label %BB1, label %BB2
 
-unreachedBB2:
-  call void @never_called()
+BB1:
+  call void @never_called(i1 %cmp)
   ret void
 
-unreachedBB1:
+BB2:
   ret void
 }
 
+declare void @f(i1)
 ; Same as dont_fold_guard1 but use switch instead of branch.
 ; triggers source code `ProcessThreadableEdges`.
-declare void @f(i1)
-define void @dont_fold_guard3(i1 %cmp1, i32 %i) nounwind {
-; CHECK-LABEL: dont_fold_guard3 
+define void @dont_fold_guard4(i1 %cmp1, i32 %i) nounwind {
+; CHECK-LABEL: dont_fold_guard4 
 ; CHECK-LABEL: L2:
 ; CHECK-NEXT: %cmp = icmp eq i32 %i, 0 
 ; CHECK-NEXT: guard(i1 %cmp)
-; CHECK-NEXT: @f(i1 %cmp)
+; CHECK-NEXT: dummy(i1 true)
+; CHECK-NEXT: @f(i1 true)
 ; CHECK-NEXT: ret void
 entry:
   br i1 %cmp1, label %L0, label %L3 
 L0:
   %cmp = icmp eq i32 %i, 0
   call void(i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  call void @dummy(i1 %cmp)
   switch i1 %cmp, label %L3 [
     i1 false, label %L1
     i1 true, label %L2
diff --git a/test/Transforms/LoopIdiom/pr33114.ll b/test/Transforms/LoopIdiom/pr33114.ll
new file mode 100644
index 000000000000..fa44d8e31e7c
--- /dev/null
+++ b/test/Transforms/LoopIdiom/pr33114.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Check that we're not crashing while looking at the recurrence variable.
+; RUN: opt -S -loop-idiom %s | FileCheck %s
+
+define void @tinkywinky() {
+; CHECK-LABEL: @tinkywinky(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[PH:%.*]]
+; CHECK:       ph:
+; CHECK-NEXT:    [[MYPHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[PATATINO:%.*]] = ashr i32 [[MYPHI]], undef
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[PATATINO]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[EXIT_LOOPEXIT:%.*]], label [[IF_END]]
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 true, label %exit, label %ph
+
+ph:
+  %myphi = phi i32 [ 1, %entry ]
+  br label %if.end
+
+if.end:
+  %patatino = ashr i32 %myphi, undef
+  %tobool = icmp eq i32 %patatino, 0
+  br i1 %tobool, label %exit, label %if.end
+
+exit:
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll b/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll
index 3adb8bcf514d..00c3222b0051 100644
--- a/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll
@@ -25,7 +25,7 @@ L2:                                               ; preds = %idxend.8
 if6:                                              ; preds = %idxend.8
   %r2 = add i64 %0, -1
   %r3 = load i64, i64* %1, align 8
-; CHECK-NOT:  %r2
+; CHECK:  %r2 = add i64 %0, -1
 ; CHECK:  %r3 = load i64
   br label %ib
 
@@ -36,13 +36,11 @@ ib:                                               ; preds = %if6
   %r4 = mul i64 %r3, %r0
   %r5 = add i64 %r2, %r4
   %r6 = icmp ult i64 %r5, undef
-; CHECK:  [[MUL1:%[0-9]+]] = mul i64 %lsr.iv, %r3
-; CHECK:  [[ADD1:%[0-9]+]] = add i64 [[MUL1]], -1
-; CHECK:  add i64 %{{.}}, [[ADD1]]
-; CHECK:  %r6
+; CHECK:  %r4 = mul i64 %r3, %lsr.iv
+; CHECK:  %r5 = add i64 %r2, %r4
+; CHECK:  %r6 = icmp ult i64 %r5, undef
+; CHECK:  %r7 = getelementptr i64, i64* undef, i64 %r5
   %r7 = getelementptr i64, i64* undef, i64 %r5
   store i64 1, i64* %r7, align 8
-; CHECK:  [[MUL2:%[0-9]+]] = mul i64 %lsr.iv, %r3
-; CHECK:  [[ADD2:%[0-9]+]] = add i64 [[MUL2]], -1
   br label %L
 }
diff --git a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll
index aa688d999e60..a7731bfcec56 100644
--- a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll
@@ -1,5 +1,14 @@
+; REQUIRES: x86
 ; RUN: opt -loop-reduce -S < %s | FileCheck %s
 
+; Strength reduction analysis here relies on IV Users analysis, that
+; only finds users among instructions with types that are treated as
+; legal by the data layout. When running this test on pure non-x86
+; configs (for example, ARM 64), it gets confused with the target
+; triple and uses a default data layout instead. This default layout
+; does not have any legal types (even i32), so the transformation
+; does not happen.
+
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx"
 
@@ -7,16 +16,23 @@ target triple = "x86_64-apple-macosx"
 ;
 ; SCEV expander cannot expand quadratic recurrences outside of the
 ; loop. This recurrence depends on %sub.us, so can't be expanded.
+; We cannot fold SCEVUnknown (sub.us) with recurrences since it is
+; declared after the loop.
 ;
 ; CHECK-LABEL: @test2
 ; CHECK-LABEL: test2.loop:
-; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ]
-; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216
+; CHECK:  %lsr.iv1 = phi i32 [ %lsr.iv.next2, %test2.loop ], [ -16777216, %entry ]
+; CHECK:  %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -1, %entry ]
+; CHECK:  %lsr.iv.next = add nsw i32 %lsr.iv, 1
+; CHECK:  %lsr.iv.next2 = add nsw i32 %lsr.iv1, 16777216
 ;
 ; CHECK-LABEL: for.end:
-; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
-; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us
-; CHECK: %f = ashr i32 %sext.us, 24
+; CHECK:  %tobool.us = icmp eq i32 %lsr.iv.next2, 0
+; CHECK:  %sub.us = select i1 %tobool.us, i32 0, i32 0
+; CHECK:  %1 = sub i32 0, %sub.us
+; CHECK:  %2 = add i32 %1, %lsr.iv.next
+; CHECK:  %sext.us = mul i32 %lsr.iv.next2, %2
+; CHECK:  %f = ashr i32 %sext.us, 24
 ; CHECK: ret i32 %f
 define i32 @test2() {
 entry:
diff --git a/test/Transforms/LoopStrengthReduce/nonintegral.ll b/test/Transforms/LoopStrengthReduce/nonintegral.ll
new file mode 100644
index 000000000000..5648e3aa74af
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/nonintegral.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+
+; Address Space 10 is non-integral. The optimizer is not allowed to use
+; ptrtoint/inttoptr instructions. Make sure that this doesn't happen
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @japi1__unsafe_getindex_65028(i64 addrspace(10)* %arg) {
+; CHECK-NOT: inttoptr
+; CHECK-NOT: ptrtoint
+; How exactly SCEV chooses to materialize isn't all that important, as
+; long as it doesn't try to round-trip through integers. As of this writing,
+; it emits a byte-wise gep, which is fine.
+; CHECK: getelementptr i64, i64 addrspace(10)* {{.*}}, i64 {{.*}}
+top:
+  br label %L86
+
+L86:                                              ; preds = %L86, %top
+  %i.0 = phi i64 [ 0, %top ], [ %tmp, %L86 ]
+  %tmp = add i64 %i.0, 1
+  br i1 undef, label %L86, label %if29
+
+if29:                                             ; preds = %L86
+  %tmp1 = shl i64 %tmp, 1
+  %tmp2 = add i64 %tmp1, -2
+  br label %if31
+
+if31:                                             ; preds = %if38, %if29
+  %"#temp#1.sroa.0.022" = phi i64 [ 0, %if29 ], [ %tmp3, %if38 ]
+  br label %L119
+
+L119:                                             ; preds = %L119, %if31
+  %i5.0 = phi i64 [ %"#temp#1.sroa.0.022", %if31 ], [ %tmp3, %L119 ]
+  %tmp3 = add i64 %i5.0, 1
+  br i1 undef, label %L119, label %if38
+
+if38:                                             ; preds = %L119
+  %tmp4 = add i64 %tmp2, %i5.0
+  %tmp5 = getelementptr i64, i64 addrspace(10)* %arg, i64 %tmp4
+  %tmp6 = load i64, i64 addrspace(10)* %tmp5
+  br i1 undef, label %done, label %if31
+
+done:                                             ; preds = %if38
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index fbf55fd81d23..cbf177c0d4b9 100644
--- a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -25,6 +25,8 @@ define void @_Z15IntegerToStringjjR7Vector2(i32 %i, i32 %radix, %struct.Vector2*
 entry:
   %buffer = alloca [33 x i16], align 16
   %add.ptr = getelementptr inbounds [33 x i16], [33 x i16]* %buffer, i64 0, i64 33
+  %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64
+  %sub.ptr.rhs.cast = ptrtoint i16* %add.ptr to i64
   br label %do.body
 
 do.body:                                          ; preds = %do.body, %entry
@@ -46,8 +48,6 @@ do.body:                                          ; preds = %do.body, %entry
 do.end:                                           ; preds = %do.body
   %xap.0 = inttoptr i64 %0 to i1*
   %cap.0 = ptrtoint i1* %xap.0 to i64
-  %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64
-  %sub.ptr.rhs.cast = ptrtoint i16* %incdec.ptr to i64
   %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
   %sub.ptr.div39 = lshr exact i64 %sub.ptr.sub, 1
   %conv11 = trunc i64 %sub.ptr.div39 to i32
diff --git a/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll b/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
new file mode 100644
index 000000000000..a7f414b8694b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
@@ -0,0 +1,26 @@
+; REQUIRES: asserts
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -S -debug-only=loop-vectorize 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: all_scalar
+; CHECK:       LV: Found scalar instruction: %i.next = add nuw nsw i64 %i, 2
+; CHECK:       LV: Found an estimated cost of 2 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2
+; CHECK:       LV: Not considering vector loop of width 2 because it will not generate any vector instructions
+;
+define void @all_scalar(i64* %a, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %tmp0 = getelementptr i64, i64* %a, i64 %i
+  store i64 0, i64* %tmp0, align 1
+  %i.next = add nuw nsw i64 %i, 2
+  %cond = icmp eq i64 %i.next, %n
+  br i1 %cond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/SystemZ/addressing.ll b/test/Transforms/LoopVectorize/SystemZ/addressing.ll
new file mode 100644
index 000000000000..1f7a6d29c57c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/SystemZ/addressing.ll
@@ -0,0 +1,72 @@
+; RUN: opt -S  -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize -dce \
+; RUN:   -instcombine -force-vector-width=2  < %s | FileCheck %s
+;
+; Test that loop vectorizer does not generate vector addresses that must then
+; always be extracted.
+
+; Check that the addresses for a scalarized memory access is not extracted
+; from a vector register.
+define i32 @foo(i32* nocapture %A) {
+;CHECK-LABEL: @foo(
+;CHECK:  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+;CHECK:  %0 = shl nsw i64 %index, 2
+;CHECK:  %1 = shl i64 %index, 2
+;CHECK:  %2 = or i64 %1, 4
+;CHECK:  %3 = getelementptr inbounds i32, i32* %A, i64 %0
+;CHECK:  %4 = getelementptr inbounds i32, i32* %A, i64 %2
+;CHECK:  store i32 4, i32* %3, align 4
+;CHECK:  store i32 4, i32* %4, align 4
+
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %0 = shl nsw i64 %indvars.iv, 2
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
+  store i32 4, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 undef
+}
+
+
+; Check that a load of address is scalarized.
+define i32 @foo1(i32* nocapture noalias %A, i32** nocapture %PtrPtr) {
+;CHECK-LABEL: @foo1(
+;CHECK:  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+;CHECK:  %0 = or i64 %index, 1
+;CHECK:  %1 = getelementptr inbounds i32*, i32** %PtrPtr, i64 %index
+;CHECK:  %2 = getelementptr inbounds i32*, i32** %PtrPtr, i64 %0
+;CHECK:  %3 = load i32*, i32** %1, align 8
+;CHECK:  %4 = load i32*, i32** %2, align 8
+;CHECK:  %5 = load i32, i32* %3, align 4
+;CHECK:  %6 = load i32, i32* %4, align 4
+;CHECK:  %7 = insertelement <2 x i32> undef, i32 %5, i32 0
+;CHECK:  %8 = insertelement <2 x i32> %7, i32 %6, i32 1
+;CHECK:  %9 = getelementptr inbounds i32, i32* %A, i64 %index
+;CHECK:  %10 = bitcast i32* %9 to <2 x i32>*
+;CHECK:  store <2 x i32> %8, <2 x i32>* %10, align 4
+
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %ptr = getelementptr inbounds i32*, i32** %PtrPtr, i64 %indvars.iv
+  %el = load i32*, i32** %ptr
+  %v = load i32, i32* %el
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  store i32 %v, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 undef
+}
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index b2933c4b56f2..4dc62d86453f 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -11,38 +11,38 @@
 ;       break;
 ;   }
 ; }
+; File, line, and column should match those specified in the metadata
+; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
+; CHECK: remark: source.cpp:4:5: loop not vectorized
 
 ; void test_disabled(int *A, int Length) {
 ; #pragma clang loop vectorize(disable) interleave(disable)
 ;   for (int i = 0; i < Length; i++)
 ;     A[i] = i;
 ; }
+; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
 
 ; void test_array_bounds(int *A, int *B, int Length) {
 ; #pragma clang loop vectorize(enable)
 ;   for (int i = 0; i < Length; i++)
 ;     A[i] = A[B[i]];
 ; }
-
-; File, line, and column should match those specified in the metadata
-; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
-; CHECK: remark: source.cpp:4:5: loop not vectorized
-; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
 ; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds
 ; CHECK: remark: source.cpp:19:5: loop not vectorized
 ; CHECK: warning: source.cpp:19:5: loop not vectorized: failed explicitly specified loop vectorization
 
-; CHECK: _Z4testPii
-; CHECK-NOT: x i32>
-; CHECK: ret
-
-; CHECK: _Z13test_disabledPii
-; CHECK-NOT: x i32>
-; CHECK: ret
-
-; CHECK: _Z17test_array_boundsPiS_i
-; CHECK-NOT: x i32>
-; CHECK: ret
+; int foo();
+; void test_multiple_failures(int *A) {
+;   int k = 0;
+; #pragma clang loop vectorize(enable) interleave(enable)
+;   for (int i = 0; i < 1000; i+=A[i]) {
+;     if (A[i])
+;       k = foo();
+;   }
+;   return k;
+; }
+; CHECK: remark: source.cpp:29:7: loop not vectorized: control flow cannot be substituted for a select
+; CHECK: remark: source.cpp:27:3: loop not vectorized
 
 ; YAML:       --- !Analysis
 ; YAML-NEXT: Pass:            loop-vectorize
@@ -98,6 +98,41 @@
 ; YAML-NEXT:   - String:          'loop not vectorized: '
 ; YAML-NEXT:   - String:          failed explicitly specified loop vectorization
 ; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass:            loop-vectorize
+; YAML-NEXT: Name:            NoCFGForSelect
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 29, Column: 7 }
+; YAML-NEXT: Function:        test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'loop not vectorized: '
+; YAML-NEXT:   - String:          control flow cannot be substituted for a select
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass:            loop-vectorize
+; YAML-NEXT: Name:            NonReductionValueUsedOutsideLoop
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function:        test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'loop not vectorized: '
+; YAML-NEXT:   - String:          value that could not be identified as reduction is used outside the loop
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass:            loop-vectorize
+; YAML-NEXT: Name:            CantComputeNumberOfIterations
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function:        test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'loop not vectorized: '
+; YAML-NEXT:   - String:          could not determine number of loop iterations
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Missed
+; YAML-NEXT: Pass:            loop-vectorize
+; YAML-NEXT: Name:            MissedDetails
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function:        test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          loop not vectorized
+; YAML-NEXT: ...
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
@@ -124,6 +159,10 @@ for.end:                                          ; preds = %for.body, %entry
   ret void, !dbg !24
 }
 
+; CHECK: _Z4testPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
 ; Function Attrs: nounwind optsize ssp uwtable
 define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 !dbg !7 {
 entry:
@@ -144,6 +183,10 @@ for.end:                                          ; preds = %for.body, %entry
   ret void, !dbg !31
 }
 
+; CHECK: _Z13test_disabledPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
 ; Function Attrs: nounwind optsize ssp uwtable
 define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 !dbg !8 {
 entry:
@@ -174,6 +217,45 @@ for.end:                                          ; preds = %for.end.loopexit, %
   ret void, !dbg !36
 }
 
+; CHECK: _Z17test_array_boundsPiS_i
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+; Function Attrs: nounwind uwtable
+define i32 @test_multiple_failures(i32* nocapture readonly %A) #0 !dbg !46 {
+entry:
+  br label %for.body, !dbg !38
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.09 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
+  %k.09 = phi i32 [ 0, %entry ], [ %k.1, %for.inc ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.09, !dbg !40
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !40
+  %tobool = icmp eq i32 %0, 0, !dbg !40
+  br i1 %tobool, label %for.inc, label %if.then, !dbg !40
+
+if.then:                                          ; preds = %for.body
+  %call = tail call i32 (...) @foo(), !dbg !41
+  %.pre = load i32, i32* %arrayidx, align 4
+  br label %for.inc, !dbg !42
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %1 = phi i32 [ %.pre, %if.then ], [ 0, %for.body ], !dbg !43
+  %k.1 = phi i32 [ %call, %if.then ], [ %k.09, %for.body ]
+  %add = add nsw i32 %1, %i.09, !dbg !44
+  %cmp = icmp slt i32 %add, 1000, !dbg !45
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !38
+
+for.cond.cleanup:                                 ; preds = %for.inc
+  ret i32 %k.1, !dbg !39
+}
+
+declare i32 @foo(...)
+
+; CHECK: test_multiple_failure
+; CHECK-NOT: x i32>
+; CHECK: ret
+
 attributes #0 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
@@ -216,3 +298,13 @@ attributes #0 = { nounwind }
 !34 = !{!34, !15}
 !35 = !DILocation(line: 19, column: 5, scope: !33)
 !36 = !DILocation(line: 20, column: 1, scope: !8)
+!37 = distinct !DILexicalBlock(line: 24, column: 3, file: !1, scope: !46)
+!38 = !DILocation(line: 27, column: 3, scope: !37)
+!39 = !DILocation(line: 31, column: 3, scope: !37)
+!40 = !DILocation(line: 28, column: 9, scope: !37)
+!41 = !DILocation(line: 29, column: 11, scope: !37)
+!42 = !DILocation(line: 29, column: 7, scope: !37)
+!43 = !DILocation(line: 27, column: 32, scope: !37)
+!44 = !DILocation(line: 27, column: 30, scope: !37)
+!45 = !DILocation(line: 27, column: 21, scope: !37)
+!46 = distinct !DISubprogram(name: "test_multiple_failures", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 26, file: !1, scope: !5, type: !6, variables: !2)
diff --git a/test/Transforms/NewGVN/pr32403.ll b/test/Transforms/NewGVN/pr32403.ll
index 2552e0e66ab9..505d31a9463e 100644
--- a/test/Transforms/NewGVN/pr32403.ll
+++ b/test/Transforms/NewGVN/pr32403.ll
@@ -17,7 +17,8 @@ define void @reorder_ref_pic_list() local_unnamed_addr {
 ; CHECK-NEXT:    [[INC_I:%.*]] = add nsw i32 [[REFIDXLX_0]], 1
 ; CHECK-NEXT:    br label [[FOR_BODY8_I:%.*]]
 ; CHECK:       for.body8.i:
-; CHECK-NEXT:    br i1 undef, label [[FOR_INC24_I:%.*]], label [[IF_THEN17_I:%.*]]
+; CHECK-NEXT:    [[NIDX_052_I:%.*]] = phi i32 [ [[INC_I]], [[IF_THEN13]] ], [ [[NIDX_052_I]], [[FOR_INC24_I:%.*]] ]
+; CHECK-NEXT:    br i1 undef, label [[FOR_INC24_I]], label [[IF_THEN17_I:%.*]]
 ; CHECK:       if.then17.i:
 ; CHECK-NEXT:    br label [[FOR_INC24_I]]
 ; CHECK:       for.inc24.i:
diff --git a/test/Transforms/NewGVN/pr32836.ll b/test/Transforms/NewGVN/pr32836.ll
new file mode 100644
index 000000000000..623f216101bf
--- /dev/null
+++ b/test/Transforms/NewGVN/pr32836.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -newgvn %s | FileCheck %s
+
+%struct.anon = type { i32 }
+@b = external global %struct.anon
+define void @tinkywinky(i1 %patatino) {
+; CHECK-LABEL: @tinkywinky(
+; CHECK-NEXT:    store i32 8, i32* null
+; CHECK-NEXT:    br i1 [[PATATINO:%.*]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br label [[L:%.*]]
+; CHECK:       L:
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* null
+; CHECK-NEXT:    [[BF_LOAD1:%.*]] = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @b, i64 0, i32 0)
+; CHECK-NEXT:    [[BF_VALUE:%.*]] = and i32 [[TMP1]], 536870911
+; CHECK-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -536870912
+; CHECK-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+; CHECK-NEXT:    store i32 [[BF_SET]], i32* getelementptr inbounds (%struct.anon, %struct.anon* @b, i64 0, i32 0)
+; CHECK-NEXT:    br label [[LOR_END:%.*]]
+; CHECK:       lor.end:
+; CHECK-NEXT:    br label [[L]]
+;
+  store i32 8, i32* null
+  br i1 %patatino, label %if.end, label %if.then
+if.then:
+  store i32 8, i32* null
+  br label %L
+L:
+  br label %if.end
+if.end:
+  %tmp1 = load i32, i32* null
+  %bf.load1 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @b, i64 0, i32 0)
+  %bf.value = and i32 %tmp1, 536870911
+  %bf.clear = and i32 %bf.load1, -536870912
+  %bf.set = or i32 %bf.clear, %bf.value
+  store i32 %bf.set, i32* getelementptr (%struct.anon, %struct.anon* @b, i64 0, i32 0)
+  br label %lor.end
+lor.end:
+  %bf.load4 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @b, i64 0, i32 0)
+  %tmp4 = and i32 %bf.load4, 536870911
+  %or = or i32 0, %tmp4
+  br label %L
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll b/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
index 3ac3c5138ae7..a97e3f81a8ef 100644
--- a/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
+++ b/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
@@ -382,3 +382,64 @@ loop_exit2:
 ; CHECK-NEXT:    %[[R:.*]] = add i32 %[[R1]], %[[R2]]
 ; CHECK-NEXT:    ret i32 %[[R]]
 }
+
+; This test, extracted from the LLVM test suite, has an interesting dominator
+; tree to update as there are edges to sibling domtree nodes within child
+; domtree nodes of the unswitched node.
+define void @xgets(i1 %cond1, i1* %cond2.ptr) {
+; CHECK-LABEL: @xgets(
+entry:
+  br label %for.cond.preheader
+; CHECK:       entry:
+; CHECK-NEXT:    br label %for.cond.preheader
+
+for.cond.preheader:
+  br label %for.cond
+; CHECK:       for.cond.preheader:
+; CHECK-NEXT:    br i1 %cond1, label %for.cond.preheader.split, label %if.end17.thread.loopexit
+;
+; CHECK:       for.cond.preheader.split:
+; CHECK-NEXT:    br label %for.cond
+
+for.cond:
+  br i1 %cond1, label %land.lhs.true, label %if.end17.thread.loopexit
+; CHECK:       for.cond:
+; CHECK-NEXT:    br label %land.lhs.true
+
+land.lhs.true:
+  br label %if.then20
+; CHECK:       land.lhs.true:
+; CHECK-NEXT:    br label %if.then20
+
+if.then20:
+  %cond2 = load volatile i1, i1* %cond2.ptr
+  br i1 %cond2, label %if.then23, label %if.else
+; CHECK:       if.then20:
+; CHECK-NEXT:    %[[COND2:.*]] = load volatile i1, i1* %cond2.ptr
+; CHECK-NEXT:    br i1 %[[COND2]], label %if.then23, label %if.else
+
+if.else:
+  br label %for.cond
+; CHECK:       if.else:
+; CHECK-NEXT:    br label %for.cond
+
+if.end17.thread.loopexit:
+  br label %if.end17.thread
+; CHECK:       if.end17.thread.loopexit:
+; CHECK-NEXT:    br label %if.end17.thread
+
+if.end17.thread:
+  br label %cleanup
+; CHECK:       if.end17.thread:
+; CHECK-NEXT:    br label %cleanup
+
+if.then23:
+  br label %cleanup
+; CHECK:       if.then23:
+; CHECK-NEXT:    br label %cleanup
+
+cleanup:
+  ret void
+; CHECK:       cleanup:
+; CHECK-NEXT:    ret void
+}
author	Dimitry Andric <dim@FreeBSD.org>	2017-05-29 16:25:25 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-05-29 16:25:25 +0000
commit	ab44ce3d598882e51a25eb82eb7ae6308de85ae6 (patch)
tree	568d786a59d49bef961dcb9bd09d422701b9da5b /test/Transforms
parent	b5630dbadf9a2a06754194387d6b0fd9962a67f1 (diff)