13 files changed, 810 insertions, 21 deletions
diff --git a/test/Transforms/InstSimplify/AndOrXor.ll b/test/Transforms/InstSimplify/AndOrXor.ll
index c6959d72961d1..33fd978277d4c 100644
--- a/test/Transforms/InstSimplify/AndOrXor.ll
+++ b/test/Transforms/InstSimplify/AndOrXor.ll
@@ -1,6 +1,28 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
 
+; add nsw (xor X, signbit), signbit --> X
+
+define <2 x i32> @add_nsw_signbit(<2 x i32> %x) {
+; CHECK-LABEL: @add_nsw_signbit(
+; CHECK-NEXT:    ret <2 x i32> %x
+;
+  %y = xor <2 x i32> %x, <i32 -2147483648, i32 -2147483648>
+  %z = add nsw <2 x i32> %y, <i32 -2147483648, i32 -2147483648>
+  ret <2 x i32> %z
+}
+
+; add nuw (xor X, signbit), signbit --> X
+
+define <2 x i5> @add_nuw_signbit(<2 x i5> %x) {
+; CHECK-LABEL: @add_nuw_signbit(
+; CHECK-NEXT:    ret <2 x i5> %x
+;
+  %y = xor <2 x i5> %x, <i5 -16, i5 -16>
+  %z = add nuw <2 x i5> %y, <i5 -16, i5 -16>
+  ret <2 x i5> %z
+}
+
 define i64 @pow2(i32 %x) {
 ; CHECK-LABEL: @pow2(
 ; CHECK-NEXT:    [[NEGX:%.*]] = sub i32 0, %x
diff --git a/test/Transforms/InstSimplify/addsub.ll b/test/Transforms/InstSimplify/addsub.ll
new file mode 100644
index 0000000000000..2f19a4d205e77
--- /dev/null
+++ b/test/Transforms/InstSimplify/addsub.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i1 @test1(i1 %a) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i1 true
+;
+  %b = xor i1 %a, true
+  %res = sub i1 %a, %b
+  ret i1 %res
+}
+
+define <2 x i1> @test2(<2 x i1> %a) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret <2 x i1> <i1 true, i1 true>
+;
+  %b = xor <2 x i1> %a, <i1 true, i1 true>
+  %res = sub <2 x i1> %a, %b
+  ret <2 x i1> %res
+}
+
+define i1 @test5(i1 %a) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret i1 false
+;
+  %res = add i1 %a, %a
+  ret i1 %res
+}
+
+define <2 x i1> @test6(<2 x i1> %a) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %res = add <2 x i1> %a, %a
+  ret <2 x i1> %res
+}
+
+define i1 @test7(i1 %a) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    ret i1 [[A:%.*]]
+;
+  %c = xor i1 %a, true
+  %res = add i1 %c, true
+  ret i1 %res
+}
+
+; TODO: simplify this to %a
+define i1 @test8(i1 %a) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[C:%.*]] = add i1 [[A:%.*]], true
+; CHECK-NEXT:    [[RES:%.*]] = xor i1 [[C]], true
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %c = add i1 %a, true
+  %res = xor i1 %c, true
+  ret i1 %res
+}
+
+define i1 @test9(i1 %a) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    ret i1 [[A:%.*]]
+;
+  %c = xor i1 %a, true
+  %res = sub i1 %c, true
+  ret i1 %res
+}
+
+; TODO: simplify this to %a
+define i1 @test10(i1 %a) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[C:%.*]] = sub i1 [[A:%.*]], true
+; CHECK-NEXT:    [[RES:%.*]] = xor i1 [[C]], true
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %c = sub i1 %a, true
+  %res = xor i1 %c, true
+  ret i1 %res
+}
diff --git a/test/Transforms/InstSimplify/assume.ll b/test/Transforms/InstSimplify/assume.ll
index 2487a9c8bb154..66f2120f2928b 100644
--- a/test/Transforms/InstSimplify/assume.ll
+++ b/test/Transforms/InstSimplify/assume.ll
@@ -1,5 +1,10 @@
 ; NOTE: Assertions have been autogenerated by update_test_checks.py
-; RUN: opt -instsimplify -S < %s | FileCheck %s
+; RUN: opt -instsimplify -S < %s 2>&1 -pass-remarks-analysis=.* | FileCheck %s
+
+; Verify that warnings are emitted for the 2nd and 3rd tests.
+
+; CHECK: remark: /tmp/s.c:1:13: Detected conflicting code assumptions.
+; CHECK: remark: /tmp/s.c:4:10: Detected conflicting code assumptions.
 
 define void @test1() {
 ; CHECK-LABEL: @test1(
@@ -10,5 +15,58 @@ define void @test1() {
 
 }
 
+; The alloca guarantees that the low bits of %a are zero because of alignment.
+; The assume says the opposite. The assume is processed last, so that's the 
+; return value. There's no way to win (we can't undo transforms that happened
+; based on half-truths), so just don't crash.
+
+define i64 @PR31809() !dbg !7 {
+; CHECK-LABEL: @PR31809(
+; CHECK-NEXT:    ret i64 3
+;
+  %a = alloca i32
+  %t1 = ptrtoint i32* %a to i64, !dbg !9
+  %cond = icmp eq i64 %t1, 3
+  call void @llvm.assume(i1 %cond)
+  ret i64 %t1
+}
+
+; Similar to above: there's no way to know which assumption is truthful,
+; so just don't crash. The second icmp+assume gets processed later, so that
+; determines the return value.
+
+define i8 @conflicting_assumptions(i8 %x) !dbg !10 {
+; CHECK-LABEL: @conflicting_assumptions(
+; CHECK-NEXT:    call void @llvm.assume(i1 false)
+; CHECK-NEXT:    [[COND2:%.*]] = icmp eq i8 %x, 4
+; CHECK-NEXT:    call void @llvm.assume(i1 [[COND2]])
+; CHECK-NEXT:    ret i8 5
+;
+  %add = add i8 %x, 1, !dbg !11
+  %cond1 = icmp eq i8 %x, 3
+  call void @llvm.assume(i1 %cond1)
+  %cond2 = icmp eq i8 %x, 4
+  call void @llvm.assume(i1 %cond2)
+  ret i8 %add
+}
+
 declare void @llvm.assume(i1) nounwind
 
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 4.0.0 (trunk 282540) (llvm/trunk 282542)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2)
+!1 = !DIFile(filename: "/tmp/s.c", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"PIC Level", i32 2}
+!6 = !{!"clang version 4.0.0 (trunk 282540) (llvm/trunk 282542)"}
+!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, variables: !2)
+!8 = !DISubroutineType(types: !2)
+!9 = !DILocation(line: 1, column: 13, scope: !7)
+!10 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, unit: !0, variables: !2)
+!11 = !DILocation(line: 4, column: 10, scope: !10)
+!12 = !DILocation(line: 4, column: 3, scope: !10)
+
diff --git a/test/Transforms/InstSimplify/bitreverse.ll b/test/Transforms/InstSimplify/bitreverse.ll
new file mode 100644
index 0000000000000..d87b68831fe5b
--- /dev/null
+++ b/test/Transforms/InstSimplify/bitreverse.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -S -instsimplify | FileCheck %s
+
+declare i32 @llvm.bitreverse.i32(i32)
+
+; CHECK-LABEL: @test1(
+; CHECK: ret i1 false
+define i1 @test1(i32 %arg) {
+  %a = or i32 %arg, 1
+  %b = call i32 @llvm.bitreverse.i32(i32 %a)
+  %res = icmp eq i32 %b, 0
+  ret i1 %res
+}
+
+; CHECK-LABEL: @test2(
+; CHECK: ret i1 false
+define i1 @test2(i32 %arg) {
+  %a = or i32 %arg, 1024
+  %b = call i32 @llvm.bitreverse.i32(i32 %a)
+  %res = icmp eq i32 %b, 0
+  ret i1 %res
+}
+
+; CHECK-LABEL: @test3(
+; CHECK: ret i1 false
+define i1 @test3(i32 %arg) {
+  %a = and i32 %arg, 1
+  %b = call i32 @llvm.bitreverse.i32(i32 %a)
+  %and = and i32 %b, 1
+  %res = icmp eq i32 %and, 1
+  ret i1 %res
+}
diff --git a/test/Transforms/InstSimplify/div.ll b/test/Transforms/InstSimplify/div.ll
index b8ce34aaa37e6..f096719359dcd 100644
--- a/test/Transforms/InstSimplify/div.ll
+++ b/test/Transforms/InstSimplify/div.ll
@@ -1,10 +1,64 @@
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
 
+; Division-by-zero is undef. UB in any vector lane means the whole op is undef.
+
+define <2 x i8> @sdiv_zero_elt_vec_constfold(<2 x i8> %x) {
+; CHECK-LABEL: @sdiv_zero_elt_vec_constfold(
+; CHECK-NEXT:    ret <2 x i8> undef
+;
+  %div = sdiv <2 x i8> <i8 1, i8 2>, <i8 0, i8 -42>
+  ret <2 x i8> %div
+}
+
+define <2 x i8> @udiv_zero_elt_vec_constfold(<2 x i8> %x) {
+; CHECK-LABEL: @udiv_zero_elt_vec_constfold(
+; CHECK-NEXT:    ret <2 x i8> undef
+;
+  %div = udiv <2 x i8> <i8 1, i8 2>, <i8 42, i8 0>
+  ret <2 x i8> %div
+}
+
+define <2 x i8> @sdiv_zero_elt_vec(<2 x i8> %x) {
+; CHECK-LABEL: @sdiv_zero_elt_vec(
+; CHECK-NEXT:    ret <2 x i8> undef
+;
+  %div = sdiv <2 x i8> %x, <i8 -42, i8 0>
+  ret <2 x i8> %div
+}
+
+define <2 x i8> @udiv_zero_elt_vec(<2 x i8> %x) {
+; CHECK-LABEL: @udiv_zero_elt_vec(
+; CHECK-NEXT:    ret <2 x i8> undef
+;
+  %div = udiv <2 x i8> %x, <i8 0, i8 42>
+  ret <2 x i8> %div
+}
+
+; Division-by-zero is undef. UB in any vector lane means the whole op is undef.
+; Thus, we can simplify this: if any element of 'y' is 0, we can do anything.
+; Therefore, assume that all elements of 'y' must be 1.
+
+define <2 x i1> @sdiv_bool_vec(<2 x i1> %x, <2 x i1> %y) {
+; CHECK-LABEL: @sdiv_bool_vec(
+; CHECK-NEXT:    ret <2 x i1> %x
+;
+  %div = sdiv <2 x i1> %x, %y
+  ret <2 x i1> %div
+}
+
+define <2 x i1> @udiv_bool_vec(<2 x i1> %x, <2 x i1> %y) {
+; CHECK-LABEL: @udiv_bool_vec(
+; CHECK-NEXT:    ret <2 x i1> %x
+;
+  %div = udiv <2 x i1> %x, %y
+  ret <2 x i1> %div
+}
+
 declare i32 @external()
 
 define i32 @div1() {
 ; CHECK-LABEL: @div1(
-; CHECK:         [[CALL:%.*]] = call i32 @external(), !range !0
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @external(), !range !0
 ; CHECK-NEXT:    ret i32 0
 ;
   %call = call i32 @external(), !range !0
diff --git a/test/Transforms/InstSimplify/fdiv.ll b/test/Transforms/InstSimplify/fdiv.ll
index bb7f443f42387..6643afd814711 100644
--- a/test/Transforms/InstSimplify/fdiv.ll
+++ b/test/Transforms/InstSimplify/fdiv.ll
@@ -1,9 +1,25 @@
-; NOTE: Assertions have been autogenerated by update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
 
+define float @fdiv_constant_fold() {
+; CHECK-LABEL: @fdiv_constant_fold(
+; CHECK-NEXT:    ret float 1.500000e+00
+;
+  %f = fdiv float 3.0, 2.0
+  ret float %f
+}
+
+define float @frem_constant_fold() {
+; CHECK-LABEL: @frem_constant_fold(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %f = frem float 3.0, 2.0
+  ret float %f
+}
+
 define double @fdiv_of_undef(double %X) {
 ; CHECK-LABEL: @fdiv_of_undef(
-; CHECK:         ret double undef
+; CHECK-NEXT:    ret double undef
 ;
 ; undef / X -> undef
   %r = fdiv double undef, %X
@@ -12,7 +28,7 @@ define double @fdiv_of_undef(double %X) {
 
 define double @fdiv_by_undef(double %X) {
 ; CHECK-LABEL: @fdiv_by_undef(
-; CHECK:         ret double undef
+; CHECK-NEXT:    ret double undef
 ;
 ; X / undef -> undef
   %r = fdiv double %X, undef
diff --git a/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index dfdb88dcc8580..e635032e6b712 100644
--- a/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -104,6 +104,7 @@ define float @PR22688(float %x) {
 }
 
 declare float @llvm.fabs.f32(float)
+declare float @llvm.sqrt.f32(float)
 
 ; CHECK-LABEL: @fabs_select_positive_constants(
 ; CHECK: %select = select i1 %cmp, float 1.000000e+00, float 2.000000e+00
@@ -195,3 +196,56 @@ define float @fabs_select_negnan_zero(float addrspace(1)* %out, i32 %c) {
   %fabs = call float @llvm.fabs.f32(float %select)
   ret float %fabs
 }
+
+; CHECK-LABEL: @fabs_sqrt
+; CHECK: call float @llvm.sqrt.f32
+; CHECK: call float @llvm.fabs.f32
+define float @fabs_sqrt(float %a) {
+; The fabs can't be eliminated because llvm.sqrt.f32 may return -0 or NaN with
+; an arbitrary sign bit.
+  %sqrt = call float @llvm.sqrt.f32(float %a)
+  %fabs = call float @llvm.fabs.f32(float %sqrt)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_sqrt_nnan
+; CHECK: call nnan float @llvm.sqrt.f32
+; CHECK: call float @llvm.fabs.f32
+define float @fabs_sqrt_nnan(float %a) {
+; The fabs can't be eliminated because the nnan sqrt may still return -0.
+  %sqrt = call nnan float @llvm.sqrt.f32(float %a)
+  %fabs = call float @llvm.fabs.f32(float %sqrt)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_sqrt_nsz
+; CHECK: call nsz float @llvm.sqrt.f32
+; CHECK: call float @llvm.fabs.f32
+define float @fabs_sqrt_nsz(float %a) {
+; The fabs can't be eliminated because the nsz sqrt may still return NaN.
+  %sqrt = call nsz float @llvm.sqrt.f32(float %a)
+  %fabs = call float @llvm.fabs.f32(float %sqrt)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_sqrt_nnan_nsz
+; CHECK: call nnan nsz float @llvm.sqrt.f32
+; CHECK-NOT: call float @llvm.fabs.f32
+define float @fabs_sqrt_nnan_nsz(float %a) {
+; The fabs can be eliminated because we're nsz and nnan.
+  %sqrt = call nnan nsz float @llvm.sqrt.f32(float %a)
+  %fabs = call float @llvm.fabs.f32(float %sqrt)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_sqrt_nnan_fabs
+; CHECK: call float @llvm.fabs.f32
+; CHECK: call nnan float @llvm.sqrt.f32
+; CHECK-NOT: call float @llvm.fabs.f32
+define float @fabs_sqrt_nnan_fabs(float %a) {
+; The second fabs can be eliminated because the operand to sqrt cannot be -0.
+  %b = call float @llvm.fabs.f32(float %a)
+  %sqrt = call nnan float @llvm.sqrt.f32(float %b)
+  %fabs = call float @llvm.fabs.f32(float %sqrt)
+  ret float %fabs
+}
diff --git a/test/Transforms/InstSimplify/icmp-constant.ll b/test/Transforms/InstSimplify/icmp-constant.ll
index 85de1a45ea277..918722299b592 100644
--- a/test/Transforms/InstSimplify/icmp-constant.ll
+++ b/test/Transforms/InstSimplify/icmp-constant.ll
@@ -416,3 +416,158 @@ define <2 x i1> @tautological9_vec(<2 x i32> %x) {
   ret <2 x i1> %cmp
 }
 
+; The upper bound of the 'add' is 0.
+
+define i1 @add_nsw_neg_const1(i32 %x) {
+; CHECK-LABEL: @add_nsw_neg_const1(
+; CHECK-NEXT:    ret i1 false
+;
+  %add = add nsw i32 %x, -2147483647
+  %cmp = icmp sgt i32 %add, 0
+  ret i1 %cmp
+}
+
+; InstCombine can fold this, but not InstSimplify.
+
+define i1 @add_nsw_neg_const2(i32 %x) {
+; CHECK-LABEL: @add_nsw_neg_const2(
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 %x, -2147483647
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[ADD]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nsw i32 %x, -2147483647
+  %cmp = icmp sgt i32 %add, -1
+  ret i1 %cmp
+}
+
+; The upper bound of the 'add' is 1 (move the constants to prove we're doing range-based analysis).
+
+define i1 @add_nsw_neg_const3(i32 %x) {
+; CHECK-LABEL: @add_nsw_neg_const3(
+; CHECK-NEXT:    ret i1 false
+;
+  %add = add nsw i32 %x, -2147483646
+  %cmp = icmp sgt i32 %add, 1
+  ret i1 %cmp
+}
+
+; InstCombine can fold this, but not InstSimplify.
+
+define i1 @add_nsw_neg_const4(i32 %x) {
+; CHECK-LABEL: @add_nsw_neg_const4(
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 %x, -2147483646
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[ADD]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nsw i32 %x, -2147483646
+  %cmp = icmp sgt i32 %add, 0
+  ret i1 %cmp
+}
+
+; The upper bound of the 'add' is 2147483647 - 42 = 2147483605 (move the constants again and try a different cmp predicate).
+
+define i1 @add_nsw_neg_const5(i32 %x) {
+; CHECK-LABEL: @add_nsw_neg_const5(
+; CHECK-NEXT:    ret i1 true
+;
+  %add = add nsw i32 %x, -42
+  %cmp = icmp ne i32 %add, 2147483606
+  ret i1 %cmp
+}
+
+; InstCombine can fold this, but not InstSimplify.
+
+define i1 @add_nsw_neg_const6(i32 %x) {
+; CHECK-LABEL: @add_nsw_neg_const6(
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 %x, -42
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[ADD]], 2147483605
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nsw i32 %x, -42
+  %cmp = icmp ne i32 %add, 2147483605
+  ret i1 %cmp
+}
+
+; The lower bound of the 'add' is -1.
+
+define i1 @add_nsw_pos_const1(i32 %x) {
+; CHECK-LABEL: @add_nsw_pos_const1(
+; CHECK-NEXT:    ret i1 false
+;
+  %add = add nsw i32 %x, 2147483647
+  %cmp = icmp slt i32 %add, -1
+  ret i1 %cmp
+}
+
+; InstCombine can fold this, but not InstSimplify.
+
+define i1 @add_nsw_pos_const2(i32 %x) {
+; CHECK-LABEL: @add_nsw_pos_const2(
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 %x, 2147483647
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[ADD]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nsw i32 %x, 2147483647
+  %cmp = icmp slt i32 %add, 0
+  ret i1 %cmp
+}
+
+; The lower bound of the 'add' is -2 (move the constants to prove we're doing range-based analysis).
+
+define i1 @add_nsw_pos_const3(i32 %x) {
+; CHECK-LABEL: @add_nsw_pos_const3(
+; CHECK-NEXT:    ret i1 false
+;
+  %add = add nsw i32 %x, 2147483646
+  %cmp = icmp slt i32 %add, -2
+  ret i1 %cmp
+}
+
+; InstCombine can fold this, but not InstSimplify.
+
+define i1 @add_nsw_pos_const4(i32 %x) {
+; CHECK-LABEL: @add_nsw_pos_const4(
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 %x, 2147483646
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[ADD]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nsw i32 %x, 2147483646
+  %cmp = icmp slt i32 %add, -1
+  ret i1 %cmp
+}
+
+; The lower bound of the 'add' is -2147483648 + 42 = -2147483606 (move the constants again and change the cmp predicate).
+
+define i1 @add_nsw_pos_const5(i32 %x) {
+; CHECK-LABEL: @add_nsw_pos_const5(
+; CHECK-NEXT:    ret i1 false
+;
+  %add = add nsw i32 %x, 42
+  %cmp = icmp eq i32 %add, -2147483607
+  ret i1 %cmp
+}
+
+; InstCombine can fold this, but not InstSimplify.
+
+define i1 @add_nsw_pos_const6(i32 %x) {
+; CHECK-LABEL: @add_nsw_pos_const6(
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 %x, 42
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[ADD]], -2147483606
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nsw i32 %x, 42
+  %cmp = icmp eq i32 %add, -2147483606
+  ret i1 %cmp
+}
+
+; Verify that vectors work too.
+
+define <2 x i1> @add_nsw_pos_const5_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @add_nsw_pos_const5_splat_vec(
+; CHECK-NEXT:    ret <2 x i1> <i1 true, i1 true>
+;
+  %add = add nsw <2 x i32> %x, <i32 42, i32 42>
+  %cmp = icmp ne <2 x i32> %add, <i32 -2147483607, i32 -2147483607>
+  ret <2 x i1> %cmp
+}
+
diff --git a/test/Transforms/InstSimplify/mul.ll b/test/Transforms/InstSimplify/mul.ll
new file mode 100644
index 0000000000000..0bf8f699a6860
--- /dev/null
+++ b/test/Transforms/InstSimplify/mul.ll
@@ -0,0 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define <2 x i1> @test1(<2 x i1> %a) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %b = and <2 x i1> %a, <i1 true, i1 false>
+  %res = mul <2 x i1> %b, <i1 false, i1 true>
+  ret <2 x i1> %res
+}
diff --git a/test/Transforms/InstSimplify/rem.ll b/test/Transforms/InstSimplify/rem.ll
index c73d34346ded5..b7f18f36b4b98 100644
--- a/test/Transforms/InstSimplify/rem.ll
+++ b/test/Transforms/InstSimplify/rem.ll
@@ -1,9 +1,63 @@
 ; NOTE: Assertions have been autogenerated by update_test_checks.py
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
 
+; Division-by-zero is undef. UB in any vector lane means the whole op is undef.
+
+define <2 x i8> @srem_zero_elt_vec_constfold(<2 x i8> %x) {
+; CHECK-LABEL: @srem_zero_elt_vec_constfold(
+; CHECK-NEXT:    ret <2 x i8> undef
+;
+  %rem = srem <2 x i8> <i8 1, i8 2>, <i8 0, i8 -42>
+  ret <2 x i8> %rem
+}
+
+define <2 x i8> @urem_zero_elt_vec_constfold(<2 x i8> %x) {
+; CHECK-LABEL: @urem_zero_elt_vec_constfold(
+; CHECK-NEXT:    ret <2 x i8> undef
+;
+  %rem = urem <2 x i8> <i8 1, i8 2>, <i8 42, i8 0>
+  ret <2 x i8> %rem
+}
+
+define <2 x i8> @srem_zero_elt_vec(<2 x i8> %x) {
+; CHECK-LABEL: @srem_zero_elt_vec(
+; CHECK-NEXT:    ret <2 x i8> undef
+;
+  %rem = srem <2 x i8> %x, <i8 -42, i8 0>
+  ret <2 x i8> %rem
+}
+
+define <2 x i8> @urem_zero_elt_vec(<2 x i8> %x) {
+; CHECK-LABEL: @urem_zero_elt_vec(
+; CHECK-NEXT:    ret <2 x i8> undef
+;
+  %rem = urem <2 x i8> %x, <i8 0, i8 42>
+  ret <2 x i8> %rem
+}
+
+; Division-by-zero is undef. UB in any vector lane means the whole op is undef.
+; Thus, we can simplify this: if any element of 'y' is 0, we can do anything.
+; Therefore, assume that all elements of 'y' must be 1.
+
+define <2 x i1> @srem_bool_vec(<2 x i1> %x, <2 x i1> %y) {
+; CHECK-LABEL: @srem_bool_vec(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %rem = srem <2 x i1> %x, %y
+  ret <2 x i1> %rem
+}
+
+define <2 x i1> @urem_bool_vec(<2 x i1> %x, <2 x i1> %y) {
+; CHECK-LABEL: @urem_bool_vec(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %rem = urem <2 x i1> %x, %y
+  ret <2 x i1> %rem
+}
+
 define i32 @select1(i32 %x, i1 %b) {
 ; CHECK-LABEL: @select1(
-; CHECK:         ret i32 0
+; CHECK-NEXT:    ret i32 0
 ;
   %rhs = select i1 %b, i32 %x, i32 1
   %rem = srem i32 %x, %rhs
@@ -12,7 +66,7 @@ define i32 @select1(i32 %x, i1 %b) {
 
 define i32 @select2(i32 %x, i1 %b) {
 ; CHECK-LABEL: @select2(
-; CHECK:         ret i32 0
+; CHECK-NEXT:    ret i32 0
 ;
   %rhs = select i1 %b, i32 %x, i32 1
   %rem = urem i32 %x, %rhs
@@ -21,40 +75,40 @@ define i32 @select2(i32 %x, i1 %b) {
 
 define i32 @rem1(i32 %x, i32 %n) {
 ; CHECK-LABEL: @rem1(
-; CHECK:         [[MOD:%.*]] = srem i32 %x, %n
+; CHECK-NEXT:    [[MOD:%.*]] = srem i32 %x, %n
 ; CHECK-NEXT:    ret i32 [[MOD]]
 ;
- %mod = srem i32 %x, %n
- %mod1 = srem i32 %mod, %n
- ret i32 %mod1
+  %mod = srem i32 %x, %n
+  %mod1 = srem i32 %mod, %n
+  ret i32 %mod1
 }
 
 define i32 @rem2(i32 %x, i32 %n) {
 ; CHECK-LABEL: @rem2(
-; CHECK:         [[MOD:%.*]] = urem i32 %x, %n
+; CHECK-NEXT:    [[MOD:%.*]] = urem i32 %x, %n
 ; CHECK-NEXT:    ret i32 [[MOD]]
 ;
- %mod = urem i32 %x, %n
- %mod1 = urem i32 %mod, %n
- ret i32 %mod1
+  %mod = urem i32 %x, %n
+  %mod1 = urem i32 %mod, %n
+  ret i32 %mod1
 }
 
 define i32 @rem3(i32 %x, i32 %n) {
 ; CHECK-LABEL: @rem3(
-; CHECK:         [[MOD:%.*]] = srem i32 %x, %n
+; CHECK-NEXT:    [[MOD:%.*]] = srem i32 %x, %n
 ; CHECK-NEXT:    [[MOD1:%.*]] = urem i32 [[MOD]], %n
 ; CHECK-NEXT:    ret i32 [[MOD1]]
 ;
- %mod = srem i32 %x, %n
- %mod1 = urem i32 %mod, %n
- ret i32 %mod1
+  %mod = srem i32 %x, %n
+  %mod1 = urem i32 %mod, %n
+  ret i32 %mod1
 }
 
 declare i32 @external()
 
 define i32 @rem4() {
 ; CHECK-LABEL: @rem4(
-; CHECK:         [[CALL:%.*]] = call i32 @external(), !range !0
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @external(), !range !0
 ; CHECK-NEXT:    ret i32 [[CALL]]
 ;
   %call = call i32 @external(), !range !0
diff --git a/test/Transforms/InstSimplify/select.ll b/test/Transforms/InstSimplify/select.ll
index 1acb5c469d373..cb2502cf63c9f 100644
--- a/test/Transforms/InstSimplify/select.ll
+++ b/test/Transforms/InstSimplify/select.ll
@@ -402,7 +402,8 @@ define i32* @select_icmp_pointers(i32* %x, i32* %y) {
   ret i32* %sel
 }
 
-; FIXME: If the condition is known, we don't need to select.
+; If the condition is known, we don't need to select, but we're not
+; doing this fold here to avoid compile-time cost.
 
 declare void @llvm.assume(i1)
 
diff --git a/test/Transforms/InstSimplify/shift-knownbits.ll b/test/Transforms/InstSimplify/shift-knownbits.ll
index f50ea0582c6c9..63b9b76fd22f7 100644
--- a/test/Transforms/InstSimplify/shift-knownbits.ll
+++ b/test/Transforms/InstSimplify/shift-knownbits.ll
@@ -145,3 +145,46 @@ define i1 @shl_i1(i1 %a, i1 %b) {
   ret i1 %shl
 }
 
+; Simplify count leading/trailing zeros to zero if all valid bits are shifted out.
+
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1) nounwind readnone
+declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1) nounwind readnone
+
+define i32 @lshr_ctlz_zero_is_undef(i32 %x) {
+; CHECK-LABEL: @lshr_ctlz_zero_is_undef(
+; CHECK-NEXT:    ret i32 0
+;
+  %ct = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %sh = lshr i32 %ct, 5
+  ret i32 %sh
+}
+
+define i32 @lshr_cttz_zero_is_undef(i32 %x) {
+; CHECK-LABEL: @lshr_cttz_zero_is_undef(
+; CHECK-NEXT:    ret i32 0
+;
+  %ct = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %sh = lshr i32 %ct, 5
+  ret i32 %sh
+}
+
+define <2 x i8> @lshr_ctlz_zero_is_undef_splat_vec(<2 x i8> %x) {
+; CHECK-LABEL: @lshr_ctlz_zero_is_undef_splat_vec(
+; CHECK-NEXT:    ret <2 x i8> zeroinitializer
+;
+  %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true)
+  %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
+  ret <2 x i8> %sh
+}
+
+define <2 x i8> @lshr_cttz_zero_is_undef_splat_vec(<2 x i8> %x) {
+; CHECK-LABEL: @lshr_cttz_zero_is_undef_splat_vec(
+; CHECK-NEXT:    ret <2 x i8> zeroinitializer
+;
+  %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true)
+  %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
+  ret <2 x i8> %sh
+}
+
diff --git a/test/Transforms/InstSimplify/shufflevector.ll b/test/Transforms/InstSimplify/shufflevector.ll
new file mode 100644
index 0000000000000..c6d180da293f8
--- /dev/null
+++ b/test/Transforms/InstSimplify/shufflevector.ll
@@ -0,0 +1,212 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define <4 x i32> @const_folding(<4 x i32> %x) {
+; CHECK-LABEL: @const_folding(
+; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> zeroinitializer, <4 x i32> <i32 5, i32 4, i32 5, i32 4>
+  ret <4 x i32> %shuf
+}
+
+define <4 x i32> @const_folding1(<4 x i32> %x) {
+; CHECK-LABEL: @const_folding1(
+; CHECK-NEXT:    ret <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+;
+  %shuf = shufflevector <4 x i32> <i32 5, i32 4, i32 5, i32 4>, <4 x i32> %x, <4 x i32> zeroinitializer
+  ret <4 x i32> %shuf
+}
+
+define <4 x i32> @const_folding_negative(<3 x i32> %x) {
+; CHECK-LABEL: @const_folding_negative(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x i32> [[X:%.*]], <3 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 5, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[SHUF]]
+;
+  %shuf = shufflevector <3 x i32> %x, <3 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 5, i32 4>
+  ret <4 x i32> %shuf
+}
+
+define <4 x i32> @splat_operand(<4 x i32> %x) {
+; CHECK-LABEL: @splat_operand(
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <4 x i32> [[SPLAT]]
+;
+  %splat = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shuf = shufflevector <4 x i32> %splat, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
+  ret <4 x i32> %shuf
+}
+
+define <4 x i32> @splat_operand1(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @splat_operand1(
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <4 x i32> [[SPLAT]]
+;
+  %splat = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> zeroinitializer
+  %shuf = shufflevector <4 x i32> %splat, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
+  ret <4 x i32> %shuf
+}
+
+define <4 x i32> @splat_operand2(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @splat_operand2(
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <4 x i32> [[SPLAT]]
+;
+  %splat = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shuf = shufflevector <4 x i32> %splat, <4 x i32> %y, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
+  ret <4 x i32> %shuf
+}
+
+define <4 x i32> @splat_operand3(<4 x i32> %x) {
+; CHECK-LABEL: @splat_operand3(
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <4 x i32> [[SPLAT]]
+;
+  %splat = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shuf = shufflevector <4 x i32> zeroinitializer, <4 x i32> %splat, <4 x i32> <i32 7, i32 6, i32 5, i32 5>
+  ret <4 x i32> %shuf
+}
+
+define <8 x i32> @splat_operand_negative(<4 x i32> %x) {
+; CHECK-LABEL: @splat_operand_negative(
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[SPLAT]], <4 x i32> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <8 x i32> [[SHUF]]
+;
+  %splat = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shuf = shufflevector <4 x i32> %splat, <4 x i32> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x i32> %shuf
+}
+
+define <4 x i32> @splat_operand_negative2(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @splat_operand_negative2(
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[SPLAT]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 4, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[SHUF]]
+;
+  %splat = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shuf = shufflevector <4 x i32> %splat, <4 x i32> %y, <4 x i32> <i32 0, i32 3, i32 4, i32 1>
+  ret <4 x i32> %shuf
+}
+
+define <4 x i32> @splat_operand_negative3(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @splat_operand_negative3(
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[SPLAT]], <4 x i32> <i32 0, i32 3, i32 4, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[SHUF]]
+;
+  %splat = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shuf = shufflevector <4 x i32> %y, <4 x i32> %splat, <4 x i32> <i32 0, i32 3, i32 4, i32 1>
+  ret <4 x i32> %shuf
+}
+
+define <4 x i32> @splat_operand_negative4(<4 x i32> %x) {
+; CHECK-LABEL: @splat_operand_negative4(
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[SPLAT]], <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <4 x i32> [[SHUF]]
+;
+  %splat = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
+  %shuf = shufflevector <4 x i32> %splat, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  ret <4 x i32> %shuf
+}
+
+define <4 x i32> @undef_mask(<4 x i32> %x) {
+; CHECK-LABEL: @undef_mask(
+; CHECK-NEXT:    ret <4 x i32> undef
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> undef
+  ret <4 x i32> %shuf
+}
+
+define <4 x i32> @identity_mask_0(<4 x i32> %x) {
+; CHECK-LABEL: @identity_mask_0(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[SHUF]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %shuf
+}
+
+define <4 x i32> @identity_mask_1(<4 x i32> %x) {
+; CHECK-LABEL: @identity_mask_1(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> undef, <4 x i32> [[X:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[SHUF]]
+;
+  %shuf = shufflevector <4 x i32> undef, <4 x i32> %x, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i32> %shuf
+}
+
+define <4 x i32> @pseudo_identity_mask(<4 x i32> %x) {
+; CHECK-LABEL: @pseudo_identity_mask(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[SHUF]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i32> %shuf
+}
+
+define <4 x i32> @const_operand(<4 x i32> %x) {
+; CHECK-LABEL: @const_operand(
+; CHECK-NEXT:    ret <4 x i32> <i32 42, i32 45, i32 44, i32 43>
+;
+  %shuf = shufflevector <4 x i32> <i32 42, i32 43, i32 44, i32 45>, <4 x i32> %x, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
+  ret <4 x i32> %shuf
+}
+
+define <4 x i32> @merge(<4 x i32> %x) {
+; CHECK-LABEL: @merge(
+; CHECK-NEXT:    [[LOWER:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[UPPER:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[MERGED:%.*]] = shufflevector <2 x i32> [[UPPER]], <2 x i32> [[LOWER]], <4 x i32> <i32 3, i32 2, i32 0, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[MERGED]]
+;
+  %lower = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 1, i32 0>
+  %upper = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %merged = shufflevector <2 x i32> %upper, <2 x i32> %lower, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
+  ret <4 x i32> %merged
+}
+
+define <8 x double> @extract_and_concat(<8 x double> %x) {
+; CHECK-LABEL: @extract_and_concat(
+; CHECK-NEXT:    [[S1:%.*]] = shufflevector <8 x double> [[X:%.*]], <8 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[S2:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[S3:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[S4:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 6, i32 7>
+; CHECK-NEXT:    [[S5:%.*]] = shufflevector <2 x double> [[S1]], <2 x double> [[S2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[S6:%.*]] = shufflevector <2 x double> [[S3]], <2 x double> [[S4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[S7:%.*]] = shufflevector <4 x double> [[S5]], <4 x double> [[S6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x double> [[S7]]
+;
+  %s1 = shufflevector <8 x double> %x, <8 x double> undef, <2 x i32> <i32 0, i32 1>
+  %s2 = shufflevector <8 x double> %x, <8 x double> undef, <2 x i32> <i32 2, i32 3>
+  %s3 = shufflevector <8 x double> %x, <8 x double> undef, <2 x i32> <i32 4, i32 5>
+  %s4 = shufflevector <8 x double> %x, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %s5 = shufflevector <2 x double> %s1, <2 x double> %s2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s6 = shufflevector <2 x double> %s3, <2 x double> %s4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s7 = shufflevector <4 x double> %s5, <4 x double> %s6, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x double> %s7
+}
+
+; This case has intermediate lane crossings.
+
+define <8 x i64> @PR30630(<8 x i64> %x) {
+; CHECK-LABEL: @PR30630(
+; CHECK-NEXT:    [[S1:%.*]] = shufflevector <8 x i64> [[X:%.*]], <8 x i64> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-NEXT:    [[S2:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-NEXT:    [[S3:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-NEXT:    [[S4:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-NEXT:    [[S5:%.*]] = shufflevector <2 x i64> [[S1]], <2 x i64> [[S2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[S6:%.*]] = shufflevector <2 x i64> [[S3]], <2 x i64> [[S4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[S7:%.*]] = shufflevector <4 x i64> [[S5]], <4 x i64> [[S6]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    ret <8 x i64> [[S7]]
+;
+  %s1 = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 0, i32 4>
+  %s2 = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 1, i32 5>
+  %s3 = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 2, i32 6>
+  %s4 = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 3, i32 7>
+  %s5 = shufflevector <2 x i64> %s1, <2 x i64> %s2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s6 = shufflevector <2 x i64> %s3, <2 x i64> %s4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s7 = shufflevector <4 x i64> %s5, <4 x i64> %s6, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+  ret <8 x i64> %s7
+}
+