vendor/llvm/llvm-trunk-r300422

author: Dimitry Andric <dim@FreeBSD.org> 2017-04-16 16:01:22 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2017-04-16 16:01:22 +0000
commit: 71d5a2540a98c81f5bcaeb48805e0e2881f530ef (patch)
tree: 5343938942df402b49ec7300a1c25a2d4ccd5821 /test/Transforms/InstCombine
parent: 31bbf64f3a4974a2d6c8b3b27ad2f519caf74057 (diff)
115 files changed, 9282 insertions, 2170 deletions
diff --git a/test/Transforms/InstCombine/2008-01-29-AddICmp.ll b/test/Transforms/InstCombine/2008-01-29-AddICmp.ll
deleted file mode 100644
index a33eb9c1ddd42..0000000000000
--- a/test/Transforms/InstCombine/2008-01-29-AddICmp.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-; PR1949
-
-define i1 @test1(i32 %a) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 %a, -5
-; CHECK-NEXT:    ret i1 [[C]]
-;
-  %b = add i32 %a, 4
-  %c = icmp ult i32 %b, 4
-  ret i1 %c
-}
-
-define <2 x i1> @test1vec(<2 x i32> %a) {
-; CHECK-LABEL: @test1vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i32> %a, <i32 -5, i32 -5>
-; CHECK-NEXT:    ret <2 x i1> [[C]]
-;
-  %b = add <2 x i32> %a, <i32 4, i32 4>
-  %c = icmp ult <2 x i32> %b, <i32 4, i32 4>
-  ret <2 x i1> %c
-}
-
-define i1 @test2(i32 %a) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 %a, 4
-; CHECK-NEXT:    ret i1 [[C]]
-;
-  %b = sub i32 %a, 4
-  %c = icmp ugt i32 %b, -5
-  ret i1 %c
-}
-
-define <2 x i1> @test2vec(<2 x i32> %a) {
-; CHECK-LABEL: @test2vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i32> %a, <i32 4, i32 4>
-; CHECK-NEXT:    ret <2 x i1> [[C]]
-;
-  %b = sub <2 x i32> %a, <i32 4, i32 4>
-  %c = icmp ugt <2 x i32> %b, <i32 -5, i32 -5>
-  ret <2 x i1> %c
-}
-
-define i1 @test3(i32 %a) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 %a, 2147483643
-; CHECK-NEXT:    ret i1 [[C]]
-;
-  %b = add i32 %a, 4
-  %c = icmp slt i32 %b, 2147483652
-  ret i1 %c
-}
-
-define <2 x i1> @test3vec(<2 x i32> %a) {
-; CHECK-LABEL: @test3vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i32> %a, <i32 2147483643, i32 2147483643>
-; CHECK-NEXT:    ret <2 x i1> [[C]]
-;
-  %b = add <2 x i32> %a, <i32 4, i32 4>
-  %c = icmp slt <2 x i32> %b, <i32 2147483652, i32 2147483652>
-  ret <2 x i1> %c
-}
-
-define i1 @test4(i32 %a) {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 %a, -4
-; CHECK-NEXT:    ret i1 [[C]]
-;
-  %b = add i32 %a, 2147483652
-  %c = icmp sge i32 %b, 4
-  ret i1 %c
-}
-
-define <2 x i1> @test4vec(<2 x i32> %a) {
-; CHECK-LABEL: @test4vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i32> %a, <i32 -4, i32 -4>
-; CHECK-NEXT:    ret <2 x i1> [[C]]
-;
-  %b = add <2 x i32> %a, <i32 2147483652, i32 2147483652>
-  %c = icmp sge <2 x i32> %b, <i32 4, i32 4>
-  ret <2 x i1> %c
-}
-
diff --git a/test/Transforms/InstCombine/2008-05-22-NegValVector.ll b/test/Transforms/InstCombine/2008-05-22-NegValVector.ll
index bf92faf2fec58..58259be8bc923 100644
--- a/test/Transforms/InstCombine/2008-05-22-NegValVector.ll
+++ b/test/Transforms/InstCombine/2008-05-22-NegValVector.ll
@@ -6,3 +6,9 @@ define <3 x i8> @f(<3 x i8> %a) {
   ret <3 x i8> %B
 }
 
+define <3 x i4> @g(<3 x i4> %a) {
+  %A = sub <3 x i4> zeroinitializer, %a
+  %B = mul <3 x i4> %A, <i4 5, i4 5, i4 5>
+  ret <3 x i4> %B
+}
+
diff --git a/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll b/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll
deleted file mode 100644
index 0c0e55a0b2d9a..0000000000000
--- a/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll
+++ /dev/null
@@ -1,67 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; PR3103
-
-define i8 @test1(i8 %x, i8 %y) {
-; CHECK-LABEL: @test1(
-  %A = udiv i8 %x, %y
-; CHECK-NEXT: urem
-  %B = mul i8 %A, %y
-  %C = sub i8 %x, %B
-  ret i8 %C
-; CHECK-NEXT: ret
-}
-
-define i8 @test2(i8 %x, i8 %y) {
-; CHECK-LABEL: @test2(
-  %A = sdiv i8 %x, %y
-; CHECK-NEXT: srem
-  %B = mul i8 %A, %y
-  %C = sub i8 %x, %B
-  ret i8 %C
-; CHECK-NEXT: ret
-}
-
-define i8 @test3(i8 %x, i8 %y) {
-; CHECK-LABEL: @test3(
-  %A = udiv i8 %x, %y
-; CHECK-NEXT: urem
-  %B = mul i8 %A, %y
-  %C = sub i8 %B, %x
-; CHECK-NEXT: sub
-  ret i8 %C
-; CHECK-NEXT: ret
-}
-
-define i8 @test4(i8 %x) {
-; CHECK-LABEL: @test4(
-  %A = udiv i8 %x, 3
-; CHECK-NEXT: urem
-  %B = mul i8 %A, -3
-; CHECK-NEXT: sub
-  %C = sub i8 %x, %B
-; CHECK-NEXT: add
-  ret i8 %C
-; CHECK-NEXT: ret
-}
-
-define i32 @test5(i32 %x, i32 %y) {
-; CHECK-LABEL: @test5(
-; (((X / Y) * Y) / Y) -> X / Y
-  %div = sdiv i32 %x, %y
-; CHECK-NEXT: sdiv
-  %mul = mul i32 %div, %y
-  %r = sdiv i32 %mul, %y
-  ret i32 %r
-; CHECK-NEXT: ret
-}
-
-define i32 @test6(i32 %x, i32 %y) {
-; CHECK-LABEL: @test6(
-; (((X / Y) * Y) / Y) -> X / Y
-  %div = udiv i32 %x, %y
-; CHECK-NEXT: udiv
-  %mul = mul i32 %div, %y
-  %r = udiv i32 %mul, %y
-  ret i32 %r
-; CHECK-NEXT: ret
-}
diff --git a/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll b/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll
deleted file mode 100644
index 4d4797720c531..0000000000000
--- a/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep "ashr i32 %val, 31"
-; PR3851
-
-define i32 @foo2(i32 %val) nounwind {
-entry:
-	%shr = ashr i32 %val, 15		; <i32> [#uses=3]
-	%shr4 = ashr i32 %shr, 17		; <i32> [#uses=1]
-        ret i32 %shr4
- }
diff --git a/test/Transforms/InstCombine/2012-07-25-LoadPart.ll b/test/Transforms/InstCombine/2012-07-25-LoadPart.ll
index 14fcf52fe9a78..71255ebbf81ff 100644
--- a/test/Transforms/InstCombine/2012-07-25-LoadPart.ll
+++ b/test/Transforms/InstCombine/2012-07-25-LoadPart.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -default-data-layout="e-p:32:32:32" -instcombine -S | FileCheck %s --check-prefix=LE
-; RUN: opt < %s -default-data-layout="E-p:32:32:32" -instcombine -S | FileCheck %s --check-prefix=BE
+; RUN: opt < %s -data-layout="e-p:32:32:32" -instcombine -S | FileCheck %s --check-prefix=LE
+; RUN: opt < %s -data-layout="E-p:32:32:32" -instcombine -S | FileCheck %s --check-prefix=BE
 ; PR13442
 
 @test = constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]
diff --git a/test/Transforms/InstCombine/X86FsubCmpCombine.ll b/test/Transforms/InstCombine/X86FsubCmpCombine.ll
new file mode 100644
index 0000000000000..fde0692d00a25
--- /dev/null
+++ b/test/Transforms/InstCombine/X86FsubCmpCombine.ll
@@ -0,0 +1,181 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; The test checks the folding of cmp(sub(a,b),0) into cmp(a,b).
+
+define i8 @sub_compare_foldingPD128_safe(<2 x double> %a, <2 x double> %b){
+; CHECK-LABEL: @sub_compare_foldingPD128_safe(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB_SAFE:%.*]] = fsub <2 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[SUB_SAFE]], <2 x double> zeroinitializer, i32 5, i8 -1)
+; CHECK-NEXT:    ret i8 [[TMP0]]
+;
+entry:
+  %sub.safe = fsub <2 x double> %a, %b
+  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %sub.safe , <2 x double> zeroinitializer, i32 5, i8 -1)
+  ret i8 %0
+}
+
+
+define i8 @sub_compare_foldingPD128(<2 x double> %a, <2 x double> %b){
+; CHECK-LABEL: @sub_compare_foldingPD128(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 5, i8 -1)
+; CHECK-NEXT:    ret i8 [[TMP0]]
+;
+entry:
+  %sub.i = fsub ninf <2 x double> %a, %b
+  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %sub.i , <2 x double> zeroinitializer, i32 5, i8 -1)
+  ret i8 %0
+}
+
+
+define i8 @sub_compare_foldingPD256(<4 x double> %a, <4 x double> %b){
+; CHECK-LABEL: @sub_compare_foldingPD256(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> [[A:%.*]], <4 x double> [[B:%.*]], i32 5, i8 -1)
+; CHECK-NEXT:    ret i8 [[TMP0]]
+;
+entry:
+  %sub.i1 = fsub ninf <4 x double> %a, %b
+  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5, i8 -1)
+  ret i8 %0
+}
+
+
+define i8 @sub_compare_foldingPD512(<8 x double> %a, <8 x double> %b){
+; CHECK-LABEL: @sub_compare_foldingPD512(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 11, i8 -1, i32 4)
+; CHECK-NEXT:    ret i8 [[TMP0]]
+;
+entry:
+  %sub.i2 = fsub ninf <8 x double> %a, %b
+  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %sub.i2, <8 x double> zeroinitializer, i32 11, i8 -1, i32 4)
+  ret i8 %0
+}
+
+
+define i8 @sub_compare_foldingPS128(<4 x float> %a, <4 x float> %b){
+; CHECK-LABEL: @sub_compare_foldingPS128(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 12, i8 -1)
+; CHECK-NEXT:    ret i8 [[TMP0]]
+;
+entry:
+  %sub.i3 = fsub ninf <4 x float> %a, %b
+  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %sub.i3, <4 x float> zeroinitializer, i32 12, i8 -1)
+  ret i8 %0
+}
+
+
+define i8 @sub_compare_foldingPS256(<8 x float> %a, <8 x float> %b){
+; CHECK-LABEL: @sub_compare_foldingPS256(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> [[A:%.*]], <8 x float> [[B:%.*]], i32 5, i8 -1)
+; CHECK-NEXT:    ret i8 [[TMP0]]
+;
+entry:
+  %sub.i4 = fsub ninf <8 x float> %a, %b
+  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %sub.i4, <8 x float> zeroinitializer, i32 5, i8 -1)
+  ret i8 %0
+}
+
+
+define i16 @sub_compare_foldingPS512(<16 x float> %a, <16 x float> %b){
+; CHECK-LABEL: @sub_compare_foldingPS512(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 11, i16 -1, i32 4)
+; CHECK-NEXT:    ret i16 [[TMP0]]
+;
+entry:
+  %sub.i5 = fsub ninf <16 x float> %a, %b
+  %0 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %sub.i5, <16 x float> zeroinitializer, i32 11, i16 -1, i32 4)
+  ret i16 %0
+}
+
+
+
+define i8 @sub_compare_folding_swapPD128(<2 x double> %a, <2 x double> %b){
+; CHECK-LABEL: @sub_compare_folding_swapPD128(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[B:%.*]], <2 x double> [[A:%.*]], i32 5, i8 -1)
+; CHECK-NEXT:    ret i8 [[TMP0]]
+;
+entry:
+  %sub.i = fsub ninf <2 x double> %a, %b
+  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> zeroinitializer, <2 x double> %sub.i, i32 5, i8 -1)
+  ret i8 %0
+}
+
+
+define i8 @sub_compare_folding_swapPD256(<4 x double> %a, <4 x double> %b){
+; CHECK-LABEL: @sub_compare_folding_swapPD256(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> [[B:%.*]], <4 x double> [[A:%.*]], i32 5, i8 -1)
+; CHECK-NEXT:    ret i8 [[TMP0]]
+;
+entry:
+  %sub.i = fsub ninf <4 x double> %a, %b
+  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> %sub.i, i32 5, i8 -1)
+  ret i8 %0
+}
+
+
+define i8 @sub_compare_folding_swapPD512(<8 x double> %a, <8 x double> %b){
+; CHECK-LABEL: @sub_compare_folding_swapPD512(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[B:%.*]], <8 x double> [[A:%.*]], i32 11, i8 -1, i32 4)
+; CHECK-NEXT:    ret i8 [[TMP0]]
+;
+entry:
+  %sub.i = fsub ninf <8 x double> %a, %b
+  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> zeroinitializer, <8 x double> %sub.i, i32 11, i8 -1, i32 4)
+  ret i8 %0
+}
+
+
+define i8 @sub_compare_folding_swapPS128(<4 x float> %a, <4 x float> %b){
+; CHECK-LABEL: @sub_compare_folding_swapPS128(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> [[B:%.*]], <4 x float> [[A:%.*]], i32 12, i8 -1)
+; CHECK-NEXT:    ret i8 [[TMP0]]
+;
+entry:
+  %sub.i = fsub ninf <4 x float> %a, %b
+  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> zeroinitializer, <4 x float> %sub.i, i32 12, i8 -1)
+  ret i8 %0
+}
+
+
+define i8 @sub_compare_folding_swapPS256(<8 x float> %a, <8 x float> %b){
+; CHECK-LABEL: @sub_compare_folding_swapPS256(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> [[B:%.*]], <8 x float> [[A:%.*]], i32 5, i8 -1)
+; CHECK-NEXT:    ret i8 [[TMP0]]
+;
+entry:
+  %sub.i = fsub ninf <8 x float> %a, %b
+  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> %sub.i, i32 5, i8 -1)
+  ret i8 %0
+}
+
+
+define i16 @sub_compare_folding_swapPS512(<16 x float> %a, <16 x float> %b){
+; CHECK-LABEL: @sub_compare_folding_swapPS512(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[B:%.*]], <16 x float> [[A:%.*]], i32 11, i16 -1, i32 4)
+; CHECK-NEXT:    ret i16 [[TMP0]]
+;
+entry:
+  %sub.i = fsub ninf <16 x float> %a, %b
+  %0 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> zeroinitializer, <16 x float> %sub.i, i32 11, i16 -1, i32 4)
+  ret i16 %0
+}
+
+declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double>, <2 x double>, i32, i8)
+declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double>, <4 x double>, i32, i8)
+declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, i8, i32)
+declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32, i8)
+declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32, i8)
+declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i16, i32)
diff --git a/test/Transforms/InstCombine/add-sitofp.ll b/test/Transforms/InstCombine/add-sitofp.ll
index 3b5485e005284..2abfa436f6d33 100644
--- a/test/Transforms/InstCombine/add-sitofp.ll
+++ b/test/Transforms/InstCombine/add-sitofp.ll
@@ -1,6 +1,14 @@
-; RUN: opt < %s -instcombine -S | grep "add nuw nsw i32"
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
-define double @x(i32 %a, i32 %b) nounwind {
+define double @x(i32 %a, i32 %b) {
+; CHECK-LABEL: @x(
+; CHECK-NEXT:    [[M:%.*]] = lshr i32 [[A:%.*]], 24
+; CHECK-NEXT:    [[N:%.*]] = and i32 [[M]], [[B:%.*]]
+; CHECK-NEXT:    [[ADDCONV:%.*]] = add nuw nsw i32 [[N]], 1
+; CHECK-NEXT:    [[P:%.*]] = sitofp i32 [[ADDCONV]] to double
+; CHECK-NEXT:    ret double [[P]]
+;
   %m = lshr i32 %a, 24
   %n = and i32 %m, %b
   %o = sitofp i32 %n to double
diff --git a/test/Transforms/InstCombine/add.ll b/test/Transforms/InstCombine/add.ll
index 39a746ab310b1..648305d134cd3 100644
--- a/test/Transforms/InstCombine/add.ll
+++ b/test/Transforms/InstCombine/add.ll
@@ -1,6 +1,32 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
+; TODO: This should be canonicalized to either a select or xor+zext.
+
+define i32 @select_0_or_1_from_bool(i1 %x) {
+; CHECK-LABEL: @select_0_or_1_from_bool(
+; CHECK-NEXT:    [[EXT:%.*]] = sext i1 %x to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[EXT]], 1
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %ext = sext i1 %x to i32
+  %add = add i32 %ext, 1
+  ret i32 %add
+}
+
+; TODO: This should be canonicalized to either a select or xor+zext.
+
+define <2 x i32> @select_0_or_1_from_bool_vec(<2 x i1> %x) {
+; CHECK-LABEL: @select_0_or_1_from_bool_vec(
+; CHECK-NEXT:    [[EXT:%.*]] = sext <2 x i1> %x to <2 x i32>
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw <2 x i32> [[EXT]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[ADD]]
+;
+  %ext = sext <2 x i1> %x to <2 x i32>
+  %add = add <2 x i32> %ext, <i32 1, i32 1>
+  ret <2 x i32> %add
+}
+
 define i32 @test1(i32 %A) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:    ret i32 %A
@@ -100,7 +126,7 @@ define i32 @test9(i32 %A) {
 define i1 @test10(i8 %A, i8 %b) {
 ; CHECK-LABEL: @test10(
 ; CHECK-NEXT:    [[B:%.*]] = sub i8 0, %b
-; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 %A, [[B]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[B]], %A
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %B = add i8 %A, %b
@@ -112,7 +138,7 @@ define i1 @test10(i8 %A, i8 %b) {
 define <2 x i1> @test10vec(<2 x i8> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @test10vec(
 ; CHECK-NEXT:    [[C:%.*]] = sub <2 x i8> zeroinitializer, %b
-; CHECK-NEXT:    [[D:%.*]] = icmp ne <2 x i8> %a, [[C]]
+; CHECK-NEXT:    [[D:%.*]] = icmp ne <2 x i8> [[C]], %a
 ; CHECK-NEXT:    ret <2 x i1> [[D]]
 ;
   %c = add <2 x i8> %a, %b
@@ -244,14 +270,59 @@ define i32 @test19(i1 %C) {
   ret i32 %V
 }
 
+define <2 x i32> @test19vec(i1 %C) {
+; CHECK-LABEL: @test19vec(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1123, i32 1123>, <2 x i32> <i32 133, i32 133>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
+  %V = add <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %V
+}
+
+; This is an InstSimplify fold, but test it here to make sure that
+; InstCombine does not prevent the fold.
+; With NSW, add of sign bit -> or of sign bit.
+
 define i32 @test20(i32 %x) {
 ; CHECK-LABEL: @test20(
 ; CHECK-NEXT:    ret i32 %x
 ;
-  %tmp.2 = xor i32 %x, -2147483648
-  ;; Add of sign bit -> xor of sign bit.
-  %tmp.4 = add i32 %tmp.2, -2147483648
-  ret i32 %tmp.4
+  %y = xor i32 %x, -2147483648
+  %z = add nsw i32 %y, -2147483648
+  ret i32 %z
+}
+
+define i32 @xor_sign_bit(i32 %x) {
+; CHECK-LABEL: @xor_sign_bit(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 %x, -2147483606
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %xor = xor i32 %x, 2147483648
+  %add = add i32 %xor, 42
+  ret i32 %add
+}
+
+; No-wrap info allows converting the add to 'or'.
+
+define i8 @add_nsw_signbit(i8 %x) {
+; CHECK-LABEL: @add_nsw_signbit(
+; CHECK-NEXT:    [[Y:%.*]] = or i8 %x, -128
+; CHECK-NEXT:    ret i8 [[Y]]
+;
+  %y = add nsw i8 %x, -128
+  ret i8 %y
+}
+
+; No-wrap info allows converting the add to 'or'.
+
+define i8 @add_nuw_signbit(i8 %x) {
+; CHECK-LABEL: @add_nuw_signbit(
+; CHECK-NEXT:    [[Y:%.*]] = or i8 %x, -128
+; CHECK-NEXT:    ret i8 [[Y]]
+;
+  %y = add nuw i8 %x, 128
+  ret i8 %y
 }
 
 define i1 @test21(i32 %x) {
@@ -519,3 +590,99 @@ define i64 @test41(i32 %a) {
   %sub = add i64 %zext, -1
   ret i64 %sub
 }
+
+define i32 @test42(i1 %C) {
+; CHECK-LABEL: @test42(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], i32 1123, i32 133
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %A = select i1 %C, i32 1000, i32 10
+  %V = add i32 123, %A
+  ret i32 %V
+}
+
+define <2 x i32> @test42vec(i1 %C) {
+; CHECK-LABEL: @test42vec(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1123, i32 1123>, <2 x i32> <i32 133, i32 133>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
+  %V = add <2 x i32> <i32 123, i32 123>, %A
+  ret <2 x i32> %V
+}
+
+define <2 x i32> @test42vec2(i1 %C) {
+; CHECK-LABEL: @test42vec2(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1123, i32 2833>, <2 x i32> <i32 133, i32 363>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 2500>, <2 x i32> <i32 10, i32 30>
+  %V = add <2 x i32> <i32 123, i32 333>, %A
+  ret <2 x i32> %V
+}
+
+define i32 @test55(i1 %which) {
+; CHECK-LABEL: @test55(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 1123, [[ENTRY:%.*]] ], [ 133, [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+  %value = add i32 123, %A
+  ret i32 %value
+}
+
+define <2 x i32> @test43vec(i1 %which) {
+; CHECK-LABEL: @test43vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1123, i32 1123>, [[ENTRY:%.*]] ], [ <i32 133, i32 133>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+  %value = add <2 x i32> <i32 123, i32 123>, %A
+  ret <2 x i32> %value
+}
+
+define <2 x i32> @test43vec2(i1 %which) {
+; CHECK-LABEL: @test43vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1123, i32 2833>, [[ENTRY:%.*]] ], [ <i32 133, i32 363>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+  %value = add <2 x i32> <i32 123, i32 333>, %A
+  ret <2 x i32> %value
+}
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index 2ee0372e5e0af..f81f700e6cf42 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -instcombine -S -default-data-layout="E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s -check-prefix=CHECK -check-prefix=ALL
-; RUN: opt < %s -instcombine -S -default-data-layout="E-p:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s -check-prefix=P32 -check-prefix=ALL
+; RUN: opt < %s -instcombine -S -data-layout="E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s -check-prefix=CHECK -check-prefix=ALL
+; RUN: opt < %s -instcombine -S -data-layout="E-p:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s -check-prefix=P32 -check-prefix=ALL
 ; RUN: opt < %s -instcombine -S | FileCheck %s -check-prefix=NODL -check-prefix=ALL
 
 
diff --git a/test/Transforms/InstCombine/amdgcn-demanded-vector-elts.ll b/test/Transforms/InstCombine/amdgcn-demanded-vector-elts.ll
new file mode 100644
index 0000000000000..888f51bf939dd
--- /dev/null
+++ b/test/Transforms/InstCombine/amdgcn-demanded-vector-elts.ll
@@ -0,0 +1,322 @@
+; RUN: opt -S -instcombine %s | FileCheck %s
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.buffer.load
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @buffer_load_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  ret float %data
+}
+
+; CHECK-LABEL: @buffer_load_v1f32(
+; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret <1 x float> %data
+define amdgpu_ps <1 x float> @buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  ret <1 x float> %data
+}
+
+; CHECK-LABEL: @buffer_load_v2f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  ret <2 x float> %data
+}
+
+; CHECK-LABEL: @buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret <4 x float> %data
+define amdgpu_ps <4 x float> @buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  ret <4 x float> %data
+}
+
+; CHECK-LABEL: @extract_elt0_buffer_load_v2f32(
+; CHECK: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt0 = extractelement <2 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_buffer_load_v2f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt1 = extractelement <2 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_buffer_load_v4f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt1 = extractelement <4 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt2_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt1 = extractelement <4 x float> %data, i32 2
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt3_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt1 = extractelement <4 x float> %data, i32 3
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret <2 x float>
+define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt1_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt2_elt3_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_elt3_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt2_elt3_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+  ret <3 x float> %shuf
+}
+
+; FIXME: Not handled even though only 2 elts used
+; CHECK-LABEL: @extract_elt0_elt1_buffer_load_v4f32_2(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %elt0 = extractelement <4 x float> %data, i32 0
+; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 1
+; CHECK-NEXT: %ins0 = insertvalue { float, float } undef, float %elt0, 0
+; CHECK-NEXT: %ins1 = insertvalue { float, float } %ins0, float %elt1, 1
+; CHECK-NEXT: ret { float, float } %ins1
+define amdgpu_ps { float, float } @extract_elt0_elt1_buffer_load_v4f32_2(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  %elt1 = extractelement <4 x float> %data, i32 1
+  %ins0 = insertvalue { float, float } undef, float %elt0, 0
+  %ins1 = insertvalue { float, float } %ins0, float %elt1, 1
+  ret { float, float } %ins1
+}
+
+; CHECK-LABEL: @extract_elt0_buffer_load_v3f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt0 = extractelement <3 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_buffer_load_v3f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt1 = extractelement <3 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt2_buffer_load_v3f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt2_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt1 = extractelement <3 x float> %data, i32 2
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_buffer_load_v3f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret <2 x float>
+define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_buffer_load_v3f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt1_elt2_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @preserve_metadata_extract_elt0_buffer_load_v2f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false), !fpmath !0
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @preserve_metadata_extract_elt0_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false), !fpmath !0
+  %elt0 = extractelement <2 x float> %data, i32 0
+  ret float %elt0
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.buffer.load.format
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @buffer_load_format_v1f32(
+; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 true)
+; CHECK-NEXT: ret <1 x float> %data
+define amdgpu_ps <1 x float> @buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <1 x float> @llvm.amdgcn.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 true)
+  ret <1 x float> %data
+}
+
+; CHECK-LABEL: @extract_elt0_buffer_load_format_v2f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 true, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 true, i1 false)
+  %elt0 = extractelement <2 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_buffer_load_format_v3f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; The initial insertion point is at the extractelement
+; CHECK-LABEL: @extract01_bitcast_buffer_load_format_v4f32(
+; CHECK-NEXT: %tmp = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false)
+; CHECK-NEXT: %1 = shufflevector <2 x float> %tmp, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: %tmp1 = bitcast <4 x float> %1 to <2 x double>
+; CHECK-NEXT: %tmp2 = extractelement <2 x double> %tmp1, i32 0
+; CHECK-NEXT: ret double %tmp2
+define double @extract01_bitcast_buffer_load_format_v4f32(i32 %arg) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false) #3
+  %tmp1 = bitcast <4 x float> %tmp to <2 x double>
+  %tmp2 = extractelement <2 x double> %tmp1, i32 0
+  ret double %tmp2
+}
+
+; CHECK-LABEL: @extract0_bitcast_buffer_load_format_v4f32(
+; CHECK-NEXT: %tmp = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false)
+; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
+; CHECK-NEXT: ret i32 %tmp2
+define i32 @extract0_bitcast_buffer_load_format_v4f32(i32 %arg) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false) #3
+  %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
+  %tmp2 = extractelement <4 x i32> %tmp1, i32 0
+  ret i32 %tmp2
+}
+
+; CHECK-LABEL: @extract_lo16_0_bitcast_buffer_load_format_v4f32(
+; CHECK-NEXT: %tmp = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false)
+; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %tmp, i64 0
+; CHECK-NEXT: %tmp1 = bitcast <4 x float> %1 to <8 x i16>
+; CHECK-NEXT: %tmp2 = extractelement <8 x i16> %tmp1, i32 0
+; CHECK-NEXT: ret i16 %tmp2
+define i16 @extract_lo16_0_bitcast_buffer_load_format_v4f32(i32 %arg) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false) #3
+  %tmp1 = bitcast <4 x float> %tmp to <8 x i16>
+  %tmp2 = extractelement <8 x i16> %tmp1, i32 0
+  ret i16 %tmp2
+}
+
+declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #1
+
+declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <1 x float> @llvm.amdgcn.buffer.load.format.v1f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+
+!0 = !{float 2.500000e+00}
diff --git a/test/Transforms/InstCombine/amdgcn-intrinsics.ll b/test/Transforms/InstCombine/amdgcn-intrinsics.ll
index a228968f25bce..deae5502bcdb8 100644
--- a/test/Transforms/InstCombine/amdgcn-intrinsics.ll
+++ b/test/Transforms/InstCombine/amdgcn-intrinsics.ll
@@ -7,6 +7,12 @@
 declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone
 declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone
 
+; CHECK-LABEL: @test_constant_fold_rcp_f32_undef
+; CHECK-NEXT: ret float undef
+define float @test_constant_fold_rcp_f32_undef() nounwind {
+  %val = call float @llvm.amdgcn.rcp.f32(float undef) nounwind readnone
+  ret float %val
+}
 
 ; CHECK-LABEL: @test_constant_fold_rcp_f32_1
 ; CHECK-NEXT: ret float 1.000000e+00
@@ -50,6 +56,18 @@ define double @test_constant_fold_rcp_f64_43() nounwind {
   ret double %val
 }
 
+; --------------------------------------------------------------------
+; llvm.amdgcn.rsq
+; --------------------------------------------------------------------
+
+declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone
+
+; CHECK-LABEL: @test_constant_fold_rsq_f32_undef
+; CHECK-NEXT: ret float undef
+define float @test_constant_fold_rsq_f32_undef() nounwind {
+  %val = call float @llvm.amdgcn.rsq.f32(float undef) nounwind readnone
+  ret float %val
+}
 
 ; --------------------------------------------------------------------
 ; llvm.amdgcn.frexp.mant
@@ -633,3 +651,888 @@ define float @cos_fabs_fneg_f32(float %x) {
   %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs.fneg)
   ret float %cos
 }
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.cvt.pkrtz
+; --------------------------------------------------------------------
+
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
+
+; CHECK-LABEL: @vars_lhs_cvt_pkrtz(
+; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
+define <2 x half> @vars_lhs_cvt_pkrtz(float %x, float %y) {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
+  ret <2 x half> %cvt
+}
+
+; CHECK-LABEL: @constant_lhs_cvt_pkrtz(
+; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.000000e+00, float %y)
+define <2 x half> @constant_lhs_cvt_pkrtz(float %y) {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float %y)
+  ret <2 x half> %cvt
+}
+
+; CHECK-LABEL: @constant_rhs_cvt_pkrtz(
+; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float 0.000000e+00)
+define <2 x half> @constant_rhs_cvt_pkrtz(float %x) {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float 0.0)
+  ret <2 x half> %cvt
+}
+
+; CHECK-LABEL: @undef_lhs_cvt_pkrtz(
+; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %y)
+define <2 x half> @undef_lhs_cvt_pkrtz(float %y) {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %y)
+  ret <2 x half> %cvt
+}
+
+; CHECK-LABEL: @undef_rhs_cvt_pkrtz(
+; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float undef)
+define <2 x half> @undef_rhs_cvt_pkrtz(float %x) {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float undef)
+  ret <2 x half> %cvt
+}
+
+; CHECK-LABEL: @undef_cvt_pkrtz(
+; CHECK: ret <2 x half> undef
+define <2 x half> @undef_cvt_pkrtz() {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef)
+  ret <2 x half> %cvt
+}
+
+; CHECK-LABEL: @constant_splat0_cvt_pkrtz(
+; CHECK: ret <2 x half> zeroinitializer
+define <2 x half> @constant_splat0_cvt_pkrtz() {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float 0.0)
+  ret <2 x half> %cvt
+}
+
+; CHECK-LABEL: @constant_cvt_pkrtz(
+; CHECK: ret <2 x half> <half 0xH4000, half 0xH4400>
+define <2 x half> @constant_cvt_pkrtz() {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 2.0, float 4.0)
+  ret <2 x half> %cvt
+}
+
+; Test constant values where rtz changes result
+; CHECK-LABEL: @constant_rtz_pkrtz(
+; CHECK: ret <2 x half> <half 0xH7BFF, half 0xH7BFF>
+define <2 x half> @constant_rtz_pkrtz() {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 65535.0, float 65535.0)
+  ret <2 x half> %cvt
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.ubfe
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) nounwind readnone
+declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) nounwind readnone
+
+; CHECK-LABEL: @ubfe_var_i32(
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 %width)
+define i32 @ubfe_var_i32(i32 %src, i32 %offset, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_clear_high_bits_constant_offset_i32(
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 5, i32 %width)
+define i32 @ubfe_clear_high_bits_constant_offset_i32(i32 %src, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 133, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_clear_high_bits_constant_width_i32(
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 5)
+define i32 @ubfe_clear_high_bits_constant_width_i32(i32 %src, i32 %offset) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 133)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_width_0(
+; CHECK-NEXT: ret i32 0
+define i32 @ubfe_width_0(i32 %src, i32 %offset) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 0)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_width_31(
+; CHECK: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 31)
+define i32 @ubfe_width_31(i32 %src, i32 %offset) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 31)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_width_32(
+; CHECK-NEXT: ret i32 0
+define i32 @ubfe_width_32(i32 %src, i32 %offset) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 32)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_width_33(
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 1)
+define i32 @ubfe_width_33(i32 %src, i32 %offset) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 33)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_33(
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 1, i32 %width)
+define i32 @ubfe_offset_33(i32 %src, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 33, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_0(
+; CHECK-NEXT: %1 = sub i32 32, %width
+; CHECK-NEXT: %2 = shl i32 %src, %1
+; CHECK-NEXT: %bfe = lshr i32 %2, %1
+; CHECK-NEXT: ret i32 %bfe
+define i32 @ubfe_offset_0(i32 %src, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_32(
+; CHECK-NEXT: %1 = sub i32 32, %width
+; CHECK-NEXT: %2 = shl i32 %src, %1
+; CHECK-NEXT: %bfe = lshr i32 %2, %1
+; CHECK-NEXT: ret i32 %bfe
+define i32 @ubfe_offset_32(i32 %src, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_31(
+; CHECK-NEXT: %1 = sub i32 32, %width
+; CHECK-NEXT: %2 = shl i32 %src, %1
+; CHECK-NEXT: %bfe = lshr i32 %2, %1
+; CHECK-NEXT: ret i32 %bfe
+define i32 @ubfe_offset_31(i32 %src, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_0_width_0(
+; CHECK-NEXT: ret i32 0
+define i32 @ubfe_offset_0_width_0(i32 %src) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 0)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_0_width_3(
+; CHECK-NEXT: and i32 %src, 7
+; CHECK-NEXT: ret
+define i32 @ubfe_offset_0_width_3(i32 %src) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 3)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_3_width_1(
+; CHECK-NEXT: %1 = lshr i32 %src, 3
+; CHECK-NEXT: and i32 %1, 1
+; CHECK-NEXT: ret i32
+define i32 @ubfe_offset_3_width_1(i32 %src) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 1)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_3_width_4(
+; CHECK-NEXT: %1 = lshr i32 %src, 3
+; CHECK-NEXT: and i32 %1, 15
+; CHECK-NEXT: ret i32
+define i32 @ubfe_offset_3_width_4(i32 %src) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 4)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_0_0_0(
+; CHECK-NEXT: ret i32 0
+define i32 @ubfe_0_0_0() {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_neg1_5_7(
+; CHECK-NEXT: ret i32 127
+define i32 @ubfe_neg1_5_7() {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 -1, i32 5, i32 7)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_undef_src_i32(
+; CHECK-NEXT: ret i32 undef
+define i32 @ubfe_undef_src_i32(i32 %offset, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 undef, i32 %offset, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_undef_offset_i32(
+; CHECK: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 undef, i32 %width)
+define i32 @ubfe_undef_offset_i32(i32 %src, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 undef, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_undef_width_i32(
+; CHECK: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 undef)
+define i32 @ubfe_undef_width_i32(i32 %src, i32 %offset) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 undef)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_33_width_4_i64(
+; CHECK-NEXT: %1 = lshr i64 %src, 33
+; CHECK-NEXT: %bfe = and i64 %1, 15
+define i64 @ubfe_offset_33_width_4_i64(i64 %src) {
+  %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 33, i32 4)
+  ret i64 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_0_i64(
+; CHECK-NEXT: %1 = sub i32 64, %width
+; CHECK-NEXT: %2 = zext i32 %1 to i64
+; CHECK-NEXT: %3 = shl i64 %src, %2
+; CHECK-NEXT: %bfe = lshr i64 %3, %2
+; CHECK-NEXT: ret i64 %bfe
+define i64 @ubfe_offset_0_i64(i64 %src, i32 %width) {
+  %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 0, i32 %width)
+  ret i64 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_32_width_32_i64(
+; CHECK-NEXT: %bfe = lshr i64 %src, 32
+; CHECK-NEXT: ret i64 %bfe
+define i64 @ubfe_offset_32_width_32_i64(i64 %src) {
+  %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 32, i32 32)
+  ret i64 %bfe
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.sbfe
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) nounwind readnone
+declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) nounwind readnone
+
+; CHECK-LABEL: @sbfe_offset_31(
+; CHECK-NEXT: %1 = sub i32 32, %width
+; CHECK-NEXT: %2 = shl i32 %src, %1
+; CHECK-NEXT: %bfe = ashr i32 %2, %1
+; CHECK-NEXT: ret i32 %bfe
+define i32 @sbfe_offset_31(i32 %src, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 32, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @sbfe_neg1_5_7(
+; CHECK-NEXT: ret i32 -1
+define i32 @sbfe_neg1_5_7() {
+  %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 -1, i32 5, i32 7)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @sbfe_offset_32_width_32_i64(
+; CHECK-NEXT: %bfe = ashr i64 %src, 32
+; CHECK-NEXT: ret i64 %bfe
+define i64 @sbfe_offset_32_width_32_i64(i64 %src) {
+  %bfe = call i64 @llvm.amdgcn.sbfe.i64(i64 %src, i32 32, i32 32)
+  ret i64 %bfe
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.exp
+; --------------------------------------------------------------------
+
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind inaccessiblememonly
+
+; Make sure no crashing on invalid variable params
+; CHECK-LABEL: @exp_invalid_inputs(
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
+define void @exp_invalid_inputs(i32 %tgt, i32 %en) {
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: @exp_disabled_inputs_to_undef(
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.000000e+00, float undef, float undef, float undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float 2.000000e+00, float undef, float undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float 5.000000e-01, float undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float 4.000000e+00, i1 true, i1 false)
+
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %x, float undef, float undef, float undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float %y, float undef, float undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float %z, float undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float %w, i1 true, i1 false)
+
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 false)
+
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.000000e+00, float 2.000000e+00, float undef, float undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.000000e+00, float undef, float 5.000000e-01, float undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.000000e+00, float undef, float undef, float 4.000000e+00, i1 false, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 false, i1 false)
+define void @exp_disabled_inputs_to_undef(float %x, float %y, float %z, float %w) {
+  ; enable src0..src3 constants
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+
+  ; enable src0..src3 variables
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %x, float %y, float %z, float %w, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %x, float %y, float %z, float %w, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float %x, float %y, float %z, float %w, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float %x, float %y, float %z, float %w, i1 true, i1 false)
+
+  ; enable none
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float %x, float %y, float %z, float %w, i1 true, i1 false)
+
+  ; enable different source combinations
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
+
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.exp.compr
+; --------------------------------------------------------------------
+
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind inaccessiblememonly
+
+; CHECK-LABEL: @exp_compr_invalid_inputs(
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false)
+define void @exp_compr_invalid_inputs(i32 %tgt, i32 %en) {
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: @exp_compr_disabled_inputs_to_undef(
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
+
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> %xy, <2 x half> undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> %xy, <2 x half> undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> %xy, <2 x half> undef, i1 true, i1 false)
+
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> undef, <2 x half> %zw, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
+define void @exp_compr_disabled_inputs_to_undef(<2 x half> %xy, <2 x half> %zw) {
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
+
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
+
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.fmed3
+; --------------------------------------------------------------------
+
+declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone
+
+; CHECK-LABEL: @fmed3_f32(
+; CHECK: %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z)
+define float @fmed3_f32(float %x, float %y, float %z) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_canonicalize_x_c0_c1_f32(
+; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00)
+define float @fmed3_canonicalize_x_c0_c1_f32(float %x) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0.0, float 1.0)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_canonicalize_c0_x_c1_f32(
+; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00)
+define float @fmed3_canonicalize_c0_x_c1_f32(float %x) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %x, float 1.0)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_canonicalize_c0_c1_x_f32(
+; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00)
+define float @fmed3_canonicalize_c0_c1_x_f32(float %x) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %x)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_canonicalize_x_y_c_f32(
+; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00)
+define float @fmed3_canonicalize_x_y_c_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.0)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_canonicalize_x_c_y_f32(
+; CHECK: %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00)
+define float @fmed3_canonicalize_x_c_y_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 1.0, float %y)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_canonicalize_c_x_y_f32(
+; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00)
+define float @fmed3_canonicalize_c_x_y_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %x, float %y)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_undef_x_y_f32(
+; CHECK: call float @llvm.minnum.f32(float %x, float %y)
+define float @fmed3_undef_x_y_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_fmf_undef_x_y_f32(
+; CHECK: call nnan float @llvm.minnum.f32(float %x, float %y)
+define float @fmed3_fmf_undef_x_y_f32(float %x, float %y) {
+  %med3 = call nnan float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_x_undef_y_f32(
+; CHECK: call float @llvm.minnum.f32(float %x, float %y)
+define float @fmed3_x_undef_y_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float undef, float %y)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_x_y_undef_f32(
+; CHECK: call float @llvm.minnum.f32(float %x, float %y)
+define float @fmed3_x_y_undef_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_qnan0_x_y_f32(
+; CHECK: call float @llvm.minnum.f32(float %x, float %y)
+define float @fmed3_qnan0_x_y_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_x_qnan0_y_f32(
+; CHECK: call float @llvm.minnum.f32(float %x, float %y)
+define float @fmed3_x_qnan0_y_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_x_y_qnan0_f32(
+; CHECK: call float @llvm.minnum.f32(float %x, float %y)
+define float @fmed3_x_y_qnan0_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_qnan1_x_y_f32(
+; CHECK: call float @llvm.minnum.f32(float %x, float %y)
+define float @fmed3_qnan1_x_y_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float %x, float %y)
+  ret float %med3
+}
+
+; This can return any of the qnans.
+; CHECK-LABEL: @fmed3_qnan0_qnan1_qnan2_f32(
+; CHECK: ret float 0x7FF8002000000000
+define float @fmed3_qnan0_qnan1_qnan2_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float 0x7FF8002000000000, float 0x7FF8030000000000)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_constant_src0_0_f32(
+; CHECK: ret float 5.000000e-01
+define float @fmed3_constant_src0_0_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float -1.0, float 4.0)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_constant_src0_1_f32(
+; CHECK: ret float 5.000000e-01
+define float @fmed3_constant_src0_1_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float 4.0, float -1.0)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_constant_src1_0_f32(
+; CHECK: ret float 5.000000e-01
+define float @fmed3_constant_src1_0_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 0.5, float 4.0)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_constant_src1_1_f32(
+; CHECK: ret float 5.000000e-01
+define float @fmed3_constant_src1_1_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float 0.5, float -1.0)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_constant_src2_0_f32(
+; CHECK: ret float 5.000000e-01
+define float @fmed3_constant_src2_0_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 4.0, float 0.5)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_constant_src2_1_f32(
+; CHECK: ret float 5.000000e-01
+define float @fmed3_constant_src2_1_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float -1.0, float 0.5)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_x_qnan0_qnan1_f32(
+; CHECK: ret float %x
+define float @fmed3_x_qnan0_qnan1_f32(float %x) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_qnan0_x_qnan1_f32(
+; CHECK: ret float %x
+define float @fmed3_qnan0_x_qnan1_f32(float %x) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_qnan0_qnan1_x_f32(
+; CHECK: ret float %x
+define float @fmed3_qnan0_qnan1_x_f32(float %x) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x)
+  ret float %med3
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.icmp
+; --------------------------------------------------------------------
+
+declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) nounwind readnone convergent
+declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) nounwind readnone convergent
+
+; Make sure there's no crash for invalid input
+; CHECK-LABEL: @invalid_nonconstant_icmp_code(
+; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
+define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) {
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
+  ret i64 %result
+}
+
+; CHECK-LABEL: @invalid_icmp_code(
+; CHECK: %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31)
+; CHECK: %over = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 42)
+define i64 @invalid_icmp_code(i32 %a, i32 %b) {
+  %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31)
+  %over = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 42)
+  %or = or i64 %under, %over
+  ret i64 %or
+}
+
+; CHECK-LABEL: @icmp_constant_inputs_false(
+; CHECK: ret i64 0
+define i64 @icmp_constant_inputs_false() {
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 32)
+  ret i64 %result
+}
+
+; CHECK-LABEL: @icmp_constant_inputs_true(
+; CHECK: ret i64 -1
+define i64 @icmp_constant_inputs_true() {
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 34)
+  ret i64 %result
+}
+
+; CHECK-LABEL: @icmp_constant_to_rhs_slt(
+; CHECK: %result = call i64 @llvm.amdgcn.icmp.i32(i32 %x, i32 9, i32 38)
+define i64 @icmp_constant_to_rhs_slt(i32 %x) {
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 %x, i32 40)
+  ret i64 %result
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
+define i64 @fold_icmp_ne_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
+  %cmp = icmp eq i32 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ne_i32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 33)
+define i64 @fold_icmp_ne_0_zext_icmp_ne_i32(i32 %a, i32 %b) {
+  %cmp = icmp ne i32 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_sle_i32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 41)
+define i64 @fold_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
+  %cmp = icmp sle i32 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ugt_i64(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 34)
+define i64 @fold_icmp_ne_0_zext_icmp_ugt_i64(i64 %a, i64 %b) {
+  %cmp = icmp ugt i64 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_swap_i64(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 34)
+define i64 @fold_icmp_ne_0_zext_icmp_ult_swap_i64(i64 %a, i64 %b) {
+  %cmp = icmp ugt i64 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 0, i32 %zext.cmp, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 1)
+define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f32(float %a, float %b) {
+  %cmp = fcmp oeq float %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_une_f32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 14)
+define i64 @fold_icmp_ne_0_zext_fcmp_une_f32(float %a, float %b) {
+  %cmp = fcmp une float %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_olt_f64(
+; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f64(double %a, double %b, i32 4)
+define i64 @fold_icmp_ne_0_zext_fcmp_olt_f64(double %a, double %b) {
+  %cmp = fcmp olt double %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_sext_icmp_ne_0_i32(
+; CHECK: %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
+define i64 @fold_icmp_sext_icmp_ne_0_i32(i32 %a, i32 %b) {
+  %cmp = icmp eq i32 %a, %b
+  %sext.cmp = sext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_eq_i32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 33)
+define i64 @fold_icmp_eq_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
+  %cmp = icmp eq i32 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_slt_i32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 39)
+define i64 @fold_icmp_eq_0_zext_icmp_slt_i32(i32 %a, i32 %b) {
+  %cmp = icmp slt i32 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_oeq_f32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 14)
+define i64 @fold_icmp_eq_0_zext_fcmp_oeq_f32(float %a, float %b) {
+  %cmp = fcmp oeq float %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ule_f32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 2)
+define i64 @fold_icmp_eq_0_zext_fcmp_ule_f32(float %a, float %b) {
+  %cmp = fcmp ule float %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ogt_f32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 13)
+define i64 @fold_icmp_eq_0_zext_fcmp_ogt_f32(float %a, float %b) {
+  %cmp = fcmp ogt float %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_zext_icmp_eq_1_i32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
+define i64 @fold_icmp_zext_icmp_eq_1_i32(i32 %a, i32 %b) {
+  %cmp = icmp eq i32 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 1, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_zext_argi1_eq_1_i32(
+; CHECK: %zext.cond = zext i1 %cond to i32
+; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 0, i32 33)
+define i64 @fold_icmp_zext_argi1_eq_1_i32(i1 %cond) {
+  %zext.cond = zext i1 %cond to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 1, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_zext_argi1_eq_neg1_i32(
+; CHECK: %zext.cond = zext i1 %cond to i32
+; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 -1, i32 32)
+define i64 @fold_icmp_zext_argi1_eq_neg1_i32(i1 %cond) {
+  %zext.cond = zext i1 %cond to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 -1, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_sext_argi1_eq_1_i32(
+; CHECK: %sext.cond = sext i1 %cond to i32
+; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 1, i32 32)
+define i64 @fold_icmp_sext_argi1_eq_1_i32(i1 %cond) {
+  %sext.cond = sext i1 %cond to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 1, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i32(
+; CHECK: %sext.cond = sext i1 %cond to i32
+; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 0, i32 33)
+define i64 @fold_icmp_sext_argi1_eq_neg1_i32(i1 %cond) {
+  %sext.cond = sext i1 %cond to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 -1, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i64(
+; CHECK: %sext.cond = sext i1 %cond to i64
+; CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %sext.cond, i64 0, i32 33)
+define i64 @fold_icmp_sext_argi1_eq_neg1_i64(i1 %cond) {
+  %sext.cond = sext i1 %cond to i64
+  %mask = call i64 @llvm.amdgcn.icmp.i64(i64 %sext.cond, i64 -1, i32 32)
+  ret i64 %mask
+}
+
+; TODO: Should be able to fold to false
+; CHECK-LABEL: @fold_icmp_sext_icmp_eq_1_i32(
+; CHECK: %cmp = icmp eq i32 %a, %b
+; CHECK: %sext.cmp = sext i1 %cmp to i32
+; CHECK: %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 1, i32 32)
+define i64 @fold_icmp_sext_icmp_eq_1_i32(i32 %a, i32 %b) {
+  %cmp = icmp eq i32 %a, %b
+  %sext.cmp = sext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 1, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_sext_icmp_eq_neg1_i32(
+; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
+define i64 @fold_icmp_sext_icmp_eq_neg1_i32(i32 %a, i32 %b) {
+  %cmp = icmp eq i32 %a, %b
+  %sext.cmp = sext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 -1, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_sext_icmp_sge_neg1_i32(
+; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 39)
+define i64 @fold_icmp_sext_icmp_sge_neg1_i32(i32 %a, i32 %b) {
+  %cmp = icmp sge i32 %a, %b
+  %sext.cmp = sext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 -1, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_not_icmp_ne_0_zext_icmp_sle_i32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 38)
+define i64 @fold_not_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
+  %cmp = icmp sle i32 %a, %b
+  %not = xor i1 %cmp, true
+  %zext.cmp = zext i1 %not to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.fcmp
+; --------------------------------------------------------------------
+
+declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) nounwind readnone convergent
+
+; Make sure there's no crash for invalid input
+; CHECK-LABEL: @invalid_nonconstant_fcmp_code(
+; CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
+define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) {
+  %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
+  ret i64 %result
+}
+
+; CHECK-LABEL: @invalid_fcmp_code(
+; CHECK: %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1)
+; CHECK: %over = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 16)
+define i64 @invalid_fcmp_code(float %a, float %b) {
+  %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1)
+  %over = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 16)
+  %or = or i64 %under, %over
+  ret i64 %or
+}
+
+; CHECK-LABEL: @fcmp_constant_inputs_false(
+; CHECK: ret i64 0
+define i64 @fcmp_constant_inputs_false() {
+  %result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 1)
+  ret i64 %result
+}
+
+; CHECK-LABEL: @fcmp_constant_inputs_true(
+; CHECK: ret i64 -1
+define i64 @fcmp_constant_inputs_true() {
+  %result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 4)
+  ret i64 %result
+}
+
+; CHECK-LABEL: @fcmp_constant_to_rhs_olt(
+; CHECK: %result = call i64 @llvm.amdgcn.fcmp.f32(float %x, float 4.000000e+00, i32 2)
+define i64 @fcmp_constant_to_rhs_olt(float %x) {
+  %result = call i64 @llvm.amdgcn.fcmp.f32(float 4.0, float %x, i32 4)
+  ret i64 %result
+}
diff --git a/test/Transforms/InstCombine/and-or-icmps.ll b/test/Transforms/InstCombine/and-or-icmps.ll
index 3903472e91190..e3aeee2931398 100644
--- a/test/Transforms/InstCombine/and-or-icmps.ll
+++ b/test/Transforms/InstCombine/and-or-icmps.ll
@@ -39,15 +39,167 @@ define i1 @PR2330(i32 %a, i32 %b) {
   ret i1 %and
 }
 
-define i1 @test(i32 %tmp1030) {
-; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[TMP1030_OFF:%.*]] = add i32 %tmp1030, -39
-; CHECK-NEXT:    [[TMP1030_CMP:%.*]] = icmp ugt i32 [[TMP1030_OFF]], 1
-; CHECK-NEXT:    ret i1 [[TMP1030_CMP]]
-;
-  %tmp1037 = icmp ne i32 %tmp1030, 39
-  %tmp1039 = icmp ne i32 %tmp1030, 40
-  %tmp1042 = and i1 %tmp1037, %tmp1039
-  ret i1 %tmp1042
+; if LHSC and RHSC differ only by one bit:
+; (X == C1 || X == C2) -> (X | (C1 ^ C2)) == C2
+; PR14708: https://bugs.llvm.org/show_bug.cgi?id=14708
+
+define i1 @or_eq_with_one_bit_diff_constants1(i32 %x) {
+; CHECK-LABEL: @or_eq_with_one_bit_diff_constants1(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %x, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 51
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp eq i32 %x, 50
+  %cmp2 = icmp eq i32 %x, 51
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+; (X != C1 && X != C2) -> (X | (C1 ^ C2)) != C2
+
+define i1 @and_ne_with_one_bit_diff_constants1(i32 %x) {
+; CHECK-LABEL: @and_ne_with_one_bit_diff_constants1(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %x, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 51
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ne i32 %x, 51
+  %cmp2 = icmp ne i32 %x, 50
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+; The constants are not necessarily off-by-one, just off-by-one-bit.
+
+define i1 @or_eq_with_one_bit_diff_constants2(i32 %x) {
+; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %x, 32
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 97
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp eq i32 %x, 97
+  %cmp2 = icmp eq i32 %x, 65
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+define i1 @and_ne_with_one_bit_diff_constants2(i19 %x) {
+; CHECK-LABEL: @and_ne_with_one_bit_diff_constants2(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i19 %x, 128
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i19 [[TMP1]], 193
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ne i19 %x, 65
+  %cmp2 = icmp ne i19 %x, 193
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+; Make sure the constants are treated as unsigned when comparing them.
+
+define i1 @or_eq_with_one_bit_diff_constants3(i8 %x) {
+; CHECK-LABEL: @or_eq_with_one_bit_diff_constants3(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 %x, -128
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], -2
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp eq i8 %x, 254
+  %cmp2 = icmp eq i8 %x, 126
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+define i1 @and_ne_with_one_bit_diff_constants3(i8 %x) {
+; CHECK-LABEL: @and_ne_with_one_bit_diff_constants3(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 %x, -128
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i8 [[TMP1]], -63
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ne i8 %x, 65
+  %cmp2 = icmp ne i8 %x, 193
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+; Use an 'add' to eliminate an icmp if the constants are off-by-one (not off-by-one-bit).
+; (X == 13 | X == 14) -> X-13 <u 2
+
+define i1 @or_eq_with_diff_one(i8 %x) {
+; CHECK-LABEL: @or_eq_with_diff_one(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 %x, -13
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 2
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp eq i8 %x, 13
+  %cmp2 = icmp eq i8 %x, 14
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+; (X != 40 | X != 39) -> X-39 >u 1
+
+define i1 @and_ne_with_diff_one(i32 %x) {
+; CHECK-LABEL: @and_ne_with_diff_one(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 %x, -39
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ne i32 %x, 40
+  %cmp2 = icmp ne i32 %x, 39
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+; Make sure the constants are treated as signed when comparing them.
+; PR32524: https://bugs.llvm.org/show_bug.cgi?id=32524
+
+define i1 @or_eq_with_diff_one_signed(i32 %x) {
+; CHECK-LABEL: @or_eq_with_diff_one_signed(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 %x, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 2
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp eq i32 %x, 0
+  %cmp2 = icmp eq i32 %x, -1
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+define i1 @and_ne_with_diff_one_signed(i64 %x) {
+; CHECK-LABEL: @and_ne_with_diff_one_signed(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 %x, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i64 [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ne i64 %x, -1
+  %cmp2 = icmp ne i64 %x, 0
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+; Vectors with splat constants get the same folds.
+
+define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) {
+; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2_splatvec(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> %x, <i32 32, i32 32>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 97, i32 97>
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %cmp1 = icmp eq <2 x i32> %x, <i32 97, i32 97>
+  %cmp2 = icmp eq <2 x i32> %x, <i32 65, i32 65>
+  %or = or <2 x i1> %cmp1, %cmp2
+  ret <2 x i1> %or
+}
+
+define <2 x i1> @and_ne_with_diff_one_splatvec(<2 x i32> %x) {
+; CHECK-LABEL: @and_ne_with_diff_one_splatvec(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> %x, <i32 -39, i32 -39>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %cmp1 = icmp ne <2 x i32> %x, <i32 40, i32 40>
+  %cmp2 = icmp ne <2 x i32> %x, <i32 39, i32 39>
+  %and = and <2 x i1> %cmp1, %cmp2
+  ret <2 x i1> %and
 }
 
diff --git a/test/Transforms/InstCombine/and.ll b/test/Transforms/InstCombine/and.ll
index e45012878ed5d..9a4d1e5758b30 100644
--- a/test/Transforms/InstCombine/and.ll
+++ b/test/Transforms/InstCombine/and.ll
@@ -176,7 +176,7 @@ define i8 @test16(i8 %A) {
 define i8 @test17(i8 %X, i8 %Y) {
 ; CHECK-LABEL: @test17(
 ; CHECK-NEXT:    [[Y_NOT:%.*]] = xor i8 %Y, -1
-; CHECK-NEXT:    [[D:%.*]] = or i8 %X, [[Y_NOT]]
+; CHECK-NEXT:    [[D:%.*]] = or i8 [[Y_NOT]], %X
 ; CHECK-NEXT:    ret i8 [[D]]
 ;
   %B = xor i8 %X, -1
@@ -311,19 +311,6 @@ define <2 x i1> @test25vec(<2 x i32> %A) {
   ret <2 x i1> %D
 }
 
-define i1 @test26(i32 %A) {
-; CHECK-LABEL: @test26(
-; CHECK-NEXT:    [[A_OFF:%.*]] = add i32 %A, -49
-; CHECK-NEXT:    [[A_CMP:%.*]] = icmp ugt i32 [[A_OFF]], 1
-; CHECK-NEXT:    ret i1 [[A_CMP]]
-;
-  %B = icmp ne i32 %A, 49
-  %C = icmp ne i32 %A, 50
-  ;; (A-49) > 1
-  %D = and i1 %B, %C
-  ret i1 %D
-}
-
 define i8 @test27(i8 %A) {
 ; CHECK-LABEL: @test27(
 ; CHECK-NEXT:    ret i8 0
@@ -382,6 +369,18 @@ define i32 @test31(i1 %X) {
   ret i32 %A
 }
 
+; Demanded bit analysis allows us to eliminate the add.
+
+define <2 x i32> @and_demanded_bits_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @and_demanded_bits_splat_vec(
+; CHECK-NEXT:    [[Z:%.*]] = and <2 x i32> %x, <i32 7, i32 7>
+; CHECK-NEXT:    ret <2 x i32> [[Z]]
+;
+  %y = add <2 x i32> %x, <i32 8, i32 8>
+  %z = and <2 x i32> %y, <i32 7, i32 7>
+  ret <2 x i32> %z
+}
+
 define i32 @test32(i32 %In) {
 ; CHECK-LABEL: @test32(
 ; CHECK-NEXT:    ret i32 0
@@ -405,6 +404,42 @@ define i32 @test33(i32 %b) {
   ret i32 %tmp.13
 }
 
+define i32 @test33b(i32 %b) {
+; CHECK-LABEL: @test33b(
+; CHECK-NEXT:    [[TMP_13:%.*]] = xor i32 [[B:%.*]], 1
+; CHECK-NEXT:    ret i32 [[TMP_13]]
+;
+  %tmp.4.mask = and i32 %b, 1
+  %tmp.10 = xor i32 %tmp.4.mask, 1
+  %tmp.12 = and i32 %b, -2
+  %tmp.13 = or i32 %tmp.10, %tmp.12
+  ret i32 %tmp.13
+}
+
+define <2 x i32> @test33vec(<2 x i32> %b) {
+; CHECK-LABEL: @test33vec(
+; CHECK-NEXT:    [[TMP_13:%.*]] = xor <2 x i32> [[B:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP_13]]
+;
+  %tmp.4.mask = and <2 x i32> %b, <i32 1, i32 1>
+  %tmp.10 = xor <2 x i32> %tmp.4.mask, <i32 1, i32 1>
+  %tmp.12 = and <2 x i32> %b, <i32 -2, i32 -2>
+  %tmp.13 = or <2 x i32> %tmp.12, %tmp.10
+  ret <2 x i32> %tmp.13
+}
+
+define <2 x i32> @test33vecb(<2 x i32> %b) {
+; CHECK-LABEL: @test33vecb(
+; CHECK-NEXT:    [[TMP_13:%.*]] = xor <2 x i32> [[B:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP_13]]
+;
+  %tmp.4.mask = and <2 x i32> %b, <i32 1, i32 1>
+  %tmp.10 = xor <2 x i32> %tmp.4.mask, <i32 1, i32 1>
+  %tmp.12 = and <2 x i32> %b, <i32 -2, i32 -2>
+  %tmp.13 = or <2 x i32> %tmp.10, %tmp.12
+  ret <2 x i32> %tmp.13
+}
+
 define i32 @test34(i32 %A, i32 %B) {
 ; CHECK-LABEL: @test34(
 ; CHECK-NEXT:    ret i32 %B
@@ -425,3 +460,156 @@ define <2 x i32> @PR24942(<2 x i32> %x) {
   ret <2 x i32> %and
 }
 
+define i64 @test35(i32 %X) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT:  %[[sub:.*]] = sub i32 0, %X
+; CHECK-NEXT:  %[[and:.*]] = and i32 %[[sub]], 240
+; CHECK-NEXT:  %[[cst:.*]] = zext i32 %[[and]] to i64
+; CHECK-NEXT:  ret i64 %[[cst]]
+  %zext = zext i32 %X to i64
+  %zsub = sub i64 0, %zext
+  %res = and i64 %zsub, 240
+  ret i64 %res
+}
+
+define i64 @test36(i32 %X) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT:  %[[sub:.*]] = add i32 %X, 7
+; CHECK-NEXT:  %[[and:.*]] = and i32 %[[sub]], 240
+; CHECK-NEXT:  %[[cst:.*]] = zext i32 %[[and]] to i64
+; CHECK-NEXT:  ret i64 %[[cst]]
+  %zext = zext i32 %X to i64
+  %zsub = add i64 %zext, 7
+  %res = and i64 %zsub, 240
+  ret i64 %res
+}
+
+define i64 @test37(i32 %X) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT:  %[[sub:.*]] = mul i32 %X, 7
+; CHECK-NEXT:  %[[and:.*]] = and i32 %[[sub]], 240
+; CHECK-NEXT:  %[[cst:.*]] = zext i32 %[[and]] to i64
+; CHECK-NEXT:  ret i64 %[[cst]]
+  %zext = zext i32 %X to i64
+  %zsub = mul i64 %zext, 7
+  %res = and i64 %zsub, 240
+  ret i64 %res
+}
+
+define i64 @test38(i32 %X) {
+; CHECK-LABEL: @test38(
+; CHECK-NEXT:  %[[and:.*]] = and i32 %X, 240
+; CHECK-NEXT:  %[[cst:.*]] = zext i32 %[[and]] to i64
+; CHECK-NEXT:  ret i64 %[[cst]]
+  %zext = zext i32 %X to i64
+  %zsub = xor i64 %zext, 7
+  %res = and i64 %zsub, 240
+  ret i64 %res
+}
+
+define i64 @test39(i32 %X) {
+; CHECK-LABEL: @test39(
+; CHECK-NEXT:  %[[and:.*]] = and i32 %X, 240
+; CHECK-NEXT:  %[[cst:.*]] = zext i32 %[[and]] to i64
+; CHECK-NEXT:  ret i64 %[[cst]]
+  %zext = zext i32 %X to i64
+  %zsub = or i64 %zext, 7
+  %res = and i64 %zsub, 240
+  ret i64 %res
+}
+
+define i32 @test40(i1 %C) {
+; CHECK-LABEL: @test40(
+; CHECK-NEXT:    [[A:%.*]] = select i1 [[C:%.*]], i32 104, i32 10
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %A = select i1 %C, i32 1000, i32 10
+  %V = and i32 %A, 123
+  ret i32 %V
+}
+
+define <2 x i32> @test40vec(i1 %C) {
+; CHECK-LABEL: @test40vec(
+; CHECK-NEXT:    [[A:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 104, i32 104>, <2 x i32> <i32 10, i32 10>
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
+  %V = and <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %V
+}
+
+define <2 x i32> @test40vec2(i1 %C) {
+; CHECK-LABEL: @test40vec2(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 104, i32 324>, <2 x i32> <i32 10, i32 12>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 2500>, <2 x i32> <i32 10, i32 30>
+  %V = and <2 x i32> %A, <i32 123, i32 333>
+  ret <2 x i32> %V
+}
+
+define i32 @test41(i1 %which) {
+; CHECK-LABEL: @test41(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 104, [[ENTRY:%.*]] ], [ 10, [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+  %value = and i32 %A, 123
+  ret i32 %value
+}
+
+define <2 x i32> @test41vec(i1 %which) {
+; CHECK-LABEL: @test41vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 104, i32 104>, [[ENTRY:%.*]] ], [ <i32 10, i32 10>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+  %value = and <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %value
+}
+
+define <2 x i32> @test41vec2(i1 %which) {
+; CHECK-LABEL: @test41vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 104, i32 324>, [[ENTRY:%.*]] ], [ <i32 10, i32 12>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+  %value = and <2 x i32> %A, <i32 123, i32 333>
+  ret <2 x i32> %value
+}
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index 3d043b0864cd3..001ac58891e46 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -45,21 +45,6 @@ define <4 x i32> @test5(<4 x i32> %A) {
   ret <4 x i32> %2
 }
 
-; Check that we combine "if x!=0 && x!=-1" into "if x+1u>1"
-define i32 @test6(i64 %x) nounwind {
-; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[X_OFF:%.*]] = add i64 %x, 1
-; CHECK-NEXT:    [[X_CMP:%.*]] = icmp ugt i64 [[X_OFF]], 1
-; CHECK-NEXT:    [[LAND_EXT:%.*]] = zext i1 [[X_CMP]] to i32
-; CHECK-NEXT:    ret i32 [[LAND_EXT]]
-;
-  %cmp1 = icmp ne i64 %x, -1
-  %not.cmp = icmp ne i64 %x, 0
-  %.cmp1 = and i1 %cmp1, %not.cmp
-  %land.ext = zext i1 %.cmp1 to i32
-  ret i32 %land.ext
-}
-
 define i1 @test7(i32 %i, i1 %b) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 %i, 0
@@ -110,6 +95,18 @@ define i64 @test9(i64 %x) {
   ret i64 %and
 }
 
+; combine -x & 1 into x & 1
+define <2 x i64> @test9vec(<2 x i64> %x) {
+; CHECK-LABEL: @test9vec(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <2 x i64> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> [[SUB]], <i64 1, i64 1>
+; CHECK-NEXT:    ret <2 x i64> [[AND]]
+;
+  %sub = sub nsw <2 x i64> <i64 0, i64 0>, %x
+  %and = and <2 x i64> %sub, <i64 1, i64 1>
+  ret <2 x i64> %and
+}
+
 define i64 @test10(i64 %x) {
 ; CHECK-LABEL: @test10(
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 %x, 1
@@ -122,3 +119,63 @@ define i64 @test10(i64 %x) {
   ret i64 %add
 }
 
+; The add in this test is unnecessary because the LSBs of the LHS are 0 and the 'and' only consumes bits from those LSBs. It doesn't matter what happens to the upper bits.
+define i32 @test11(i32 %a, i32 %b) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[X:%.*]] = shl i32 [[A:%.*]], 8
+; CHECK-NEXT:    [[Z:%.*]] = and i32 [[B:%.*]], 128
+; CHECK-NEXT:    [[W:%.*]] = mul i32 [[Z]], [[X]]
+; CHECK-NEXT:    ret i32 [[W]]
+;
+  %x = shl i32 %a, 8
+  %y = add i32 %x, %b
+  %z = and i32 %y, 128
+  %w = mul i32 %z, %x ; to keep the shift from being removed
+  ret i32 %w
+}
+
+; The add in this test is unnecessary because the LSBs of the RHS are 0 and the 'and' only consumes bits from those LSBs. It doesn't matter what happens to the upper bits.
+define i32 @test12(i32 %a, i32 %b) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[X:%.*]] = shl i32 [[A:%.*]], 8
+; CHECK-NEXT:    [[Z:%.*]] = and i32 [[B:%.*]], 128
+; CHECK-NEXT:    [[W:%.*]] = mul i32 [[Z]], [[X]]
+; CHECK-NEXT:    ret i32 [[W]]
+;
+  %x = shl i32 %a, 8
+  %y = add i32 %b, %x
+  %z = and i32 %y, 128
+  %w = mul i32 %z, %x ; to keep the shift from being removed
+  ret i32 %w
+}
+
+; The sub in this test is unnecessary because the LSBs of the RHS are 0 and the 'and' only consumes bits from those LSBs. It doesn't matter what happens to the upper bits.
+define i32 @test13(i32 %a, i32 %b) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[X:%.*]] = shl i32 [[A:%.*]], 8
+; CHECK-NEXT:    [[Z:%.*]] = and i32 [[B:%.*]], 128
+; CHECK-NEXT:    [[W:%.*]] = mul i32 [[Z]], [[X]]
+; CHECK-NEXT:    ret i32 [[W]]
+;
+  %x = shl i32 %a, 8
+  %y = sub i32 %b, %x
+  %z = and i32 %y, 128
+  %w = mul i32 %z, %x ; to keep the shift from being removed
+  ret i32 %w
+}
+
+; The sub in this test cannot be removed because we need to keep the negation of %b. TODO: But we should be able to replace the LHS of it with a 0.
+define i32 @test14(i32 %a, i32 %b) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[X:%.*]] = shl i32 [[A:%.*]], 8
+; CHECK-NEXT:    [[Y:%.*]] = sub i32 0, [[B:%.*]]
+; CHECK-NEXT:    [[Z:%.*]] = and i32 [[Y]], 128
+; CHECK-NEXT:    [[W:%.*]] = mul i32 [[Z]], [[X]]
+; CHECK-NEXT:    ret i32 [[W]]
+;
+  %x = shl i32 %a, 8
+  %y = sub i32 %x, %b
+  %z = and i32 %y, 128
+  %w = mul i32 %z, %x ; to keep the shift from being removed
+  ret i32 %w
+}
diff --git a/test/Transforms/InstCombine/apint-shift.ll b/test/Transforms/InstCombine/apint-shift.ll
index e1e6b7c48c479..f339de35d77cd 100644
--- a/test/Transforms/InstCombine/apint-shift.ll
+++ b/test/Transforms/InstCombine/apint-shift.ll
@@ -63,6 +63,8 @@ define i55 @test6(i55 %A) {
   ret i55 %C
 }
 
+; (X * C2) << C1 --> X * (C2 << C1)
+
 define i55 @test6a(i55 %A) {
 ; CHECK-LABEL: @test6a(
 ; CHECK-NEXT:    [[C:%.*]] = mul i55 %A, 6
@@ -73,6 +75,18 @@ define i55 @test6a(i55 %A) {
   ret i55 %C
 }
 
+; (X * C2) << C1 --> X * (C2 << C1)
+
+define <2 x i55> @test6a_vec(<2 x i55> %A) {
+; CHECK-LABEL: @test6a_vec(
+; CHECK-NEXT:    [[C:%.*]] = mul <2 x i55> %A, <i55 6, i55 48>
+; CHECK-NEXT:    ret <2 x i55> [[C]]
+;
+  %B = mul <2 x i55> %A, <i55 3, i55 12>
+  %C = shl <2 x i55> %B, <i55 1, i55 2>
+  ret <2 x i55> %C
+}
+
 define i29 @test7(i8 %X) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:    ret i29 -1
@@ -101,14 +115,150 @@ define i17 @test9(i17 %A) {
   ret i17 %C
 }
 
-define i19 @test10(i19 %A) {
+; shl (lshr X, C), C --> and X, C'
+
+define i19 @test10(i19 %X) {
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT:    [[B:%.*]] = and i19 %A, -262144
-; CHECK-NEXT:    ret i19 [[B]]
+; CHECK-NEXT:    [[SH1:%.*]] = and i19 %X, -262144
+; CHECK-NEXT:    ret i19 [[SH1]]
+;
+  %sh1 = lshr i19 %X, 18
+  %sh2 = shl i19 %sh1, 18
+  ret i19 %sh2
+}
+
+; Two right shifts in the same direction:
+; lshr (lshr X, C1), C2 --> lshr X, C1 + C2
+
+define <2 x i19> @lshr_lshr_splat_vec(<2 x i19> %X) {
+; CHECK-LABEL: @lshr_lshr_splat_vec(
+; CHECK-NEXT:    [[SH1:%.*]] = lshr <2 x i19> %X, <i19 5, i19 5>
+; CHECK-NEXT:    ret <2 x i19> [[SH1]]
+;
+  %sh1 = lshr <2 x i19> %X, <i19 3, i19 3>
+  %sh2 = lshr <2 x i19> %sh1, <i19 2, i19 2>
+  ret <2 x i19> %sh2
+}
+
+define i9 @multiuse_lshr_lshr(i9 %x) {
+; CHECK-LABEL: @multiuse_lshr_lshr(
+; CHECK-NEXT:    [[SH1:%.*]] = lshr i9 %x, 2
+; CHECK-NEXT:    [[SH2:%.*]] = lshr i9 %x, 5
+; CHECK-NEXT:    [[MUL:%.*]] = mul i9 [[SH1]], [[SH2]]
+; CHECK-NEXT:    ret i9 [[MUL]]
+;
+  %sh1 = lshr i9 %x, 2
+  %sh2 = lshr i9 %sh1, 3
+  %mul = mul i9 %sh1, %sh2
+  ret i9 %mul
+}
+
+define <2 x i9> @multiuse_lshr_lshr_splat(<2 x i9> %x) {
+; CHECK-LABEL: @multiuse_lshr_lshr_splat(
+; CHECK-NEXT:    [[SH1:%.*]] = lshr <2 x i9> %x, <i9 2, i9 2>
+; CHECK-NEXT:    [[SH2:%.*]] = lshr <2 x i9> %x, <i9 5, i9 5>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i9> [[SH1]], [[SH2]]
+; CHECK-NEXT:    ret <2 x i9> [[MUL]]
+;
+  %sh1 = lshr <2 x i9> %x, <i9 2, i9 2>
+  %sh2 = lshr <2 x i9> %sh1, <i9 3, i9 3>
+  %mul = mul <2 x i9> %sh1, %sh2
+  ret <2 x i9> %mul
+}
+
+; Two left shifts in the same direction:
+; shl (shl X, C1), C2 -->  shl X, C1 + C2
+
+define <2 x i19> @shl_shl_splat_vec(<2 x i19> %X) {
+; CHECK-LABEL: @shl_shl_splat_vec(
+; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i19> %X, <i19 5, i19 5>
+; CHECK-NEXT:    ret <2 x i19> [[SH1]]
+;
+  %sh1 = shl <2 x i19> %X, <i19 3, i19 3>
+  %sh2 = shl <2 x i19> %sh1, <i19 2, i19 2>
+  ret <2 x i19> %sh2
+}
+
+define i42 @multiuse_shl_shl(i42 %x) {
+; CHECK-LABEL: @multiuse_shl_shl(
+; CHECK-NEXT:    [[SH1:%.*]] = shl i42 %x, 8
+; CHECK-NEXT:    [[SH2:%.*]] = shl i42 %x, 17
+; CHECK-NEXT:    [[MUL:%.*]] = mul i42 [[SH1]], [[SH2]]
+; CHECK-NEXT:    ret i42 [[MUL]]
+;
+  %sh1 = shl i42 %x, 8
+  %sh2 = shl i42 %sh1, 9
+  %mul = mul i42 %sh1, %sh2
+  ret i42 %mul
+}
+
+define <2 x i42> @multiuse_shl_shl_splat(<2 x i42> %x) {
+; CHECK-LABEL: @multiuse_shl_shl_splat(
+; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i42> %x, <i42 8, i42 8>
+; CHECK-NEXT:    [[SH2:%.*]] = shl <2 x i42> %x, <i42 17, i42 17>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i42> [[SH1]], [[SH2]]
+; CHECK-NEXT:    ret <2 x i42> [[MUL]]
+;
+  %sh1 = shl <2 x i42> %x, <i42 8, i42 8>
+  %sh2 = shl <2 x i42> %sh1, <i42 9, i42 9>
+  %mul = mul <2 x i42> %sh1, %sh2
+  ret <2 x i42> %mul
+}
+
+; Equal shift amounts in opposite directions become bitwise 'and':
+; lshr (shl X, C), C --> and X, C'
+
+define <2 x i19> @eq_shl_lshr_splat_vec(<2 x i19> %X) {
+; CHECK-LABEL: @eq_shl_lshr_splat_vec(
+; CHECK-NEXT:    [[SH1:%.*]] = and <2 x i19> %X, <i19 65535, i19 65535>
+; CHECK-NEXT:    ret <2 x i19> [[SH1]]
+;
+  %sh1 = shl <2 x i19> %X, <i19 3, i19 3>
+  %sh2 = lshr <2 x i19> %sh1, <i19 3, i19 3>
+  ret <2 x i19> %sh2
+}
+
+; Equal shift amounts in opposite directions become bitwise 'and':
+; shl (lshr X, C), C --> and X, C'
+
+define <2 x i19> @eq_lshr_shl_splat_vec(<2 x i19> %X) {
+; CHECK-LABEL: @eq_lshr_shl_splat_vec(
+; CHECK-NEXT:    [[SH1:%.*]] = and <2 x i19> %X, <i19 -8, i19 -8>
+; CHECK-NEXT:    ret <2 x i19> [[SH1]]
+;
+  %sh1 = lshr <2 x i19> %X, <i19 3, i19 3>
+  %sh2 = shl <2 x i19> %sh1, <i19 3, i19 3>
+  ret <2 x i19> %sh2
+}
+
+; In general, we would need an 'and' for this transform, but the masked-off bits are known zero.
+; shl (lshr X, C1), C2 --> lshr X, C1 - C2
+
+define <2 x i7> @lshr_shl_splat_vec(<2 x i7> %X) {
+; CHECK-LABEL: @lshr_shl_splat_vec(
+; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i7> %X, <i7 -8, i7 -8>
+; CHECK-NEXT:    [[SH1:%.*]] = lshr exact <2 x i7> [[MUL]], <i7 1, i7 1>
+; CHECK-NEXT:    ret <2 x i7> [[SH1]]
+;
+  %mul = mul <2 x i7> %X, <i7 -8, i7 -8>
+  %sh1 = lshr exact <2 x i7> %mul, <i7 3, i7 3>
+  %sh2 = shl nuw nsw <2 x i7> %sh1, <i7 2, i7 2>
+  ret <2 x i7> %sh2
+}
+
+; In general, we would need an 'and' for this transform, but the masked-off bits are known zero.
+; lshr (shl X, C1), C2 -->  shl X, C1 - C2
+
+define <2 x i7> @shl_lshr_splat_vec(<2 x i7> %X) {
+; CHECK-LABEL: @shl_lshr_splat_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = udiv <2 x i7> %X, <i7 9, i7 9>
+; CHECK-NEXT:    [[SH1:%.*]] = shl nuw nsw <2 x i7> [[DIV]], <i7 1, i7 1>
+; CHECK-NEXT:    ret <2 x i7> [[SH1]]
 ;
-  %B = lshr i19 %A, 18
-  %C = shl i19 %B, 18
-  ret i19 %C
+  %div = udiv <2 x i7> %X, <i7 9, i7 9>
+  %sh1 = shl nuw <2 x i7> %div, <i7 3, i7 3>
+  %sh2 = lshr exact <2 x i7> %sh1, <i7 2, i7 2>
+  ret <2 x i7> %sh2
 }
 
 ; Don't hide the shl from scalar evolution. DAGCombine will get it.
@@ -125,14 +275,29 @@ define i23 @test11(i23 %A) {
   ret i23 %C
 }
 
-define i47 @test12(i47 %A) {
+; shl (ashr X, C), C --> and X, C'
+
+define i47 @test12(i47 %X) {
 ; CHECK-LABEL: @test12(
-; CHECK-NEXT:    [[B1:%.*]] = and i47 %A, -256
-; CHECK-NEXT:    ret i47 [[B1]]
+; CHECK-NEXT:    [[SH11:%.*]] = and i47 %X, -256
+; CHECK-NEXT:    ret i47 [[SH11]]
+;
+  %sh1 = ashr i47 %X, 8
+  %sh2 = shl i47 %sh1, 8
+  ret i47 %sh2
+}
+
+; FIXME: Same as above with vectors.
+
+define <2 x i47> @test12_splat_vec(<2 x i47> %X) {
+; CHECK-LABEL: @test12_splat_vec(
+; CHECK-NEXT:    [[SH1:%.*]] = ashr <2 x i47> %X, <i47 8, i47 8>
+; CHECK-NEXT:    [[SH2:%.*]] = shl nsw <2 x i47> [[SH1]], <i47 8, i47 8>
+; CHECK-NEXT:    ret <2 x i47> [[SH2]]
 ;
-  %B = ashr i47 %A, 8
-  %C = shl i47 %B, 8
-  ret i47 %C
+  %sh1 = ashr <2 x i47> %X, <i47 8, i47 8>
+  %sh2 = shl <2 x i47> %sh1, <i47 8, i47 8>
+  ret <2 x i47> %sh2
 }
 
 ; Don't hide the shl from scalar evolution. DAGCombine will get it.
@@ -330,6 +495,66 @@ define i11 @test23(i44 %A) {
   ret i11 %D
 }
 
+; Fold lshr (shl X, C), C -> and X, C' regardless of the number of uses of the shl.
+
+define i44 @shl_lshr_eq_amt_multi_use(i44 %A) {
+; CHECK-LABEL: @shl_lshr_eq_amt_multi_use(
+; CHECK-NEXT:    [[B:%.*]] = shl i44 %A, 33
+; CHECK-NEXT:    [[C:%.*]] = and i44 %A, 2047
+; CHECK-NEXT:    [[D:%.*]] = or i44 [[B]], [[C]]
+; CHECK-NEXT:    ret i44 [[D]]
+;
+  %B = shl i44 %A, 33
+  %C = lshr i44 %B, 33
+  %D = add i44 %B, %C
+  ret i44 %D
+}
+
+; Fold vector lshr (shl X, C), C -> and X, C' regardless of the number of uses of the shl.
+
+define <2 x i44> @shl_lshr_eq_amt_multi_use_splat_vec(<2 x i44> %A) {
+; CHECK-LABEL: @shl_lshr_eq_amt_multi_use_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = shl <2 x i44> %A, <i44 33, i44 33>
+; CHECK-NEXT:    [[C:%.*]] = and <2 x i44> %A, <i44 2047, i44 2047>
+; CHECK-NEXT:    [[D:%.*]] = or <2 x i44> [[B]], [[C]]
+; CHECK-NEXT:    ret <2 x i44> [[D]]
+;
+  %B = shl <2 x i44> %A, <i44 33, i44 33>
+  %C = lshr <2 x i44> %B, <i44 33, i44 33>
+  %D = add <2 x i44> %B, %C
+  ret <2 x i44> %D
+}
+
+; Fold shl (lshr X, C), C -> and X, C' regardless of the number of uses of the lshr.
+
+define i43 @lshr_shl_eq_amt_multi_use(i43 %A) {
+; CHECK-LABEL: @lshr_shl_eq_amt_multi_use(
+; CHECK-NEXT:    [[B:%.*]] = lshr i43 %A, 23
+; CHECK-NEXT:    [[C:%.*]] = and i43 %A, -8388608
+; CHECK-NEXT:    [[D:%.*]] = mul i43 [[B]], [[C]]
+; CHECK-NEXT:    ret i43 [[D]]
+;
+  %B = lshr i43 %A, 23
+  %C = shl i43 %B, 23
+  %D = mul i43 %B, %C
+  ret i43 %D
+}
+
+; Fold vector shl (lshr X, C), C -> and X, C' regardless of the number of uses of the lshr.
+
+define <2 x i43> @lshr_shl_eq_amt_multi_use_splat_vec(<2 x i43> %A) {
+; CHECK-LABEL: @lshr_shl_eq_amt_multi_use_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i43> %A, <i43 23, i43 23>
+; CHECK-NEXT:    [[C:%.*]] = and <2 x i43> %A, <i43 -8388608, i43 -8388608>
+; CHECK-NEXT:    [[D:%.*]] = mul <2 x i43> [[B]], [[C]]
+; CHECK-NEXT:    ret <2 x i43> [[D]]
+;
+  %B = lshr <2 x i43> %A, <i43 23, i43 23>
+  %C = shl <2 x i43> %B, <i43 23, i43 23>
+  %D = mul <2 x i43> %B, %C
+  ret <2 x i43> %D
+}
+
 define i37 @test25(i37 %tmp.2, i37 %AA) {
 ; CHECK-LABEL: @test25(
 ; CHECK-NEXT:    [[TMP_3:%.*]] = and i37 %tmp.2, -131072
diff --git a/test/Transforms/InstCombine/apint-sub.ll b/test/Transforms/InstCombine/apint-sub.ll
index eb314ce3d1b25..1a4e62ff0d735 100644
--- a/test/Transforms/InstCombine/apint-sub.ll
+++ b/test/Transforms/InstCombine/apint-sub.ll
@@ -50,7 +50,7 @@ define i19 @test5(i19 %A, i19 %Bok, i19 %Cok) {
 define i57 @test6(i57 %A, i57 %B) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:    [[B_NOT:%.*]] = xor i57 %B, -1
-; CHECK-NEXT:    [[D:%.*]] = and i57 %A, [[B_NOT]]
+; CHECK-NEXT:    [[D:%.*]] = and i57 [[B_NOT]], %A
 ; CHECK-NEXT:    ret i57 [[D]]
 ;
   %C = and i57 %A, %B
diff --git a/test/Transforms/InstCombine/assume.ll b/test/Transforms/InstCombine/assume.ll
index 6e690426db99f..13fa6339e85aa 100644
--- a/test/Transforms/InstCombine/assume.ll
+++ b/test/Transforms/InstCombine/assume.ll
@@ -176,13 +176,13 @@ define i32 @icmp2(i32 %a) #0 {
   ret i32 %lnot.ext
 }
 
-; FIXME: If the 'not' of a condition is known true, then the condition must be false. 
+; If the 'not' of a condition is known true, then the condition must be false.
 
 define i1 @assume_not(i1 %cond) {
 ; CHECK-LABEL: @assume_not(
 ; CHECK-NEXT:    [[NOTCOND:%.*]] = xor i1 [[COND:%.*]], true
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[NOTCOND]])
-; CHECK-NEXT:    ret i1 [[COND]]
+; CHECK-NEXT:    ret i1 false
 ;
   %notcond = xor i1 %cond, true
   call void @llvm.assume(i1 %notcond)
diff --git a/test/Transforms/InstCombine/bitcast-bigendian.ll b/test/Transforms/InstCombine/bitcast-bigendian.ll
index 1a91d11d8aeed..e940f0fcec75e 100644
--- a/test/Transforms/InstCombine/bitcast-bigendian.ll
+++ b/test/Transforms/InstCombine/bitcast-bigendian.ll
@@ -9,8 +9,8 @@ target triple = "powerpc64-unknown-linux-gnu"
 
 define float @test2(<2 x float> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> %A, i32 1
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i32> %B to <2 x float>
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> [[A:%.*]], i32 1
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i32> [[B:%.*]] to <2 x float>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[BC]], i32 1
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP24]], [[TMP4]]
 ; CHECK-NEXT:    ret float [[ADD]]
@@ -29,8 +29,8 @@ define float @test2(<2 x float> %A, <2 x i32> %B) {
 
 define float @test3(<2 x float> %A, <2 x i64> %B) {
 ; CHECK-LABEL: @test3(
-; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> %A, i32 0
-; CHECK-NEXT:    [[BC2:%.*]] = bitcast <2 x i64> %B to <4 x float>
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x float>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BC2]], i32 1
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP24]], [[TMP4]]
 ; CHECK-NEXT:    ret float [[ADD]]
@@ -51,8 +51,8 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
 
 define <2 x i32> @test4(i32 %A, i32 %B){
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 %B, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %A, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A:%.*]], i32 1
 ; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %tmp38 = zext i32 %A to i64
@@ -65,8 +65,8 @@ define <2 x i32> @test4(i32 %A, i32 %B){
 
 define <2 x float> @test5(float %A, float %B) {
 ; CHECK-LABEL: @test5(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> undef, float %B, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float %A, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> undef, float [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[A:%.*]], i32 1
 ; CHECK-NEXT:    ret <2 x float> [[TMP2]]
 ;
   %tmp37 = bitcast float %A to i32
@@ -81,9 +81,8 @@ define <2 x float> @test5(float %A, float %B) {
 
 define <2 x float> @test6(float %A){
 ; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> undef, float %A, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float 4.200000e+01, i32 1
-; CHECK-NEXT:    ret <2 x float> [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> <float undef, float 4.200000e+01>, float [[A:%.*]], i32 0
+; CHECK-NEXT:    ret <2 x float> [[TMP1]]
 ;
   %tmp23 = bitcast float %A to i32
   %tmp24 = zext i32 %tmp23 to i64
@@ -97,8 +96,8 @@ define <2 x float> @test6(float %A){
 
 define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
 ; CHECK-LABEL: @xor_bitcast_vec_to_vec(
-; CHECK-NEXT:    [[T21:%.*]] = xor <1 x i64> %a, <i64 4294967298>
-; CHECK-NEXT:    [[T2:%.*]] = bitcast <1 x i64> [[T21]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <1 x i64> [[A:%.*]], <i64 4294967298>
+; CHECK-NEXT:    [[T2:%.*]] = bitcast <1 x i64> [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[T2]]
 ;
   %t1 = bitcast <1 x i64> %a to <2 x i32>
@@ -110,8 +109,8 @@ define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
 
 define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
 ; CHECK-LABEL: @and_bitcast_vec_to_int(
-; CHECK-NEXT:    [[T21:%.*]] = and <2 x i32> %a, <i32 0, i32 3>
-; CHECK-NEXT:    [[T2:%.*]] = bitcast <2 x i32> [[T21]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 0, i32 3>
+; CHECK-NEXT:    [[T2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
 ; CHECK-NEXT:    ret i64 [[T2]]
 ;
   %t1 = bitcast <2 x i32> %a to i64
@@ -123,8 +122,8 @@ define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
 
 define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
 ; CHECK-LABEL: @or_bitcast_int_to_vec(
-; CHECK-NEXT:    [[T21:%.*]] = or i64 %a, 4294967298
-; CHECK-NEXT:    [[T2:%.*]] = bitcast i64 [[T21]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[A:%.*]], 4294967298
+; CHECK-NEXT:    [[T2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[T2]]
 ;
   %t1 = bitcast i64 %a to <2 x i32>
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 08f49660f184c..2e7f30fee14d1 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -21,7 +21,7 @@ define i32 @test1(i64 %a) {
 
 define <2 x i32> @xor_two_vector_bitcasts(<1 x i64> %a, <1 x i64> %b) {
 ; CHECK-LABEL: @xor_two_vector_bitcasts(
-; CHECK-NEXT:    [[T31:%.*]] = xor <1 x i64> %a, %b
+; CHECK-NEXT:    [[T31:%.*]] = xor <1 x i64> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[T3:%.*]] = bitcast <1 x i64> [[T31]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[T3]]
 ;
@@ -35,8 +35,8 @@ define <2 x i32> @xor_two_vector_bitcasts(<1 x i64> %a, <1 x i64> %b) {
 
 define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
 ; CHECK-LABEL: @xor_bitcast_vec_to_vec(
-; CHECK-NEXT:    [[T21:%.*]] = xor <1 x i64> %a, <i64 8589934593>
-; CHECK-NEXT:    [[T2:%.*]] = bitcast <1 x i64> [[T21]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <1 x i64> [[A:%.*]], <i64 8589934593>
+; CHECK-NEXT:    [[T2:%.*]] = bitcast <1 x i64> [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[T2]]
 ;
   %t1 = bitcast <1 x i64> %a to <2 x i32>
@@ -48,8 +48,8 @@ define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
 
 define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
 ; CHECK-LABEL: @and_bitcast_vec_to_int(
-; CHECK-NEXT:    [[T21:%.*]] = and <2 x i32> %a, <i32 3, i32 0>
-; CHECK-NEXT:    [[T2:%.*]] = bitcast <2 x i32> [[T21]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 3, i32 0>
+; CHECK-NEXT:    [[T2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
 ; CHECK-NEXT:    ret i64 [[T2]]
 ;
   %t1 = bitcast <2 x i32> %a to i64
@@ -61,8 +61,8 @@ define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
 
 define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
 ; CHECK-LABEL: @or_bitcast_int_to_vec(
-; CHECK-NEXT:    [[T21:%.*]] = or i64 %a, 8589934593
-; CHECK-NEXT:    [[T2:%.*]] = bitcast i64 [[T21]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[A:%.*]], 8589934593
+; CHECK-NEXT:    [[T2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[T2]]
 ;
   %t1 = bitcast i64 %a to <2 x i32>
@@ -74,8 +74,8 @@ define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
 
 define <4 x i32> @bitcasts_and_bitcast(<4 x i32> %a, <8 x i16> %b) {
 ; CHECK-LABEL: @bitcasts_and_bitcast(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> %b to <4 x i32>
-; CHECK-NEXT:    [[BC3:%.*]] = and <4 x i32> [[TMP1]], %a
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[BC3:%.*]] = and <4 x i32> [[TMP1]], [[A:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[BC3]]
 ;
   %bc1 = bitcast <4 x i32> %a to <2 x i64>
@@ -91,8 +91,8 @@ define <4 x i32> @bitcasts_and_bitcast(<4 x i32> %a, <8 x i16> %b) {
 
 define <4 x float> @bitcasts_and_bitcast_to_fp(<4 x float> %a, <8 x i16> %b) {
 ; CHECK-LABEL: @bitcasts_and_bitcast_to_fp(
-; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x float> %a to <2 x i64>
-; CHECK-NEXT:    [[BC2:%.*]] = bitcast <8 x i16> %b to <2 x i64>
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x float> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast <8 x i16> [[B:%.*]] to <2 x i64>
 ; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> [[BC2]], [[BC1]]
 ; CHECK-NEXT:    [[BC3:%.*]] = bitcast <2 x i64> [[AND]] to <4 x float>
 ; CHECK-NEXT:    ret <4 x float> [[BC3]]
@@ -108,8 +108,8 @@ define <4 x float> @bitcasts_and_bitcast_to_fp(<4 x float> %a, <8 x i16> %b) {
 
 define i128 @bitcast_or_bitcast(i128 %a, <2 x i64> %b) {
 ; CHECK-LABEL: @bitcast_or_bitcast(
-; CHECK-NEXT:    [[BC1:%.*]] = bitcast i128 %a to <2 x i64>
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i64> [[BC1]], %b
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast i128 [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i64> [[BC1]], [[B:%.*]]
 ; CHECK-NEXT:    [[BC2:%.*]] = bitcast <2 x i64> [[OR]] to i128
 ; CHECK-NEXT:    ret i128 [[BC2]]
 ;
@@ -123,8 +123,8 @@ define i128 @bitcast_or_bitcast(i128 %a, <2 x i64> %b) {
 
 define <4 x i32> @bitcast_xor_bitcast(<4 x i32> %a, i128 %b) {
 ; CHECK-LABEL: @bitcast_xor_bitcast(
-; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x i32> %a to i128
-; CHECK-NEXT:    [[XOR:%.*]] = xor i128 [[BC1]], %b
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x i32> [[A:%.*]] to i128
+; CHECK-NEXT:    [[XOR:%.*]] = xor i128 [[BC1]], [[B:%.*]]
 ; CHECK-NEXT:    [[BC2:%.*]] = bitcast i128 [[XOR]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[BC2]]
 ;
@@ -138,8 +138,8 @@ define <4 x i32> @bitcast_xor_bitcast(<4 x i32> %a, i128 %b) {
 
 define <4 x float> @bitcast_vector_select(<4 x float> %x, <2 x i64> %y, <4 x i1> %cmp) {
 ; CHECK-LABEL: @bitcast_vector_select(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %y to <4 x float>
-; CHECK-NEXT:    [[T7:%.*]] = select <4 x i1> %cmp, <4 x float> %x, <4 x float> [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x float>
+; CHECK-NEXT:    [[T7:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x float> [[X:%.*]], <4 x float> [[TMP1]]
 ; CHECK-NEXT:    ret <4 x float> [[T7]]
 ;
   %t4 = bitcast <4 x float> %x to <4 x i32>
@@ -151,8 +151,8 @@ define <4 x float> @bitcast_vector_select(<4 x float> %x, <2 x i64> %y, <4 x i1>
 
 define float @bitcast_scalar_select_of_scalars(float %x, i32 %y, i1 %cmp) {
 ; CHECK-LABEL: @bitcast_scalar_select_of_scalars(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 %y to float
-; CHECK-NEXT:    [[T7:%.*]] = select i1 %cmp, float %x, float [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[Y:%.*]] to float
+; CHECK-NEXT:    [[T7:%.*]] = select i1 [[CMP:%.*]], float [[X:%.*]], float [[TMP1]]
 ; CHECK-NEXT:    ret float [[T7]]
 ;
   %t4 = bitcast float %x to i32
@@ -166,8 +166,8 @@ define float @bitcast_scalar_select_of_scalars(float %x, i32 %y, i1 %cmp) {
 
 define float @bitcast_scalar_select_type_mismatch1(float %x, <4 x i8> %y, i1 %cmp) {
 ; CHECK-LABEL: @bitcast_scalar_select_type_mismatch1(
-; CHECK-NEXT:    [[T4:%.*]] = bitcast float %x to <4 x i8>
-; CHECK-NEXT:    [[T6:%.*]] = select i1 %cmp, <4 x i8> [[T4]], <4 x i8> %y
+; CHECK-NEXT:    [[T4:%.*]] = bitcast float [[X:%.*]] to <4 x i8>
+; CHECK-NEXT:    [[T6:%.*]] = select i1 [[CMP:%.*]], <4 x i8> [[T4]], <4 x i8> [[Y:%.*]]
 ; CHECK-NEXT:    [[T7:%.*]] = bitcast <4 x i8> [[T6]] to float
 ; CHECK-NEXT:    ret float [[T7]]
 ;
@@ -182,8 +182,8 @@ define float @bitcast_scalar_select_type_mismatch1(float %x, <4 x i8> %y, i1 %cm
 
 define <4 x i8> @bitcast_scalar_select_type_mismatch2(<4 x i8> %x, float %y, i1 %cmp) {
 ; CHECK-LABEL: @bitcast_scalar_select_type_mismatch2(
-; CHECK-NEXT:    [[T4:%.*]] = bitcast <4 x i8> %x to float
-; CHECK-NEXT:    [[T6:%.*]] = select i1 %cmp, float [[T4]], float %y
+; CHECK-NEXT:    [[T4:%.*]] = bitcast <4 x i8> [[X:%.*]] to float
+; CHECK-NEXT:    [[T6:%.*]] = select i1 [[CMP:%.*]], float [[T4]], float [[Y:%.*]]
 ; CHECK-NEXT:    [[T7:%.*]] = bitcast float [[T6]] to <4 x i8>
 ; CHECK-NEXT:    ret <4 x i8> [[T7]]
 ;
@@ -195,8 +195,8 @@ define <4 x i8> @bitcast_scalar_select_type_mismatch2(<4 x i8> %x, float %y, i1
 
 define <4 x float> @bitcast_scalar_select_of_vectors(<4 x float> %x, <2 x i64> %y, i1 %cmp) {
 ; CHECK-LABEL: @bitcast_scalar_select_of_vectors(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %y to <4 x float>
-; CHECK-NEXT:    [[T7:%.*]] = select i1 %cmp, <4 x float> %x, <4 x float> [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x float>
+; CHECK-NEXT:    [[T7:%.*]] = select i1 [[CMP:%.*]], <4 x float> [[X:%.*]], <4 x float> [[TMP1]]
 ; CHECK-NEXT:    ret <4 x float> [[T7]]
 ;
   %t4 = bitcast <4 x float> %x to <4 x i32>
@@ -210,9 +210,9 @@ define <4 x float> @bitcast_scalar_select_of_vectors(<4 x float> %x, <2 x i64> %
 
 define float @bitcast_vector_select_no_fold1(float %x, <2 x i16> %y, <4 x i1> %cmp) {
 ; CHECK-LABEL: @bitcast_vector_select_no_fold1(
-; CHECK-NEXT:    [[T4:%.*]] = bitcast float %x to <4 x i8>
-; CHECK-NEXT:    [[T5:%.*]] = bitcast <2 x i16> %y to <4 x i8>
-; CHECK-NEXT:    [[T6:%.*]] = select <4 x i1> %cmp, <4 x i8> [[T4]], <4 x i8> [[T5]]
+; CHECK-NEXT:    [[T4:%.*]] = bitcast float [[X:%.*]] to <4 x i8>
+; CHECK-NEXT:    [[T5:%.*]] = bitcast <2 x i16> [[Y:%.*]] to <4 x i8>
+; CHECK-NEXT:    [[T6:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i8> [[T4]], <4 x i8> [[T5]]
 ; CHECK-NEXT:    [[T7:%.*]] = bitcast <4 x i8> [[T6]] to float
 ; CHECK-NEXT:    ret float [[T7]]
 ;
@@ -227,9 +227,9 @@ define float @bitcast_vector_select_no_fold1(float %x, <2 x i16> %y, <4 x i1> %c
 
 define <2 x float> @bitcast_vector_select_no_fold2(<2 x float> %x, <4 x i16> %y, <8 x i1> %cmp) {
 ; CHECK-LABEL: @bitcast_vector_select_no_fold2(
-; CHECK-NEXT:    [[T4:%.*]] = bitcast <2 x float> %x to <8 x i8>
-; CHECK-NEXT:    [[T5:%.*]] = bitcast <4 x i16> %y to <8 x i8>
-; CHECK-NEXT:    [[T6:%.*]] = select <8 x i1> %cmp, <8 x i8> [[T4]], <8 x i8> [[T5]]
+; CHECK-NEXT:    [[T4:%.*]] = bitcast <2 x float> [[X:%.*]] to <8 x i8>
+; CHECK-NEXT:    [[T5:%.*]] = bitcast <4 x i16> [[Y:%.*]] to <8 x i8>
+; CHECK-NEXT:    [[T6:%.*]] = select <8 x i1> [[CMP:%.*]], <8 x i8> [[T4]], <8 x i8> [[T5]]
 ; CHECK-NEXT:    [[T7:%.*]] = bitcast <8 x i8> [[T6]] to <2 x float>
 ; CHECK-NEXT:    ret <2 x float> [[T7]]
 ;
@@ -244,8 +244,8 @@ define <2 x float> @bitcast_vector_select_no_fold2(<2 x float> %x, <4 x i16> %y,
 ; rdar://7892780
 define float @test2(<2 x float> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> %A, i32 0
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i32> %B to <2 x float>
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i32> [[B:%.*]] to <2 x float>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[BC]], i32 0
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP24]], [[TMP4]]
 ; CHECK-NEXT:    ret float [[ADD]]
@@ -266,8 +266,8 @@ define float @test2(<2 x float> %A, <2 x i32> %B) {
 ; rdar://7892780
 define float @test3(<2 x float> %A, <2 x i64> %B) {
 ; CHECK-LABEL: @test3(
-; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> %A, i32 1
-; CHECK-NEXT:    [[BC2:%.*]] = bitcast <2 x i64> %B to <4 x float>
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> [[A:%.*]], i32 1
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x float>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BC2]], i32 2
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP24]], [[TMP4]]
 ; CHECK-NEXT:    ret float [[ADD]]
@@ -290,7 +290,7 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
 
 define float @bitcast_extelt1(<2 x float> %A) {
 ; CHECK-LABEL: @bitcast_extelt1(
-; CHECK-NEXT:    [[BC2:%.*]] = extractelement <2 x float> %A, i32 0
+; CHECK-NEXT:    [[BC2:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0
 ; CHECK-NEXT:    ret float [[BC2]]
 ;
   %bc1 = bitcast <2 x float> %A to <2 x i32>
@@ -303,7 +303,7 @@ define float @bitcast_extelt1(<2 x float> %A) {
 
 define i64 @bitcast_extelt2(<4 x float> %A) {
 ; CHECK-LABEL: @bitcast_extelt2(
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x float> %A to <2 x i64>
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x float> [[A:%.*]] to <2 x i64>
 ; CHECK-NEXT:    [[BC2:%.*]] = extractelement <2 x i64> [[BC]], i32 1
 ; CHECK-NEXT:    ret i64 [[BC2]]
 ;
@@ -317,7 +317,7 @@ define i64 @bitcast_extelt2(<4 x float> %A) {
 
 define <2 x i32> @bitcast_extelt3(<2 x i32> %A) {
 ; CHECK-LABEL: @bitcast_extelt3(
-; CHECK-NEXT:    [[BC1:%.*]] = bitcast <2 x i32> %A to <1 x i64>
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <2 x i32> [[A:%.*]] to <1 x i64>
 ; CHECK-NEXT:    [[EXT:%.*]] = extractelement <1 x i64> [[BC1]], i32 0
 ; CHECK-NEXT:    [[BC2:%.*]] = bitcast i64 [[EXT]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[BC2]]
@@ -332,7 +332,7 @@ define <2 x i32> @bitcast_extelt3(<2 x i32> %A) {
 
 define double @bitcast_extelt4(i128 %A) {
 ; CHECK-LABEL: @bitcast_extelt4(
-; CHECK-NEXT:    [[BC:%.*]] = bitcast i128 %A to <2 x double>
+; CHECK-NEXT:    [[BC:%.*]] = bitcast i128 [[A:%.*]] to <2 x double>
 ; CHECK-NEXT:    [[BC2:%.*]] = extractelement <2 x double> [[BC]], i32 0
 ; CHECK-NEXT:    ret double [[BC2]]
 ;
@@ -344,8 +344,8 @@ define double @bitcast_extelt4(i128 %A) {
 
 define <2 x i32> @test4(i32 %A, i32 %B){
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 %A, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %B, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[B:%.*]], i32 1
 ; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %tmp38 = zext i32 %A to i64
@@ -359,8 +359,8 @@ define <2 x i32> @test4(i32 %A, i32 %B){
 ; rdar://8360454
 define <2 x float> @test5(float %A, float %B) {
 ; CHECK-LABEL: @test5(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> undef, float %A, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float %B, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> undef, float [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[B:%.*]], i32 1
 ; CHECK-NEXT:    ret <2 x float> [[TMP2]]
 ;
   %tmp37 = bitcast float %A to i32
@@ -375,7 +375,7 @@ define <2 x float> @test5(float %A, float %B) {
 
 define <2 x float> @test6(float %A){
 ; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> <float 4.200000e+01, float undef>, float %A, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> <float 4.200000e+01, float undef>, float [[A:%.*]], i32 1
 ; CHECK-NEXT:    ret <2 x float> [[TMP1]]
 ;
   %tmp23 = bitcast float %A to i32
@@ -422,7 +422,7 @@ define i32 @All111(i32 %in) {
 
 define <2 x i16> @BitcastInsert(i32 %a) {
 ; CHECK-LABEL: @BitcastInsert(
-; CHECK-NEXT:    [[R:%.*]] = bitcast i32 %a to <2 x i16>
+; CHECK-NEXT:    [[R:%.*]] = bitcast i32 [[A:%.*]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[R]]
 ;
   %v = insertelement <1 x i32> undef, i32 %a, i32 0
@@ -433,7 +433,7 @@ define <2 x i16> @BitcastInsert(i32 %a) {
 ; PR17293
 define <2 x i64> @test7(<2 x i8*>* %arg) nounwind {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[CAST:%.*]] = bitcast <2 x i8*>* %arg to <2 x i64>*
+; CHECK-NEXT:    [[CAST:%.*]] = bitcast <2 x i8*>* [[ARG:%.*]] to <2 x i64>*
 ; CHECK-NEXT:    [[LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[CAST]], align 16
 ; CHECK-NEXT:    ret <2 x i64> [[LOAD]]
 ;
@@ -452,25 +452,24 @@ define i8 @test8() {
 
 @g = internal unnamed_addr global i32 undef
 
-; CHECK-LABEL: @constant_fold_vector_to_double(
-; CHECK: store volatile double 1.000000e+00,
-; CHECK: store volatile double 1.000000e+00,
-; CHECK: store volatile double 1.000000e+00,
-; CHECK: store volatile double 1.000000e+00,
-
-; CHECK: store volatile double 0xFFFFFFFFFFFFFFFF,
-; CHECK: store volatile double 0x162E000004D2,
-
-; CHECK: store volatile double bitcast (<2 x i32> <i32 1234, i32 ptrtoint (i32* @g to i32)> to double),
-; CHECK: store volatile double 0x400000003F800000,
-
-; CHECK: store volatile double 0.000000e+00,
-; CHECK: store volatile double 0.000000e+00,
-; CHECK: store volatile double 0.000000e+00,
-; CHECK: store volatile double 0.000000e+00,
-; CHECK: store volatile double 0.000000e+00,
-; CHECK: store volatile double 0.000000e+00,
 define void @constant_fold_vector_to_double() {
+; CHECK-LABEL: @constant_fold_vector_to_double(
+; CHECK-NEXT:    store volatile double 1.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 1.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 1.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 1.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0xFFFFFFFFFFFFFFFF, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0x162E000004D2, double* undef, align 8
+; CHECK-NEXT:    store volatile double bitcast (<2 x i32> <i32 1234, i32 ptrtoint (i32* @g to i32)> to double), double* undef, align 8
+; CHECK-NEXT:    store volatile double 0x400000003F800000, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    ret void
+;
   store volatile double bitcast (<1 x i64> <i64 4607182418800017408> to double), double* undef
   store volatile double bitcast (<2 x i32> <i32 0, i32 1072693248> to double), double* undef
   store volatile double bitcast (<4 x i16> <i16 0, i16 0, i16 0, i16 16368> to double), double* undef
@@ -491,12 +490,14 @@ define void @constant_fold_vector_to_double() {
   ret void
 }
 
-; CHECK-LABEL: @constant_fold_vector_to_float(
-; CHECK: store volatile float 1.000000e+00,
-; CHECK: store volatile float 1.000000e+00,
-; CHECK: store volatile float 1.000000e+00,
-; CHECK: store volatile float 1.000000e+00,
 define void @constant_fold_vector_to_float() {
+; CHECK-LABEL: @constant_fold_vector_to_float(
+; CHECK-NEXT:    store volatile float 1.000000e+00, float* undef, align 4
+; CHECK-NEXT:    store volatile float 1.000000e+00, float* undef, align 4
+; CHECK-NEXT:    store volatile float 1.000000e+00, float* undef, align 4
+; CHECK-NEXT:    store volatile float 1.000000e+00, float* undef, align 4
+; CHECK-NEXT:    ret void
+;
   store volatile float bitcast (<1 x i32> <i32 1065353216> to float), float* undef
   store volatile float bitcast (<2 x i16> <i16 0, i16 16256> to float), float* undef
   store volatile float bitcast (<4 x i8> <i8 0, i8 0, i8 128, i8 63> to float), float* undef
@@ -505,10 +506,12 @@ define void @constant_fold_vector_to_float() {
   ret void
 }
 
-; CHECK-LABEL: @constant_fold_vector_to_half(
-; CHECK: store volatile half 0xH4000,
-; CHECK: store volatile half 0xH4000,
 define void @constant_fold_vector_to_half() {
+; CHECK-LABEL: @constant_fold_vector_to_half(
+; CHECK-NEXT:    store volatile half 0xH4000, half* undef, align 2
+; CHECK-NEXT:    store volatile half 0xH4000, half* undef, align 2
+; CHECK-NEXT:    ret void
+;
   store volatile half bitcast (<2 x i8> <i8 0, i8 64> to half), half* undef
   store volatile half bitcast (<4 x i4> <i4 0, i4 0, i4 0, i4 4> to half), half* undef
   ret void
diff --git a/test/Transforms/InstCombine/bitreverse-fold.ll b/test/Transforms/InstCombine/bitreverse-fold.ll
index ecdfbc8cb5f99..b798ad33b3f08 100644
--- a/test/Transforms/InstCombine/bitreverse-fold.ll
+++ b/test/Transforms/InstCombine/bitreverse-fold.ll
@@ -37,6 +37,13 @@ define i32 @reverse_neg1_i32() {
   ret i32 %x
 }
 
+; CHECK-LABEL: @reverse_undef_i32(
+; CHECK-NEXT: ret i32 undef
+define i32 @reverse_undef_i32() {
+  %x = call i32 @llvm.bitreverse.i32(i32 undef)
+  ret i32 %x
+}
+
 ; CHECK-LABEL: @reverse_false_i1(
 ; CHECK-NEXT: ret i1 false
 define i1 @reverse_false_i1() {
@@ -51,6 +58,13 @@ define i1 @reverse_true_i1() {
   ret i1 %x
 }
 
+; CHECK-LABEL: @reverse_undef_i1(
+; CHECK-NEXT: ret i1 undef
+define i1 @reverse_undef_i1() {
+  %x = call i1 @llvm.bitreverse.i1(i1 undef)
+  ret i1 %x
+}
+
 ; CHECK-LABEL: @reverse_false_v2i1(
 ; CHECK-NEXT: ret <2 x i1> zeroinitializer
 define <2 x i1> @reverse_false_v2i1() {
diff --git a/test/Transforms/InstCombine/bitreverse-known-bits.ll b/test/Transforms/InstCombine/bitreverse-known-bits.ll
new file mode 100644
index 0000000000000..cd1523a3b06ba
--- /dev/null
+++ b/test/Transforms/InstCombine/bitreverse-known-bits.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+
+declare i8 @llvm.bitreverse.i8(i8)
+declare i32 @llvm.bitreverse.i32(i32)
+
+; CHECK-LABEL: @test1
+; CHECK: ret i1 true
+define i1 @test1(i32 %arg) {
+  %a = or i32 %arg, 4294901760
+  %b = call i32 @llvm.bitreverse.i32(i32 %a)
+  %and = and i32 %b, 65535
+  %res = icmp eq i32 %and, 65535
+  ret i1 %res
+}
+
+; CHECK-LABEL: @test2
+; CHECK: ret i1 true
+define i1 @test2(i32 %arg) {
+  %a = or i32 %arg, 1
+  %b = call i32 @llvm.bitreverse.i32(i32 %a)
+  %c = and i32 %b, 2147483648
+  %d = call i32 @llvm.bitreverse.i32(i32 %c)
+  %res = icmp eq i32 %d, 1
+  ret i1 %res
+}
+
+; CHECK-LABEL: @test3
+; CHECK: ret i1 false
+define i1 @test3(i32 %arg) {
+  %a = or i32 %arg, 65536
+  %b = call i32 @llvm.bitreverse.i32(i32 %a)
+  %and = and i32 %b, 32768
+  %res = icmp eq i32 %and, 0
+  ret i1 %res
+}
+
+; CHECK-LABEL: @add_bitreverse
+; Make sure we process range metadata on bitreverse
+define i8 @add_bitreverse(i8 %a) {
+  %b = and i8 %a, 252
+  ; known bits for the bitreverse will say the result is in the range [0, 64)
+  ; but the metadata says [0, 16). So make sure the range metadata wins.
+  ;    add %reverse, 1111 0000
+  ; should become
+  ;    or  %reverse, 1111 0000
+  %reverse = call i8 @llvm.bitreverse.i8(i8 %b), !range !1
+  %c = add i8 %reverse, -16
+; CHECK: or i8 %reverse, -16
+  ret i8 %c
+}
+!1 = !{i8 0, i8 16}
diff --git a/test/Transforms/InstCombine/bswap-fold.ll b/test/Transforms/InstCombine/bswap-fold.ll
index edf9572f1e112..91678a91962a8 100644
--- a/test/Transforms/InstCombine/bswap-fold.ll
+++ b/test/Transforms/InstCombine/bswap-fold.ll
@@ -1,68 +1,75 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-define i1 @test1(i16 %tmp2) {
-; CHECK-LABEL: @test1
-; CHECK-NEXT:  %tmp = icmp eq i16 %tmp2, 256
-; CHECK-NEXT:  ret i1 %tmp
-        %tmp10 = call i16 @llvm.bswap.i16( i16 %tmp2 )
-        %tmp = icmp eq i16 %tmp10, 1
-        ret i1 %tmp
+define i1 @test1(i16 %t) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i16 %t, 256
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp1 = call i16 @llvm.bswap.i16( i16 %t )
+  %tmp2 = icmp eq i16 %tmp1, 1
+  ret i1 %tmp2
 }
 
 define i1 @test2(i32 %tmp) {
-; CHECK-LABEL: @test2
-; CHECK-NEXT:  %tmp.upgrd.1 = icmp eq i32 %tmp, 16777216
-; CHECK-NEXT:  ret i1 %tmp.upgrd.1
-        %tmp34 = tail call i32 @llvm.bswap.i32( i32 %tmp )
-        %tmp.upgrd.1 = icmp eq i32 %tmp34, 1
-        ret i1 %tmp.upgrd.1
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP_UPGRD_1:%.*]] = icmp eq i32 %tmp, 16777216
+; CHECK-NEXT:    ret i1 [[TMP_UPGRD_1]]
+;
+  %tmp34 = tail call i32 @llvm.bswap.i32( i32 %tmp )
+  %tmp.upgrd.1 = icmp eq i32 %tmp34, 1
+  ret i1 %tmp.upgrd.1
 }
 
 define i1 @test3(i64 %tmp) {
-; CHECK-LABEL: @test3
-; CHECK-NEXT:  %tmp.upgrd.2 = icmp eq i64 %tmp, 72057594037927936
-; CHECK-NEXT:  ret i1 %tmp.upgrd.2
-        %tmp34 = tail call i64 @llvm.bswap.i64( i64 %tmp )
-        %tmp.upgrd.2 = icmp eq i64 %tmp34, 1
-        ret i1 %tmp.upgrd.2
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[TMP_UPGRD_2:%.*]] = icmp eq i64 %tmp, 72057594037927936
+; CHECK-NEXT:    ret i1 [[TMP_UPGRD_2]]
+;
+  %tmp34 = tail call i64 @llvm.bswap.i64( i64 %tmp )
+  %tmp.upgrd.2 = icmp eq i64 %tmp34, 1
+  ret i1 %tmp.upgrd.2
 }
 
 ; rdar://5992453
 ; A & 255
 define i32 @test4(i32 %a) nounwind  {
-; CHECK-LABEL: @test4
-; CHECK-NEXT:  %tmp2 = and i32 %a, 255
-; CHECK-NEXT:  ret i32 %tmp2
-	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
-	%tmp4 = lshr i32 %tmp2, 24
-	ret i32 %tmp4
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 %a, 255
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
+  %tmp4 = lshr i32 %tmp2, 24
+  ret i32 %tmp4
 }
 
 ; A
 define i32 @test5(i32 %a) nounwind {
-; CHECK-LABEL: @test5
-; CHECK-NEXT:  ret i32 %a
-	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
-	%tmp4 = tail call i32 @llvm.bswap.i32( i32 %tmp2 )
-	ret i32 %tmp4
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret i32 %a
+;
+  %tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
+  %tmp4 = tail call i32 @llvm.bswap.i32( i32 %tmp2 )
+  ret i32 %tmp4
 }
 
 ; a >> 24
 define i32 @test6(i32 %a) nounwind {
-; CHECK-LABEL: @test6
-; CHECK-NEXT:  %tmp2 = lshr i32 %a, 24
-; CHECK-NEXT:  ret i32 %tmp2
-	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
-	%tmp4 = and i32 %tmp2, 255
-	ret i32 %tmp4
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 %a, 24
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
+  %tmp4 = and i32 %tmp2, 255
+  ret i32 %tmp4
 }
 
 ; PR5284
 define i16 @test7(i32 %A) {
-; CHECK-LABEL: @test7
-; CHECK-NEXT:  %1 = lshr i32 %A, 16
-; CHECK-NEXT:  %D = trunc i32 %1 to i16
-; CHECK-NEXT:  ret i16 %D
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 %A, 16
+; CHECK-NEXT:    [[D:%.*]] = trunc i32 [[TMP1]] to i16
+; CHECK-NEXT:    ret i16 [[D]]
+;
   %B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind
   %C = trunc i32 %B to i16
   %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
@@ -70,11 +77,12 @@ define i16 @test7(i32 %A) {
 }
 
 define i16 @test8(i64 %A) {
-; CHECK-LABEL: @test8
-; CHECK-NEXT:  %1 = lshr i64 %A, 48
-; CHECK-NEXT:  %D = trunc i64 %1 to i16
-; CHECK-NEXT:  ret i16 %D
-  %B = tail call i64 @llvm.bswap.i64(i64 %A) nounwind 
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 %A, 48
+; CHECK-NEXT:    [[D:%.*]] = trunc i64 [[TMP1]] to i16
+; CHECK-NEXT:    ret i16 [[D]]
+;
+  %B = tail call i64 @llvm.bswap.i64(i64 %A) nounwind
   %C = trunc i64 %B to i16
   %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
   ret i16 %D
@@ -82,8 +90,9 @@ define i16 @test8(i64 %A) {
 
 ; Misc: Fold bswap(undef) to undef.
 define i64 @foo() {
-; CHECK-LABEL: @foo
-; CHECK-NEXT: ret i64 undef
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    ret i64 undef
+;
   %a = call i64 @llvm.bswap.i64(i64 undef)
   ret i64 %a
 }
@@ -92,20 +101,22 @@ define i64 @foo() {
 ; Fold: OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) )
 ; Fold: OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) )
 define i16 @bs_and16i(i16 %a, i16 %b) #0 {
-; CHECK-LABEL: @bs_and16i
-; CHECK-NEXT:  %1 = and i16 %a, 4391
-; CHECK-NEXT:  %2 = call i16 @llvm.bswap.i16(i16 %1)
-; CHECK-NEXT:  ret i16 %2
+; CHECK-LABEL: @bs_and16i(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 %a, 4391
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
   %1 = tail call i16 @llvm.bswap.i16(i16 %a)
   %2 = and i16 %1, 10001
   ret i16 %2
 }
 
 define i16 @bs_and16(i16 %a, i16 %b) #0 {
-; CHECK-LABEL: @bs_and16
-; CHECK-NEXT:  %1 = and i16 %a, %b
-; CHECK-NEXT:  %2 = call i16 @llvm.bswap.i16(i16 %1)
-; CHECK-NEXT:  ret i16 %2
+; CHECK-LABEL: @bs_and16(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
   %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a)
   %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b)
   %tmp3 = and i16 %tmp1, %tmp2
@@ -113,10 +124,11 @@ define i16 @bs_and16(i16 %a, i16 %b) #0 {
 }
 
 define i16 @bs_or16(i16 %a, i16 %b) #0 {
-; CHECK-LABEL: @bs_or16
-; CHECK-NEXT:  %1 = or i16 %a, %b
-; CHECK-NEXT:  %2 = call i16 @llvm.bswap.i16(i16 %1)
-; CHECK-NEXT:  ret i16 %2
+; CHECK-LABEL: @bs_or16(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i16 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
   %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a)
   %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b)
   %tmp3 = or i16 %tmp1, %tmp2
@@ -124,10 +136,11 @@ define i16 @bs_or16(i16 %a, i16 %b) #0 {
 }
 
 define i16 @bs_xor16(i16 %a, i16 %b) #0 {
-; CHECK-LABEL: @bs_xor16
-; CHECK-NEXT:  %1 = xor i16 %a, %b
-; CHECK-NEXT:  %2 = call i16 @llvm.bswap.i16(i16 %1)
-; CHECK-NEXT:  ret i16 %2
+; CHECK-LABEL: @bs_xor16(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i16 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
   %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a)
   %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b)
   %tmp3 = xor i16 %tmp1, %tmp2
@@ -135,20 +148,22 @@ define i16 @bs_xor16(i16 %a, i16 %b) #0 {
 }
 
 define i32 @bs_and32i(i32 %a, i32 %b) #0 {
-; CHECK-LABEL: @bs_and32i
-; CHECK-NEXT:  %1 = and i32 %a, -1585053440
-; CHECK-NEXT:  %2 = call i32 @llvm.bswap.i32(i32 %1)
-; CHECK-NEXT:  ret i32 %2
+; CHECK-LABEL: @bs_and32i(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, -1585053440
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
   %tmp2 = and i32 %tmp1, 100001
   ret i32 %tmp2
 }
 
 define i32 @bs_and32(i32 %a, i32 %b) #0 {
-; CHECK-LABEL: @bs_and32
-; CHECK-NEXT:  %1 = and i32 %a, %b
-; CHECK-NEXT:  %2 = call i32 @llvm.bswap.i32(i32 %1)
-; CHECK-NEXT:  ret i32 %2
+; CHECK-LABEL: @bs_and32(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
   %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b)
   %tmp3 = and i32 %tmp1, %tmp2
@@ -156,10 +171,11 @@ define i32 @bs_and32(i32 %a, i32 %b) #0 {
 }
 
 define i32 @bs_or32(i32 %a, i32 %b) #0 {
-; CHECK-LABEL: @bs_or32
-; CHECK-NEXT:  %1 = or i32 %a, %b
-; CHECK-NEXT:  %2 = call i32 @llvm.bswap.i32(i32 %1)
-; CHECK-NEXT:  ret i32 %2
+; CHECK-LABEL: @bs_or32(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
   %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b)
   %tmp3 = or i32 %tmp1, %tmp2
@@ -167,10 +183,11 @@ define i32 @bs_or32(i32 %a, i32 %b) #0 {
 }
 
 define i32 @bs_xor32(i32 %a, i32 %b) #0 {
-; CHECK-LABEL: @bs_xor32
-; CHECK-NEXT:  %1 = xor i32 %a, %b
-; CHECK-NEXT:  %2 = call i32 @llvm.bswap.i32(i32 %1)
-; CHECK-NEXT:  ret i32 %2
+; CHECK-LABEL: @bs_xor32(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
   %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b)
   %tmp3 = xor i32 %tmp1, %tmp2
@@ -178,20 +195,22 @@ define i32 @bs_xor32(i32 %a, i32 %b) #0 {
 }
 
 define i64 @bs_and64i(i64 %a, i64 %b) #0 {
-; CHECK-LABEL: @bs_and64i
-; CHECK-NEXT:  %1 = and i64 %a, 129085117527228416
-; CHECK-NEXT:  %2 = call i64 @llvm.bswap.i64(i64 %1)
-; CHECK-NEXT:  ret i64 %2
+; CHECK-LABEL: @bs_and64i(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 %a, 129085117527228416
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
   %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
   %tmp2 = and i64 %tmp1, 1000000001
   ret i64 %tmp2
 }
 
 define i64 @bs_and64(i64 %a, i64 %b) #0 {
-; CHECK-LABEL: @bs_and64
-; CHECK-NEXT:  %1 = and i64 %a, %b
-; CHECK-NEXT:  %2 = call i64 @llvm.bswap.i64(i64 %1)
-; CHECK-NEXT:  ret i64 %2
+; CHECK-LABEL: @bs_and64(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
   %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
   %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
   %tmp3 = and i64 %tmp1, %tmp2
@@ -199,10 +218,11 @@ define i64 @bs_and64(i64 %a, i64 %b) #0 {
 }
 
 define i64 @bs_or64(i64 %a, i64 %b) #0 {
-; CHECK-LABEL: @bs_or64
-; CHECK-NEXT:  %1 = or i64 %a, %b
-; CHECK-NEXT:  %2 = call i64 @llvm.bswap.i64(i64 %1)
-; CHECK-NEXT:  ret i64 %2
+; CHECK-LABEL: @bs_or64(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
   %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
   %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
   %tmp3 = or i64 %tmp1, %tmp2
@@ -210,10 +230,11 @@ define i64 @bs_or64(i64 %a, i64 %b) #0 {
 }
 
 define i64 @bs_xor64(i64 %a, i64 %b) #0 {
-; CHECK-LABEL: @bs_xor64
-; CHECK-NEXT:  %1 = xor i64 %a, %b
-; CHECK-NEXT:  %2 = call i64 @llvm.bswap.i64(i64 %1)
-; CHECK-NEXT:  ret i64 %2
+; CHECK-LABEL: @bs_xor64(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i64 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
   %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
   %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
   %tmp3 = xor i64 %tmp1, %tmp2
diff --git a/test/Transforms/InstCombine/builtin-object-size-offset.ll b/test/Transforms/InstCombine/builtin-object-size-offset.ll
index 7ab24a9acd949..248cf644df892 100644
--- a/test/Transforms/InstCombine/builtin-object-size-offset.ll
+++ b/test/Transforms/InstCombine/builtin-object-size-offset.ll
@@ -26,25 +26,25 @@ entry:
   %Big = alloca [20 x i8], align 16
   %Small = alloca [10 x i8], align 1
   %0 = getelementptr inbounds [20 x i8], [20 x i8]* %Big, i64 0, i64 0
-  call void @llvm.lifetime.start(i64 20, i8* %0)
+  call void @llvm.lifetime.start.p0i8(i64 20, i8* %0)
   %1 = getelementptr inbounds [10 x i8], [10 x i8]* %Small, i64 0, i64 0
-  call void @llvm.lifetime.start(i64 10, i8* %1)
+  call void @llvm.lifetime.start.p0i8(i64 10, i8* %1)
   %tobool = icmp ne i32 %N, 0
   %add.ptr = getelementptr inbounds [20 x i8], [20 x i8]* %Big, i64 0, i64 10
   %cond = select i1 %tobool, i8* %add.ptr, i8* %1
   %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %cond, i1 false)
   %conv = trunc i64 %2 to i32
-  call void @llvm.lifetime.end(i64 10, i8* %1)
-  call void @llvm.lifetime.end(i64 20, i8* %0)
+  call void @llvm.lifetime.end.p0i8(i64 10, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 20, i8* %0)
   ret i32 %conv
 ; CHECK: ret i32 10 
 }
 
-declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
 
 declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
 
-declare void @llvm.lifetime.end(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
 
 define void @foo() {
 entry:
diff --git a/test/Transforms/InstCombine/builtin-object-size-ptr.ll b/test/Transforms/InstCombine/builtin-object-size-ptr.ll
index b38513999dc1a..ada3fc1670265 100644
--- a/test/Transforms/InstCombine/builtin-object-size-ptr.ll
+++ b/test/Transforms/InstCombine/builtin-object-size-ptr.ll
@@ -16,19 +16,19 @@ define i32 @foo() #0 {
 entry:
   %var = alloca %struct.V, align 4
   %0 = bitcast %struct.V* %var to i8*
-  call void @llvm.lifetime.start(i64 28, i8* %0) #3
+  call void @llvm.lifetime.start.p0i8(i64 28, i8* %0) #3
   %buf1 = getelementptr inbounds %struct.V, %struct.V* %var, i32 0, i32 0
   %arrayidx = getelementptr inbounds [10 x i8], [10 x i8]* %buf1, i64 0, i64 1
   %1 = call i64 @llvm.objectsize.i64.p0i8(i8* %arrayidx, i1 false)
   %conv = trunc i64 %1 to i32
-  call void @llvm.lifetime.end(i64 28, i8* %0) #3
+  call void @llvm.lifetime.end.p0i8(i64 28, i8* %0) #3
   ret i32 %conv
 ; CHECK: ret i32 27
 ; CHECK-NOT: ret i32 -1
 }
 
-declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
 
 declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) #2
 
-declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
diff --git a/test/Transforms/InstCombine/call-guard.ll b/test/Transforms/InstCombine/call-guard.ll
new file mode 100644
index 0000000000000..9664467f914b4
--- /dev/null
+++ b/test/Transforms/InstCombine/call-guard.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define void @test_guard_adjacent_same_cond(i1 %A) {
+; CHECK-LABEL: @test_guard_adjacent_same_cond(
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 %A) [ "deopt"() ]
+; CHECK-NEXT:    ret void
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  ret void
+}
+
+define void @test_guard_adjacent_diff_cond(i1 %A, i1 %B, i1 %C) {
+; CHECK-LABEL: @test_guard_adjacent_diff_cond(
+; CHECK-NEXT:    %1 = and i1 %A, %B
+; CHECK-NEXT:    %2 = and i1 %1, %C
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 %2, i32 123) [ "deopt"() ]
+; CHECK-NEXT:    ret void
+  call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %C, i32 789 )[ "deopt"() ]
+  ret void
+}
diff --git a/test/Transforms/InstCombine/call_nonnull_arg.ll b/test/Transforms/InstCombine/call_nonnull_arg.ll
index c502aa05731e5..8127f4734fcd6 100644
--- a/test/Transforms/InstCombine/call_nonnull_arg.ll
+++ b/test/Transforms/InstCombine/call_nonnull_arg.ll
@@ -31,7 +31,7 @@ dead:
   unreachable
 }
 
-; FIXME: The nonnull attribute in the 'bar' declaration could be 
+; The nonnull attribute in the 'bar' declaration is 
 ; propagated to the parameters of the 'baz' callsite. 
 
 declare void @bar(i8*, i8* nonnull)
@@ -40,7 +40,7 @@ declare void @baz(i8*, i8*)
 define void @deduce_nonnull_from_another_call(i8* %a, i8* %b) {
 ; CHECK-LABEL: @deduce_nonnull_from_another_call(
 ; CHECK-NEXT:    call void @bar(i8* %a, i8* %b)
-; CHECK-NEXT:    call void @baz(i8* %b, i8* %b)
+; CHECK-NEXT:    call void @baz(i8* nonnull %b, i8* nonnull %b)
 ; CHECK-NEXT:    ret void
 ;
   call void @bar(i8* %a, i8* %b)
diff --git a/test/Transforms/InstCombine/cast-call-combine-prof.ll b/test/Transforms/InstCombine/cast-call-combine-prof.ll
new file mode 100644
index 0000000000000..05b71b666e242
--- /dev/null
+++ b/test/Transforms/InstCombine/cast-call-combine-prof.ll
@@ -0,0 +1,53 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; Check that instcombine preserves !prof metadata when removing function
+; prototype casts.
+
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_call_unexpected(i8*)
+declare void @foo(i16* %a)
+
+; CHECK-LABEL: @test_call()
+; CHECK: call void @foo(i16* null), !prof ![[PROF:[0-9]+]]
+define void @test_call() {
+  call void bitcast (void (i16*)* @foo to void (i8*)*) (i8* null), !prof !0
+  ret void
+}
+
+; CHECK-LABEL: @test_invoke()
+; CHECK: invoke void @foo(i16* null)
+; CHECK-NEXT: to label %done unwind label %lpad, !prof ![[PROF]]
+define void @test_invoke() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+  invoke void bitcast (void (i16*)* @foo to void (i8*)*) (i8* null)
+          to label %done unwind label %lpad, !prof !0
+
+done:
+  ret void
+
+lpad:
+  %lp = landingpad { i8*, i32 }
+          filter [0 x i8*] zeroinitializer
+  %ehptr = extractvalue { i8*, i32 } %lp, 0
+  tail call void @__cxa_call_unexpected(i8* %ehptr) noreturn nounwind
+  unreachable
+}
+
+; CHECK: ![[PROF]] = !{!"branch_weights", i32 2000}
+!0 = !{!"VP", i32 0, i64 2000, i64 -3913987384944532146, i64 2000}
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 1000, i32 1}
+!13 = !{i32 999000, i64 1000, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
diff --git a/test/Transforms/InstCombine/compare-alloca.ll b/test/Transforms/InstCombine/compare-alloca.ll
index ca24da191779c..414a07825f2f1 100644
--- a/test/Transforms/InstCombine/compare-alloca.ll
+++ b/test/Transforms/InstCombine/compare-alloca.ll
@@ -72,15 +72,15 @@ define i1 @alloca_argument_compare_escaped_through_store(i64* %arg, i64** %ptr)
   ; CHECK: ret i1 %cmp
 }
 
-declare void @llvm.lifetime.start(i64, i8* nocapture)
-declare void @llvm.lifetime.end(i64, i8* nocapture)
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
 define i1 @alloca_argument_compare_benign_instrs(i8* %arg) {
   %alloc = alloca i8
-  call void @llvm.lifetime.start(i64 1, i8* %alloc)
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %alloc)
   %cmp = icmp eq i8* %arg, %alloc
   %x = load i8, i8* %arg
   store i8 %x, i8* %alloc
-  call void @llvm.lifetime.end(i64 1, i8* %alloc)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %alloc)
   ret i1 %cmp
   ; CHECK-LABEL: alloca_argument_compare_benign_instrs
   ; CHECK: ret i1 false
diff --git a/test/Transforms/InstCombine/compare-unescaped.ll b/test/Transforms/InstCombine/compare-unescaped.ll
index 0e512aa28911c..d15fc2fd4495c 100644
--- a/test/Transforms/InstCombine/compare-unescaped.ll
+++ b/test/Transforms/InstCombine/compare-unescaped.ll
@@ -144,7 +144,7 @@ chk2:
   ret i8* %n
 ; CHECK-LABEL: compare_ret_escape
 ; CHECK: %cmp = icmp eq i8* %n, %c
-; CHECK: %cmp2 = icmp eq i32* %bc, %lgp
+; CHECK: %cmp2 = icmp eq i32* %lgp, %bc
 }
 
 ; The malloc call for %m cannot be elided since it is used in the call to function f.
diff --git a/test/Transforms/InstCombine/consecutive-fences.ll b/test/Transforms/InstCombine/consecutive-fences.ll
new file mode 100644
index 0000000000000..6f1c412773861
--- /dev/null
+++ b/test/Transforms/InstCombine/consecutive-fences.ll
@@ -0,0 +1,47 @@
+; RUN: opt -instcombine -S %s | FileCheck %s
+
+; Make sure we collapse the fences in this case
+
+; CHECK-LABEL: define void @tinkywinky
+; CHECK-NEXT:   fence seq_cst
+; CHECK-NEXT:   fence singlethread acquire
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @tinkywinky() {
+  fence seq_cst
+  fence seq_cst
+  fence seq_cst
+  fence singlethread acquire
+  fence singlethread acquire
+  fence singlethread acquire
+  ret void
+}
+
+; CHECK-LABEL: define void @dipsy
+; CHECK-NEXT:   fence seq_cst
+; CHECK-NEXT:   fence singlethread seq_cst
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @dipsy() {
+  fence seq_cst
+  fence singlethread seq_cst
+  ret void
+}
+
+; CHECK-LABEL: define void @patatino
+; CHECK-NEXT:   fence acquire
+; CHECK-NEXT:   fence seq_cst
+; CHECK-NEXT:   fence acquire
+; CHECK-NEXT:   fence seq_cst
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @patatino() {
+  fence acquire
+  fence seq_cst
+  fence acquire
+  fence seq_cst
+  ret void
+}
diff --git a/test/Transforms/InstCombine/constant-fold-math.ll b/test/Transforms/InstCombine/constant-fold-math.ll
index ce8d337c08bfc..50cd6070896e8 100644
--- a/test/Transforms/InstCombine/constant-fold-math.ll
+++ b/test/Transforms/InstCombine/constant-fold-math.ll
@@ -45,12 +45,4 @@ define double @constant_fold_fmuladd_f64() #0 {
   ret double %x
 }
 
-; The sqrt intrinsic is undefined for negative inputs besides -0.0.
-; CHECK-LABEL: @bad_sqrt
-; CHECK-NEXT: ret double undef
-define double @bad_sqrt() {
-  %x = call double @llvm.sqrt.f64(double -2.000000e+00)
-  ret double %x
-}
-
 attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/convergent.ll b/test/Transforms/InstCombine/convergent.ll
index d4484cf4567ea..9b9ae6f5352c5 100644
--- a/test/Transforms/InstCombine/convergent.ll
+++ b/test/Transforms/InstCombine/convergent.ll
@@ -27,7 +27,7 @@ define i32 @no_extern() {
 }
 
 define i32 @indirect_call(i32 ()* %f) {
-  ; CHECK call i32 %f() [[CONVERGENT_ATTR]]
+  ; CHECK: call i32 %f() [[CONVERGENT_ATTR]]
   %a = call i32 %f() convergent
   ret i32 %a
 }
diff --git a/test/Transforms/InstCombine/deadcode.ll b/test/Transforms/InstCombine/deadcode.ll
index 8fe673d8c9c07..c5fa58babdbc0 100644
--- a/test/Transforms/InstCombine/deadcode.ll
+++ b/test/Transforms/InstCombine/deadcode.ll
@@ -22,12 +22,12 @@ define i32* @test2(i32 %width) {
 
 declare i8* @llvm.stacksave()
 
-declare void @llvm.lifetime.start(i64, i8*)
-declare void @llvm.lifetime.end(i64, i8*)
+declare void @llvm.lifetime.start.p0i8(i64, i8*)
+declare void @llvm.lifetime.end.p0i8(i64, i8*)
 
 define void @test3() {
-  call void @llvm.lifetime.start(i64 -1, i8* undef)
-  call void @llvm.lifetime.end(i64 -1, i8* undef)
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* undef)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* undef)
   ret void
 }
 
diff --git a/test/Transforms/InstCombine/debuginfo-dce.ll b/test/Transforms/InstCombine/debuginfo-dce.ll
new file mode 100644
index 0000000000000..e23aef7334d59
--- /dev/null
+++ b/test/Transforms/InstCombine/debuginfo-dce.ll
@@ -0,0 +1,106 @@
+; RUN: opt -instcombine %s -S -o - | FileCheck %s
+; Verify that the eliminated instructions (bitcast, gep, load) are salvaged into
+; a DIExpression.
+;
+; Originally created from the following C source and then heavily isolated/reduced.
+;
+; struct entry {
+;   struct entry *next;
+; };
+; void scan(struct entry *queue, struct entry *end)
+; {
+;   struct entry *entry;
+;   for (entry = (struct entry *)((char *)(queue->next) - 8);
+;        &entry->next == end;
+;        entry = (struct entry *)((char *)(entry->next) - 8)) {
+;   }
+; }
+
+; ModuleID = '<stdin>'
+source_filename = "test.c"
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.12.0"
+
+%struct.entry = type { %struct.entry* }
+
+; Function Attrs: nounwind ssp uwtable
+define void @salvage_load(%struct.entry** %queue) local_unnamed_addr #0 !dbg !14 {
+entry:
+  %im_not_dead = alloca %struct.entry*
+  %0 = load %struct.entry*, %struct.entry** %queue, align 8, !dbg !19
+  %1 = load %struct.entry*, %struct.entry** %queue, align 8, !dbg !19
+  call void @llvm.dbg.value(metadata %struct.entry* %1, i64 0, metadata !18, metadata !20), !dbg !19
+; CHECK: define void @salvage_load
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.entry** %queue, i64 0,
+; CHECK-SAME:                           metadata ![[LOAD_EXPR:[0-9]+]])
+  store %struct.entry* %1, %struct.entry** %im_not_dead, align 8
+  ret void, !dbg !21
+}
+
+; Function Attrs: nounwind ssp uwtable
+define void @salvage_bitcast(%struct.entry* %queue) local_unnamed_addr #0 !dbg !14 {
+entry:
+  %im_not_dead = alloca i8*
+  %0 = bitcast %struct.entry* %queue to i8*, !dbg !19
+  %1 = bitcast %struct.entry* %queue to i8*, !dbg !19
+  call void @llvm.dbg.value(metadata i8* %1, i64 0, metadata !18, metadata !20), !dbg !19
+; CHECK: define void @salvage_bitcast
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.entry* %queue, i64 0,
+; CHECK-SAME:                           metadata ![[BITCAST_EXPR:[0-9]+]])
+  store i8* %1, i8** %im_not_dead, align 8
+  ret void, !dbg !21
+}
+
+; Function Attrs: nounwind ssp uwtable
+define void @salvage_gep(%struct.entry* %queue, %struct.entry* %end) local_unnamed_addr #0 !dbg !14 {
+entry:
+  %im_not_dead = alloca %struct.entry**
+  %0 = getelementptr inbounds %struct.entry, %struct.entry* %queue, i32 -1, i32 0, !dbg !19
+  %1 = getelementptr inbounds %struct.entry, %struct.entry* %queue, i32 -1, i32 0, !dbg !19
+  call void @llvm.dbg.value(metadata %struct.entry** %1, i64 0, metadata !18, metadata !20), !dbg !19
+; CHECK: define void @salvage_gep
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.entry* %queue, i64 0,
+; CHECK-SAME:                           metadata ![[GEP_EXPR:[0-9]+]])
+  store %struct.entry** %1, %struct.entry*** %im_not_dead, align 8
+  ret void, !dbg !21
+}
+
+; CHECK: ![[LOAD_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_plus, 0)
+; CHECK: ![[BITCAST_EXPR]] = !DIExpression(DW_OP_plus, 0)
+; CHECK: ![[GEP_EXPR]] = !DIExpression(DW_OP_minus, 8, DW_OP_plus, 0)
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11, !12}
+!llvm.ident = !{!13}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0 (trunk 297628) (llvm/trunk 297643)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3)
+!1 = !DIFile(filename: "test.c", directory: "/")
+!2 = !{}
+!3 = !{!4, !8}
+!4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !5, size: 64)
+!5 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "entry", file: !1, line: 1, size: 64, elements: !6)
+!6 = !{!7}
+!7 = !DIDerivedType(tag: DW_TAG_member, name: "next", scope: !5, file: !1, line: 2, baseType: !4, size: 64)
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64)
+!9 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{i32 1, !"PIC Level", i32 2}
+!13 = !{!"clang version 5.0.0 (trunk 297628) (llvm/trunk 297643)"}
+!14 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !17)
+!15 = !DISubroutineType(types: !16)
+!16 = !{null, !4, !4}
+!17 = !{!18}
+!18 = !DILocalVariable(name: "entry", scope: !14, file: !1, line: 6, type: !4)
+!19 = !DILocation(line: 6, column: 17, scope: !14)
+!20 = !DIExpression(DW_OP_plus, 0)
+!21 = !DILocation(line: 11, column: 1, scope: !14)
diff --git a/test/Transforms/InstCombine/double-float-shrink-2.ll b/test/Transforms/InstCombine/double-float-shrink-2.ll
index 7f6df92c96c5a..4813614f26cbd 100644
--- a/test/Transforms/InstCombine/double-float-shrink-2.ll
+++ b/test/Transforms/InstCombine/double-float-shrink-2.ll
@@ -1,28 +1,9 @@
-; RUN: opt < %s -instcombine -S -mtriple "i386-pc-linux" | FileCheck -check-prefix=DO-SIMPLIFY %s
-; RUN: opt < %s -instcombine -S -mtriple "i386-pc-win32" | FileCheck -check-prefix=DONT-SIMPLIFY %s
-; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-win32" | FileCheck -check-prefix=C89-SIMPLIFY %s
-; RUN: opt < %s -instcombine -S -mtriple "i386-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
-; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
-; RUN: opt < %s -instcombine -S -mtriple "sparc-sun-solaris" | FileCheck -check-prefix=DO-SIMPLIFY %s
-
-; DO-SIMPLIFY: call float @floorf(
-; DO-SIMPLIFY: call float @ceilf(
-; DO-SIMPLIFY: call float @roundf(
-; DO-SIMPLIFY: call float @nearbyintf(
-; DO-SIMPLIFY: call float @truncf(
-; DO-SIMPLIFY: call float @fabsf(
-
-; C89-SIMPLIFY: call float @floorf(
-; C89-SIMPLIFY: call float @ceilf(
-; C89-SIMPLIFY: call double @round(
-; C89-SIMPLIFY: call double @nearbyint(
-
-; DONT-SIMPLIFY: call double @floor(
-; DONT-SIMPLIFY: call double @ceil(
-; DONT-SIMPLIFY: call double @round(
-; DONT-SIMPLIFY: call double @nearbyint(
-; DONT-SIMPLIFY: call double @trunc(
-; DONT-SIMPLIFY: call double @fabs(
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-linux" | FileCheck -check-prefix=DO-SIMPLIFY -check-prefix=ALL %s
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-win32" | FileCheck -check-prefix=DONT-SIMPLIFY -check-prefix=ALL %s
+; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-win32" | FileCheck -check-prefix=C89-SIMPLIFY -check-prefix=ALL %s
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY -check-prefix=ALL %s
+; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY -check-prefix=ALL %s
+; RUN: opt < %s -instcombine -S -mtriple "sparc-sun-solaris" | FileCheck -check-prefix=DO-SIMPLIFY -check-prefix=ALL %s
 
 declare double @floor(double)
 declare double @ceil(double)
@@ -31,7 +12,18 @@ declare double @nearbyint(double)
 declare double @trunc(double)
 declare double @fabs(double)
 
-define float @test_floor(float %C) {
+declare double @llvm.floor.f64(double)
+declare double @llvm.ceil.f64(double)
+declare double @llvm.round.f64(double)
+declare double @llvm.nearbyint.f64(double)
+declare double @llvm.trunc.f64(double)
+declare double @llvm.fabs.f64(double)
+
+; ALL-LABEL: @test_shrink_libcall_floor(
+; DO-SIMPLIFY: call float @llvm.floor.f32(
+; C89-SIMPLIFY: call float @llvm.floor.f32(
+; DONT-SIMPLIFY: call float @llvm.floor.f32(
+define float @test_shrink_libcall_floor(float %C) {
   %D = fpext float %C to double
   ; --> floorf
   %E = call double @floor(double %D)
@@ -39,7 +31,11 @@ define float @test_floor(float %C) {
   ret float %F
 }
 
-define float @test_ceil(float %C) {
+; ALL-LABEL: @test_shrink_libcall_ceil(
+; DO-SIMPLIFY: call float @llvm.ceil.f32(
+; C89-SIMPLIFY: call float @llvm.ceil.f32(
+; DONT-SIMPLIFY: call float @llvm.ceil.f32(
+define float @test_shrink_libcall_ceil(float %C) {
   %D = fpext float %C to double
   ; --> ceilf
   %E = call double @ceil(double %D)
@@ -47,7 +43,11 @@ define float @test_ceil(float %C) {
   ret float %F
 }
 
-define float @test_round(float %C) {
+; ALL-LABEL: @test_shrink_libcall_round(
+; DO-SIMPLIFY: call float @llvm.round.f32(
+; C89-SIMPLIFY: call double @round(
+; DONT-SIMPLIFY: call double @round(
+define float @test_shrink_libcall_round(float %C) {
   %D = fpext float %C to double
   ; --> roundf
   %E = call double @round(double %D)
@@ -55,7 +55,11 @@ define float @test_round(float %C) {
   ret float %F
 }
 
-define float @test_nearbyint(float %C) {
+; ALL-LABEL: @test_shrink_libcall_nearbyint(
+; DO-SIMPLIFY: call float @llvm.nearbyint.f32(
+; C89-SIMPLIFY: call double @nearbyint(
+; DONT-SIMPLIFY: call double @nearbyint(
+define float @test_shrink_libcall_nearbyint(float %C) {
   %D = fpext float %C to double
   ; --> nearbyintf
   %E = call double @nearbyint(double %D)
@@ -63,7 +67,10 @@ define float @test_nearbyint(float %C) {
   ret float %F
 }
 
-define float @test_trunc(float %C) {
+; ALL-LABEL: @test_shrink_libcall_trunc(
+; DO-SIMPLIFY: call float @llvm.trunc.f32(
+; DONT-SIMPLIFY: call double @trunc(
+define float @test_shrink_libcall_trunc(float %C) {
   %D = fpext float %C to double
   ; --> truncf
   %E = call double @trunc(double %D)
@@ -71,10 +78,386 @@ define float @test_trunc(float %C) {
   ret float %F
 }
 
-define float @test_fabs(float %C) {
+; ALL-LABEL: @test_shrink_libcall_fabs(
+; DO-SIMPLIFY: call float @llvm.fabs.f32(
+
+; This is replaced with the intrinsic, which does the right thing on
+; all platforms.
+; DONT-SIMPLIFY: call float @llvm.fabs.f32(
+define float @test_shrink_libcall_fabs(float %C) {
   %D = fpext float %C to double
   ; --> fabsf
   %E = call double @fabs(double %D)
   %F = fptrunc double %E to float
   ret float %F
 }
+
+; Make sure fast math flags are preserved
+; ALL-LABEL: @test_shrink_libcall_fabs_fast(
+; DO-SIMPLIFY: call fast float @llvm.fabs.f32(
+define float @test_shrink_libcall_fabs_fast(float %C) {
+  %D = fpext float %C to double
+  ; --> fabsf
+  %E = call fast double @fabs(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_floor(
+; ALL: call float @llvm.floor.f32(
+define float @test_shrink_intrin_floor(float %C) {
+  %D = fpext float %C to double
+  ; --> floorf
+  %E = call double @llvm.floor.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_ceil(
+; ALL: call float @llvm.ceil.f32(
+define float @test_shrink_intrin_ceil(float %C) {
+  %D = fpext float %C to double
+  ; --> ceilf
+  %E = call double @llvm.ceil.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_round(
+; ALL: call float @llvm.round.f32(
+define float @test_shrink_intrin_round(float %C) {
+  %D = fpext float %C to double
+  ; --> roundf
+  %E = call double @llvm.round.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_nearbyint(
+; ALL: call float @llvm.nearbyint.f32(
+define float @test_shrink_intrin_nearbyint(float %C) {
+  %D = fpext float %C to double
+  ; --> nearbyintf
+  %E = call double @llvm.nearbyint.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_trunc(
+; ALL-SIMPLIFY: call float @llvm.trunc.f32(
+define float @test_shrink_intrin_trunc(float %C) {
+  %D = fpext float %C to double
+  %E = call double @llvm.trunc.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_fabs(
+; ALL: call float @llvm.fabs.f32(
+define float @test_shrink_intrin_fabs(float %C) {
+  %D = fpext float %C to double
+  %E = call double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; Make sure fast math flags are preserved
+; ALL-LABEL: @test_shrink_intrin_fabs_fast(
+; ALL: call fast float @llvm.fabs.f32(
+define float @test_shrink_intrin_fabs_fast(float %C) {
+  %D = fpext float %C to double
+  %E = call fast double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_no_shrink_intrin_floor(
+; ALL: call double @llvm.floor.f64(
+define float @test_no_shrink_intrin_floor(double %D) {
+  %E = call double @llvm.floor.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_no_shrink_intrin_ceil(
+; ALL: call double @llvm.ceil.f64(
+define float @test_no_shrink_intrin_ceil(double %D) {
+  %E = call double @llvm.ceil.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_no_shrink_intrin_round(
+; ALL: call double @llvm.round.f64(
+define float @test_no_shrink_intrin_round(double %D) {
+  %E = call double @llvm.round.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_no_shrink_intrin_nearbyint(
+; ALL: call double @llvm.nearbyint.f64(
+define float @test_no_shrink_intrin_nearbyint(double %D) {
+  %E = call double @llvm.nearbyint.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_no_shrink_intrin_trunc(
+; ALL-SIMPLIFY: call double @llvm.trunc.f64(
+define float @test_no_shrink_intrin_trunc(double %D) {
+  %E = call double @llvm.trunc.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_fabs_double_src(
+; ALL: call float @llvm.fabs.f32(
+define float @test_shrink_intrin_fabs_double_src(double %D) {
+  %E = call double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; Make sure fast math flags are preserved
+; ALL-LABEL: @test_shrink_intrin_fabs_fast_double_src(
+; ALL: call fast float @llvm.fabs.f32(
+define float @test_shrink_intrin_fabs_fast_double_src(double %D) {
+  %E = call fast double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_float_convertible_constant_intrin_floor(
+; ALL: ret float 2.000000e+00
+define float @test_shrink_float_convertible_constant_intrin_floor() {
+  %E = call double @llvm.floor.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_float_convertible_constant_intrin_ceil(
+; ALL: ret float 3.000000e+00
+define float @test_shrink_float_convertible_constant_intrin_ceil() {
+  %E = call double @llvm.ceil.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_float_convertible_constant_intrin_round(
+; ALL: ret float 2.000000e+00
+define float @test_shrink_float_convertible_constant_intrin_round() {
+  %E = call double @llvm.round.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_float_convertible_constant_intrin_nearbyint(
+; ALL: ret float 2.000000e+00
+define float @test_shrink_float_convertible_constant_intrin_nearbyint() {
+  %E = call double @llvm.nearbyint.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_float_convertible_constant_intrin_trunc(
+; ALL: ret float 2.000000e+00
+define float @test_shrink_float_convertible_constant_intrin_trunc() {
+  %E = call double @llvm.trunc.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_float_convertible_constant_intrin_fabs(
+; ALL: ret float 0x4000CCCCC0000000
+define float @test_shrink_float_convertible_constant_intrin_fabs() {
+  %E = call double @llvm.fabs.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; Make sure fast math flags are preserved
+; ALL-LABEL: @test_shrink_float_convertible_constant_intrin_fabs_fast(
+; ALL: ret float 0x4000CCCCC0000000
+define float @test_shrink_float_convertible_constant_intrin_fabs_fast() {
+  %E = call fast double @llvm.fabs.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_no_shrink_mismatched_type_intrin_floor(
+; ALL-NEXT: %E = call double @llvm.floor.f64(double %D)
+; ALL-NEXT: %F = fptrunc double %E to half
+; ALL-NEXT: ret half %F
+define half @test_no_shrink_mismatched_type_intrin_floor(double %D) {
+  %E = call double @llvm.floor.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+; ALL-LABEL: @test_no_shrink_mismatched_type_intrin_ceil(
+; ALL-NEXT: %E = call double @llvm.ceil.f64(double %D)
+; ALL-NEXT: %F = fptrunc double %E to half
+; ALL-NEXT: ret half %F
+define half @test_no_shrink_mismatched_type_intrin_ceil(double %D) {
+  %E = call double @llvm.ceil.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+; ALL-LABEL: @test_no_shrink_mismatched_type_intrin_round(
+; ALL-NEXT: %E = call double @llvm.round.f64(double %D)
+; ALL-NEXT: %F = fptrunc double %E to half
+; ALL-NEXT: ret half %F
+define half @test_no_shrink_mismatched_type_intrin_round(double %D) {
+  %E = call double @llvm.round.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+; ALL-LABEL: @test_no_shrink_mismatched_type_intrin_nearbyint(
+; ALL-NEXT: %E = call double @llvm.nearbyint.f64(double %D)
+; ALL-NEXT: %F = fptrunc double %E to half
+; ALL-NEXT: ret half %F
+define half @test_no_shrink_mismatched_type_intrin_nearbyint(double %D) {
+  %E = call double @llvm.nearbyint.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+; ALL-LABEL: @test_no_shrink_mismatched_type_intrin_trunc(
+; ALL-NEXT: %E = call double @llvm.trunc.f64(double %D)
+; ALL-NEXT: %F = fptrunc double %E to half
+; ALL-NEXT: ret half %F
+define half @test_no_shrink_mismatched_type_intrin_trunc(double %D) {
+  %E = call double @llvm.trunc.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+; ALL-LABEL: @test_shrink_mismatched_type_intrin_fabs_double_src(
+; ALL-NEXT: %1 = fptrunc double %D to half
+; ALL-NEXT: %F = call half @llvm.fabs.f16(half %1)
+; ALL-NEXT: ret half %F
+define half @test_shrink_mismatched_type_intrin_fabs_double_src(double %D) {
+  %E = call double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+; Make sure fast math flags are preserved
+; ALL-LABEL: @test_mismatched_type_intrin_fabs_fast_double_src(
+; ALL-NEXT: %1 = fptrunc double %D to half
+; ALL-NEXT: %F = call fast half @llvm.fabs.f16(half %1)
+; ALL-NEXT: ret half %F
+define half @test_mismatched_type_intrin_fabs_fast_double_src(double %D) {
+  %E = call fast double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_floor_fp16_src(
+; ALL-NEXT: %E = call half @llvm.floor.f16(half %C)
+; ALL-NEXT: %1 = fpext half %E to double
+; ALL-NEXT: %F = fptrunc double %1 to float
+define float @test_shrink_intrin_floor_fp16_src(half %C) {
+  %D = fpext half %C to double
+  %E = call double @llvm.floor.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_ceil_fp16_src(
+; ALL-NEXT: %E = call half @llvm.ceil.f16(half %C)
+; ALL-NEXT: %1 = fpext half %E to double
+; ALL-NEXT: %F = fptrunc double %1 to float
+; ALL-NEXT: ret float %F
+define float @test_shrink_intrin_ceil_fp16_src(half %C) {
+  %D = fpext half %C to double
+  %E = call double @llvm.ceil.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_round_fp16_src(
+; ALL-NEXT: %E = call half @llvm.round.f16(half %C)
+; ALL-NEXT: %1 = fpext half %E to double
+; ALL-NEXT: %F = fptrunc double %1 to float
+; ALL-NEXT: ret float %F
+define float @test_shrink_intrin_round_fp16_src(half %C) {
+  %D = fpext half %C to double
+  %E = call double @llvm.round.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_nearbyint_fp16_src(
+; ALL-NEXT: %E = call half @llvm.nearbyint.f16(half %C)
+; ALL-NEXT: %1 = fpext half %E to double
+; ALL-NEXT: %F = fptrunc double %1 to float
+; ALL-NEXT: ret float %F
+define float @test_shrink_intrin_nearbyint_fp16_src(half %C) {
+  %D = fpext half %C to double
+  %E = call double @llvm.nearbyint.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_trunc_fp16_src(
+; ALL-NEXT: %E = call half @llvm.trunc.f16(half %C)
+; ALL-NEXT: %1 = fpext half %E to double
+; ALL-NEXT: %F = fptrunc double %1 to float
+; ALL-NEXT: ret float %F
+define float @test_shrink_intrin_trunc_fp16_src(half %C) {
+  %D = fpext half %C to double
+  %E = call double @llvm.trunc.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_fabs_fp16_src(
+; ALL-NEXT: %E = call half @llvm.fabs.f16(half %C)
+; ALL-NEXT: %1 = fpext half %E to double
+; ALL-NEXT: %F = fptrunc double %1 to float
+; ALL-NEXT: ret float %F
+define float @test_shrink_intrin_fabs_fp16_src(half %C) {
+  %D = fpext half %C to double
+  %E = call double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; Make sure fast math flags are preserved
+; ALL-LABEL: @test_shrink_intrin_fabs_fast_fp16_src(
+; ALL-NEXT: %E = call fast half @llvm.fabs.f16(half %C)
+; ALL-NEXT: %1 = fpext half %E to double
+; ALL-NEXT: %F = fptrunc double %1 to float
+; ALL-NEXT: ret float %F
+define float @test_shrink_intrin_fabs_fast_fp16_src(half %C) {
+  %D = fpext half %C to double
+  %E = call fast double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_no_shrink_intrin_floor_multi_use_fpext(
+; ALL: %D = fpext half %C to double
+; ALL: call double @llvm.floor.f64
+define float @test_no_shrink_intrin_floor_multi_use_fpext(half %C) {
+  %D = fpext half %C to double
+  store volatile double %D, double* undef
+  %E = call double @llvm.floor.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_no_shrink_intrin_fabs_multi_use_fpext(
+; ALL: %D = fpext half %C to double
+; ALL: call double @llvm.fabs.f64
+define float @test_no_shrink_intrin_fabs_multi_use_fpext(half %C) {
+  %D = fpext half %C to double
+  store volatile double %D, double* undef
+  %E = call double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
diff --git a/test/Transforms/InstCombine/element-atomic-memcpy-to-loads.ll b/test/Transforms/InstCombine/element-atomic-memcpy-to-loads.ll
new file mode 100644
index 0000000000000..107440f10a5a2
--- /dev/null
+++ b/test/Transforms/InstCombine/element-atomic-memcpy-to-loads.ll
@@ -0,0 +1,92 @@
+; RUN: opt -instcombine -unfold-element-atomic-memcpy-max-elements=8 -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Test basic unfolding
+define void @test1(i8* %Src, i8* %Dst) {
+; CHECK-LABEL: test1
+; CHECK-NOT: llvm.memcpy.element.atomic
+
+; CHECK-DAG: %memcpy_unfold.src_casted = bitcast i8* %Src to i32*
+; CHECK-DAG: %memcpy_unfold.dst_casted = bitcast i8* %Dst to i32*
+
+; CHECK-DAG: [[VAL1:%[^\s]+]] =  load atomic i32, i32* %memcpy_unfold.src_casted unordered, align 4
+; CHECK-DAG: store atomic i32 [[VAL1]], i32* %memcpy_unfold.dst_casted unordered, align 8
+
+; CHECK-DAG: [[VAL2:%[^\s]+]] =  load atomic i32, i32* %{{[^\s]+}} unordered, align 4
+; CHECK-DAG: store atomic i32 [[VAL2]], i32* %{{[^\s]+}} unordered, align 4
+
+; CHECK-DAG: [[VAL3:%[^\s]+]] =  load atomic i32, i32* %{{[^\s]+}} unordered, align 4
+; CHECK-DAG: store atomic i32 [[VAL3]], i32* %{{[^\s]+}} unordered, align 4
+
+; CHECK-DAG: [[VAL4:%[^\s]+]] =  load atomic i32, i32* %{{[^\s]+}} unordered, align 4
+; CHECK-DAG: store atomic i32 [[VAL4]], i32* %{{[^\s]+}} unordered, align 4
+entry:
+  call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 4 %Dst, i8* align 8 %Src, i64 4, i32 4)
+  ret void
+}
+
+; Test that we don't unfold too much
+define void @test2(i8* %Src, i8* %Dst) {
+; CHECK-LABEL: test2
+
+; CHECK-NOT: load
+; CHECK-NOT: store
+; CHECK: llvm.memcpy.element.atomic
+entry:
+  call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 4 %Dst, i8* align 4 %Src, i64 1000, i32 4)
+  ret void
+}
+
+; Test that we will not unfold into non native integers
+define void @test3(i8* %Src, i8* %Dst) {
+; CHECK-LABEL: test3
+
+; CHECK-NOT: load
+; CHECK-NOT: store
+; CHECK: llvm.memcpy.element.atomic
+entry:
+  call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 64 %Dst, i8* align 64 %Src, i64 4, i32 64)
+  ret void
+}
+
+; Test that we will eliminate redundant bitcasts
+define void @test4(i64* %Src, i64* %Dst) {
+; CHECK-LABEL: test4
+; CHECK-NOT: llvm.memcpy.element.atomic
+
+; CHECK-NOT: bitcast
+
+; CHECK-DAG: [[VAL1:%[^\s]+]] =  load atomic i64, i64* %Src unordered, align 16
+; CHECK-DAG: store atomic i64 [[VAL1]], i64* %Dst unordered, align 16
+
+; CHECK-DAG: [[SRC_ADDR2:%[^ ]+]] = getelementptr i64, i64* %Src, i64 1
+; CHECK-DAG: [[DST_ADDR2:%[^ ]+]] = getelementptr i64, i64* %Dst, i64 1
+; CHECK-DAG: [[VAL2:%[^\s]+]] =  load atomic i64, i64* [[SRC_ADDR2]] unordered, align 8
+; CHECK-DAG: store atomic i64 [[VAL2]], i64* [[DST_ADDR2]] unordered, align 8
+
+; CHECK-DAG: [[SRC_ADDR3:%[^ ]+]] = getelementptr i64, i64* %Src, i64 2
+; CHECK-DAG: [[DST_ADDR3:%[^ ]+]] = getelementptr i64, i64* %Dst, i64 2
+; CHECK-DAG: [[VAL3:%[^ ]+]] =  load atomic i64, i64* [[SRC_ADDR3]] unordered, align 8
+; CHECK-DAG: store atomic i64 [[VAL3]], i64* [[DST_ADDR3]] unordered, align 8
+
+; CHECK-DAG: [[SRC_ADDR4:%[^ ]+]] = getelementptr i64, i64* %Src, i64 3
+; CHECK-DAG: [[DST_ADDR4:%[^ ]+]] = getelementptr i64, i64* %Dst, i64 3
+; CHECK-DAG: [[VAL4:%[^ ]+]] =  load atomic i64, i64* [[SRC_ADDR4]] unordered, align 8
+; CHECK-DAG: store atomic i64 [[VAL4]], i64* [[DST_ADDR4]] unordered, align 8
+entry:
+  %Src.casted = bitcast i64* %Src to i8*
+  %Dst.casted = bitcast i64* %Dst to i8*
+  call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 16 %Dst.casted, i8* align 16 %Src.casted, i64 4, i32 8)
+  ret void
+}
+
+define void @test5(i8* %Src, i8* %Dst) {
+; CHECK-LABEL: test5
+
+; CHECK-NOT: llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 64 %Dst, i8* align 64 %Src, i64 0, i32 64)
+entry:
+  call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 64 %Dst, i8* align 64 %Src, i64 0, i32 64)
+  ret void
+}
+
+declare void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* nocapture, i8* nocapture, i64, i32)
diff --git a/test/Transforms/InstCombine/exact.ll b/test/Transforms/InstCombine/exact.ll
index 436d5081c7aa2..96b6fd6899646 100644
--- a/test/Transforms/InstCombine/exact.ll
+++ b/test/Transforms/InstCombine/exact.ll
@@ -99,12 +99,12 @@ define i64 @ashr1(i64 %X) {
   ret i64 %B
 }
 
-; FIXME: The ashr should be exact (like it is in the preceding test).
+; The vector ashr should be exact (like it is in the preceding test).
 
 define <2 x i64> @ashr1_vec(<2 x i64> %X) {
 ; CHECK-LABEL: @ashr1_vec(
 ; CHECK-NEXT:    [[A:%.*]] = shl <2 x i64> %X, <i64 8, i64 8>
-; CHECK-NEXT:    [[B:%.*]] = ashr <2 x i64> [[A]], <i64 2, i64 2>
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[A]], <i64 2, i64 2>
 ; CHECK-NEXT:    ret <2 x i64> [[B]]
 ;
   %A = shl <2 x i64> %X, <i64 8, i64 8>
diff --git a/test/Transforms/InstCombine/fabs-libcall.ll b/test/Transforms/InstCombine/fabs-libcall.ll
new file mode 100644
index 0000000000000..5733badfa8f9d
--- /dev/null
+++ b/test/Transforms/InstCombine/fabs-libcall.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -mtriple=i686-apple-macosx -instcombine %s | FileCheck %s
+
+declare x86_fp80 @fabsl(x86_fp80)
+
+; CHECK-LABEL: @replace_fabs_call_f80(
+; CHECK-NEXT: %fabsl = call x86_fp80 @llvm.fabs.f80(x86_fp80 %x)
+; CHECK-NEXT: ret x86_fp80 %fabsl
+define x86_fp80 @replace_fabs_call_f80(x86_fp80 %x) {
+  %fabsl = tail call x86_fp80 @fabsl(x86_fp80 %x)
+  ret x86_fp80 %fabsl
+
+}
+
+; CHECK-LABEL: @fmf_replace_fabs_call_f80(
+; CHECK-NEXT: %fabsl = call nnan x86_fp80 @llvm.fabs.f80(x86_fp80 %x)
+; CHECK-NEXT: ret x86_fp80 %fabsl
+define x86_fp80 @fmf_replace_fabs_call_f80(x86_fp80 %x) {
+  %fabsl = tail call nnan x86_fp80 @fabsl(x86_fp80 %x)
+  ret x86_fp80 %fabsl
+}
+
diff --git a/test/Transforms/InstCombine/fabs.ll b/test/Transforms/InstCombine/fabs.ll
index aee853ae9eeba..a95f7b306b558 100644
--- a/test/Transforms/InstCombine/fabs.ll
+++ b/test/Transforms/InstCombine/fabs.ll
@@ -1,6 +1,10 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu < %s -instcombine -S | FileCheck %s
 
-; Make sure all library calls are eliminated when the input is known positive.
+; Make sure libcalls are replaced with intrinsic calls.
+
+declare float @llvm.fabs.f32(float)
+declare double @llvm.fabs.f64(double)
+declare fp128 @llvm.fabs.f128(fp128)
 
 declare float @fabsf(float)
 declare double @fabs(double)
@@ -8,46 +12,46 @@ declare fp128 @fabsl(fp128)
 declare float @llvm.fma.f32(float, float, float)
 declare float @llvm.fmuladd.f32(float, float, float)
 
-define float @square_fabs_call_f32(float %x) {
-  %mul = fmul float %x, %x
-  %fabsf = tail call float @fabsf(float %mul)
+define float @replace_fabs_call_f32(float %x) {
+  %fabsf = tail call float @fabsf(float %x)
   ret float %fabsf
 
-; CHECK-LABEL: square_fabs_call_f32(
-; CHECK-NEXT: %mul = fmul float %x, %x
-; CHECK-NEXT: %fabsf = tail call float @fabsf(float %mul)
+; CHECK-LABEL: @replace_fabs_call_f32(
+; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %x)
 ; CHECK-NEXT: ret float %fabsf
 }
 
-define double @square_fabs_call_f64(double %x) {
-  %mul = fmul double %x, %x
-  %fabs = tail call double @fabs(double %mul)
+define double @replace_fabs_call_f64(double %x) {
+  %fabs = tail call double @fabs(double %x)
   ret double %fabs
 
-; CHECK-LABEL: square_fabs_call_f64(
-; CHECK-NEXT: %mul = fmul double %x, %x
-; CHECK-NEXT: %fabs = tail call double @fabs(double %mul)
+; CHECK-LABEL: @replace_fabs_call_f64(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
 ; CHECK-NEXT: ret double %fabs
 }
 
-define fp128 @square_fabs_call_f128(fp128 %x) {
-  %mul = fmul fp128 %x, %x
-  %fabsl = tail call fp128 @fabsl(fp128 %mul)
+define fp128 @replace_fabs_call_f128(fp128 %x) {
+  %fabsl = tail call fp128 @fabsl(fp128 %x)
   ret fp128 %fabsl
 
-; CHECK-LABEL: square_fabs_call_f128(
-; CHECK-NEXT: %mul = fmul fp128 %x, %x
-; CHECK-NEXT: %fabsl = tail call fp128 @fabsl(fp128 %mul)
+; CHECK-LABEL: replace_fabs_call_f128(
+; CHECK-NEXT: %fabsl = call fp128 @llvm.fabs.f128(fp128 %x)
 ; CHECK-NEXT: ret fp128 %fabsl
 }
 
+; Make sure fast math flags are preserved when replacing the libcall.
+define float @fmf_replace_fabs_call_f32(float %x) {
+  %fabsf = tail call nnan float @fabsf(float %x)
+  ret float %fabsf
+
+; CHECK-LABEL: @fmf_replace_fabs_call_f32(
+; CHECK-NEXT: %fabsf = call nnan float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: ret float %fabsf
+}
+
 ; Make sure all intrinsic calls are eliminated when the input is known
 ; positive.
 
-declare float @llvm.fabs.f32(float)
-declare double @llvm.fabs.f64(double)
-declare fp128 @llvm.fabs.f128(fp128)
-
 ; The fabs cannot be eliminated because %x may be a NaN
 define float @square_fabs_intrinsic_f32(float %x) {
   %mul = fmul float %x, %x
@@ -102,10 +106,8 @@ define float @square_fabs_shrink_call1(float %x) {
   ret float %trunc
 
 ; CHECK-LABEL: square_fabs_shrink_call1(
-; CHECK-NEXT: %ext = fpext float %x to double
-; CHECK-NEXT: %sq = fmul double %ext, %ext
-; CHECK-NEXT: call double @fabs(double %sq)
-; CHECK-NEXT: %trunc = fptrunc double %fabs to float
+; CHECK-NEXT: fmul float %x, %x
+; CHECK-NEXT: %trunc = call float @llvm.fabs.f32(float
 ; CHECK-NEXT: ret float %trunc
 }
 
@@ -118,8 +120,8 @@ define float @square_fabs_shrink_call2(float %x) {
 
 ; CHECK-LABEL: square_fabs_shrink_call2(
 ; CHECK-NEXT: %sq = fmul float %x, %x
-; CHECK-NEXT: %fabsf = call float @fabsf(float %sq)
-; CHECK-NEXT: ret float %fabsf
+; CHECK-NEXT: %trunc = call float @llvm.fabs.f32(float %sq)
+; CHECK-NEXT: ret float %trunc
 }
 
 ; CHECK-LABEL: @fabs_select_constant_negative_positive(
@@ -214,3 +216,16 @@ define float @square_nnan_fmuladd_fabs_intrinsic_f32(float %x) {
 ; CHECK-NEXT: %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00)
 ; CHECK-NEXT: ret float %fmuladd
 }
+
+; Don't introduce a second fpext
+; CHECK-LABEL: @multi_use_fabs_fpext(
+; CHECK: %fpext = fpext float %x to double
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %fpext)
+; CHECK-NEXT: store volatile double %fpext, double* undef, align 8
+; CHECK-NEXT: ret double %fabs
+define double @multi_use_fabs_fpext(float %x) {
+  %fpext = fpext float %x to double
+  %fabs = call double @llvm.fabs.f64(double %fpext)
+  store volatile double %fpext, double* undef
+  ret double %fabs
+}
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index ad8a9247e4e1d..6ddf3a58529f4 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -831,3 +831,26 @@ define fp128 @min4(fp128 %a, fp128 %b) {
 ; CHECK-NEXT:  select {{.*}} fp128 %a, fp128 %b 
 ; CHECK-NEXT:  ret
 }
+
+define float @test55(i1 %which, float %a) {
+; CHECK-LABEL: @test55(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    [[PHITMP:%.*]] = fadd fast float [[A:%.*]], 1.000000e+00
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi float [ 3.000000e+00, [[ENTRY:%.*]] ], [ [[PHITMP]], [[DELAY]] ]
+; CHECK-NEXT:    ret float [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi float [ 2.0, %entry ], [ %a, %delay ]
+  %value = fadd fast float %A, 1.0
+  ret float %value
+}
diff --git a/test/Transforms/InstCombine/fcmp.ll b/test/Transforms/InstCombine/fcmp.ll
index 7fd46f2281832..40f7bf9b64fa8 100644
--- a/test/Transforms/InstCombine/fcmp.ll
+++ b/test/Transforms/InstCombine/fcmp.ll
@@ -3,238 +3,291 @@
 declare double @llvm.fabs.f64(double) nounwind readnone
 
 define i1 @test1(float %x, float %y) nounwind {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
   %ext1 = fpext float %x to double
   %ext2 = fpext float %y to double
   %cmp = fcmp ogt double %ext1, %ext2
   ret i1 %cmp
-; CHECK-LABEL: @test1(
-; CHECK-NEXT: fcmp ogt float %x, %y
 }
 
 define i1 @test2(float %a) nounwind {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float %a, 1.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
   %ext = fpext float %a to double
   %cmp = fcmp ogt double %ext, 1.000000e+00
   ret i1 %cmp
-; CHECK-LABEL: @test2(
-; CHECK-NEXT: fcmp ogt float %a, 1.0
 }
 
 define i1 @test3(float %a) nounwind {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[EXT:%.*]] = fpext float %a to double
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt double [[EXT]], 0x3FF0000000000001
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
   %ext = fpext float %a to double
   %cmp = fcmp ogt double %ext, 0x3FF0000000000001 ; more precision than float.
   ret i1 %cmp
-; CHECK-LABEL: @test3(
-; CHECK-NEXT: fpext float %a to double
 }
 
 define i1 @test4(float %a) nounwind {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[EXT:%.*]] = fpext float %a to double
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt double [[EXT]], 0x36A0000000000000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
   %ext = fpext float %a to double
   %cmp = fcmp ogt double %ext, 0x36A0000000000000 ; denormal in float.
   ret i1 %cmp
-; CHECK-LABEL: @test4(
-; CHECK-NEXT: fpext float %a to double
 }
 
 define i1 @test5(float %a) nounwind {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt float %a, -1.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
   %neg = fsub float -0.000000e+00, %a
   %cmp = fcmp ogt float %neg, 1.000000e+00
   ret i1 %cmp
-; CHECK-LABEL: @test5(
-; CHECK-NEXT: fcmp olt float %a, -1.0
 }
 
 define i1 @test6(float %x, float %y) nounwind {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
   %neg1 = fsub float -0.000000e+00, %x
   %neg2 = fsub float -0.000000e+00, %y
   %cmp = fcmp olt float %neg1, %neg2
   ret i1 %cmp
-; CHECK-LABEL: @test6(
-; CHECK-NEXT: fcmp ogt float %x, %y
 }
 
 define i1 @test7(float %x) nounwind readnone ssp noredzone {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float %x, 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
   %ext = fpext float %x to ppc_fp128
   %cmp = fcmp ogt ppc_fp128 %ext, 0xM00000000000000000000000000000000
   ret i1 %cmp
-; CHECK-LABEL: @test7(
-; CHECK-NEXT: fcmp ogt float %x, 0.000000e+00
 }
 
 define float @test8(float %x) nounwind readnone optsize ssp {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt float %x, 0.000000e+00
+; CHECK-NEXT:    [[CONV2:%.*]] = uitofp i1 [[CMP]] to float
+; CHECK-NEXT:    ret float [[CONV2]]
+;
   %conv = fpext float %x to double
   %cmp = fcmp olt double %conv, 0.000000e+00
   %conv1 = zext i1 %cmp to i32
   %conv2 = sitofp i32 %conv1 to float
   ret float %conv2
 ; Float comparison to zero shouldn't cast to double.
-; CHECK-LABEL: @test8(
-; CHECK-NEXT: fcmp olt float %x, 0.000000e+00
 }
 
 declare double @fabs(double) nounwind readnone
 
 define i32 @test9(double %a) nounwind {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    ret i32 0
+;
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp olt double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK-LABEL: @test9(
-; CHECK-NOT: fabs
-; CHECK: ret i32 0
 }
 
 define i32 @test9_intrinsic(double %a) nounwind {
+; CHECK-LABEL: @test9_intrinsic(
+; CHECK-NEXT:    ret i32 0
+;
   %call = tail call double @llvm.fabs.f64(double %a) nounwind
   %cmp = fcmp olt double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK-LABEL: @test9_intrinsic(
-; CHECK-NOT: fabs
-; CHECK: ret i32 0
 }
 
 define i32 @test10(double %a) nounwind {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq double %a, 0.000000e+00
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp ole double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK-LABEL: @test10(
-; CHECK-NOT: fabs
-; CHECK: fcmp oeq double %a, 0.000000e+00
 }
 
 define i32 @test10_intrinsic(double %a) nounwind {
+; CHECK-LABEL: @test10_intrinsic(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq double %a, 0.000000e+00
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
   %call = tail call double @llvm.fabs.f64(double %a) nounwind
   %cmp = fcmp ole double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK-LABEL: @test10_intrinsic(
-; CHECK-NOT: fabs
-; CHECK: fcmp oeq double %a, 0.000000e+00
 }
 
 define i32 @test11(double %a) nounwind {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp one double %a, 0.000000e+00
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp ogt double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK-LABEL: @test11(
-; CHECK-NOT: fabs
-; CHECK: fcmp one double %a, 0.000000e+00
 }
 
 define i32 @test11_intrinsic(double %a) nounwind {
+; CHECK-LABEL: @test11_intrinsic(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp one double %a, 0.000000e+00
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
   %call = tail call double @llvm.fabs.f64(double %a) nounwind
   %cmp = fcmp ogt double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK-LABEL: @test11_intrinsic(
-; CHECK-NOT: fabs
-; CHECK: fcmp one double %a, 0.000000e+00
 }
 
 define i32 @test12(double %a) nounwind {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ord double %a, 0.000000e+00
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp oge double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK-LABEL: @test12(
-; CHECK-NOT: fabs
-; CHECK: fcmp ord double %a, 0.000000e+00
 }
 
 define i32 @test12_intrinsic(double %a) nounwind {
+; CHECK-LABEL: @test12_intrinsic(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ord double %a, 0.000000e+00
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
   %call = tail call double @llvm.fabs.f64(double %a) nounwind
   %cmp = fcmp oge double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK-LABEL: @test12_intrinsic(
-; CHECK-NOT: fabs
-; CHECK: fcmp ord double %a, 0.000000e+00
 }
 
 define i32 @test13(double %a) nounwind {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp une double %a, 0.000000e+00
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp une double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK-LABEL: @test13(
-; CHECK-NOT: fabs
-; CHECK: fcmp une double %a, 0.000000e+00
 }
 
 define i32 @test13_intrinsic(double %a) nounwind {
+; CHECK-LABEL: @test13_intrinsic(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp une double %a, 0.000000e+00
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
   %call = tail call double @llvm.fabs.f64(double %a) nounwind
   %cmp = fcmp une double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK-LABEL: @test13_intrinsic(
-; CHECK-NOT: fabs
-; CHECK: fcmp une double %a, 0.000000e+00
 }
 
 define i32 @test14(double %a) nounwind {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq double %a, 0.000000e+00
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp oeq double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK-LABEL: @test14(
-; CHECK-NOT: fabs
-; CHECK: fcmp oeq double %a, 0.000000e+00
 }
 
 define i32 @test14_intrinsic(double %a) nounwind {
+; CHECK-LABEL: @test14_intrinsic(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq double %a, 0.000000e+00
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
   %call = tail call double @llvm.fabs.f64(double %a) nounwind
   %cmp = fcmp oeq double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK-LABEL: @test14_intrinsic(
-; CHECK-NOT: fabs
-; CHECK: fcmp oeq double %a, 0.000000e+00
 }
 
 define i32 @test15(double %a) nounwind {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp one double %a, 0.000000e+00
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp one double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK-LABEL: @test15(
-; CHECK-NOT: fabs
-; CHECK: fcmp one double %a, 0.000000e+00
 }
 
 define i32 @test15_intrinsic(double %a) nounwind {
+; CHECK-LABEL: @test15_intrinsic(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp one double %a, 0.000000e+00
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
   %call = tail call double @llvm.fabs.f64(double %a) nounwind
   %cmp = fcmp one double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK-LABEL: @test15_intrinsic(
-; CHECK-NOT: fabs
-; CHECK: fcmp one double %a, 0.000000e+00
 }
 
 define i32 @test16(double %a) nounwind {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ueq double %a, 0.000000e+00
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp ueq double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK-LABEL: @test16(
-; CHECK-NOT: fabs
-; CHECK: fcmp ueq double %a, 0.000000e+00
 }
 
 define i32 @test16_intrinsic(double %a) nounwind {
+; CHECK-LABEL: @test16_intrinsic(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ueq double %a, 0.000000e+00
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
   %call = tail call double @llvm.fabs.f64(double %a) nounwind
   %cmp = fcmp ueq double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK-LABEL: @test16_intrinsic(
-; CHECK-NOT: fabs
-; CHECK: fcmp ueq double %a, 0.000000e+00
 }
 
 ; Don't crash.
 define i32 @test17(double %a, double (double)* %p) nounwind {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[CALL:%.*]] = tail call double %p(double %a) #1
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ueq double [[CALL]], 0.000000e+00
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
   %call = tail call double %p(double %a) nounwind
   %cmp = fcmp ueq double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
@@ -243,16 +296,18 @@ define i32 @test17(double %a, double (double)* %p) nounwind {
 
 ; Can fold fcmp with undef on one side by choosing NaN for the undef
 define i32 @test18_undef_unordered(float %a) nounwind {
-; CHECK-LABEL: @test18_undef_unordered
-; CHECK: ret i32 1
+; CHECK-LABEL: @test18_undef_unordered(
+; CHECK-NEXT:    ret i32 1
+;
   %cmp = fcmp ueq float %a, undef
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 ; Can fold fcmp with undef on one side by choosing NaN for the undef
 define i32 @test18_undef_ordered(float %a) nounwind {
-; CHECK-LABEL: @test18_undef_ordered
-; CHECK: ret i32 0
+; CHECK-LABEL: @test18_undef_ordered(
+; CHECK-NEXT:    ret i32 0
+;
   %cmp = fcmp oeq float %a, undef
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -264,14 +319,18 @@ define i32 @test18_undef_ordered(float %a) nounwind {
 ; because whatever you choose for the first undef
 ; you can choose NaN for the other undef
 define i1 @test19_undef_unordered() nounwind {
-; CHECK-LABEL: @test19_undef
-; CHECK: ret i1 true
+; CHECK-LABEL: @test19_undef_unordered(
+; CHECK-NEXT:    ret i1 true
+;
   %cmp = fcmp ueq float undef, undef
   ret i1 %cmp
 }
+
 define i1 @test19_undef_ordered() nounwind {
-; CHECK-LABEL: @test19_undef
-; CHECK: ret i1 false
+; CHECK-LABEL: @test19_undef_ordered(
+; CHECK-NEXT:    ret i1 false
+;
   %cmp = fcmp oeq float undef, undef
   ret i1 %cmp
 }
+
diff --git a/test/Transforms/InstCombine/float-shrink-compare.ll b/test/Transforms/InstCombine/float-shrink-compare.ll
index a08f9531d2174..e0925952bf44d 100644
--- a/test/Transforms/InstCombine/float-shrink-compare.ll
+++ b/test/Transforms/InstCombine/float-shrink-compare.ll
@@ -3,171 +3,329 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 define i32 @test1(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %x to double
-  %2 = call double @ceil(double %1) nounwind readnone
-  %3 = fpext float %y to double
-  %4 = fcmp oeq double %2, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
+  %x.ext = fpext float %x to double
+  %ceil = call double @ceil(double %x.ext) nounwind readnone
+  %ext.y = fpext float %y to double
+  %cmp = fcmp oeq double %ceil, %ext.y
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
 ; CHECK-LABEL: @test1(
-; CHECK-NEXT: %ceilf = call float @ceilf(float %x)
-; CHECK-NEXT: fcmp oeq float %ceilf, %y
+; CHECK-NEXT: %ceil = call float @llvm.ceil.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %ceil, %y
+}
+
+define i32 @test1_intrin(float %x, float %y) nounwind uwtable {
+  %x.ext = fpext float %x to double
+  %ceil = call double @llvm.ceil.f64(double %x.ext) nounwind readnone
+  %ext.y = fpext float %y to double
+  %cmp = fcmp oeq double %ceil, %ext.y
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
+; CHECK-LABEL: @test1_intrin(
+; CHECK-NEXT: %ceil = call float @llvm.ceil.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %ceil, %y
 }
 
 define i32 @test2(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %x to double
-  %2 = call double @fabs(double %1) nounwind readnone
-  %3 = fpext float %y to double
-  %4 = fcmp oeq double %2, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
+  %x.ext = fpext float %x to double
+  %fabs = call double @fabs(double %x.ext) nounwind readnone
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %fabs, %y.ext
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT: %fabsf = call float @fabsf(float %x)
-; CHECK-NEXT: fcmp oeq float %fabsf, %y
+; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %fabs, %y
 }
 
-define i32 @test3(float %x, float %y) nounwind uwtable {
+define i32 @test2_intrin(float %x, float %y) nounwind uwtable {
+  %x.ext = fpext float %x to double
+  %fabs = call double @llvm.fabs.f64(double %x.ext) nounwind readnone
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %fabs, %y.ext
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
+; CHECK-LABEL: @test2_intrin(
+; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %fabs, %y
+}
+
+define i32 @fmf_test2(float %x, float %y) nounwind uwtable {
   %1 = fpext float %x to double
-  %2 = call double @floor(double %1) nounwind readnone
+  %2 = call nnan double @fabs(double %1) nounwind readnone
   %3 = fpext float %y to double
   %4 = fcmp oeq double %2, %3
   %5 = zext i1 %4 to i32
   ret i32 %5
+; CHECK-LABEL: @fmf_test2(
+; CHECK-NEXT: [[FABS:%[0-9]+]] = call nnan float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: fcmp oeq float [[FABS]], %y
+}
+
+define i32 @test3(float %x, float %y) nounwind uwtable {
+  %x.ext = fpext float %x to double
+  %floor = call double @floor(double %x.ext) nounwind readnone
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %floor, %y.ext
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
 ; CHECK-LABEL: @test3(
-; CHECK-NEXT: %floorf = call float @floorf(float %x)
-; CHECK-NEXT: fcmp oeq float %floorf, %y
+; CHECK-NEXT: %floor = call float @llvm.floor.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %floor, %y
+}
+
+
+define i32 @test3_intrin(float %x, float %y) nounwind uwtable {
+  %x.ext = fpext float %x to double
+  %floor = call double @llvm.floor.f64(double %x.ext) nounwind readnone
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %floor, %y.ext
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
+; CHECK-LABEL: @test3_intrin(
+; CHECK-NEXT: %floor = call float @llvm.floor.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %floor, %y
 }
 
 define i32 @test4(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %x to double
-  %2 = call double @nearbyint(double %1) nounwind
-  %3 = fpext float %y to double
-  %4 = fcmp oeq double %2, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
+  %x.ext = fpext float %x to double
+  %nearbyint = call double @nearbyint(double %x.ext) nounwind
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %nearbyint, %y.ext
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT: %nearbyintf = call float @nearbyintf(float %x)
-; CHECK-NEXT: fcmp oeq float %nearbyintf, %y
+; CHECK-NEXT: %nearbyint = call float @llvm.nearbyint.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %nearbyint, %y
+}
+
+define i32 @shrink_nearbyint_intrin(float %x, float %y) nounwind uwtable {
+  %x.ext = fpext float %x to double
+  %nearbyint = call double @llvm.nearbyint.f64(double %x.ext) nounwind
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %nearbyint, %y.ext
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
+; CHECK-LABEL: @shrink_nearbyint_intrin(
+; CHECK-NEXT: %nearbyint = call float @llvm.nearbyint.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %nearbyint, %y
 }
 
 define i32 @test5(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %x to double
-  %2 = call double @rint(double %1) nounwind
-  %3 = fpext float %y to double
-  %4 = fcmp oeq double %2, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
+  %x.ext = fpext float %x to double
+  %rint = call double @rint(double %x.ext) nounwind
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %rint, %y.ext
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
 ; CHECK-LABEL: @test5(
-; CHECK-NEXT: %rintf = call float @rintf(float %x)
-; CHECK-NEXT: fcmp oeq float %rintf, %y
+; CHECK-NEXT: %rint = call float @llvm.rint.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %rint, %y
 }
 
 define i32 @test6(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %x to double
-  %2 = call double @round(double %1) nounwind readnone
-  %3 = fpext float %y to double
-  %4 = fcmp oeq double %2, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
+  %x.ext = fpext float %x to double
+  %round = call double @round(double %x.ext) nounwind readnone
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %round, %y.ext
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
 ; CHECK-LABEL: @test6(
-; CHECK-NEXT: %roundf = call float @roundf(float %x)
-; CHECK-NEXT: fcmp oeq float %roundf, %y
+; CHECK-NEXT: %round = call float @llvm.round.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %round, %y
+}
+
+define i32 @test6_intrin(float %x, float %y) nounwind uwtable {
+  %x.ext = fpext float %x to double
+  %round = call double @llvm.round.f64(double %x.ext) nounwind readnone
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %round, %y.ext
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
+; CHECK-LABEL: @test6_intrin(
+; CHECK-NEXT: %round = call float @llvm.round.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %round, %y
 }
 
 define i32 @test7(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %x to double
-  %2 = call double @trunc(double %1) nounwind
-  %3 = fpext float %y to double
-  %4 = fcmp oeq double %2, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
+  %x.ext = fpext float %x to double
+  %trunc = call double @trunc(double %x.ext) nounwind
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %trunc, %y.ext
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT: %truncf = call float @truncf(float %x)
-; CHECK-NEXT: fcmp oeq float %truncf, %y
+; CHECK-NEXT: %trunc = call float @llvm.trunc.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %trunc, %y
+}
+
+define i32 @test7_intrin(float %x, float %y) nounwind uwtable {
+  %x.ext = fpext float %x to double
+  %trunc = call double @llvm.trunc.f64(double %x.ext) nounwind
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %trunc, %y.ext
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
+; CHECK-LABEL: @test7_intrin(
+; CHECK-NEXT: %trunc = call float @llvm.trunc.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %trunc, %y
 }
 
 define i32 @test8(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %y to double
-  %2 = fpext float %x to double
-  %3 = call double @ceil(double %2) nounwind readnone
-  %4 = fcmp oeq double %1, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %ceil = call double @ceil(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %y.ext, %ceil
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
 ; CHECK-LABEL: @test8(
-; CHECK-NEXT: %ceilf = call float @ceilf(float %x)
-; CHECK-NEXT: fcmp oeq float %ceilf, %y
+; CHECK-NEXT: %ceil = call float @llvm.ceil.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %ceil, %y
+}
+
+define i32 @test8_intrin(float %x, float %y) nounwind uwtable {
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %ceil = call double @llvm.ceil.f64(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %y.ext, %ceil
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
+; CHECK-LABEL: @test8_intrin(
+; CHECK-NEXT: %ceil = call float @llvm.ceil.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %ceil, %y
 }
 
 define i32 @test9(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %y to double
-  %2 = fpext float %x to double
-  %3 = call double @fabs(double %2) nounwind readnone
-  %4 = fcmp oeq double %1, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %fabs = call double @fabs(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %y.ext, %fabs
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
 ; CHECK-LABEL: @test9(
-; CHECK-NEXT: %fabsf = call float @fabsf(float %x)
-; CHECK-NEXT: fcmp oeq float %fabsf, %y
+; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %fabs, %y
+}
+
+define i32 @test9_intrin(float %x, float %y) nounwind uwtable {
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %fabs = call double @llvm.fabs.f64(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %y.ext, %fabs
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
+; CHECK-LABEL: @test9_intrin(
+; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %fabs, %y
 }
 
 define i32 @test10(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %y to double
-  %2 = fpext float %x to double
-  %3 = call double @floor(double %2) nounwind readnone
-  %4 = fcmp oeq double %1, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %floor = call double @floor(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %floor, %y.ext
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT: %floorf = call float @floorf(float %x)
-; CHECK-NEXT: fcmp oeq float %floorf, %y
+; CHECK-NEXT: %floor = call float @llvm.floor.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %floor, %y
+}
+
+define i32 @test10_intrin(float %x, float %y) nounwind uwtable {
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %floor = call double @llvm.floor.f64(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %floor, %y.ext
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
+; CHECK-LABEL: @test10_intrin(
+; CHECK-NEXT: %floor = call float @llvm.floor.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %floor, %y
 }
 
 define i32 @test11(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %y to double
-  %2 = fpext float %x to double
-  %3 = call double @nearbyint(double %2) nounwind
-  %4 = fcmp oeq double %1, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %nearbyint = call double @nearbyint(double %x.ext) nounwind
+  %cmp = fcmp oeq double %nearbyint, %y.ext
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
 ; CHECK-LABEL: @test11(
-; CHECK-NEXT: %nearbyintf = call float @nearbyintf(float %x)
-; CHECK-NEXT: fcmp oeq float %nearbyintf, %y
+; CHECK-NEXT: %nearbyint = call float @llvm.nearbyint.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %nearbyint, %y
+}
+
+
+define i32 @test11_intrin(float %x, float %y) nounwind uwtable {
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %nearbyint = call double @llvm.nearbyint.f64(double %x.ext) nounwind
+  %cmp = fcmp oeq double %nearbyint, %y.ext
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
+; CHECK-LABEL: @test11_intrin(
+; CHECK-NEXT: %nearbyint = call float @llvm.nearbyint.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %nearbyint, %y
 }
 
 define i32 @test12(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %y to double
-  %2 = fpext float %x to double
-  %3 = call double @rint(double %2) nounwind
-  %4 = fcmp oeq double %1, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %rint = call double @rint(double %x.ext) nounwind
+  %cmp = fcmp oeq double %y.ext, %rint
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
 ; CHECK-LABEL: @test12(
-; CHECK-NEXT: %rintf = call float @rintf(float %x)
-; CHECK-NEXT: fcmp oeq float %rintf, %y
+; CHECK-NEXT: %rint = call float @llvm.rint.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %rint, %y
 }
 
 define i32 @test13(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %y to double
-  %2 = fpext float %x to double
-  %3 = call double @round(double %2) nounwind readnone
-  %4 = fcmp oeq double %1, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %round = call double @round(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %y.ext, %round
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
 ; CHECK-LABEL: @test13(
-; CHECK-NEXT: %roundf = call float @roundf(float %x)
-; CHECK-NEXT: fcmp oeq float %roundf, %y
+; CHECK-NEXT: %round = call float @llvm.round.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %round, %y
+}
+
+define i32 @test13_intrin(float %x, float %y) nounwind uwtable {
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %round = call double @llvm.round.f64(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %y.ext, %round
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
+; CHECK-LABEL: @test13_intrin(
+; CHECK-NEXT: %round = call float @llvm.round.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %round, %y
 }
 
 define i32 @test14(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %y to double
-  %2 = fpext float %x to double
-  %3 = call double @trunc(double %2) nounwind
-  %4 = fcmp oeq double %1, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %trunc = call double @trunc(double %x.ext) nounwind
+  %cmp = fcmp oeq double %y.ext, %trunc
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
 ; CHECK-LABEL: @test14(
-; CHECK-NEXT: %truncf = call float @truncf(float %x)
-; CHECK-NEXT: fcmp oeq float %truncf, %y
+; CHECK-NEXT: %trunc = call float @llvm.trunc.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %trunc, %y
+}
+
+define i32 @test14_intrin(float %x, float %y) nounwind uwtable {
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %trunc = call double @llvm.trunc.f64(double %x.ext) nounwind
+  %cmp = fcmp oeq double %y.ext, %trunc
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
+; CHECK-LABEL: @test14_intrin(
+; CHECK-NEXT: %trunc = call float @llvm.trunc.f32(float %x)
+; CHECK-NEXT: fcmp oeq float %trunc, %y
 }
 
 define i32 @test15(float %x, float %y, float %z) nounwind uwtable {
@@ -269,3 +427,10 @@ declare double @round(double) nounwind readnone
 declare double @trunc(double) nounwind readnone
 declare double @fmin(double, double) nounwind readnone
 declare double @fmax(double, double) nounwind readnone
+
+declare double @llvm.fabs.f64(double) nounwind readnone
+declare double @llvm.ceil.f64(double) nounwind readnone
+declare double @llvm.floor.f64(double) nounwind readnone
+declare double @llvm.nearbyint.f64(double) nounwind readnone
+declare double @llvm.round.f64(double) nounwind readnone
+declare double @llvm.trunc.f64(double) nounwind readnone
diff --git a/test/Transforms/InstCombine/fma.ll b/test/Transforms/InstCombine/fma.ll
index e41f1e7edd460..3808e07d89a0e 100644
--- a/test/Transforms/InstCombine/fma.ll
+++ b/test/Transforms/InstCombine/fma.ll
@@ -78,7 +78,8 @@ define float @fmuladd_fneg_x_fneg_y(float %x, float %y, float %z) {
 }
 
 ; CHECK-LABEL: @fmuladd_fneg_x_fneg_y_fast(
-; CHECK: %fmuladd = call fast float @llvm.fmuladd.f32(float %x, float %y, float %z)
+; CHECK-NEXT: %1 = fmul fast float %x, %y
+; CHECK-NEXT: %fmuladd = fadd fast float %1, %z
 define float @fmuladd_fneg_x_fneg_y_fast(float %x, float %y, float %z) {
   %x.fneg = fsub float -0.0, %x
   %y.fneg = fsub float -0.0, %y
@@ -122,7 +123,8 @@ define float @fmuladd_fabs_x_fabs_x(float %x, float %z) {
 }
 
 ; CHECK-LABEL: @fmuladd_fabs_x_fabs_x_fast(
-; CHECK: %fmuladd = call fast float @llvm.fmuladd.f32(float %x, float %x, float %z)
+; CHECK-NEXT: %1 = fmul fast float %x, %x
+; CHECK-NEXT: %fmuladd = fadd fast float %1, %z
 define float @fmuladd_fabs_x_fabs_x_fast(float %x, float %z) {
   %x.fabs = call float @llvm.fabs.f32(float %x)
   %fmuladd = call fast float @llvm.fmuladd.f32(float %x.fabs, float %x.fabs, float %z)
@@ -144,7 +146,8 @@ define float @fma_k_y_z_fast(float %y, float %z) {
 }
 
 ; CHECK-LABEL: @fmuladd_k_y_z_fast(
-; CHECK: %fmuladd = call fast float @llvm.fmuladd.f32(float %y, float 4.000000e+00, float %z)
+; CHECK: %1 = fmul fast float %y, 4.000000e+00
+; CHECK-NEXT: %fmuladd = fadd fast float %1, %z
 define float @fmuladd_k_y_z_fast(float %y, float %z) {
   %fmuladd = call fast float @llvm.fmuladd.f32(float 4.0, float %y, float %z)
   ret float %fmuladd
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index 7ccbdf11fdeda..de8190da01c22 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -883,6 +883,33 @@ define %struct.C* @test46(%struct.C* %c1, %struct.C* %c2, i64 %N) {
 ; CHECK-NEXT:  ret %struct.C* [[GEP]]
 }
 
+define i32* @test47(i32* %I, i64 %C, i64 %D) {
+  %sub = sub i64 %D, %C
+  %A = getelementptr i32, i32* %I, i64 %C
+  %B = getelementptr i32, i32* %A, i64 %sub
+  ret i32* %B
+; CHECK-LABEL: @test47(
+; CHECK-NEXT: %B = getelementptr i32, i32* %I, i64 %D
+}
+
+define i32* @test48(i32* %I, i64 %C, i64 %D) {
+  %sub = sub i64 %D, %C
+  %A = getelementptr i32, i32* %I, i64 %sub
+  %B = getelementptr i32, i32* %A, i64 %C
+  ret i32* %B
+; CHECK-LABEL: @test48(
+; CHECK-NEXT: %B = getelementptr i32, i32* %I, i64 %D
+}
+
+define i32* @test49(i32* %I, i64 %C) {
+  %notC = xor i64 -1, %C
+  %A = getelementptr i32, i32* %I, i64 %C
+  %B = getelementptr i32, i32* %A, i64 %notC
+  ret i32* %B
+; CHECK-LABEL: @test49(
+; CHECK-NEXT: %B = getelementptr i32, i32* %I, i64 -1
+}
+
 define i32 addrspace(1)* @ascast_0_gep(i32* %p) nounwind {
 ; CHECK-LABEL: @ascast_0_gep(
 ; CHECK-NOT: getelementptr
@@ -904,4 +931,15 @@ define i32 addrspace(1)* @ascast_0_0_gep([128 x i32]* %p) nounwind {
   ret i32 addrspace(1)* %x
 }
 
+define <2 x i32*> @PR32414(i32** %ptr) {
+; CHECK-LABEL: @PR32414(
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32** %ptr to i32*
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], <2 x i64> <i64 0, i64 1>
+; CHECK-NEXT:    ret <2 x i32*> [[TMP1]]
+;
+  %tmp0 = bitcast i32** %ptr to i32*
+  %tmp1 = getelementptr inbounds i32, i32* %tmp0, <2 x i64> <i64 0, i64 1>
+  ret <2 x i32*> %tmp1
+}
+
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/InstCombine/icmp-add.ll b/test/Transforms/InstCombine/icmp-add.ll
new file mode 100644
index 0000000000000..efeb9d5bb45ba
--- /dev/null
+++ b/test/Transforms/InstCombine/icmp-add.ll
@@ -0,0 +1,247 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; PR1949
+
+define i1 @test1(i32 %a) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 %a, -5
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add i32 %a, 4
+  %c = icmp ult i32 %b, 4
+  ret i1 %c
+}
+
+define <2 x i1> @test1vec(<2 x i32> %a) {
+; CHECK-LABEL: @test1vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i32> %a, <i32 -5, i32 -5>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = add <2 x i32> %a, <i32 4, i32 4>
+  %c = icmp ult <2 x i32> %b, <i32 4, i32 4>
+  ret <2 x i1> %c
+}
+
+define i1 @test2(i32 %a) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 %a, 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = sub i32 %a, 4
+  %c = icmp ugt i32 %b, -5
+  ret i1 %c
+}
+
+define <2 x i1> @test2vec(<2 x i32> %a) {
+; CHECK-LABEL: @test2vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i32> %a, <i32 4, i32 4>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = sub <2 x i32> %a, <i32 4, i32 4>
+  %c = icmp ugt <2 x i32> %b, <i32 -5, i32 -5>
+  ret <2 x i1> %c
+}
+
+define i1 @test3(i32 %a) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 %a, 2147483643
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add i32 %a, 4
+  %c = icmp slt i32 %b, 2147483652
+  ret i1 %c
+}
+
+define <2 x i1> @test3vec(<2 x i32> %a) {
+; CHECK-LABEL: @test3vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i32> %a, <i32 2147483643, i32 2147483643>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = add <2 x i32> %a, <i32 4, i32 4>
+  %c = icmp slt <2 x i32> %b, <i32 2147483652, i32 2147483652>
+  ret <2 x i1> %c
+}
+
+define i1 @test4(i32 %a) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 %a, -4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add i32 %a, 2147483652
+  %c = icmp sge i32 %b, 4
+  ret i1 %c
+}
+
+define <2 x i1> @test4vec(<2 x i32> %a) {
+; CHECK-LABEL: @test4vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i32> %a, <i32 -4, i32 -4>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = add <2 x i32> %a, <i32 2147483652, i32 2147483652>
+  %c = icmp sge <2 x i32> %b, <i32 4, i32 4>
+  ret <2 x i1> %c
+}
+
+; icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2), when C - C2 does not overflow.
+; This becomes equality because it's at the limit.
+
+define i1 @nsw_slt1(i8 %a) {
+; CHECK-LABEL: @nsw_slt1(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 %a, -128
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add nsw i8 %a, 100
+  %c = icmp slt i8 %b, -27
+  ret i1 %c
+}
+
+; icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2), when C - C2 does not overflow.
+; This becomes equality because it's at the limit.
+
+define i1 @nsw_slt2(i8 %a) {
+; CHECK-LABEL: @nsw_slt2(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 %a, 127
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add nsw i8 %a, -100
+  %c = icmp slt i8 %b, 27
+  ret i1 %c
+}
+
+; icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2), when C - C2 does not overflow.
+; Less than the limit, so the predicate doesn't change.
+
+define i1 @nsw_slt3(i8 %a) {
+; CHECK-LABEL: @nsw_slt3(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 %a, -126
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add nsw i8 %a, 100
+  %c = icmp slt i8 %b, -26
+  ret i1 %c
+}
+
+; icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2), when C - C2 does not overflow.
+; Less than the limit, so the predicate doesn't change.
+
+define i1 @nsw_slt4(i8 %a) {
+; CHECK-LABEL: @nsw_slt4(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 %a, 126
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add nsw i8 %a, -100
+  %c = icmp slt i8 %b, 26
+  ret i1 %c
+}
+
+; icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2), when C - C2 does not overflow.
+; Try sgt to make sure that works too.
+
+define i1 @nsw_sgt1(i8 %a) {
+; CHECK-LABEL: @nsw_sgt1(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 %a, 127
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add nsw i8 %a, -100
+  %c = icmp sgt i8 %b, 26
+  ret i1 %c
+}
+
+; icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2), when C - C2 does not overflow.
+; Try a vector type to make sure that works too.
+; FIXME: This should be 'eq 127' as above.
+
+define <2 x i1> @nsw_sgt2_splat_vec(<2 x i8> %a) {
+; CHECK-LABEL: @nsw_sgt2_splat_vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i8> %a, <i8 -126, i8 -126>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = add nsw <2 x i8> %a, <i8 100, i8 100>
+  %c = icmp sgt <2 x i8> %b, <i8 -26, i8 -26>
+  ret <2 x i1> %c
+}
+
+; icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2), when C - C2 does not overflow.
+; Comparison with 0 doesn't need special-casing.
+
+define i1 @slt_zero_add_nsw(i32 %a) {
+; CHECK-LABEL: @slt_zero_add_nsw(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 %a, -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nsw i32 %a, 1
+  %cmp = icmp slt i32 %add, 0
+  ret i1 %cmp
+}
+
+; The same fold should work with vectors.
+
+define <2 x i1> @slt_zero_add_nsw_splat_vec(<2 x i8> %a) {
+; CHECK-LABEL: @slt_zero_add_nsw_splat_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> %a, <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %add = add nsw <2 x i8> %a, <i8 1, i8 1>
+  %cmp = icmp slt <2 x i8> %add, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+; Test the edges - instcombine should not interfere with simplification to constants.
+; Constant subtraction does not overflow, but this is false.
+
+define i1 @nsw_slt3_ov_no(i8 %a) {
+; CHECK-LABEL: @nsw_slt3_ov_no(
+; CHECK-NEXT:    ret i1 false
+;
+  %b = add nsw i8 %a, 100
+  %c = icmp slt i8 %b, -28
+  ret i1 %c
+}
+
+; Test the edges - instcombine should not interfere with simplification to constants.
+; Constant subtraction overflows. This is false.
+
+define i1 @nsw_slt4_ov(i8 %a) {
+; CHECK-LABEL: @nsw_slt4_ov(
+; CHECK-NEXT:    ret i1 false
+;
+  %b = add nsw i8 %a, 100
+  %c = icmp slt i8 %b, -29
+  ret i1 %c
+}
+
+; Test the edges - instcombine should not interfere with simplification to constants.
+; Constant subtraction overflows. This is true.
+
+define i1 @nsw_slt5_ov(i8 %a) {
+; CHECK-LABEL: @nsw_slt5_ov(
+; CHECK-NEXT:    ret i1 true
+;
+  %b = add nsw i8 %a, -100
+  %c = icmp slt i8 %b, 28
+  ret i1 %c
+}
+
+; InstCombine should not thwart this opportunity to simplify completely.
+
+define i1 @slt_zero_add_nsw_signbit(i8 %x) {
+; CHECK-LABEL: @slt_zero_add_nsw_signbit(
+; CHECK-NEXT:    ret i1 true
+;
+  %y = add nsw i8 %x, -128
+  %z = icmp slt i8 %y, 0
+  ret i1 %z
+}
+
+; InstCombine should not thwart this opportunity to simplify completely.
+
+define i1 @slt_zero_add_nuw_signbit(i8 %x) {
+; CHECK-LABEL: @slt_zero_add_nuw_signbit(
+; CHECK-NEXT:    ret i1 true
+;
+  %y = add nuw i8 %x, 128
+  %z = icmp slt i8 %y, 0
+  ret i1 %z
+}
+
diff --git a/test/Transforms/InstCombine/icmp-shl-nsw.ll b/test/Transforms/InstCombine/icmp-shl-nsw.ll
index 896a45625b9f2..ba05302897e9e 100644
--- a/test/Transforms/InstCombine/icmp-shl-nsw.ll
+++ b/test/Transforms/InstCombine/icmp-shl-nsw.ll
@@ -73,8 +73,7 @@ define <2 x i1> @icmp_shl_nsw_eq_vec(<2 x i32> %x) {
 
 define i1 @icmp_sgt1(i8 %x) {
 ; CHECK-LABEL: @icmp_sgt1(
-; CHECK-NEXT:    [[SHL_MASK:%.*]] = and i8 %x, 127
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[SHL_MASK]], 64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %x, -64
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 1
@@ -84,8 +83,7 @@ define i1 @icmp_sgt1(i8 %x) {
 
 define i1 @icmp_sgt2(i8 %x) {
 ; CHECK-LABEL: @icmp_sgt2(
-; CHECK-NEXT:    [[SHL:%.*]] = shl nsw i8 %x, 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[SHL]], -127
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, -64
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 1
@@ -95,8 +93,7 @@ define i1 @icmp_sgt2(i8 %x) {
 
 define i1 @icmp_sgt3(i8 %x) {
 ; CHECK-LABEL: @icmp_sgt3(
-; CHECK-NEXT:    [[SHL:%.*]] = shl nsw i8 %x, 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[SHL]], -16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, -8
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 1
@@ -106,8 +103,7 @@ define i1 @icmp_sgt3(i8 %x) {
 
 define i1 @icmp_sgt4(i8 %x) {
 ; CHECK-LABEL: @icmp_sgt4(
-; CHECK-NEXT:    [[SHL:%.*]] = shl nsw i8 %x, 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[SHL]], -2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, -1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 1
@@ -120,8 +116,7 @@ define i1 @icmp_sgt4(i8 %x) {
 
 define i1 @icmp_sgt5(i8 %x) {
 ; CHECK-LABEL: @icmp_sgt5(
-; CHECK-NEXT:    [[SHL:%.*]] = shl nsw i8 %x, 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[SHL]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 1
@@ -131,8 +126,7 @@ define i1 @icmp_sgt5(i8 %x) {
 
 define i1 @icmp_sgt6(i8 %x) {
 ; CHECK-LABEL: @icmp_sgt6(
-; CHECK-NEXT:    [[SHL:%.*]] = shl nsw i8 %x, 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[SHL]], 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, 8
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 1
@@ -142,8 +136,7 @@ define i1 @icmp_sgt6(i8 %x) {
 
 define i1 @icmp_sgt7(i8 %x) {
 ; CHECK-LABEL: @icmp_sgt7(
-; CHECK-NEXT:    [[SHL:%.*]] = shl nsw i8 %x, 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[SHL]], 124
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, 62
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 1
@@ -155,8 +148,7 @@ define i1 @icmp_sgt7(i8 %x) {
 
 define i1 @icmp_sgt8(i8 %x) {
 ; CHECK-LABEL: @icmp_sgt8(
-; CHECK-NEXT:    [[SHL_MASK:%.*]] = and i8 %x, 127
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[SHL_MASK]], 63
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %x, 63
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 1
@@ -170,8 +162,7 @@ define i1 @icmp_sgt8(i8 %x) {
 
 define i1 @icmp_sgt9(i8 %x) {
 ; CHECK-LABEL: @icmp_sgt9(
-; CHECK-NEXT:    [[SHL_MASK:%.*]] = and i8 %x, 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[SHL_MASK]], 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %x, -1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 7
@@ -181,8 +172,7 @@ define i1 @icmp_sgt9(i8 %x) {
 
 define i1 @icmp_sgt10(i8 %x) {
 ; CHECK-LABEL: @icmp_sgt10(
-; CHECK-NEXT:    [[SHL:%.*]] = shl nsw i8 %x, 7
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[SHL]], -127
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, -1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 7
@@ -192,8 +182,7 @@ define i1 @icmp_sgt10(i8 %x) {
 
 define i1 @icmp_sgt11(i8 %x) {
 ; CHECK-LABEL: @icmp_sgt11(
-; CHECK-NEXT:    [[SHL:%.*]] = shl nsw i8 %x, 7
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[SHL]], -2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, -1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 7
@@ -205,8 +194,7 @@ define i1 @icmp_sgt11(i8 %x) {
 
 define <2 x i1> @icmp_sgt11_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @icmp_sgt11_vec(
-; CHECK-NEXT:    [[SHL:%.*]] = shl nsw <2 x i8> %x, <i8 7, i8 7>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[SHL]], <i8 -2, i8 -2>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> %x, <i8 -1, i8 -1>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl nsw <2 x i8> %x, <i8 7, i8 7>
@@ -216,3 +204,153 @@ define <2 x i1> @icmp_sgt11_vec(<2 x i8> %x) {
 
 ; Known bits analysis returns false for compares with >=0.
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Repeat the shl nsw + sgt tests with predicate changed to 'sle'.
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; Known bits analysis turns this into an equality predicate.
+
+define i1 @icmp_sle1(i8 %x) {
+; CHECK-LABEL: @icmp_sle1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %x, -64
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sle i8 %shl, -128
+  ret i1 %cmp
+}
+
+define i1 @icmp_sle2(i8 %x) {
+; CHECK-LABEL: @icmp_sle2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, -63
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sle i8 %shl, -127
+  ret i1 %cmp
+}
+
+define i1 @icmp_sle3(i8 %x) {
+; CHECK-LABEL: @icmp_sle3(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, -7
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sle i8 %shl, -16
+  ret i1 %cmp
+}
+
+define i1 @icmp_sle4(i8 %x) {
+; CHECK-LABEL: @icmp_sle4(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sle i8 %shl, -2
+  ret i1 %cmp
+}
+
+; x <=s -1 is a sign bit test.
+; x <=s 0 is a sign bit test.
+
+define i1 @icmp_sle5(i8 %x) {
+; CHECK-LABEL: @icmp_sle5(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sle i8 %shl, 1
+  ret i1 %cmp
+}
+
+define i1 @icmp_sle6(i8 %x) {
+; CHECK-LABEL: @icmp_sle6(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 9
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sle i8 %shl, 16
+  ret i1 %cmp
+}
+
+define i1 @icmp_sle7(i8 %x) {
+; CHECK-LABEL: @icmp_sle7(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 63
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sle i8 %shl, 124
+  ret i1 %cmp
+}
+
+; Known bits analysis turns this into an equality predicate.
+
+define i1 @icmp_sle8(i8 %x) {
+; CHECK-LABEL: @icmp_sle8(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %x, 63
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sle i8 %shl, 125
+  ret i1 %cmp
+}
+
+; Compares with 126 and 127 are recognized as always true.
+
+; Known bits analysis turns this into an equality predicate.
+
+define i1 @icmp_sle9(i8 %x) {
+; CHECK-LABEL: @icmp_sle9(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %x, -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 7
+  %cmp = icmp sle i8 %shl, -128
+  ret i1 %cmp
+}
+
+define i1 @icmp_sle10(i8 %x) {
+; CHECK-LABEL: @icmp_sle10(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 7
+  %cmp = icmp sle i8 %shl, -127
+  ret i1 %cmp
+}
+
+define i1 @icmp_sle11(i8 %x) {
+; CHECK-LABEL: @icmp_sle11(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 7
+  %cmp = icmp sle i8 %shl, -2
+  ret i1 %cmp
+}
+
+; Some of the earlier sgt/sle tests are transformed to eq/ne, but try a couple
+; of those explicitly, so we know no intermediate transforms are necessary.
+
+define i1 @icmp_eq1(i8 %x) {
+; CHECK-LABEL: @icmp_eq1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %x, 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp eq i8 %shl, 12
+  ret i1 %cmp
+}
+
+define i1 @icmp_ne1(i8 %x) {
+; CHECK-LABEL: @icmp_ne1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %x, -2
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 6
+  %cmp = icmp ne i8 %shl, -128
+  ret i1 %cmp
+}
+
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 32fe050bf83f0..edfa9a102917f 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -918,7 +918,7 @@ define i1 @test60_as1(i8 addrspace(1)* %foo, i64 %i, i64 %j) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 %i to i16
 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 %j to i16
 ; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i16 [[TMP1]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i16 [[TMP2]], [[GEP1_IDX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP2]]
 ; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
@@ -949,7 +949,7 @@ define i1 @test60_addrspacecast_smaller(i8* %foo, i16 %i, i64 %j) {
 ; CHECK-LABEL: @test60_addrspacecast_smaller(
 ; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i16 %i, 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 %j to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i16 [[TMP1]], [[GEP1_IDX]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %bit = addrspacecast i8* %foo to i32 addrspace(1)*
@@ -981,7 +981,7 @@ define i1 @test61(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i32, i32* [[BIT]], i64 %i
 ; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, i8* %foo, i64 %j
 ; CHECK-NEXT:    [[CAST1:%.*]] = bitcast i32* [[GEP1]] to i8*
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8* [[CAST1]], [[GEP2]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8* [[GEP2]], [[CAST1]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %bit = bitcast i8* %foo to i32*
@@ -999,7 +999,7 @@ define i1 @test61_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i32, i32 addrspace(1)* [[BIT]], i16 %i
 ; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, i8 addrspace(1)* %foo, i16 %j
 ; CHECK-NEXT:    [[CAST1:%.*]] = bitcast i32 addrspace(1)* [[GEP1]] to i8 addrspace(1)*
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 addrspace(1)* [[CAST1]], [[GEP2]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 addrspace(1)* [[GEP2]], [[CAST1]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
@@ -1123,19 +1123,6 @@ define i1 @test68(i32 %x) {
   ret i1 %cmp
 }
 
-; PR14708
-define i1 @test69(i32 %c) {
-; CHECK-LABEL: @test69(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %c, 32
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 97
-; CHECK-NEXT:    ret i1 [[TMP2]]
-;
-  %1 = icmp eq i32 %c, 97
-  %2 = icmp eq i32 %c, 65
-  %3 = or i1 %1, %2
-  ret i1 %3
-}
-
 ; PR15940
 define i1 @test70(i32 %X) {
 ; CHECK-LABEL: @test70(
@@ -1183,12 +1170,11 @@ define i1 @icmp_sext8trunc(i32 %x) {
   ret i1 %cmp
 }
 
-; FIXME: Vectors should fold the same way.
+; Vectors should fold the same way.
 define <2 x i1> @icmp_sext8trunc_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_sext8trunc_vec(
-; CHECK-NEXT:    [[SEXT1:%.*]] = shl <2 x i32> %x, <i32 24, i32 24>
-; CHECK-NEXT:    [[SEXT:%.*]] = ashr <2 x i32> [[SEXT:%.*]]1, <i32 24, i32 24>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[SEXT]], <i32 36, i32 36>
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> %x to <2 x i8>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], <i8 36, i8 36>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %trunc = trunc <2 x i32> %x to <2 x i8>
@@ -1877,6 +1863,55 @@ define <2 x i1> @icmp_and_X_-16_ne-16_vec(<2 x i32> %X) {
   ret <2 x i1> %cmp
 }
 
+; PR32524: https://bugs.llvm.org/show_bug.cgi?id=32524
+; X | C == C --> X <=u C (when C+1 is PowerOf2).
+
+define i1 @or1_eq1(i32 %x) {
+; CHECK-LABEL: @or1_eq1(
+; CHECK-NEXT:    [[T1:%.*]] = icmp ult i32 %x, 2
+; CHECK-NEXT:    ret i1 [[T1]]
+;
+  %t0 = or i32 %x, 1
+  %t1 = icmp eq i32 %t0, 1
+  ret i1 %t1
+}
+
+; X | C == C --> X <=u C (when C+1 is PowerOf2).
+
+define <2 x i1> @or3_eq3_vec(<2 x i8> %x) {
+; CHECK-LABEL: @or3_eq3_vec(
+; CHECK-NEXT:    [[T1:%.*]] = icmp ult <2 x i8> %x, <i8 4, i8 4>
+; CHECK-NEXT:    ret <2 x i1> [[T1]]
+;
+  %t0 = or <2 x i8> %x, <i8 3, i8 3>
+  %t1 = icmp eq <2 x i8> %t0, <i8 3, i8 3>
+  ret <2 x i1> %t1
+}
+
+; X | C != C --> X >u C (when C+1 is PowerOf2).
+
+define i1 @or7_ne7(i32 %x) {
+; CHECK-LABEL: @or7_ne7(
+; CHECK-NEXT:    [[T1:%.*]] = icmp ugt i32 %x, 7
+; CHECK-NEXT:    ret i1 [[T1]]
+;
+  %t0 = or i32 %x, 7
+  %t1 = icmp ne i32 %t0, 7
+  ret i1 %t1
+}
+
+; X | C != C --> X >u C (when C+1 is PowerOf2).
+
+define <2 x i1> @or63_ne63_vec(<2 x i8> %x) {
+; CHECK-LABEL: @or63_ne63_vec(
+; CHECK-NEXT:    [[T1:%.*]] = icmp ugt <2 x i8> %x, <i8 63, i8 63>
+; CHECK-NEXT:    ret <2 x i1> [[T1]]
+;
+  %t0 = or <2 x i8> %x, <i8 63, i8 63>
+  %t1 = icmp ne <2 x i8> %t0, <i8 63, i8 63>
+  ret <2 x i1> %t1
+}
+
 define i1 @shrink_constant(i32 %X) {
 ; CHECK-LABEL: @shrink_constant(
 ; CHECK-NEXT:    [[XOR:%.*]] = xor i32 %X, -12
@@ -2232,16 +2267,6 @@ define i1 @icmp_sge_zero_add_nsw(i32 %a) {
   ret i1 %cmp
 }
 
-define i1 @icmp_slt_zero_add_nsw(i32 %a) {
-; CHECK-LABEL: @icmp_slt_zero_add_nsw(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 %a, -1
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %add = add nsw i32 %a, 1
-  %cmp = icmp slt i32 %add, 0
-  ret i1 %cmp
-}
-
 define i1 @icmp_sle_zero_add_nsw(i32 %a) {
 ; CHECK-LABEL: @icmp_sle_zero_add_nsw(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 %a, 0
@@ -2425,6 +2450,10 @@ define i1 @f10(i16 %p) {
   ret i1 %cmp580
 }
 
+; Note: fptosi is used in various tests below to ensure that operand complexity
+; canonicalization does not kick in, which would make some of the tests
+; equivalent to one another.
+
 define i1 @cmp_sgt_rhs_dec(float %x, i32 %i) {
 ; CHECK-LABEL: @cmp_sgt_rhs_dec(
 ; CHECK-NEXT:    [[CONV:%.*]] = fptosi float %x to i32
@@ -2711,3 +2740,143 @@ define i1 @or_ptrtoint_mismatch(i8* %p, i32* %q) {
   %b = icmp eq i64 %o, 0
   ret i1 %b
 }
+
+define i1 @icmp_add1_ugt(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_add1_ugt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nuw i32 %x, 1
+  %cmp = icmp ugt i32 %add, %y
+  ret i1 %cmp
+}
+
+define i1 @icmp_add1_ule(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_add1_ule(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nuw i32 %x, 1
+  %cmp = icmp ule i32 %add, %y
+  ret i1 %cmp
+}
+
+define i1 @cmp_uge_rhs_inc(float %x, i32 %i) {
+; CHECK-LABEL: @cmp_uge_rhs_inc(
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float %x to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CONV]], %i
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = fptosi float %x to i32
+  %inc = add nuw i32 %i, 1
+  %cmp = icmp uge i32 %conv, %inc
+  ret i1 %cmp
+}
+
+define i1 @cmp_ult_rhs_inc(float %x, i32 %i) {
+; CHECK-LABEL: @cmp_ult_rhs_inc(
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float %x to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ule i32 [[CONV]], %i
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = fptosi float %x to i32
+  %inc = add nuw i32 %i, 1
+  %cmp = icmp ult i32 %conv, %inc
+  ret i1 %cmp
+}
+
+define i1 @cmp_sge_lhs_inc(i32 %x, i32 %y) {
+; CHECK-LABEL: @cmp_sge_lhs_inc(
+; CHECK-NEXT:    [[INC:%.*]] = add
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[INC]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %inc = add nsw i32 %x, 1
+  %cmp = icmp sge i32 %inc, %y
+  ret i1 %cmp
+}
+
+define i1 @cmp_uge_lhs_inc(i32 %x, i32 %y) {
+; CHECK-LABEL: @cmp_uge_lhs_inc(
+; CHECK-NEXT:    [[INC:%.*]] = add
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i32 [[INC]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %inc = add nuw i32 %x, 1
+  %cmp = icmp uge i32 %inc, %y
+  ret i1 %cmp
+}
+
+define i1 @cmp_sgt_lhs_dec(i32 %x, i32 %y) {
+; CHECK-LABEL: @cmp_sgt_lhs_dec(
+; CHECK-NEXT:    [[DEC:%.*]] = {{add|sub}}
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[DEC]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %dec = sub nsw i32 %x, 1
+  %cmp = icmp sgt i32 %dec, %y
+  ret i1 %cmp
+}
+
+define i1 @cmp_ugt_lhs_dec(i32 %x, i32 %y) {
+; CHECK-LABEL: @cmp_ugt_lhs_dec(
+; CHECK-NEXT:    [[DEC:%.*]] = {{add|sub}}
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[DEC]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %dec = sub nuw i32 %x, 1
+  %cmp = icmp ugt i32 %dec, %y
+  ret i1 %cmp
+}
+
+define i1 @cmp_sle_rhs_inc(float %x, i32 %y) {
+; CHECK-LABEL: @cmp_sle_rhs_inc(
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float %x to i32
+; CHECK-NEXT:    [[INC:%.*]] = add
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[INC]], [[CONV]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = fptosi float %x to i32
+  %inc = add nsw i32 %y, 1
+  %cmp = icmp sle i32 %conv, %inc
+  ret i1 %cmp
+}
+
+define i1 @cmp_ule_rhs_inc(float %x, i32 %y) {
+; CHECK-LABEL: @cmp_ule_rhs_inc(
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float %x to i32
+; CHECK-NEXT:    [[INC:%.*]] = add
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i32 [[INC]], [[CONV]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = fptosi float %x to i32
+  %inc = add nuw i32 %y, 1
+  %cmp = icmp ule i32 %conv, %inc
+  ret i1 %cmp
+}
+
+define i1 @cmp_slt_rhs_dec(float %x, i32 %y) {
+; CHECK-LABEL: @cmp_slt_rhs_dec(
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float %x to i32
+; CHECK-NEXT:    [[DEC:%.*]] = {{add|sub}}
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[DEC]], [[CONV]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = fptosi float %x to i32
+  %dec = sub nsw i32 %y, 1
+  %cmp = icmp slt i32 %conv, %dec
+  ret i1 %cmp
+}
+
+define i1 @cmp_ult_rhs_dec(float %x, i32 %y) {
+; CHECK-LABEL: @cmp_ult_rhs_dec(
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float %x to i32
+; CHECK-NEXT:    [[DEC:%.*]] = {{add|sub}}
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[DEC]], [[CONV]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = fptosi float %x to i32
+  %dec = sub nuw i32 %y, 1
+  %cmp = icmp ult i32 %conv, %dec
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/insert-extract-shuffle.ll b/test/Transforms/InstCombine/insert-extract-shuffle.ll
index 4507deb7f023a..29f774c5f62b5 100644
--- a/test/Transforms/InstCombine/insert-extract-shuffle.ll
+++ b/test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -86,11 +86,8 @@ define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) {
 
 define <8 x i16> @pr26015(<4 x i16> %t0) {
 ; CHECK-LABEL: @pr26015(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i16> %t0, i32 2
-; CHECK-NEXT:    [[T2:%.*]] = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 undef>, i16 [[TMP2]], i32 3
-; CHECK-NEXT:    [[T3:%.*]] = insertelement <8 x i16> [[T2]], i16 0, i32 6
-; CHECK-NEXT:    [[T5:%.*]] = shufflevector <8 x i16> [[T3]], <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 11>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[T5:%.*]] = shufflevector <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 undef>, <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 10, i32 4, i32 5, i32 6, i32 11>
 ; CHECK-NEXT:    ret <8 x i16> [[T5]]
 ;
   %t1 = extractelement <4 x i16> %t0, i32 2
@@ -110,8 +107,7 @@ define <8 x i16> @pr25999(<4 x i16> %t0, i1 %b) {
 ; CHECK-NEXT:    br i1 %b, label %if, label %end
 ; CHECK:       if:
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[T2:%.*]] = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 undef>, i16 [[T1]], i32 3
-; CHECK-NEXT:    [[T3:%.*]] = insertelement <8 x i16> [[T2]], i16 0, i32 6
+; CHECK-NEXT:    [[T3:%.*]] = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 undef>, i16 [[T1]], i32 3
 ; CHECK-NEXT:    [[T5:%.*]] = shufflevector <8 x i16> [[T3]], <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 11>
 ; CHECK-NEXT:    ret <8 x i16> [[T5]]
 ; CHECK:       end:
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 858f9c029b30b..e8f5ddd329ff6 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -351,33 +351,12 @@ define void @ctpop_cmp_vec(<2 x i32> %a, <2 x i1>* %b) {
 ; CHECK-NEXT: store volatile <2 x i1> %pop1.cmp, <2 x i1>* %b
 }
 
-define i32 @cttz_simplify1a(i32 %x) nounwind readnone ssp {
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
-  %shr3 = lshr i32 %tmp1, 5
-  ret i32 %shr3
-
-; CHECK-LABEL: @cttz_simplify1a(
-; CHECK: icmp eq i32 %x, 0
-; CHECK-NEXT: zext i1
-; CHECK-NEXT: ret i32
-}
-
-define i32 @cttz_simplify1b(i32 %x) nounwind readnone ssp {
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
-  %shr3 = lshr i32 %tmp1, 5
-  ret i32 %shr3
-
-; CHECK-LABEL: @cttz_simplify1b(
-; CHECK-NEXT: ret i32 0
-}
-
-define i32 @ctlz_undef(i32 %Value) nounwind {
+define i32 @ctlz_undef(i32 %Value) {
 ; CHECK-LABEL: @ctlz_undef(
 ; CHECK-NEXT:    ret i32 undef
 ;
   %ctlz = call i32 @llvm.ctlz.i32(i32 0, i1 true)
   ret i32 %ctlz
-
 }
 
 define i32 @ctlz_make_undef(i32 %a) {
diff --git a/test/Transforms/InstCombine/lifetime-asan.ll b/test/Transforms/InstCombine/lifetime-asan.ll
index f52c0202b7738..7fdc1fcbc3b30 100644
--- a/test/Transforms/InstCombine/lifetime-asan.ll
+++ b/test/Transforms/InstCombine/lifetime-asan.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-declare void @llvm.lifetime.start(i64, i8* nocapture)
-declare void @llvm.lifetime.end(i64, i8* nocapture)
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
 declare void @foo(i8* nocapture)
 
 define void @asan() sanitize_address {
@@ -9,8 +9,8 @@ entry:
   ; CHECK-LABEL: @asan(
   %text = alloca i8, align 1
 
-  call void @llvm.lifetime.start(i64 1, i8* %text)
-  call void @llvm.lifetime.end(i64 1, i8* %text)
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %text)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %text)
   ; CHECK: call void @llvm.lifetime.start
   ; CHECK-NEXT: call void @llvm.lifetime.end
 
@@ -25,8 +25,8 @@ entry:
   ; CHECK-LABEL: @no_asan(
   %text = alloca i8, align 1
 
-  call void @llvm.lifetime.start(i64 1, i8* %text)
-  call void @llvm.lifetime.end(i64 1, i8* %text)
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %text)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %text)
   ; CHECK-NO: call void @llvm.lifetime
 
   call void @foo(i8* %text) ; Keep alloca alive
diff --git a/test/Transforms/InstCombine/lifetime.ll b/test/Transforms/InstCombine/lifetime.ll
index c296d29b99b97..71c676233b088 100644
--- a/test/Transforms/InstCombine/lifetime.ll
+++ b/test/Transforms/InstCombine/lifetime.ll
@@ -1,8 +1,8 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata)
-declare void @llvm.lifetime.start(i64, i8* nocapture)
-declare void @llvm.lifetime.end(i64, i8* nocapture)
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
 declare void @foo(i8* nocapture, i8* nocapture)
 
 define void @bar(i1 %flag) !dbg !4 {
@@ -17,11 +17,11 @@ entry:
 ; CHECK: bb3:
 ; CHECK-NEXT: call void @llvm.dbg.declare
 ; CHECK-NEXT: br label %fin
-; CHECK: call void @llvm.lifetime.start(i64 1, i8* %[[T]])
-; CHECK-NEXT: call void @llvm.lifetime.start(i64 1, i8* %[[B]])
+; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %[[T]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* %[[B]])
 ; CHECK-NEXT: call void @foo(i8* %[[B]], i8* %[[T]])
-; CHECK-NEXT: call void @llvm.lifetime.end(i64 1, i8* %[[B]])
-; CHECK-NEXT: call void @llvm.lifetime.end(i64 1, i8* %[[T]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* %[[B]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* %[[T]])
   %text = alloca [1 x i8], align 1
   %buff = alloca [1 x i8], align 1
   %0 = getelementptr inbounds [1 x i8], [1 x i8]* %text, i64 0, i64 0
@@ -29,31 +29,31 @@ entry:
   br i1 %flag, label %if, label %else
 
 if:
-  call void @llvm.lifetime.start(i64 1, i8* %0)
-  call void @llvm.lifetime.start(i64 1, i8* %1)
-  call void @llvm.lifetime.end(i64 1, i8* %1)
-  call void @llvm.lifetime.end(i64 1, i8* %0)
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %0)
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %0)
   br label %bb2
 
 bb2:
-  call void @llvm.lifetime.start(i64 1, i8* %0)
-  call void @llvm.lifetime.start(i64 1, i8* %1)
-  call void @llvm.lifetime.end(i64 1, i8* %0)
-  call void @llvm.lifetime.end(i64 1, i8* %1)
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %0)
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %0)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %1)
   br label %bb3
 
 bb3:
-  call void @llvm.lifetime.start(i64 1, i8* %0)
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %0)
   call void @llvm.dbg.declare(metadata [1 x i8]* %text, metadata !14, metadata !25), !dbg !26
-  call void @llvm.lifetime.end(i64 1, i8* %0)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %0)
   br label %fin
 
 else:
-  call void @llvm.lifetime.start(i64 1, i8* %0)
-  call void @llvm.lifetime.start(i64 1, i8* %1)
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %0)
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %1)
   call void @foo(i8* %1, i8* %0)
-  call void @llvm.lifetime.end(i64 1, i8* %1)
-  call void @llvm.lifetime.end(i64 1, i8* %0)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %0)
   br  label %fin
 
 fin:
diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll
index 75952e01c19c8..5746b7aa28d54 100644
--- a/test/Transforms/InstCombine/load-cmp.ll
+++ b/test/Transforms/InstCombine/load-cmp.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -instcombine -S -default-data-layout="p:32:32:32-p1:16:16:16-n8:16:32:64" < %s | FileCheck %s
+; RUN: opt -instcombine -S -data-layout="p:32:32:32-p1:16:16:16-n8:16:32:64" < %s | FileCheck %s
 
 @G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
                                      i16 73, i16 82, i16 69, i16 68, i16 0]
diff --git a/test/Transforms/InstCombine/lshr.ll b/test/Transforms/InstCombine/lshr.ll
new file mode 100644
index 0000000000000..b81371b030429
--- /dev/null
+++ b/test/Transforms/InstCombine/lshr.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1) nounwind readnone
+declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1) nounwind readnone
+declare <2 x i8> @llvm.ctpop.v2i8(<2 x i8>) nounwind readnone
+
+define i32 @lshr_ctlz_zero_is_not_undef(i32 %x) {
+; CHECK-LABEL: @lshr_ctlz_zero_is_not_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 %x, 0
+; CHECK-NEXT:    [[SH:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[SH]]
+;
+  %ct = call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %sh = lshr i32 %ct, 5
+  ret i32 %sh
+}
+
+define i32 @lshr_cttz_zero_is_not_undef(i32 %x) {
+; CHECK-LABEL: @lshr_cttz_zero_is_not_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 %x, 0
+; CHECK-NEXT:    [[SH:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[SH]]
+;
+  %ct = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+  %sh = lshr i32 %ct, 5
+  ret i32 %sh
+}
+
+define i32 @lshr_ctpop(i32 %x) {
+; CHECK-LABEL: @lshr_ctpop(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 %x, -1
+; CHECK-NEXT:    [[SH:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[SH]]
+;
+  %ct = call i32 @llvm.ctpop.i32(i32 %x)
+  %sh = lshr i32 %ct, 5
+  ret i32 %sh
+}
+
+define <2 x i8> @lshr_ctlz_zero_is_not_undef_splat_vec(<2 x i8> %x) {
+; CHECK-LABEL: @lshr_ctlz_zero_is_not_undef_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> %x, zeroinitializer
+; CHECK-NEXT:    [[SH:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[SH]]
+;
+  %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 false)
+  %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
+  ret <2 x i8> %sh
+}
+
+define <2 x i8> @lshr_cttz_zero_is_not_undef_splat_vec(<2 x i8> %x) {
+; CHECK-LABEL: @lshr_cttz_zero_is_not_undef_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> %x, zeroinitializer
+; CHECK-NEXT:    [[SH:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[SH]]
+;
+  %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 false)
+  %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
+  ret <2 x i8> %sh
+}
+
+define <2 x i8> @lshr_ctpop_splat_vec(<2 x i8> %x) {
+; CHECK-LABEL: @lshr_ctpop_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> %x, <i8 -1, i8 -1>
+; CHECK-NEXT:    [[SH:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[SH]]
+;
+  %ct = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x)
+  %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
+  ret <2 x i8> %sh
+}
+
+define i8 @lshr_exact(i8 %x) {
+; CHECK-LABEL: @lshr_exact(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i8 %x, 2
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[SHL]], 4
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr exact i8 [[ADD]], 2
+; CHECK-NEXT:    ret i8 [[LSHR]]
+;
+  %shl = shl i8 %x, 2
+  %add = add i8 %shl, 4
+  %lshr = lshr i8 %add, 2
+  ret i8 %lshr
+}
+
+define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) {
+; CHECK-LABEL: @lshr_exact_splat_vec(
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> %x, <i8 2, i8 2>
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[SHL]], <i8 4, i8 4>
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr exact <2 x i8> [[ADD]], <i8 2, i8 2>
+; CHECK-NEXT:    ret <2 x i8> [[LSHR]]
+;
+  %shl = shl <2 x i8> %x, <i8 2, i8 2>
+  %add = add <2 x i8> %shl, <i8 4, i8 4>
+  %lshr = lshr <2 x i8> %add, <i8 2, i8 2>
+  ret <2 x i8> %lshr
+}
+
diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll
index 8fcb8214360d4..7a5c7457e3649 100644
--- a/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -24,8 +24,8 @@ define i1 @foo() {
   ret i1 %z
 }
 
-declare void @llvm.lifetime.start(i64, i8*)
-declare void @llvm.lifetime.end(i64, i8*)
+declare void @llvm.lifetime.start.p0i8(i64, i8*)
+declare void @llvm.lifetime.end.p0i8(i64, i8*)
 declare i64 @llvm.objectsize.i64(i8*, i1)
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
@@ -35,8 +35,8 @@ define void @test3(i8* %src) {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT: ret void
   %a = call noalias i8* @malloc(i32 10)
-  call void @llvm.lifetime.start(i64 10, i8* %a)
-  call void @llvm.lifetime.end(i64 10, i8* %a)
+  call void @llvm.lifetime.start.p0i8(i64 10, i8* %a)
+  call void @llvm.lifetime.end.p0i8(i64 10, i8* %a)
   %size = call i64 @llvm.objectsize.i64(i8* %a, i1 true)
   store i8 42, i8* %a
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %src, i32 32, i32 1, i1 false)
diff --git a/test/Transforms/InstCombine/max-of-nots.ll b/test/Transforms/InstCombine/max-of-nots.ll
index 96fac52289707..519f1c6a90b04 100644
--- a/test/Transforms/InstCombine/max-of-nots.ll
+++ b/test/Transforms/InstCombine/max-of-nots.ll
@@ -90,6 +90,28 @@ define i32 @max_of_nots(i32 %x, i32 %y) {
   ret i32 %smax96
 }
 
+ ; negative test case (i.e. can not simplify) : ABS(MIN(NOT x,y))
+define i32 @abs_of_min_of_not(i32 %x, i32 %y) {
+; CHECK-LABEL: @abs_of_min_of_not(
+; CHECK-NEXT: xor
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp sge
+; CHECK-NEXT: select
+; CHECK-NEXT: icmp sgt
+; CHECK-NEXT: sub
+; CHECK-NEXT: select
+; CHECK-NEXT: ret
+
+  %xord = xor i32 %x, -1
+  %yadd = add i32 %y, 2
+  %cond.i = icmp sge i32 %yadd, %xord
+  %min = select i1 %cond.i, i32 %xord, i32 %yadd
+  %cmp2 = icmp sgt i32 %min, -1
+  %sub = sub i32 0, %min
+  %abs = select i1 %cmp2, i32 %min, i32 %sub
+  ret i32  %abs
+}
+
 define <2 x i32> @max_of_nots_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @max_of_nots_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i32> %y, zeroinitializer
diff --git a/test/Transforms/InstCombine/memcmp-1.ll b/test/Transforms/InstCombine/memcmp-1.ll
index f9ff479e3addf..96516f44e0815 100644
--- a/test/Transforms/InstCombine/memcmp-1.ll
+++ b/test/Transforms/InstCombine/memcmp-1.ll
@@ -14,67 +14,76 @@ declare i32 @memcmp(i8*, i8*, i32)
 
 define i32 @test_simplify1(i8* %mem, i32 %size) {
 ; CHECK-LABEL: @test_simplify1(
+; CHECK-NEXT:    ret i32 0
+;
   %ret = call i32 @memcmp(i8* %mem, i8* %mem, i32 %size)
   ret i32 %ret
-; CHECK: ret i32 0
 }
 
 ; Check memcmp(mem1, mem2, 0) -> 0.
 
 define i32 @test_simplify2(i8* %mem1, i8* %mem2) {
 ; CHECK-LABEL: @test_simplify2(
+; CHECK-NEXT:    ret i32 0
+;
   %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 0)
   ret i32 %ret
-; CHECK: ret i32 0
 }
 
 ;; Check memcmp(mem1, mem2, 1) -> *(unsigned char*)mem1 - *(unsigned char*)mem2.
 
 define i32 @test_simplify3(i8* %mem1, i8* %mem2) {
 ; CHECK-LABEL: @test_simplify3(
+; CHECK-NEXT:    [[LHSC:%.*]] = load i8, i8* %mem1, align 1
+; CHECK-NEXT:    [[LHSV:%.*]] = zext i8 [[LHSC]] to i32
+; CHECK-NEXT:    [[RHSC:%.*]] = load i8, i8* %mem2, align 1
+; CHECK-NEXT:    [[RHSV:%.*]] = zext i8 [[RHSC]] to i32
+; CHECK-NEXT:    [[CHARDIFF:%.*]] = sub nsw i32 [[LHSV]], [[RHSV]]
+; CHECK-NEXT:    ret i32 [[CHARDIFF]]
+;
   %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 1)
-; CHECK: [[LOAD1:%[a-z]+]] = load i8, i8* %mem1, align 1
-; CHECK: [[ZEXT1:%[a-z]+]] = zext i8 [[LOAD1]] to i32
-; CHECK: [[LOAD2:%[a-z]+]] = load i8, i8* %mem2, align 1
-; CHECK: [[ZEXT2:%[a-z]+]] = zext i8 [[LOAD2]] to i32
-; CHECK: [[RET:%[a-z]+]] = sub nsw i32 [[ZEXT1]], [[ZEXT2]]
   ret i32 %ret
-; CHECK: ret i32 [[RET]]
 }
 
 ; Check memcmp(mem1, mem2, size) -> cnst, where all arguments are constants.
 
 define i32 @test_simplify4() {
 ; CHECK-LABEL: @test_simplify4(
+; CHECK-NEXT:    ret i32 0
+;
   %mem1 = getelementptr [4 x i8], [4 x i8]* @hel, i32 0, i32 0
   %mem2 = getelementptr [8 x i8], [8 x i8]* @hello_u, i32 0, i32 0
   %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
   ret i32 %ret
-; CHECK: ret i32 0
 }
 
 define i32 @test_simplify5() {
 ; CHECK-LABEL: @test_simplify5(
+; CHECK-NEXT:    ret i32 1
+;
   %mem1 = getelementptr [4 x i8], [4 x i8]* @hel, i32 0, i32 0
   %mem2 = getelementptr [4 x i8], [4 x i8]* @foo, i32 0, i32 0
   %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
   ret i32 %ret
-; CHECK: ret i32 1
 }
 
 define i32 @test_simplify6() {
 ; CHECK-LABEL: @test_simplify6(
+; CHECK-NEXT:    ret i32 -1
+;
   %mem1 = getelementptr [4 x i8], [4 x i8]* @foo, i32 0, i32 0
   %mem2 = getelementptr [4 x i8], [4 x i8]* @hel, i32 0, i32 0
   %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
   ret i32 %ret
-; CHECK: ret i32 -1
 }
 
 ; Check memcmp(mem1, mem2, 8)==0 -> *(int64_t*)mem1 == *(int64_t*)mem2
 
 define i1 @test_simplify7(i64 %x, i64 %y) {
 ; CHECK-LABEL: @test_simplify7(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
   %x.addr = alloca i64, align 8
   %y.addr = alloca i64, align 8
   store i64 %x, i64* %x.addr, align 8
@@ -84,14 +93,15 @@ define i1 @test_simplify7(i64 %x, i64 %y) {
   %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 8)
   %cmp = icmp eq i32 %call, 0
   ret i1 %cmp
-; CHECK: %cmp = icmp eq i64 %x, %y
-; CHECK: ret i1 %cmp
 }
 
 ; Check memcmp(mem1, mem2, 4)==0 -> *(int32_t*)mem1 == *(int32_t*)mem2
 
 define i1 @test_simplify8(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test_simplify8(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
   %x.addr = alloca i32, align 4
   %y.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
@@ -101,14 +111,15 @@ define i1 @test_simplify8(i32 %x, i32 %y) {
   %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 4)
   %cmp = icmp eq i32 %call, 0
   ret i1 %cmp
-; CHECK: %cmp = icmp eq i32 %x, %y
-; CHECK: ret i1 %cmp
 }
 
 ; Check memcmp(mem1, mem2, 2)==0 -> *(int16_t*)mem1 == *(int16_t*)mem2
 
 define i1 @test_simplify9(i16 %x, i16 %y) {
 ; CHECK-LABEL: @test_simplify9(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
   %x.addr = alloca i16, align 2
   %y.addr = alloca i16, align 2
   store i16 %x, i16* %x.addr, align 2
@@ -118,6 +129,4 @@ define i1 @test_simplify9(i16 %x, i16 %y) {
   %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 2)
   %cmp = icmp eq i32 %call, 0
   ret i1 %cmp
-; CHECK: %cmp = icmp eq i16 %x, %y
-; CHECK: ret i1 %cmp
 }
diff --git a/test/Transforms/InstCombine/memcpy-addrspace.ll b/test/Transforms/InstCombine/memcpy-addrspace.ll
new file mode 100644
index 0000000000000..17bc1d08f9867
--- /dev/null
+++ b/test/Transforms/InstCombine/memcpy-addrspace.ll
@@ -0,0 +1,85 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@test.data = private unnamed_addr addrspace(2) constant [8 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7], align 4
+
+; CHECK-LABEL: test_load
+; CHECK: %[[GEP:.*]] = getelementptr [8 x i32], [8 x i32] addrspace(2)* @test.data, i64 0, i64 %x
+; CHECK: %{{.*}} = load i32, i32 addrspace(2)* %[[GEP]]
+; CHECK-NOT: alloca
+; CHECK-NOT: call void @llvm.memcpy.p0i8.p2i8.i64
+; CHECK-NOT: addrspacecast
+; CHECK-NOT: load i32, i32*
+define void @test_load(i32 addrspace(1)* %out, i64 %x) {
+entry:
+  %data = alloca [8 x i32], align 4
+  %0 = bitcast [8 x i32]* %data to i8*
+  call void @llvm.memcpy.p0i8.p2i8.i64(i8* %0, i8 addrspace(2)* bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i32 4, i1 false)
+  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x
+  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x
+  store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
+  ret void
+}
+
+; CHECK-LABEL: test_load_bitcast_chain
+; CHECK: %[[GEP:.*]] = getelementptr [8 x i32], [8 x i32] addrspace(2)* @test.data, i64 0, i64 %x
+; CHECK: %{{.*}} = load i32, i32 addrspace(2)* %[[GEP]]
+; CHECK-NOT: alloca
+; CHECK-NOT: call void @llvm.memcpy.p0i8.p2i8.i64
+; CHECK-NOT: addrspacecast
+; CHECK-NOT: load i32, i32*
+define void @test_load_bitcast_chain(i32 addrspace(1)* %out, i64 %x) {
+entry:
+  %data = alloca [8 x i32], align 4
+  %0 = bitcast [8 x i32]* %data to i8*
+  call void @llvm.memcpy.p0i8.p2i8.i64(i8* %0, i8 addrspace(2)* bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i32 4, i1 false)
+  %1 = bitcast i8* %0 to i32*
+  %arrayidx = getelementptr inbounds i32, i32* %1, i64 %x
+  %2 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x
+  store i32 %2, i32 addrspace(1)* %arrayidx1, align 4
+  ret void
+}
+
+; CHECK-LABEL: test_call
+; CHECK: alloca
+; CHECK: call void @llvm.memcpy.p0i8.p2i8.i64
+; CHECK-NOT: addrspacecast
+; CHECK: call i32 @foo(i32* %{{.*}})
+define void @test_call(i32 addrspace(1)* %out, i64 %x) {
+entry:
+  %data = alloca [8 x i32], align 4
+  %0 = bitcast [8 x i32]* %data to i8*
+  call void @llvm.memcpy.p0i8.p2i8.i64(i8* %0, i8 addrspace(2)* bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i32 4, i1 false)
+  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x
+  %1 = call i32 @foo(i32* %arrayidx)
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x
+  store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
+  ret void
+}
+
+; CHECK-LABEL: test_load_and_call
+; CHECK: alloca
+; CHECK: call void @llvm.memcpy.p0i8.p2i8.i64
+; CHECK: load i32, i32* %{{.*}}
+; CHECK: call i32 @foo(i32* %{{.*}})
+; CHECK-NOT: addrspacecast
+; CHECK-NOT: load i32, i32 addrspace(2)*
+define void @test_load_and_call(i32 addrspace(1)* %out, i64 %x, i64 %y) {
+entry:
+  %data = alloca [8 x i32], align 4
+  %0 = bitcast [8 x i32]* %data to i8*
+  call void @llvm.memcpy.p0i8.p2i8.i64(i8* %0, i8 addrspace(2)* bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i32 4, i1 false)
+  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x
+  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x
+  store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
+  %2 = call i32 @foo(i32* %arrayidx)
+  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %y
+  store i32 %2, i32 addrspace(1)* %arrayidx2, align 4
+  ret void
+}
+
+
+declare void @llvm.memcpy.p0i8.p2i8.i64(i8* nocapture writeonly, i8 addrspace(2)* nocapture readonly, i64, i32, i1)
+declare i32 @foo(i32* %x)
diff --git a/test/Transforms/InstCombine/memcpy-from-global.ll b/test/Transforms/InstCombine/memcpy-from-global.ll
index da38087d7397e..7c9384d89ba34 100644
--- a/test/Transforms/InstCombine/memcpy-from-global.ll
+++ b/test/Transforms/InstCombine/memcpy-from-global.ll
@@ -126,11 +126,11 @@ define void @test4() {
   ret void
 }
 
-declare void @llvm.lifetime.start(i64, i8*)
+declare void @llvm.lifetime.start.p0i8(i64, i8*)
 define void @test5() {
   %A = alloca %T
   %a = bitcast %T* %A to i8*
-  call void @llvm.lifetime.start(i64 -1, i8* %a)
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %a)
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
   call void @baz(i8* byval %a)
 ; CHECK-LABEL: @test5(
diff --git a/test/Transforms/InstCombine/memcpy-to-load.ll b/test/Transforms/InstCombine/memcpy-to-load.ll
index bcc9e188b965f..fe5f0ac657f15 100644
--- a/test/Transforms/InstCombine/memcpy-to-load.ll
+++ b/test/Transforms/InstCombine/memcpy-to-load.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep "load double"
+; RUN: opt < %s -instcombine -S | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
 
@@ -10,4 +10,8 @@ entry:
   ret void
 }
 
+; Make sure that the memcpy has been replace with a load/store of i64
+; CHECK: [[TMP:%[0-9]+]] = load i64
+; CHECK: store i64 [[TMP]]
+
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/memset_chk-1.ll b/test/Transforms/InstCombine/memset_chk-1.ll
index 9d08e96cb49be..79028502b641b 100644
--- a/test/Transforms/InstCombine/memset_chk-1.ll
+++ b/test/Transforms/InstCombine/memset_chk-1.ll
@@ -69,7 +69,7 @@ define i32 @test_rauw(i8* %a, i8* %b, i8** %c) {
 entry:
   %call49 = call i64 @strlen(i8* %a)
   %add180 = add i64 %call49, 1
-  %yo107 = call i64 @llvm.objectsize.i64.p0i8(i8* %b, i1 false)
+  %yo107 = call i64 @llvm.objectsize.i64.p0i8(i8* %b, i1 false, i1 false)
   %call50 = call i8* @__memmove_chk(i8* %b, i8* %a, i64 %add180, i64 %yo107)
 ; CHECK: %strlen = call i64 @strlen(i8* %b)
 ; CHECK-NEXT: %strchr2 = getelementptr i8, i8* %b, i64 %strlen
@@ -87,7 +87,7 @@ entry:
 declare i8* @__memmove_chk(i8*, i8*, i64, i64)
 declare i8* @strrchr(i8*, i32)
 declare i64 @strlen(i8* nocapture)
-declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1)
 
 declare i8* @__memset_chk(i8*, i32, i64, i64)
 
@@ -100,7 +100,7 @@ entry:
   br i1 %cmp, label %cleanup, label %if.end
 if.end:
   %bc = bitcast i8* %call to float*
-  %call2 = tail call i64 @llvm.objectsize.i64.p0i8(i8* nonnull %call, i1 false)
+  %call2 = tail call i64 @llvm.objectsize.i64.p0i8(i8* nonnull %call, i1 false, i1 false)
   %call3 = tail call i8* @__memset_chk(i8* nonnull %call, i32 0, i64 %size, i64 %call2) #1
   br label %cleanup
 cleanup:
@@ -114,7 +114,7 @@ cleanup:
 ; CHECK-NEXT:    br i1 %cmp, label %cleanup, label %if.end
 ; CHECK:       if.end:
 ; CHECK-NEXT:    %bc = bitcast i8* %call to float*
-; CHECK-NEXT:    %call2 = tail call i64 @llvm.objectsize.i64.p0i8(i8* nonnull %call, i1 false)
+; CHECK-NEXT:    %call2 = tail call i64 @llvm.objectsize.i64.p0i8(i8* nonnull %call, i1 false, i1 false)
 ; CHECK-NEXT:    %call3 = tail call i8* @__memset_chk(i8* nonnull %call, i32 0, i64 %size, i64 %call2)
 ; CHECK-NEXT:    br label %cleanup
 ; CHECK:       cleanup:
diff --git a/test/Transforms/InstCombine/minmax-fold.ll b/test/Transforms/InstCombine/minmax-fold.ll
index bf46cefd8ab39..19a7341fdc28f 100644
--- a/test/Transforms/InstCombine/minmax-fold.ll
+++ b/test/Transforms/InstCombine/minmax-fold.ll
@@ -339,14 +339,85 @@ define i32 @test75(i32 %x) {
   ret i32 %retval
 }
 
+; The next 4 tests are value clamping with constants:
+; https://llvm.org/bugs/show_bug.cgi?id=31693
+
+; (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
+
+define i32 @clamp_signed1(i32 %x) {
+; CHECK-LABEL: @clamp_signed1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 %x, 255
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], i32 %x, i32 255
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[MIN]], 15
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i32 [[MIN]], i32 15
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %cmp2 = icmp slt i32 %x, 255
+  %min = select i1 %cmp2, i32 %x, i32 255
+  %cmp1 = icmp slt i32 %x, 15
+  %r = select i1 %cmp1, i32 15, i32 %min
+  ret i32 %r
+}
+
+; (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
+
+define i32 @clamp_signed2(i32 %x) {
+; CHECK-LABEL: @clamp_signed2(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 %x, 15
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], i32 %x, i32 15
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[MAX]], 255
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i32 [[MAX]], i32 255
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %cmp2 = icmp sgt i32 %x, 15
+  %max = select i1 %cmp2, i32 %x, i32 15
+  %cmp1 = icmp sgt i32 %x, 255
+  %r = select i1 %cmp1, i32 255, i32 %max
+  ret i32 %r
+}
+
+; (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
+
+define i32 @clamp_unsigned1(i32 %x) {
+; CHECK-LABEL: @clamp_unsigned1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 %x, 255
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], i32 %x, i32 255
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[MIN]], 15
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i32 [[MIN]], i32 15
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %cmp2 = icmp ult i32 %x, 255
+  %min = select i1 %cmp2, i32 %x, i32 255
+  %cmp1 = icmp ult i32 %x, 15
+  %r = select i1 %cmp1, i32 15, i32 %min
+  ret i32 %r
+}
+
+; (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
+
+define i32 @clamp_unsigned2(i32 %x) {
+; CHECK-LABEL: @clamp_unsigned2(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i32 %x, 15
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], i32 %x, i32 15
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[MAX]], 255
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i32 [[MAX]], i32 255
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %cmp2 = icmp ugt i32 %x, 15
+  %max = select i1 %cmp2, i32 %x, i32 15
+  %cmp1 = icmp ugt i32 %x, 255
+  %r = select i1 %cmp1, i32 255, i32 %max
+  ret i32 %r
+}
+
 ; The next 3 min tests should canonicalize to the same form...and not infinite loop.
 
 define double @PR31751_umin1(i32 %x) {
 ; CHECK-LABEL: @PR31751_umin1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 %x, 2147483647
-; CHECK-NEXT:    [[CONV1:%.*]] = select i1 [[TMP1]], i32 %x, i32 2147483647
-; CHECK-NEXT:    [[TMP2:%.*]] = sitofp i32 [[CONV1]] to double
-; CHECK-NEXT:    ret double [[TMP2]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[TMP1]], i32 %x, i32 2147483647
+; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[SEL]] to double
+; CHECK-NEXT:    ret double [[CONV]]
 ;
   %cmp = icmp slt i32 %x, 0
   %sel = select i1 %cmp, i32 2147483647, i32 %x
@@ -385,9 +456,9 @@ define double @PR31751_umin3(i32 %x) {
 define double @PR31751_umax1(i32 %x) {
 ; CHECK-LABEL: @PR31751_umax1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 %x, -2147483648
-; CHECK-NEXT:    [[CONV1:%.*]] = select i1 [[TMP1]], i32 %x, i32 -2147483648
-; CHECK-NEXT:    [[TMP2:%.*]] = sitofp i32 [[CONV1]] to double
-; CHECK-NEXT:    ret double [[TMP2]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[TMP1]], i32 %x, i32 -2147483648
+; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[SEL]] to double
+; CHECK-NEXT:    ret double [[CONV]]
 ;
   %cmp = icmp sgt i32 %x, -1
   %sel = select i1 %cmp, i32 2147483648, i32 %x
@@ -420,3 +491,77 @@ define double @PR31751_umax3(i32 %x) {
   %conv = sitofp i32 %sel to double
   ret double %conv
 }
+
+; The icmp/select form a canonical smax, so don't hide that by folding the final bitcast into the select.
+
+define float @bitcast_scalar_smax(float %x, float %y) {
+; CHECK-LABEL: @bitcast_scalar_smax(
+; CHECK-NEXT:    [[BCX:%.*]] = bitcast float %x to i32
+; CHECK-NEXT:    [[BCY:%.*]] = bitcast float %y to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[BCX]], [[BCY]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[BCX]], i32 [[BCY]]
+; CHECK-NEXT:    [[BCS:%.*]] = bitcast i32 [[SEL]] to float
+; CHECK-NEXT:    ret float [[BCS]]
+;
+  %bcx = bitcast float %x to i32
+  %bcy = bitcast float %y to i32
+  %cmp = icmp sgt i32 %bcx, %bcy
+  %sel = select i1 %cmp, i32 %bcx, i32 %bcy
+  %bcs = bitcast i32 %sel to float
+  ret float %bcs
+}
+
+; FIXME: Create a canonical umax by bitcasting the select.
+
+define float @bitcast_scalar_umax(float %x, float %y) {
+; CHECK-LABEL: @bitcast_scalar_umax(
+; CHECK-NEXT:    [[BCX:%.*]] = bitcast float %x to i32
+; CHECK-NEXT:    [[BCY:%.*]] = bitcast float %y to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[BCX]], [[BCY]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], float %x, float %y
+; CHECK-NEXT:    ret float [[SEL]]
+;
+  %bcx = bitcast float %x to i32
+  %bcy = bitcast float %y to i32
+  %cmp = icmp ugt i32 %bcx, %bcy
+  %sel = select i1 %cmp, float %x, float %y
+  ret float %sel
+}
+
+; PR32306 - https://bugs.llvm.org/show_bug.cgi?id=32306
+; The icmp/select form a canonical smin, so don't hide that by folding the final bitcast into the select.
+
+define <8 x float> @bitcast_vector_smin(<8 x float> %x, <8 x float> %y) {
+; CHECK-LABEL: @bitcast_vector_smin(
+; CHECK-NEXT:    [[BCX:%.*]] = bitcast <8 x float> %x to <8 x i32>
+; CHECK-NEXT:    [[BCY:%.*]] = bitcast <8 x float> %y to <8 x i32>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <8 x i32> [[BCX]], [[BCY]]
+; CHECK-NEXT:    [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[BCX]], <8 x i32> [[BCY]]
+; CHECK-NEXT:    [[BCS:%.*]] = bitcast <8 x i32> [[SEL]] to <8 x float>
+; CHECK-NEXT:    ret <8 x float> [[BCS]]
+;
+  %bcx = bitcast <8 x float> %x to <8 x i32>
+  %bcy = bitcast <8 x float> %y to <8 x i32>
+  %cmp = icmp slt <8 x i32> %bcx, %bcy
+  %sel = select <8 x i1> %cmp, <8 x i32> %bcx, <8 x i32> %bcy
+  %bcs = bitcast <8 x i32> %sel to <8 x float>
+  ret <8 x float> %bcs
+}
+
+; FIXME: Create a canonical umin by bitcasting the select.
+
+define <8 x float> @bitcast_vector_umin(<8 x float> %x, <8 x float> %y) {
+; CHECK-LABEL: @bitcast_vector_umin(
+; CHECK-NEXT:    [[BCX:%.*]] = bitcast <8 x float> %x to <8 x i32>
+; CHECK-NEXT:    [[BCY:%.*]] = bitcast <8 x float> %y to <8 x i32>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <8 x i32> [[BCX]], [[BCY]]
+; CHECK-NEXT:    [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x float> %x, <8 x float> %y
+; CHECK-NEXT:    ret <8 x float> [[SEL]]
+;
+  %bcx = bitcast <8 x float> %x to <8 x i32>
+  %bcy = bitcast <8 x float> %y to <8 x i32>
+  %cmp = icmp slt <8 x i32> %bcx, %bcy
+  %sel = select <8 x i1> %cmp, <8 x float> %x, <8 x float> %y
+  ret <8 x float> %sel
+}
+
diff --git a/test/Transforms/InstCombine/narrow-switch.ll b/test/Transforms/InstCombine/narrow-switch.ll
index c391fd2cd3321..474bd820c8f8e 100644
--- a/test/Transforms/InstCombine/narrow-switch.ll
+++ b/test/Transforms/InstCombine/narrow-switch.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; Vary legal integer types in data layout.
-; RUN: opt < %s -instcombine -S -default-data-layout=n32    | FileCheck %s --check-prefix=ALL --check-prefix=CHECK32
-; RUN: opt < %s -instcombine -S -default-data-layout=n32:64 | FileCheck %s --check-prefix=ALL --check-prefix=CHECK64
+; RUN: opt < %s -instcombine -S -data-layout=n32    | FileCheck %s --check-prefix=ALL --check-prefix=CHECK32
+; RUN: opt < %s -instcombine -S -data-layout=n32:64 | FileCheck %s --check-prefix=ALL --check-prefix=CHECK64
 
 ; In all cases, the data-layout is irrelevant. We should shrink as much as possible in InstCombine
 ; and allow the backend to expand as much as needed to ensure optimal codegen for any target.
@@ -164,3 +164,45 @@ case124:
   ret i8 5
 }
 
+; Make sure the arithmetic evaluation of the switch
+; condition is evaluated on the original type
+define i32 @trunc32to16(i32 %a0) #0 {
+; ALL-LABEL: @trunc32to16(
+; ALL:         switch i16
+; ALL-NEXT:    i16 63, label %sw.bb
+; ALL-NEXT:    i16 1, label %sw.bb1
+; ALL-NEXT:    i16 100, label %sw.bb2
+; ALL-NEXT:    ]
+;
+entry:
+  %retval = alloca i32, align 4
+  %xor = xor i32 %a0, 1034460917
+  %shr = lshr i32 %xor, 16
+  %add = add i32 %shr, -917677090
+  switch i32 %add, label %sw.epilog [
+    i32 -917677027, label %sw.bb
+    i32 -917677089, label %sw.bb1
+    i32 -917676990, label %sw.bb2
+  ]
+
+sw.bb:                                            ; preds = %entry
+  store i32 90, i32* %retval, align 4
+  br label %return
+
+sw.bb1:                                           ; preds = %entry
+  store i32 91, i32* %retval, align 4
+  br label %return
+
+sw.bb2:                                           ; preds = %entry
+  store i32 92, i32* %retval, align 4
+  br label %return
+
+sw.epilog:                                        ; preds = %entry
+  store i32 113, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %sw.epilog, %sw.bb2,
+  %rval = load i32, i32* %retval, align 4
+  ret i32 %rval
+}
+
diff --git a/test/Transforms/InstCombine/narrow.ll b/test/Transforms/InstCombine/narrow.ll
index 0e000e8bdbeb4..1df400aac9738 100644
--- a/test/Transforms/InstCombine/narrow.ll
+++ b/test/Transforms/InstCombine/narrow.ll
@@ -97,3 +97,143 @@ define <2 x i32> @shrink_and_vec(<2 x i33> %a) {
   ret <2 x i32> %trunc
 }
 
+; FIXME:
+; This is based on an 'any_of' loop construct.
+; By narrowing the phi and logic op, we simplify away the zext and the final icmp.
+
+define i1 @searchArray1(i32 %needle, i32* %haystack) {
+; CHECK-LABEL: @searchArray1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[FOUND:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[OR:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDVAR]] to i64
+; CHECK-NEXT:    [[IDX:%.*]] = getelementptr i32, i32* [[HAYSTACK:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[IDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[LD]], [[NEEDLE:%.*]]
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP1]] to i8
+; CHECK-NEXT:    [[OR]] = or i8 [[FOUND]], [[ZEXT]]
+; CHECK-NEXT:    [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[OR]], 0
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %loop ]
+  %found = phi i8 [ 0, %entry ], [ %or, %loop ]
+  %idx = getelementptr i32, i32* %haystack, i32 %indvar
+  %ld = load i32, i32* %idx
+  %cmp1 = icmp eq i32 %ld, %needle
+  %zext = zext i1 %cmp1 to i8
+  %or = or i8 %found, %zext
+  %indvar.next = add i32 %indvar, 1
+  %exitcond = icmp eq i32 %indvar.next, 1000
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  %tobool = icmp ne i8 %or, 0
+  ret i1 %tobool
+}
+
+; FIXME:
+; This is based on an 'all_of' loop construct.
+; By narrowing the phi and logic op, we simplify away the zext and the final icmp.
+
+define i1 @searchArray2(i32 %hay, i32* %haystack) {
+; CHECK-LABEL: @searchArray2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[FOUND:%.*]] = phi i8 [ 1, [[ENTRY]] ], [ [[AND:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IDX:%.*]] = getelementptr i32, i32* [[HAYSTACK:%.*]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[IDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[LD]], [[HAY:%.*]]
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP1]] to i8
+; CHECK-NEXT:    [[AND]] = and i8 [[FOUND]], [[ZEXT]]
+; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+  %found = phi i8 [ 1, %entry ], [ %and, %loop ]
+  %idx = getelementptr i32, i32* %haystack, i64 %indvar
+  %ld = load i32, i32* %idx
+  %cmp1 = icmp eq i32 %ld, %hay
+  %zext = zext i1 %cmp1 to i8
+  %and = and i8 %found, %zext
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 1000
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  %tobool = icmp ne i8 %and, 0
+  ret i1 %tobool
+}
+
+; FIXME:
+; Narrowing should work with an 'xor' and is not limited to bool types.
+
+define i32 @shrinkLogicAndPhi1(i8 %x, i1 %cond) {
+; CHECK-LABEL: @shrinkLogicAndPhi1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF:%.*]], label [[ENDIF:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    br label [[ENDIF]]
+; CHECK:       endif:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 21, [[ENTRY:%.*]] ], [ 33, [[IF]] ]
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i32
+; CHECK-NEXT:    [[LOGIC:%.*]] = xor i32 [[PHI]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[LOGIC]]
+;
+entry:
+  br i1 %cond, label %if, label %endif
+if:
+  br label %endif
+endif:
+  %phi = phi i32 [ 21, %entry], [ 33, %if ]
+  %zext = zext i8 %x to i32
+  %logic = xor i32 %phi, %zext
+  ret i32 %logic
+}
+
+; FIXME:
+; Narrowing should work with an 'xor' and is not limited to bool types.
+; Test that commuting the xor operands does not inhibit optimization.
+
+define i32 @shrinkLogicAndPhi2(i8 %x, i1 %cond) {
+; CHECK-LABEL: @shrinkLogicAndPhi2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF:%.*]], label [[ENDIF:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    br label [[ENDIF]]
+; CHECK:       endif:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 21, [[ENTRY:%.*]] ], [ 33, [[IF]] ]
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i32
+; CHECK-NEXT:    [[LOGIC:%.*]] = xor i32 [[PHI]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[LOGIC]]
+;
+entry:
+  br i1 %cond, label %if, label %endif
+if:
+  br label %endif
+endif:
+  %phi = phi i32 [ 21, %entry], [ 33, %if ]
+  %zext = zext i8 %x to i32
+  %logic = xor i32 %zext, %phi
+  ret i32 %logic
+}
+
diff --git a/test/Transforms/InstCombine/not-fcmp.ll b/test/Transforms/InstCombine/not-fcmp.ll
deleted file mode 100644
index 9718e0b905fca..0000000000000
--- a/test/Transforms/InstCombine/not-fcmp.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; PR1570
-
-define i1 @f(float %X, float %Y) {
-entry:
-        %tmp3 = fcmp olt float %X, %Y           ; <i1> [#uses=1]
-        %toBoolnot5 = xor i1 %tmp3, true                ; <i1> [#uses=1]
-        ret i1 %toBoolnot5
-; CHECK-LABEL: @f(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: %toBoolnot5 = fcmp uge float %X, %Y
-; CHECK-NEXT: ret i1 %toBoolnot5
-}
diff --git a/test/Transforms/InstCombine/not.ll b/test/Transforms/InstCombine/not.ll
index edb402a125ac1..d0c242f65558c 100644
--- a/test/Transforms/InstCombine/not.ll
+++ b/test/Transforms/InstCombine/not.ll
@@ -1,61 +1,95 @@
-; This test makes sure that these instructions are properly eliminated.
-;
-
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; CHECK-NOT: xor
 
 define i32 @test1(i32 %A) {
-        %B = xor i32 %A, -1
-        %C = xor i32 %B, -1
-        ret i32 %C
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i32 %A
+;
+  %B = xor i32 %A, -1
+  %C = xor i32 %B, -1
+  ret i32 %C
 }
 
-define i1 @test2(i32 %A, i32 %B) {
-        ; Can change into setge
-        %cond = icmp sle i32 %A, %B
-        %Ret = xor i1 %cond, true
-        ret i1 %Ret
+define i1 @invert_icmp(i32 %A, i32 %B) {
+; CHECK-LABEL: @invert_icmp(
+; CHECK-NEXT:    [[NOT:%.*]] = icmp sgt i32 %A, %B
+; CHECK-NEXT:    ret i1 [[NOT]]
+;
+  %cmp = icmp sle i32 %A, %B
+  %not = xor i1 %cmp, true
+  ret i1 %not
+}
+
+; PR1570
+
+define i1 @invert_fcmp(float %X, float %Y) {
+; CHECK-LABEL: @invert_fcmp(
+; CHECK-NEXT:    [[NOT:%.*]] = fcmp uge float %X, %Y
+; CHECK-NEXT:    ret i1 [[NOT]]
+;
+  %cmp = fcmp olt float %X, %Y
+  %not = xor i1 %cmp, true
+  ret i1 %not
 }
 
 ; Test that De Morgan's law can be instcombined.
 define i32 @test3(i32 %A, i32 %B) {
-        %a = xor i32 %A, -1
-        %b = xor i32 %B, -1
-        %c = and i32 %a, %b
-        %d = xor i32 %c, -1
-        ret i32 %d
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = or i32 %A, %B
+; CHECK-NEXT:    ret i32 [[C_DEMORGAN]]
+;
+  %a = xor i32 %A, -1
+  %b = xor i32 %B, -1
+  %c = and i32 %a, %b
+  %d = xor i32 %c, -1
+  ret i32 %d
 }
 
 ; Test that De Morgan's law can work with constants.
 define i32 @test4(i32 %A, i32 %B) {
-        %a = xor i32 %A, -1
-        %c = and i32 %a, 5
-        %d = xor i32 %c, -1
-        ret i32 %d
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[D1:%.*]] = or i32 %A, -6
+; CHECK-NEXT:    ret i32 [[D1]]
+;
+  %a = xor i32 %A, -1
+  %c = and i32 %a, 5
+  %d = xor i32 %c, -1
+  ret i32 %d
 }
 
 ; Test the mirror of De Morgan's law.
 define i32 @test5(i32 %A, i32 %B) {
-        %a = xor i32 %A, -1
-        %b = xor i32 %B, -1
-        %c = or i32 %a, %b
-        %d = xor i32 %c, -1
-        ret i32 %d
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = and i32 %A, %B
+; CHECK-NEXT:    ret i32 [[C_DEMORGAN]]
+;
+  %a = xor i32 %A, -1
+  %b = xor i32 %B, -1
+  %c = or i32 %a, %b
+  %d = xor i32 %c, -1
+  ret i32 %d
 }
 
 ; PR2298
 define zeroext i8 @test6(i32 %a, i32 %b) {
-entry:
-	%tmp1not = xor i32 %a, -1
-	%tmp2not = xor i32 %b, -1
-	%tmp3 = icmp slt i32 %tmp1not, %tmp2not
-	%retval67 = zext i1 %tmp3 to i8
-	ret i8 %retval67
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 %b, %a
+; CHECK-NEXT:    [[RETVAL67:%.*]] = zext i1 [[TMP3]] to i8
+; CHECK-NEXT:    ret i8 [[RETVAL67]]
+;
+  %tmp1not = xor i32 %a, -1
+  %tmp2not = xor i32 %b, -1
+  %tmp3 = icmp slt i32 %tmp1not, %tmp2not
+  %retval67 = zext i1 %tmp3 to i8
+  ret i8 %retval67
 }
 
 define <2 x i1> @test7(<2 x i32> %A, <2 x i32> %B) {
-        %cond = icmp sle <2 x i32> %A, %B
-        %Ret = xor <2 x i1> %cond, <i1 true, i1 true>
-        ret <2 x i1> %Ret
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt <2 x i32> %A, %B
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %cond = icmp sle <2 x i32> %A, %B
+  %Ret = xor <2 x i1> %cond, <i1 true, i1 true>
+  ret <2 x i1> %Ret
 }
 
diff --git a/test/Transforms/InstCombine/nvvm-intrins.ll b/test/Transforms/InstCombine/nvvm-intrins.ll
new file mode 100644
index 0000000000000..cb65b8fdc5477
--- /dev/null
+++ b/test/Transforms/InstCombine/nvvm-intrins.ll
@@ -0,0 +1,471 @@
+; Check that nvvm intrinsics get simplified to target-generic intrinsics where
+; possible.
+;
+; We run this test twice; once with ftz on, and again with ftz off.  Behold the
+; hackery:
+
+; RUN: cat %s > %t.ftz
+; RUN: echo 'attributes #0 = { "nvptx-f32ftz" = "true" }' >> %t.ftz
+; RUN: opt < %t.ftz -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=FTZ
+
+; RUN: cat %s > %t.noftz
+; RUN: echo 'attributes #0 = { "nvptx-f32ftz" = "false" }' >> %t.noftz
+; RUN: opt < %t.noftz -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ
+
+; We handle nvvm intrinsics with ftz variants as follows:
+;  - If the module is in ftz mode, the ftz variant is transformed into the
+;    regular llvm intrinsic, and the non-ftz variant is left alone.
+;  - If the module is not in ftz mode, it's the reverse: Only the non-ftz
+;    variant is transformed, and the ftz variant is left alone.
+
+; Check NVVM intrinsics that map directly to LLVM target-generic intrinsics.
+
+; CHECK-LABEL: @ceil_double
+define double @ceil_double(double %a) #0 {
+; CHECK: call double @llvm.ceil.f64
+  %ret = call double @llvm.nvvm.ceil.d(double %a)
+  ret double %ret
+}
+; CHECK-LABEL: @ceil_float
+define float @ceil_float(float %a) #0 {
+; NOFTZ: call float @llvm.ceil.f32
+; FTZ: call float @llvm.nvvm.ceil.f
+  %ret = call float @llvm.nvvm.ceil.f(float %a)
+  ret float %ret
+}
+; CHECK-LABEL: @ceil_float_ftz
+define float @ceil_float_ftz(float %a) #0 {
+; NOFTZ: call float @llvm.nvvm.ceil.ftz.f
+; FTZ: call float @llvm.ceil.f32
+  %ret = call float @llvm.nvvm.ceil.ftz.f(float %a)
+  ret float %ret
+}
+
+; CHECK-LABEL: @fabs_double
+define double @fabs_double(double %a) #0 {
+; CHECK: call double @llvm.fabs.f64
+  %ret = call double @llvm.nvvm.fabs.d(double %a)
+  ret double %ret
+}
+; CHECK-LABEL: @fabs_float
+define float @fabs_float(float %a) #0 {
+; NOFTZ: call float @llvm.fabs.f32
+; FTZ: call float @llvm.nvvm.fabs.f
+  %ret = call float @llvm.nvvm.fabs.f(float %a)
+  ret float %ret
+}
+; CHECK-LABEL: @fabs_float_ftz
+define float @fabs_float_ftz(float %a) #0 {
+; NOFTZ: call float @llvm.nvvm.fabs.ftz.f
+; FTZ: call float @llvm.fabs.f32
+  %ret = call float @llvm.nvvm.fabs.ftz.f(float %a)
+  ret float %ret
+}
+
+; CHECK-LABEL: @floor_double
+define double @floor_double(double %a) #0 {
+; CHECK: call double @llvm.floor.f64
+  %ret = call double @llvm.nvvm.floor.d(double %a)
+  ret double %ret
+}
+; CHECK-LABEL: @floor_float
+define float @floor_float(float %a) #0 {
+; NOFTZ: call float @llvm.floor.f32
+; FTZ: call float @llvm.nvvm.floor.f
+  %ret = call float @llvm.nvvm.floor.f(float %a)
+  ret float %ret
+}
+; CHECK-LABEL: @floor_float_ftz
+define float @floor_float_ftz(float %a) #0 {
+; NOFTZ: call float @llvm.nvvm.floor.ftz.f
+; FTZ: call float @llvm.floor.f32
+  %ret = call float @llvm.nvvm.floor.ftz.f(float %a)
+  ret float %ret
+}
+
+; CHECK-LABEL: @fma_double
+define double @fma_double(double %a, double %b, double %c) #0 {
+; CHECK: call double @llvm.fma.f64
+  %ret = call double @llvm.nvvm.fma.rn.d(double %a, double %b, double %c)
+  ret double %ret
+}
+; CHECK-LABEL: @fma_float
+define float @fma_float(float %a, float %b, float %c) #0 {
+; NOFTZ: call float @llvm.fma.f32
+; FTZ: call float @llvm.nvvm.fma.rn.f
+  %ret = call float @llvm.nvvm.fma.rn.f(float %a, float %b, float %c)
+  ret float %ret
+}
+; CHECK-LABEL: @fma_float_ftz
+define float @fma_float_ftz(float %a, float %b, float %c) #0 {
+; NOFTZ: call float @llvm.nvvm.fma.rn.ftz.f
+; FTZ: call float @llvm.fma.f32
+  %ret = call float @llvm.nvvm.fma.rn.ftz.f(float %a, float %b, float %c)
+  ret float %ret
+}
+
+; CHECK-LABEL: @fmax_double
+define double @fmax_double(double %a, double %b) #0 {
+; CHECK: call double @llvm.maxnum.f64
+  %ret = call double @llvm.nvvm.fmax.d(double %a, double %b)
+  ret double %ret
+}
+; CHECK-LABEL: @fmax_float
+define float @fmax_float(float %a, float %b) #0 {
+; NOFTZ: call float @llvm.maxnum.f32
+; FTZ: call float @llvm.nvvm.fmax.f
+  %ret = call float @llvm.nvvm.fmax.f(float %a, float %b)
+  ret float %ret
+}
+; CHECK-LABEL: @fmax_float_ftz
+define float @fmax_float_ftz(float %a, float %b) #0 {
+; NOFTZ: call float @llvm.nvvm.fmax.ftz.f
+; FTZ: call float @llvm.maxnum.f32
+  %ret = call float @llvm.nvvm.fmax.ftz.f(float %a, float %b)
+  ret float %ret
+}
+
+; CHECK-LABEL: @fmin_double
+define double @fmin_double(double %a, double %b) #0 {
+; CHECK: call double @llvm.minnum.f64
+  %ret = call double @llvm.nvvm.fmin.d(double %a, double %b)
+  ret double %ret
+}
+; CHECK-LABEL: @fmin_float
+define float @fmin_float(float %a, float %b) #0 {
+; NOFTZ: call float @llvm.minnum.f32
+; FTZ: call float @llvm.nvvm.fmin.f
+  %ret = call float @llvm.nvvm.fmin.f(float %a, float %b)
+  ret float %ret
+}
+; CHECK-LABEL: @fmin_float_ftz
+define float @fmin_float_ftz(float %a, float %b) #0 {
+; NOFTZ: call float @llvm.nvvm.fmin.ftz.f
+; FTZ: call float @llvm.minnum.f32
+  %ret = call float @llvm.nvvm.fmin.ftz.f(float %a, float %b)
+  ret float %ret
+}
+
+; CHECK-LABEL: @round_double
+define double @round_double(double %a) #0 {
+; CHECK: call double @llvm.round.f64
+  %ret = call double @llvm.nvvm.round.d(double %a)
+  ret double %ret
+}
+; CHECK-LABEL: @round_float
+define float @round_float(float %a) #0 {
+; NOFTZ: call float @llvm.round.f32
+; FTZ: call float @llvm.nvvm.round.f
+  %ret = call float @llvm.nvvm.round.f(float %a)
+  ret float %ret
+}
+; CHECK-LABEL: @round_float_ftz
+define float @round_float_ftz(float %a) #0 {
+; NOFTZ: call float @llvm.nvvm.round.ftz.f
+; FTZ: call float @llvm.round.f32
+  %ret = call float @llvm.nvvm.round.ftz.f(float %a)
+  ret float %ret
+}
+
+; CHECK-LABEL: @trunc_double
+define double @trunc_double(double %a) #0 {
+; CHECK: call double @llvm.trunc.f64
+  %ret = call double @llvm.nvvm.trunc.d(double %a)
+  ret double %ret
+}
+; CHECK-LABEL: @trunc_float
+define float @trunc_float(float %a) #0 {
+; NOFTZ: call float @llvm.trunc.f32
+; FTZ: call float @llvm.nvvm.trunc.f
+  %ret = call float @llvm.nvvm.trunc.f(float %a)
+  ret float %ret
+}
+; CHECK-LABEL: @trunc_float_ftz
+define float @trunc_float_ftz(float %a) #0 {
+; NOFTZ: call float @llvm.nvvm.trunc.ftz.f
+; FTZ: call float @llvm.trunc.f32
+  %ret = call float @llvm.nvvm.trunc.ftz.f(float %a)
+  ret float %ret
+}
+
+; Check NVVM intrinsics that correspond to LLVM cast operations.
+
+; CHECK-LABEL: @test_d2i
+define i32 @test_d2i(double %a) #0 {
+; CHECK: fptosi double %a to i32
+  %ret = call i32 @llvm.nvvm.d2i.rz(double %a)
+  ret i32 %ret
+}
+; CHECK-LABEL: @test_f2i
+define i32 @test_f2i(float %a) #0 {
+; CHECK: fptosi float %a to i32
+  %ret = call i32 @llvm.nvvm.f2i.rz(float %a)
+  ret i32 %ret
+}
+; CHECK-LABEL: @test_d2ll
+define i64 @test_d2ll(double %a) #0 {
+; CHECK: fptosi double %a to i64
+  %ret = call i64 @llvm.nvvm.d2ll.rz(double %a)
+  ret i64 %ret
+}
+; CHECK-LABEL: @test_f2ll
+define i64 @test_f2ll(float %a) #0 {
+; CHECK: fptosi float %a to i64
+  %ret = call i64 @llvm.nvvm.f2ll.rz(float %a)
+  ret i64 %ret
+}
+; CHECK-LABEL: @test_d2ui
+define i32 @test_d2ui(double %a) #0 {
+; CHECK: fptoui double %a to i32
+  %ret = call i32 @llvm.nvvm.d2ui.rz(double %a)
+  ret i32 %ret
+}
+; CHECK-LABEL: @test_f2ui
+define i32 @test_f2ui(float %a) #0 {
+; CHECK: fptoui float %a to i32
+  %ret = call i32 @llvm.nvvm.f2ui.rz(float %a)
+  ret i32 %ret
+}
+; CHECK-LABEL: @test_d2ull
+define i64 @test_d2ull(double %a) #0 {
+; CHECK: fptoui double %a to i64
+  %ret = call i64 @llvm.nvvm.d2ull.rz(double %a)
+  ret i64 %ret
+}
+; CHECK-LABEL: @test_f2ull
+define i64 @test_f2ull(float %a) #0 {
+; CHECK: fptoui float %a to i64
+  %ret = call i64 @llvm.nvvm.f2ull.rz(float %a)
+  ret i64 %ret
+}
+
+; CHECK-LABEL: @test_i2d
+define double @test_i2d(i32 %a) #0 {
+; CHECK: sitofp i32 %a to double
+  %ret = call double @llvm.nvvm.i2d.rz(i32 %a)
+  ret double %ret
+}
+; CHECK-LABEL: @test_i2f
+define float @test_i2f(i32 %a) #0 {
+; CHECK: sitofp i32 %a to float
+  %ret = call float @llvm.nvvm.i2f.rz(i32 %a)
+  ret float %ret
+}
+; CHECK-LABEL: @test_ll2d
+define double @test_ll2d(i64 %a) #0 {
+; CHECK: sitofp i64 %a to double
+  %ret = call double @llvm.nvvm.ll2d.rz(i64 %a)
+  ret double %ret
+}
+; CHECK-LABEL: @test_ll2f
+define float @test_ll2f(i64 %a) #0 {
+; CHECK: sitofp i64 %a to float
+  %ret = call float @llvm.nvvm.ll2f.rz(i64 %a)
+  ret float %ret
+}
+; CHECK-LABEL: @test_ui2d
+define double @test_ui2d(i32 %a) #0 {
+; CHECK: uitofp i32 %a to double
+  %ret = call double @llvm.nvvm.ui2d.rz(i32 %a)
+  ret double %ret
+}
+; CHECK-LABEL: @test_ui2f
+define float @test_ui2f(i32 %a) #0 {
+; CHECK: uitofp i32 %a to float
+  %ret = call float @llvm.nvvm.ui2f.rz(i32 %a)
+  ret float %ret
+}
+; CHECK-LABEL: @test_ull2d
+define double @test_ull2d(i64 %a) #0 {
+; CHECK: uitofp i64 %a to double
+  %ret = call double @llvm.nvvm.ull2d.rz(i64 %a)
+  ret double %ret
+}
+; CHECK-LABEL: @test_ull2f
+define float @test_ull2f(i64 %a) #0 {
+; CHECK: uitofp i64 %a to float
+  %ret = call float @llvm.nvvm.ull2f.rz(i64 %a)
+  ret float %ret
+}
+
+; Check NVVM intrinsics that map to LLVM binary operations.
+
+; CHECK-LABEL: @test_add_rn_d
+define double @test_add_rn_d(double %a, double %b) #0 {
+; CHECK: fadd
+  %ret = call double @llvm.nvvm.add.rn.d(double %a, double %b)
+  ret double %ret
+}
+; CHECK-LABEL: @test_add_rn_f
+define float @test_add_rn_f(float %a, float %b) #0 {
+; NOFTZ: fadd
+; FTZ: call float @llvm.nvvm.add.rn.f
+  %ret = call float @llvm.nvvm.add.rn.f(float %a, float %b)
+  ret float %ret
+}
+; CHECK-LABEL: @test_add_rn_f_ftz
+define float @test_add_rn_f_ftz(float %a, float %b) #0 {
+; NOFTZ: call float @llvm.nvvm.add.rn.f
+; FTZ: fadd
+  %ret = call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b)
+  ret float %ret
+}
+
+; CHECK-LABEL: @test_mul_rn_d
+define double @test_mul_rn_d(double %a, double %b) #0 {
+; CHECK: fmul
+  %ret = call double @llvm.nvvm.mul.rn.d(double %a, double %b)
+  ret double %ret
+}
+; CHECK-LABEL: @test_mul_rn_f
+define float @test_mul_rn_f(float %a, float %b) #0 {
+; NOFTZ: fmul
+; FTZ: call float @llvm.nvvm.mul.rn.f
+  %ret = call float @llvm.nvvm.mul.rn.f(float %a, float %b)
+  ret float %ret
+}
+; CHECK-LABEL: @test_mul_rn_f_ftz
+define float @test_mul_rn_f_ftz(float %a, float %b) #0 {
+; NOFTZ: call float @llvm.nvvm.mul.rn.f
+; FTZ: fmul
+  %ret = call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b)
+  ret float %ret
+}
+
+; CHECK-LABEL: @test_div_rn_d
+define double @test_div_rn_d(double %a, double %b) #0 {
+; CHECK: fdiv
+  %ret = call double @llvm.nvvm.div.rn.d(double %a, double %b)
+  ret double %ret
+}
+; CHECK-LABEL: @test_div_rn_f
+define float @test_div_rn_f(float %a, float %b) #0 {
+; NOFTZ: fdiv
+; FTZ: call float @llvm.nvvm.div.rn.f
+  %ret = call float @llvm.nvvm.div.rn.f(float %a, float %b)
+  ret float %ret
+}
+; CHECK-LABEL: @test_div_rn_f_ftz
+define float @test_div_rn_f_ftz(float %a, float %b) #0 {
+; NOFTZ: call float @llvm.nvvm.div.rn.f
+; FTZ: fdiv
+  %ret = call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b)
+  ret float %ret
+}
+
+; Check NVVM intrinsics that require us to emit custom IR.
+
+; CHECK-LABEL: @test_rcp_rn_f
+define float @test_rcp_rn_f(float %a) #0 {
+; NOFTZ: fdiv float 1.0{{.*}} %a
+; FTZ: call float @llvm.nvvm.rcp.rn.f
+  %ret = call float @llvm.nvvm.rcp.rn.f(float %a)
+  ret float %ret
+}
+; CHECK-LABEL: @test_rcp_rn_f_ftz
+define float @test_rcp_rn_f_ftz(float %a) #0 {
+; NOFTZ: call float @llvm.nvvm.rcp.rn.f
+; FTZ: fdiv float 1.0{{.*}} %a
+  %ret = call float @llvm.nvvm.rcp.rn.ftz.f(float %a)
+  ret float %ret
+}
+
+; CHECK-LABEL: @test_sqrt_rn_d
+define double @test_sqrt_rn_d(double %a) #0 {
+; CHECK: call double @llvm.sqrt.f64(double %a)
+  %ret = call double @llvm.nvvm.sqrt.rn.d(double %a)
+  ret double %ret
+}
+; nvvm.sqrt.f is a special case: It goes to a llvm.sqrt.f
+; CHECK-LABEL: @test_sqrt_f
+define float @test_sqrt_f(float %a) #0 {
+; CHECK: call float @llvm.sqrt.f32(float %a)
+  %ret = call float @llvm.nvvm.sqrt.f(float %a)
+  ret float %ret
+}
+; CHECK-LABEL: @test_sqrt_rn_f
+define float @test_sqrt_rn_f(float %a) #0 {
+; NOFTZ: call float @llvm.sqrt.f32(float %a)
+; FTZ: call float @llvm.nvvm.sqrt.rn.f
+  %ret = call float @llvm.nvvm.sqrt.rn.f(float %a)
+  ret float %ret
+}
+; CHECK-LABEL: @test_sqrt_rn_f_ftz
+define float @test_sqrt_rn_f_ftz(float %a) #0 {
+; NOFTZ: call float @llvm.nvvm.sqrt.rn.f
+; FTZ: call float @llvm.sqrt.f32(float %a)
+  %ret = call float @llvm.nvvm.sqrt.rn.ftz.f(float %a)
+  ret float %ret
+}
+
+declare double @llvm.nvvm.add.rn.d(double, double)
+declare float @llvm.nvvm.add.rn.f(float, float)
+declare float @llvm.nvvm.add.rn.ftz.f(float, float)
+declare double @llvm.nvvm.ceil.d(double)
+declare float @llvm.nvvm.ceil.f(float)
+declare float @llvm.nvvm.ceil.ftz.f(float)
+declare float @llvm.nvvm.d2f.rm(double)
+declare float @llvm.nvvm.d2f.rm.ftz(double)
+declare float @llvm.nvvm.d2f.rp(double)
+declare float @llvm.nvvm.d2f.rp.ftz(double)
+declare float @llvm.nvvm.d2f.rz(double)
+declare float @llvm.nvvm.d2f.rz.ftz(double)
+declare i32 @llvm.nvvm.d2i.rz(double)
+declare i64 @llvm.nvvm.d2ll.rz(double)
+declare i32 @llvm.nvvm.d2ui.rz(double)
+declare i64 @llvm.nvvm.d2ull.rz(double)
+declare double @llvm.nvvm.div.rn.d(double, double)
+declare float @llvm.nvvm.div.rn.f(float, float)
+declare float @llvm.nvvm.div.rn.ftz.f(float, float)
+declare i16 @llvm.nvvm.f2h.rz(float)
+declare i16 @llvm.nvvm.f2h.rz.ftz(float)
+declare i32 @llvm.nvvm.f2i.rz(float)
+declare i32 @llvm.nvvm.f2i.rz.ftz(float)
+declare i64 @llvm.nvvm.f2ll.rz(float)
+declare i64 @llvm.nvvm.f2ll.rz.ftz(float)
+declare i32 @llvm.nvvm.f2ui.rz(float)
+declare i32 @llvm.nvvm.f2ui.rz.ftz(float)
+declare i64 @llvm.nvvm.f2ull.rz(float)
+declare i64 @llvm.nvvm.f2ull.rz.ftz(float)
+declare double @llvm.nvvm.fabs.d(double)
+declare float @llvm.nvvm.fabs.f(float)
+declare float @llvm.nvvm.fabs.ftz.f(float)
+declare double @llvm.nvvm.floor.d(double)
+declare float @llvm.nvvm.floor.f(float)
+declare float @llvm.nvvm.floor.ftz.f(float)
+declare double @llvm.nvvm.fma.rn.d(double, double, double)
+declare float @llvm.nvvm.fma.rn.f(float, float, float)
+declare float @llvm.nvvm.fma.rn.ftz.f(float, float, float)
+declare double @llvm.nvvm.fmax.d(double, double)
+declare float @llvm.nvvm.fmax.f(float, float)
+declare float @llvm.nvvm.fmax.ftz.f(float, float)
+declare double @llvm.nvvm.fmin.d(double, double)
+declare float @llvm.nvvm.fmin.f(float, float)
+declare float @llvm.nvvm.fmin.ftz.f(float, float)
+declare double @llvm.nvvm.i2d.rz(i32)
+declare float @llvm.nvvm.i2f.rz(i32)
+declare double @llvm.nvvm.ll2d.rz(i64)
+declare float @llvm.nvvm.ll2f.rz(i64)
+declare double @llvm.nvvm.lohi.i2d(i32, i32)
+declare double @llvm.nvvm.mul.rn.d(double, double)
+declare float @llvm.nvvm.mul.rn.f(float, float)
+declare float @llvm.nvvm.mul.rn.ftz.f(float, float)
+declare double @llvm.nvvm.rcp.rm.d(double)
+declare double @llvm.nvvm.rcp.rn.d(double)
+declare float @llvm.nvvm.rcp.rn.f(float)
+declare float @llvm.nvvm.rcp.rn.ftz.f(float)
+declare double @llvm.nvvm.round.d(double)
+declare float @llvm.nvvm.round.f(float)
+declare float @llvm.nvvm.round.ftz.f(float)
+declare float @llvm.nvvm.sqrt.f(float)
+declare double @llvm.nvvm.sqrt.rn.d(double)
+declare float @llvm.nvvm.sqrt.rn.f(float)
+declare float @llvm.nvvm.sqrt.rn.ftz.f(float)
+declare double @llvm.nvvm.trunc.d(double)
+declare float @llvm.nvvm.trunc.f(float)
+declare float @llvm.nvvm.trunc.ftz.f(float)
+declare double @llvm.nvvm.ui2d.rz(i32)
+declare float @llvm.nvvm.ui2f.rn(i32)
+declare float @llvm.nvvm.ui2f.rz(i32)
+declare double @llvm.nvvm.ull2d.rz(i64)
+declare float @llvm.nvvm.ull2f.rz(i64)
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 2af391f907cc3..5c0a36f5feaa0 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -8,7 +8,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 define i32 @foo() nounwind {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT: ret i32 60
-  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false, i1 false)
   ret i32 %1
 }
 
@@ -16,7 +16,7 @@ define i8* @bar() nounwind {
 ; CHECK-LABEL: @bar(
 entry:
   %retval = alloca i8*
-  %0 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false)
+  %0 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false, i1 false)
   %cmp = icmp ne i32 %0, -1
 ; CHECK: br i1 true
   br i1 %cmp, label %cond.true, label %cond.false
@@ -33,7 +33,7 @@ cond.false:
 define i32 @f() nounwind {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT: ret i32 0
-  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr ([60 x i8], [60 x i8]* @a, i32 1, i32 0), i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr ([60 x i8], [60 x i8]* @a, i32 1, i32 0), i1 false, i1 false)
   ret i32 %1
 }
 
@@ -42,7 +42,7 @@ define i32 @f() nounwind {
 define i1 @baz() nounwind {
 ; CHECK-LABEL: @baz(
 ; CHECK-NEXT: objectsize
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8], [0 x i8]* @window, i32 0, i32 0), i1 false)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8], [0 x i8]* @window, i32 0, i32 0), i1 false, i1 false)
   %2 = icmp eq i32 %1, -1
   ret i1 %2
 }
@@ -51,7 +51,7 @@ define void @test1(i8* %q, i32 %x) nounwind noinline {
 ; CHECK-LABEL: @test1(
 ; CHECK: objectsize.i32.p0i8
 entry:
-  %0 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8], [0 x i8]* @window, i32 0, i32 10), i1 false) ; <i64> [#uses=1]
+  %0 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8], [0 x i8]* @window, i32 0, i32 10), i1 false, i1 false) ; <i64> [#uses=1]
   %1 = icmp eq i32 %0, -1                         ; <i1> [#uses=1]
   br i1 %1, label %"47", label %"46"
 
@@ -67,7 +67,7 @@ entry:
 define i32 @test2() nounwind {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT: ret i32 34
-  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr (i8, i8* bitcast ([9 x i32]* @.str5 to i8*), i32 2), i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr (i8, i8* bitcast ([9 x i32]* @.str5 to i8*), i32 2), i1 false, i1 false)
   ret i32 %1
 }
 
@@ -76,7 +76,9 @@ define i32 @test2() nounwind {
 
 declare i8* @__memcpy_chk(i8*, i8*, i32, i32) nounwind
 
-declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1) nounwind readonly
+
+declare i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)*, i1, i1) nounwind readonly
 
 declare i8* @__inline_memcpy_chk(i8*, i8*, i32) nounwind inlinehint
 
@@ -88,7 +90,7 @@ entry:
 bb11:
   %0 = getelementptr inbounds float, float* getelementptr inbounds ([480 x float], [480 x float]* @array, i32 0, i32 128), i32 -127 ; <float*> [#uses=1]
   %1 = bitcast float* %0 to i8*                   ; <i8*> [#uses=1]
-  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false) ; <i32> [#uses=1]
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false, i1 false) ; <i32> [#uses=1]
   %3 = call i8* @__memcpy_chk(i8* undef, i8* undef, i32 512, i32 %2) nounwind ; <i8*> [#uses=0]
 ; CHECK: unreachable
   unreachable
@@ -110,7 +112,7 @@ define i32 @test4(i8** %esc) nounwind ssp {
 entry:
   %0 = alloca %struct.data, align 8
   %1 = bitcast %struct.data* %0 to i8*
-  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false) nounwind
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false, i1 false) nounwind
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @llvm.memset.p0i8.i32(i8* %1, i8 0, i32 1824, i32 8, i1 false)
   %3 = call i8* @__memset_chk(i8* %1, i32 0, i32 1824, i32 %2) nounwind
@@ -125,7 +127,7 @@ define i8* @test5(i32 %n) nounwind ssp {
 ; CHECK-LABEL: @test5(
 entry:
   %0 = tail call noalias i8* @malloc(i32 20) nounwind
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false, i1 false)
   %2 = load i8*, i8** @s, align 8
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 10, i32 1, i1 false)
@@ -137,7 +139,7 @@ define void @test6(i32 %n) nounwind ssp {
 ; CHECK-LABEL: @test6(
 entry:
   %0 = tail call noalias i8* @malloc(i32 20) nounwind
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false, i1 false)
   %2 = load i8*, i8** @s, align 8
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @__memcpy_chk(i8* %0, i8* %1, i32 30, i32 20)
@@ -154,7 +156,7 @@ define i32 @test7(i8** %esc) {
   %alloc = call noalias i8* @malloc(i32 48) nounwind
   store i8* %alloc, i8** %esc
   %gep = getelementptr inbounds i8, i8* %alloc, i32 16
-  %objsize = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 false) nounwind readonly
+  %objsize = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 false, i1 false) nounwind readonly
 ; CHECK: ret i32 32
   ret i32 %objsize
 }
@@ -166,7 +168,7 @@ define i32 @test8(i8** %esc) {
   %alloc = call noalias i8* @calloc(i32 5, i32 7) nounwind
   store i8* %alloc, i8** %esc
   %gep = getelementptr inbounds i8, i8* %alloc, i32 5
-  %objsize = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 false) nounwind readonly
+  %objsize = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 false, i1 false) nounwind readonly
 ; CHECK: ret i32 30
   ret i32 %objsize
 }
@@ -178,7 +180,7 @@ declare noalias i8* @strndup(i8* nocapture, i32) nounwind
 define i32 @test9(i8** %esc) {
   %call = tail call i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0)) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true, i1 false)
 ; CHECK: ret i32 8
   ret i32 %1
 }
@@ -187,7 +189,7 @@ define i32 @test9(i8** %esc) {
 define i32 @test10(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i32 3) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true, i1 false)
 ; CHECK: ret i32 4
   ret i32 %1
 }
@@ -196,7 +198,7 @@ define i32 @test10(i8** %esc) {
 define i32 @test11(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i32 7) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true, i1 false)
 ; CHECK: ret i32 8
   ret i32 %1
 }
@@ -205,7 +207,7 @@ define i32 @test11(i8** %esc) {
 define i32 @test12(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i32 8) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true, i1 false)
 ; CHECK: ret i32 8
   ret i32 %1
 }
@@ -214,7 +216,7 @@ define i32 @test12(i8** %esc) {
 define i32 @test13(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i32 57) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true, i1 false)
 ; CHECK: ret i32 8
   ret i32 %1
 }
@@ -225,7 +227,7 @@ define i32 @test13(i8** %esc) {
 ; CHECK-NEXT: ret i32 60
 define i32 @test18() {
   %bc = bitcast [60 x i8]* @globalalias to i8*
-  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false, i1 false)
   ret i32 %1
 }
 
@@ -235,7 +237,67 @@ define i32 @test18() {
 ; CHECK: llvm.objectsize
 define i32 @test19() {
   %bc = bitcast [60 x i8]* @globalalias2 to i8*
-  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false, i1 false)
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test20(
+; CHECK: ret i32 0
+define i32 @test20() {
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 false, i1 false)
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test21(
+; CHECK: ret i32 0
+define i32 @test21() {
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 true, i1 false)
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test22(
+; CHECK: llvm.objectsize
+define i32 @test22() {
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 false, i1 true)
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test23(
+; CHECK: llvm.objectsize
+define i32 @test23() {
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 true, i1 true)
   ret i32 %1
 }
 
+; 1 is an arbitrary non-zero address space.
+; CHECK-LABEL: @test24(
+; CHECK: ret i32 0
+define i32 @test24() {
+  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* null, i1 false,
+                                          i1 false)
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test25(
+; CHECK: ret i32 0
+define i32 @test25() {
+  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* null, i1 true,
+                                          i1 false)
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test26(
+; CHECK: ret i32 0
+define i32 @test26() {
+  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* null, i1 false,
+                                          i1 true)
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test27(
+; CHECK: ret i32 0
+define i32 @test27() {
+  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* null, i1 true,
+                                          i1 true)
+  ret i32 %1
+}
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index 2c9088428bdec..41e6d2d1f8277 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -207,19 +207,6 @@ define <2 x i1> @test18vec(<2 x i32> %A) {
   ret <2 x i1> %D
 }
 
-define i1 @test19(i32 %A) {
-; CHECK-LABEL: @test19(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %A, 1
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 51
-; CHECK-NEXT:    ret i1 [[TMP2]]
-;
-  %B = icmp eq i32 %A, 50
-  %C = icmp eq i32 %A, 51
-  ;; (A&-2) == 50
-  %D = or i1 %B, %C
-  ret i1 %D
-}
-
 define i32 @test20(i32 %x) {
 ; CHECK-LABEL: @test20(
 ; CHECK-NEXT:    ret i32 %x
@@ -490,7 +477,7 @@ define i32 @orsext_to_sel_multi_use(i32 %x, i1 %y) {
 ; CHECK-LABEL: @orsext_to_sel_multi_use(
 ; CHECK-NEXT:    [[SEXT:%.*]] = sext i1 %y to i32
 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SEXT]], %x
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SEXT]], [[OR]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[OR]], [[SEXT]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
   %sext = sext i1 %y to i32
@@ -521,7 +508,7 @@ define <2 x i132> @orsext_to_sel_vec_swap(<2 x i132> %x, <2 x i1> %y) {
 
 define i32 @test39(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test39(
-; CHECK-NEXT:    [[OR:%.*]] = or i32 %a, %b
+; CHECK-NEXT:    [[OR:%.*]] = or i32 %b, %a
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
   %xor = xor i32 %a, -1
@@ -542,6 +529,42 @@ define i32 @test40(i32 %a, i32 %b) {
   ret i32 %or
 }
 
+define i32 @test40b(i32 %a, i32 %b) {
+; CHECK-LABEL: @test40b(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 %a, -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[XOR]], %b
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %and = and i32 %b, %a
+  %xor = xor i32 %a, -1
+  %or = or i32 %and, %xor
+  ret i32 %or
+}
+
+define i32 @test40c(i32 %a, i32 %b) {
+; CHECK-LABEL: @test40c(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 %a, -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[XOR]], %b
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %and = and i32 %b, %a
+  %xor = xor i32 %a, -1
+  %or = or i32 %xor, %and
+  ret i32 %or
+}
+
+define i32 @test40d(i32 %a, i32 %b) {
+; CHECK-LABEL: @test40d(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 %a, -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[XOR]], %b
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %and = and i32 %a, %b
+  %xor = xor i32 %a, -1
+  %or = or i32 %xor, %and
+  ret i32 %or
+}
+
 define i32 @test41(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test41(
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 %a, -1
@@ -701,3 +724,138 @@ define i1 @test48(i64 %x, i1 %b) {
   %3 = or i1 %1, %.b
   ret i1 %3
 }
+
+define i32 @test49(i1 %C) {
+; CHECK-LABEL: @test49(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], i32 1019, i32 123
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %A = select i1 %C, i32 1000, i32 10
+  %V = or i32 %A, 123
+  ret i32 %V
+}
+
+define <2 x i32> @test49vec(i1 %C) {
+; CHECK-LABEL: @test49vec(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1019, i32 1019>, <2 x i32> <i32 123, i32 123>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
+  %V = or <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %V
+}
+
+define <2 x i32> @test49vec2(i1 %C) {
+; CHECK-LABEL: @test49vec2(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1019, i32 2509>, <2 x i32> <i32 123, i32 351>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 2500>, <2 x i32> <i32 10, i32 30>
+  %V = or <2 x i32> %A, <i32 123, i32 333>
+  ret <2 x i32> %V
+}
+
+define i32 @test50(i1 %which) {
+; CHECK-LABEL: @test50(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 1019, [[ENTRY:%.*]] ], [ 123, [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+  %value = or i32 %A, 123
+  ret i32 %value
+}
+
+define <2 x i32> @test50vec(i1 %which) {
+; CHECK-LABEL: @test50vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1019, i32 1019>, [[ENTRY:%.*]] ], [ <i32 123, i32 123>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+  %value = or <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %value
+}
+
+define <2 x i32> @test50vec2(i1 %which) {
+; CHECK-LABEL: @test50vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1019, i32 2509>, [[ENTRY:%.*]] ], [ <i32 123, i32 351>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+  %value = or <2 x i32> %A, <i32 123, i32 333>
+  ret <2 x i32> %value
+}
+
+define i8 @test51(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: @test51(
+; CHECK-NEXT:    [[W:%.*]] = mul i8 [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = or i8 [[W]], [[A:%.*]]
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %w = mul i8 %b, %c
+  %z = xor i8 %a, -1
+  %y = and i8 %w, %z
+  %x = or i8 %y, %a
+  ret i8 %x
+}
+
+define i8 @test52(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: @test52(
+; CHECK-NEXT:    [[W:%.*]] = mul i8 [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = or i8 [[W]], [[A:%.*]]
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %w = mul i8 %b, %c
+  %z = xor i8 %w, -1
+  %y = and i8 %z, %a
+  %x = or i8 %w, %y
+  ret i8 %x
+}
+
+define i8 @test53(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: @test53(
+; CHECK-NEXT:    [[W:%.*]] = mul i8 [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = or i8 [[W]], [[A:%.*]]
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %w = mul i8 %b, %c
+  %z = xor i8 %w, -1
+  %y = and i8 %z, %a
+  %x = or i8 %w, %y
+  ret i8 %x
+}
diff --git a/test/Transforms/InstCombine/phi-select-constant.ll b/test/Transforms/InstCombine/phi-select-constant.ll
new file mode 100644
index 0000000000000..272594d7f4f9c
--- /dev/null
+++ b/test/Transforms/InstCombine/phi-select-constant.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+@A = extern_weak global i32, align 4
+@B = extern_weak global i32, align 4
+
+define i32 @foo(i1 %which) {
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+; CHECK-LABEL: @foo
+; CHECK-LABEL: final:
+; CHECK: phi i32 [ 1, %entry ], [ select (i1 icmp eq (i32* @A, i32* @B), i32 2, i32 1), %delay ]
+final:
+  %use2 = phi i1 [ false, %entry ], [ icmp eq (i32* @A, i32* @B), %delay ]
+  %value = select i1 %use2, i32 2, i32 1
+  ret i32 %value
+}
+
+
+; test folding of select into phi for vectors.
+define <4 x i64> @vec1(i1 %which) {
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+ br label %final
+
+final:
+; CHECK-LABEL: @vec1
+; CHECK-LABEL: final:
+; CHECK: %phinode = phi <4 x i64> [ zeroinitializer, %entry ], [ <i64 0, i64 0, i64 126, i64 127>, %delay ]
+; CHECK-NOT: select
+; CHECK: ret <4 x i64> %phinode
+ %phinode =  phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, %entry ], [ <i1 true, i1 true, i1 false, i1 false>, %delay ]
+ %sel = select <4 x i1> %phinode, <4 x i64> zeroinitializer, <4 x i64> <i64 124, i64 125, i64 126, i64 127>
+ ret <4 x i64> %sel
+}
+
+define <4 x i64> @vec2(i1 %which) {
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+ br label %final
+
+final:
+; CHECK-LABEL: @vec2
+; CHECK-LABEL: final:
+; CHECK: %phinode = phi <4 x i64> [ <i64 124, i64 125, i64 126, i64 127>, %entry ], [ <i64 0, i64 125, i64 0, i64 127>, %delay ]
+; CHECK-NOT: select
+; CHECK: ret <4 x i64> %phinode
+ %phinode =  phi <4 x i1> [ <i1 false, i1 false, i1 false, i1 false>, %entry ], [ <i1 true, i1 false, i1 true, i1 false>, %delay ]
+ %sel = select <4 x i1> %phinode, <4 x i64> zeroinitializer, <4 x i64> <i64 124, i64 125, i64 126, i64 127>
+ ret <4 x i64> %sel
+}
diff --git a/test/Transforms/InstCombine/phi-select-constexpr.ll b/test/Transforms/InstCombine/phi-select-constexpr.ll
deleted file mode 100644
index 054e0691d47a0..0000000000000
--- a/test/Transforms/InstCombine/phi-select-constexpr.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt < %s -S -instcombine | FileCheck %s
-@A = extern_weak global i32, align 4
-@B = extern_weak global i32, align 4
-
-define i32 @foo(i1 %which) {
-entry:
-  br i1 %which, label %final, label %delay
-
-delay:
-  br label %final
-
-; CHECK-LABEL: final:
-; CHECK: phi i32 [ 1, %entry ], [ select (i1 icmp eq (i32* @A, i32* @B), i32 2, i32 1), %delay ]
-final:
-  %use2 = phi i1 [ false, %entry ], [ icmp eq (i32* @A, i32* @B), %delay ]
-  %value = select i1 %use2, i32 2, i32 1
-  ret i32 %value
-}
-
diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll
index c9f71fd457216..602c20a1314b5 100644
--- a/test/Transforms/InstCombine/pow-1.ll
+++ b/test/Transforms/InstCombine/pow-1.ll
@@ -72,7 +72,7 @@ define float @test_simplify7(float %x) {
 ; CHECK-LABEL: @test_simplify7(
   %retval = call float @powf(float %x, float 0.5)
 ; CHECK-NEXT: [[SQRTF:%[a-z0-9]+]] = call float @sqrtf(float %x) [[NUW_RO:#[0-9]+]]
-; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @fabsf(float [[SQRTF]]) [[NUW_RO]]
+; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @llvm.fabs.f32(float [[SQRTF]])
 ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq float %x, 0xFFF0000000000000
 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], float 0x7FF0000000000000, float [[FABSF]]
   ret float %retval
@@ -83,7 +83,7 @@ define double @test_simplify8(double %x) {
 ; CHECK-LABEL: @test_simplify8(
   %retval = call double @pow(double %x, double 0.5)
 ; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]]
-; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]]
+; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @llvm.fabs.f64(double [[SQRT]])
 ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000
 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]]
   ret double %retval
@@ -163,7 +163,7 @@ define double @test_simplify17(double %x) {
 ; CHECK-LABEL: @test_simplify17(
   %retval = call double @llvm.pow.f64(double %x, double 0.5)
 ; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x)
-; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]])
+; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @llvm.fabs.f64(double [[SQRT]])
 ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000
 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]]
   ret double %retval
diff --git a/test/Transforms/InstCombine/pr17827.ll b/test/Transforms/InstCombine/pr17827.ll
index a3ed5e1697ec7..ada6edab69c62 100644
--- a/test/Transforms/InstCombine/pr17827.ll
+++ b/test/Transforms/InstCombine/pr17827.ll
@@ -48,14 +48,14 @@ define i1 @test_shift_and_cmp_changed1(i8 %p, i8 %q) {
 }
 
 ; FIXME: Vectors should fold the same way.
+
 define <2 x i1> @test_shift_and_cmp_changed1_vec(<2 x i8> %p, <2 x i8> %q) {
 ; CHECK-LABEL: @test_shift_and_cmp_changed1_vec(
 ; CHECK-NEXT:    [[ANDP:%.*]] = and <2 x i8> %p, <i8 6, i8 6>
 ; CHECK-NEXT:    [[ANDQ:%.*]] = and <2 x i8> %q, <i8 8, i8 8>
 ; CHECK-NEXT:    [[OR:%.*]] = or <2 x i8> [[ANDQ]], [[ANDP]]
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> [[OR]], <i8 5, i8 5>
-; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i8> [[SHL]], <i8 5, i8 5>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[ASHR]], <i8 1, i8 1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[SHL]], <i8 32, i8 32>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %andp = and <2 x i8> %p, <i8 6, i8 6>
diff --git a/test/Transforms/InstCombine/pr19420.ll b/test/Transforms/InstCombine/pr19420.ll
index 23fa0a4097458..015f35eaaa53d 100644
--- a/test/Transforms/InstCombine/pr19420.ll
+++ b/test/Transforms/InstCombine/pr19420.ll
@@ -1,36 +1,44 @@
 ; RUN: opt -S -instcombine < %s | FileCheck %s
 
-; CHECK-LABEL: @test_FoldShiftByConstant_CreateSHL
-; CHECK: mul <4 x i32> %in, <i32 0, i32 -32, i32 0, i32 -32>
-; CHECK-NEXT: ret
 define <4 x i32> @test_FoldShiftByConstant_CreateSHL(<4 x i32> %in) {
+; CHECK-LABEL: @test_FoldShiftByConstant_CreateSHL(
+; CHECK-NEXT:    [[VSHL_N:%.*]] = mul <4 x i32> %in, <i32 0, i32 -32, i32 0, i32 -32>
+; CHECK-NEXT:    ret <4 x i32> [[VSHL_N]]
+;
   %mul.i = mul <4 x i32> %in, <i32 0, i32 -1, i32 0, i32 -1>
   %vshl_n = shl <4 x i32> %mul.i, <i32 5, i32 5, i32 5, i32 5>
   ret <4 x i32> %vshl_n
 }
 
-; CHECK-LABEL: @test_FoldShiftByConstant_CreateSHL2
-; CHECK: mul <8 x i16> %in, <i16 0, i16 -32, i16 0, i16 -32, i16 0, i16 -32, i16 0, i16 -32>
-; CHECK-NEXT: ret
 define <8 x i16> @test_FoldShiftByConstant_CreateSHL2(<8 x i16> %in) {
+; CHECK-LABEL: @test_FoldShiftByConstant_CreateSHL2(
+; CHECK-NEXT:    [[VSHL_N:%.*]] = mul <8 x i16> %in, <i16 0, i16 -32, i16 0, i16 -32, i16 0, i16 -32, i16 0, i16 -32>
+; CHECK-NEXT:    ret <8 x i16> [[VSHL_N]]
+;
   %mul.i = mul <8 x i16> %in, <i16 0, i16 -1, i16 0, i16 -1, i16 0, i16 -1, i16 0, i16 -1>
   %vshl_n = shl <8 x i16> %mul.i, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
   ret <8 x i16> %vshl_n
 }
 
-; CHECK-LABEL: @test_FoldShiftByConstant_CreateAnd
-; CHECK: mul <16 x i8> %in0, <i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33>
-; CHECK-NEXT: and <16 x i8> %vsra_n2, <i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32>
-; CHECK-NEXT: ret
 define <16 x i8> @test_FoldShiftByConstant_CreateAnd(<16 x i8> %in0) {
+; CHECK-LABEL: @test_FoldShiftByConstant_CreateAnd(
+; CHECK-NEXT:    [[VSRA_N2:%.*]] = mul <16 x i8> %in0, <i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33>
+; CHECK-NEXT:    [[VSHL_N:%.*]] = and <16 x i8> [[VSRA_N2]], <i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32>
+; CHECK-NEXT:    ret <16 x i8> [[VSHL_N]]
+;
   %vsra_n = ashr <16 x i8> %in0, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
   %tmp = add <16 x i8> %in0, %vsra_n
   %vshl_n = shl <16 x i8> %tmp, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
   ret <16 x i8> %vshl_n
 }
 
-
 define i32 @bar(i32 %x, i32 %y) {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:    [[B1:%.*]] = shl i32 %y, 4
+; CHECK-NEXT:    [[A2:%.*]] = add i32 [[B1]], %x
+; CHECK-NEXT:    [[C:%.*]] = and i32 [[A2]], -16
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %a = lshr i32 %x, 4
   %b = add i32 %a, %y
   %c = shl i32 %b, 4
@@ -38,16 +46,25 @@ define i32 @bar(i32 %x, i32 %y) {
 }
 
 define <2 x i32> @bar_v2i32(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @bar_v2i32(
+; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i32> %y, <i32 5, i32 5>
+; CHECK-NEXT:    [[A2:%.*]] = add <2 x i32> [[B1]], %x
+; CHECK-NEXT:    [[C:%.*]] = and <2 x i32> [[A2]], <i32 -32, i32 -32>
+; CHECK-NEXT:    ret <2 x i32> [[C]]
+;
   %a = lshr <2 x i32> %x, <i32 5, i32 5>
   %b = add <2 x i32> %a, %y
   %c = shl <2 x i32> %b, <i32 5, i32 5>
   ret <2 x i32> %c
 }
 
-
-
-
 define i32 @foo(i32 %x, i32 %y) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[C1:%.*]] = shl i32 %y, 4
+; CHECK-NEXT:    [[X_MASK:%.*]] = and i32 %x, 128
+; CHECK-NEXT:    [[D:%.*]] = add i32 [[X_MASK]], [[C1]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
   %a = lshr i32 %x, 4
   %b = and i32 %a, 8
   %c = add i32 %b, %y
@@ -56,6 +73,13 @@ define i32 @foo(i32 %x, i32 %y) {
 }
 
 define <2 x i32> @foo_v2i32(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @foo_v2i32(
+; CHECK-NEXT:    [[A:%.*]] = lshr <2 x i32> %x, <i32 4, i32 4>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[A]], <i32 8, i32 8>
+; CHECK-NEXT:    [[C:%.*]] = add <2 x i32> [[B]], %y
+; CHECK-NEXT:    [[D:%.*]] = shl <2 x i32> [[C]], <i32 4, i32 4>
+; CHECK-NEXT:    ret <2 x i32> [[D]]
+;
   %a = lshr <2 x i32> %x, <i32 4, i32 4>
   %b = and <2 x i32> %a, <i32 8, i32 8>
   %c = add <2 x i32> %b, %y
@@ -63,5 +87,3 @@ define <2 x i32> @foo_v2i32(<2 x i32> %x, <2 x i32> %y) {
   ret <2 x i32> %d
 }
 
-
-
diff --git a/test/Transforms/InstCombine/pr31990_wrong_memcpy.ll b/test/Transforms/InstCombine/pr31990_wrong_memcpy.ll
new file mode 100644
index 0000000000000..62ecd0311ffd1
--- /dev/null
+++ b/test/Transforms/InstCombine/pr31990_wrong_memcpy.ll
@@ -0,0 +1,26 @@
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+
+; Regression test of PR31990. A memcpy of one byte, copying 0xff, was
+; replaced with a single store of an i4 0xf.
+
+@g = constant i8 -1
+
+define void @foo() {
+entry:
+  %0 = alloca i8
+  %1 = bitcast i8* %0 to i4*
+  call void @bar(i4* %1)
+  %2 = bitcast i4* %1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* @g, i32 1, i32 1, i1 false)
+  call void @gaz(i8* %2)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly,
+                                        i8* nocapture readonly, i32, i32, i1)
+declare void @bar(i4*)
+declare void @gaz(i8*)
+
+; The mempcy should be simplified to a single store of an i8, not i4
+; CHECK: store i8 -1
+; CHECK-NOT: store i4 -1
diff --git a/test/Transforms/InstCombine/prefetch-load.ll b/test/Transforms/InstCombine/prefetch-load.ll
new file mode 100644
index 0000000000000..f98b7ae00bf1d
--- /dev/null
+++ b/test/Transforms/InstCombine/prefetch-load.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+%struct.C = type { %struct.C*, i32 }
+
+; Check that we instcombine the load across the prefetch.
+
+; CHECK-LABEL: define signext i32 @foo
+define signext i32 @foo(%struct.C* %c) local_unnamed_addr #0 {
+; CHECK: store i32 %dec, i32* %length_
+; CHECK-NOT: load
+; CHECK: llvm.prefetch
+; CHECK-NEXT: ret
+entry:
+  %next_ = getelementptr inbounds %struct.C, %struct.C* %c, i32 0, i32 0
+  %0 = load %struct.C*, %struct.C** %next_, align 8
+  %next_1 = getelementptr inbounds %struct.C, %struct.C* %0, i32 0, i32 0
+  %1 = load %struct.C*, %struct.C** %next_1, align 8
+  store %struct.C* %1, %struct.C** %next_, align 8
+  %length_ = getelementptr inbounds %struct.C, %struct.C* %c, i32 0, i32 1
+  %2 = load i32, i32* %length_, align 8
+  %dec = add nsw i32 %2, -1
+  store i32 %dec, i32* %length_, align 8
+  %3 = bitcast %struct.C* %1 to i8*
+  call void @llvm.prefetch(i8* %3, i32 0, i32 0, i32 1)
+  %4 = load i32, i32* %length_, align 8
+  ret i32 %4
+}
+
+; Function Attrs: inaccessiblemem_or_argmemonly nounwind
+declare void @llvm.prefetch(i8* nocapture readonly, i32, i32, i32) 
+
+attributes #0 = { noinline nounwind }
+; We've explicitly removed the function attrs from llvm.prefetch so we get the defaults.
+; attributes #1 = { inaccessiblemem_or_argmemonly nounwind }
diff --git a/test/Transforms/InstCombine/preserved-analyses.ll b/test/Transforms/InstCombine/preserved-analyses.ll
new file mode 100644
index 0000000000000..767304aecf353
--- /dev/null
+++ b/test/Transforms/InstCombine/preserved-analyses.ll
@@ -0,0 +1,33 @@
+; This is really testing that instcombine preserves analyses correctly, so we
+; don't care much about the code other than it is something instcombine can
+; transform.
+;
+; RUN: opt < %s -disable-output -debug-pass-manager 2>&1 -aa-pipeline=basic-aa,globals-aa \
+; RUN:    -passes='require<globals-aa>,function(require<aa>,instcombine),function(require<aa>)' \
+; RUN:    | FileCheck %s --check-prefix=AA
+; AA: Running analysis: GlobalsAA
+; AA: Running analysis: AAManager
+; AA: Running analysis: BasicAA
+; AA: Running pass: InstCombinePass on test
+; AA-NOT: Invalidating analysis: GlobalsAA
+; AA-NOT: Invalidating analysis: AAmanager
+; AA-NOT: Invalidating analysis: BasicAA
+; AA: Running pass: RequireAnalysisPass<{{.*}}AAManager
+; AA-NOT: Running analysis: GlobalsAA
+; AA-NOT: Running analysis: AAmanager
+; AA-NOT: Running analysis: BasicAA
+;
+; RUN: opt < %s -disable-output -debug-pass-manager 2>&1 \
+; RUN:    -passes='require<domtree>,instcombine,require<domtree>' \
+; RUN:    | FileCheck %s --check-prefix=DT
+; DT: Running analysis: DominatorTreeAnalysis
+; DT: Running pass: InstCombinePass on test
+; DT-NOT: Invalidating analysis: DominatorTreeAnalysis
+; DT: Running pass: RequireAnalysisPass<{{.*}}DominatorTreeAnalysis
+; DT-NOT: Running analysis: DominatorTreeAnalysis
+
+define i32 @test(i32 %A) {
+  %B = add i32 %A, 5
+  %C = add i32 %B, -5
+  ret i32 %C
+}
diff --git a/test/Transforms/InstCombine/readnone-maythrow.ll b/test/Transforms/InstCombine/readnone-maythrow.ll
new file mode 100644
index 0000000000000..f01e90263a303
--- /dev/null
+++ b/test/Transforms/InstCombine/readnone-maythrow.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare void @readnone_but_may_throw() readnone
+
+define void @f_0(i32* %ptr) {
+; CHECK-LABEL: @f_0(
+entry:
+; CHECK:  store i32 10, i32* %ptr
+; CHECK-NEXT:  call void @readnone_but_may_throw()
+; CHECK-NEXT:  store i32 20, i32* %ptr, align 4
+; CHECK:  ret void
+
+  store i32 10, i32* %ptr
+  call void @readnone_but_may_throw()
+  store i32 20, i32* %ptr
+  ret void
+}
+
+define void @f_1(i1 %cond, i32* %ptr) {
+; CHECK-LABEL: @f_1(
+; CHECK:  store i32 10, i32* %ptr
+; CHECK-NEXT:  call void @readnone_but_may_throw()
+
+  store i32 10, i32* %ptr
+  call void @readnone_but_may_throw()
+  br i1 %cond, label %left, label %merge
+
+left:
+  store i32 20, i32* %ptr
+  br label %merge
+
+merge:
+  ret void
+}
diff --git a/test/Transforms/InstCombine/rem.ll b/test/Transforms/InstCombine/rem.ll
index 89a741c907074..7a7a134db9c5d 100644
--- a/test/Transforms/InstCombine/rem.ll
+++ b/test/Transforms/InstCombine/rem.ll
@@ -1,28 +1,169 @@
-; This test makes sure that rem instructions are properly eliminated.
-;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; END.
+
+define i64 @rem_signed(i64 %x1, i64 %y2) {
+; CHECK-LABEL: @rem_signed(
+; CHECK-NEXT:    [[R:%.*]] = srem i64 %x1, %y2
+; CHECK-NEXT:    ret i64 [[R]]
+;
+  %r = sdiv i64 %x1, %y2
+  %r7 = mul i64 %r, %y2
+  %r8 = sub i64 %x1, %r7
+  ret i64 %r8
+}
+
+define <4 x i32> @rem_signed_vec(<4 x i32> %t, <4 x i32> %u) {
+; CHECK-LABEL: @rem_signed_vec(
+; CHECK-NEXT:    [[K:%.*]] = srem <4 x i32> %t, %u
+; CHECK-NEXT:    ret <4 x i32> [[K]]
+;
+  %k = sdiv <4 x i32> %t, %u
+  %l = mul <4 x i32> %k, %u
+  %m = sub <4 x i32> %t, %l
+  ret <4 x i32> %m
+}
+
+define i64 @rem_unsigned(i64 %x1, i64 %y2) {
+; CHECK-LABEL: @rem_unsigned(
+; CHECK-NEXT:    [[R:%.*]] = urem i64 %x1, %y2
+; CHECK-NEXT:    ret i64 [[R]]
+;
+  %r = udiv i64 %x1, %y2
+  %r7 = mul i64 %r, %y2
+  %r8 = sub i64 %x1, %r7
+  ret i64 %r8
+}
+
+; PR28672 - https://llvm.org/bugs/show_bug.cgi?id=28672
+
+define i8 @big_divisor(i8 %x) {
+; CHECK-LABEL: @big_divisor(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 %x, -127
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 %x, 127
+; CHECK-NEXT:    [[REM:%.*]] = select i1 [[TMP1]], i8 %x, i8 [[TMP2]]
+; CHECK-NEXT:    ret i8 [[REM]]
+;
+  %rem = urem i8 %x, 129
+  ret i8 %rem
+}
+
+define i5 @biggest_divisor(i5 %x) {
+; CHECK-LABEL: @biggest_divisor(
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i5 %x, -1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[NOT_]] to i5
+; CHECK-NEXT:    [[REM:%.*]] = add i5 [[TMP1]], %x
+; CHECK-NEXT:    ret i5 [[REM]]
+;
+  %rem = urem i5 %x, -1
+  ret i5 %rem
+}
+
+define <2 x i4> @big_divisor_vec(<2 x i4> %x) {
+; CHECK-LABEL: @big_divisor_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i4> [[X:%.*]], <i4 -3, i4 -3>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i4> [[X]], <i4 3, i4 3>
+; CHECK-NEXT:    [[REM:%.*]] = select <2 x i1> [[TMP1]], <2 x i4> [[X]], <2 x i4> [[TMP2]]
+; CHECK-NEXT:    ret <2 x i4> [[REM]]
+;
+  %rem = urem <2 x i4> %x, <i4 13, i4 13>
+  ret <2 x i4> %rem
+}
+
+define i8 @urem1(i8 %x, i8 %y) {
+; CHECK-LABEL: @urem1(
+; CHECK-NEXT:    [[A:%.*]] = urem i8 %x, %y
+; CHECK-NEXT:    ret i8 [[A]]
+;
+  %A = udiv i8 %x, %y
+  %B = mul i8 %A, %y
+  %C = sub i8 %x, %B
+  ret i8 %C
+}
+
+define i8 @srem1(i8 %x, i8 %y) {
+; CHECK-LABEL: @srem1(
+; CHECK-NEXT:    [[A:%.*]] = srem i8 %x, %y
+; CHECK-NEXT:    ret i8 [[A]]
+;
+  %A = sdiv i8 %x, %y
+  %B = mul i8 %A, %y
+  %C = sub i8 %x, %B
+  ret i8 %C
+}
+
+define i8 @urem2(i8 %x, i8 %y) {
+; CHECK-LABEL: @urem2(
+; CHECK-NEXT:    [[A:%.*]] = urem i8 %x, %y
+; CHECK-NEXT:    [[C:%.*]] = sub i8 0, [[A]]
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %A = udiv i8 %x, %y
+  %B = mul i8 %A, %y
+  %C = sub i8 %B, %x
+  ret i8 %C
+}
+
+define i8 @urem3(i8 %x) {
+; CHECK-LABEL: @urem3(
+; CHECK-NEXT:    [[A:%.*]] = urem i8 %x, 3
+; CHECK-NEXT:    [[B1:%.*]] = sub i8 %x, [[A]]
+; CHECK-NEXT:    [[C:%.*]] = add i8 [[B1]], %x
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %A = udiv i8 %x, 3
+  %B = mul i8 %A, -3
+  %C = sub i8 %x, %B
+  ret i8 %C
+}
+
+; (((X / Y) * Y) / Y) -> X / Y
+
+define i32 @sdiv_mul_sdiv(i32 %x, i32 %y) {
+; CHECK-LABEL: @sdiv_mul_sdiv(
+; CHECK-NEXT:    [[R:%.*]] = sdiv i32 %x, %y
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %div = sdiv i32 %x, %y
+  %mul = mul i32 %div, %y
+  %r = sdiv i32 %mul, %y
+  ret i32 %r
+}
+
+; (((X / Y) * Y) / Y) -> X / Y
+
+define i32 @udiv_mul_udiv(i32 %x, i32 %y) {
+; CHECK-LABEL: @udiv_mul_udiv(
+; CHECK-NEXT:    [[R:%.*]] = udiv i32 %x, %y
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %div = udiv i32 %x, %y
+  %mul = mul i32 %div, %y
+  %r = udiv i32 %mul, %y
+  ret i32 %r
+}
 
 define i32 @test1(i32 %A) {
 ; CHECK-LABEL: @test1(
-; CHECK-NEXT: ret i32 0
-	%B = srem i32 %A, 1	; ISA constant 0
-	ret i32 %B
+; CHECK-NEXT:    ret i32 0
+;
+  %B = srem i32 %A, 1	; ISA constant 0
+  ret i32 %B
 }
 
 define i32 @test2(i32 %A) {	; 0 % X = 0, we don't need to preserve traps
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT: ret i32 0
-	%B = srem i32 0, %A
-	ret i32 %B
+; CHECK-NEXT:    ret i32 0
+;
+  %B = srem i32 0, %A
+  ret i32 %B
 }
 
 define i32 @test3(i32 %A) {
 ; CHECK-LABEL: @test3(
-; CHECK-NEXT: [[AND:%.*]] = and i32 %A, 7
-; CHECK-NEXT: ret i32 [[AND]]
-	%B = urem i32 %A, 8
-	ret i32 %B
+; CHECK-NEXT:    [[B:%.*]] = and i32 %A, 7
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = urem i32 %A, 8
+  ret i32 %B
 }
 
 define <2 x i32> @vec_power_of_2_constant_splat_divisor(<2 x i32> %A) {
@@ -45,12 +186,13 @@ define <2 x i19> @weird_vec_power_of_2_constant_splat_divisor(<2 x i19> %A) {
 
 define i1 @test3a(i32 %A) {
 ; CHECK-LABEL: @test3a(
-; CHECK-NEXT: [[AND:%.*]] = and i32 %A, 7
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-	%B = srem i32 %A, -8
-	%C = icmp ne i32 %B, 0
-	ret i1 %C
+; CHECK-NEXT:    [[B1:%.*]] = and i32 %A, 7
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[B1]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = srem i32 %A, -8
+  %C = icmp ne i32 %B, 0
+  ret i1 %C
 }
 
 define <2 x i1> @test3a_vec(<2 x i32> %A) {
@@ -66,201 +208,221 @@ define <2 x i1> @test3a_vec(<2 x i32> %A) {
 
 define i32 @test4(i32 %X, i1 %C) {
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT: [[SEL:%.*]] = select i1 %C, i32 0, i32 7
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[SEL]], %X
-	%V = select i1 %C, i32 1, i32 8
-	%R = urem i32 %X, %V
-	ret i32 %R
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 %C, i32 0, i32 7
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[TMP1]], %X
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %V = select i1 %C, i32 1, i32 8
+  %R = urem i32 %X, %V
+  ret i32 %R
 }
 
 define i32 @test5(i32 %X, i8 %B) {
 ; CHECK-LABEL: @test5(
-; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 %B to i32
-; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 32, [[ZEXT]]
-; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SHL]], -1
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[ADD]], %X
-; CHECK-NEXT: ret i32 [[AND]]
-	%shift.upgrd.1 = zext i8 %B to i32
-	%Amt = shl i32 32, %shift.upgrd.1
-	%V = urem i32 %X, %Amt
-	ret i32 %V
+; CHECK-NEXT:    [[SHIFT_UPGRD_1:%.*]] = zext i8 %B to i32
+; CHECK-NEXT:    [[AMT:%.*]] = shl nuw i32 32, [[SHIFT_UPGRD_1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[AMT]], -1
+; CHECK-NEXT:    [[V:%.*]] = and i32 [[TMP1]], %X
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %shift.upgrd.1 = zext i8 %B to i32
+  %Amt = shl i32 32, %shift.upgrd.1
+  %V = urem i32 %X, %Amt
+  ret i32 %V
 }
 
 define i32 @test6(i32 %A) {
 ; CHECK-LABEL: @test6(
-; CHECK-NEXT: ret i32 undef
-	%B = srem i32 %A, 0	;; undef
-	ret i32 %B
+; CHECK-NEXT:    ret i32 undef
+;
+  %B = srem i32 %A, 0	;; undef
+  ret i32 %B
 }
 
 define i32 @test7(i32 %A) {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT: ret i32 0
-	%B = mul i32 %A, 8
-	%C = srem i32 %B, 4
-	ret i32 %C
+; CHECK-NEXT:    ret i32 0
+;
+  %B = mul i32 %A, 8
+  %C = srem i32 %B, 4
+  ret i32 %C
 }
 
 define i32 @test8(i32 %A) {
 ; CHECK-LABEL: @test8(
-; CHECK-NEXT: ret i32 0
-	%B = shl i32 %A, 4
-	%C = srem i32 %B, 8
-	ret i32 %C
+; CHECK-NEXT:    ret i32 0
+;
+  %B = shl i32 %A, 4
+  %C = srem i32 %B, 8
+  ret i32 %C
 }
 
 define i32 @test9(i32 %A) {
 ; CHECK-LABEL: @test9(
-; CHECK-NEXT: ret i32 0
-	%B = mul i32 %A, 64
-	%C = urem i32 %B, 32
-	ret i32 %C
+; CHECK-NEXT:    ret i32 0
+;
+  %B = mul i32 %A, 64
+  %C = urem i32 %B, 32
+  ret i32 %C
 }
 
 define i32 @test10(i8 %c) {
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT: ret i32 0
-	%tmp.1 = zext i8 %c to i32
-	%tmp.2 = mul i32 %tmp.1, 4
-	%tmp.3 = sext i32 %tmp.2 to i64
-	%tmp.5 = urem i64 %tmp.3, 4
-	%tmp.6 = trunc i64 %tmp.5 to i32
-	ret i32 %tmp.6
+; CHECK-NEXT:    ret i32 0
+;
+  %tmp.1 = zext i8 %c to i32
+  %tmp.2 = mul i32 %tmp.1, 4
+  %tmp.3 = sext i32 %tmp.2 to i64
+  %tmp.5 = urem i64 %tmp.3, 4
+  %tmp.6 = trunc i64 %tmp.5 to i32
+  ret i32 %tmp.6
 }
 
 define i32 @test11(i32 %i) {
 ; CHECK-LABEL: @test11(
-; CHECK-NEXT: ret i32 0
-	%tmp.1 = and i32 %i, -2
-	%tmp.3 = mul i32 %tmp.1, 2
-	%tmp.5 = urem i32 %tmp.3, 4
-	ret i32 %tmp.5
+; CHECK-NEXT:    ret i32 0
+;
+  %tmp.1 = and i32 %i, -2
+  %tmp.3 = mul i32 %tmp.1, 2
+  %tmp.5 = urem i32 %tmp.3, 4
+  ret i32 %tmp.5
 }
 
 define i32 @test12(i32 %i) {
 ; CHECK-LABEL: @test12(
-; CHECK-NEXT: ret i32 0
-	%tmp.1 = and i32 %i, -4
-	%tmp.5 = srem i32 %tmp.1, 2
-	ret i32 %tmp.5
+; CHECK-NEXT:    ret i32 0
+;
+  %tmp.1 = and i32 %i, -4
+  %tmp.5 = srem i32 %tmp.1, 2
+  ret i32 %tmp.5
 }
 
 define i32 @test13(i32 %i) {
 ; CHECK-LABEL: @test13(
-; CHECK-NEXT: ret i32 0
-	%x = srem i32 %i, %i
-	ret i32 %x
+; CHECK-NEXT:    ret i32 0
+;
+  %x = srem i32 %i, %i
+  ret i32 %x
 }
 
 define i64 @test14(i64 %x, i32 %y) {
 ; CHECK-LABEL: @test14(
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, %y
-; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[SHL]] to i64
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[ZEXT]], -1
-; CHECK-NEXT: [[AND:%.*]] = and i64 [[ADD]], %x
-; CHECK-NEXT: ret i64 [[AND]]
-	%shl = shl i32 1, %y
-	%zext = zext i32 %shl to i64
-	%urem = urem i64 %x, %zext
-	ret i64 %urem
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 1, %y
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[SHL]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i64 [[ZEXT]], -1
+; CHECK-NEXT:    [[UREM:%.*]] = and i64 [[TMP1]], %x
+; CHECK-NEXT:    ret i64 [[UREM]]
+;
+  %shl = shl i32 1, %y
+  %zext = zext i32 %shl to i64
+  %urem = urem i64 %x, %zext
+  ret i64 %urem
 }
 
 define i64 @test15(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test15(
-; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, %y
-; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SHL]], -1
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[ADD]], %x
-; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[AND]] to i64
-; CHECK-NEXT: ret i64 [[ZEXT]]
-	%shl = shl i32 1, %y
-	%zext0 = zext i32 %shl to i64
-	%zext1 = zext i32 %x to i64
-	%urem = urem i64 %zext1, %zext0
-	ret i64 %urem
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, %y
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[SHL]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %x
+; CHECK-NEXT:    [[UREM:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    ret i64 [[UREM]]
+;
+  %shl = shl i32 1, %y
+  %zext0 = zext i32 %shl to i64
+  %zext1 = zext i32 %x to i64
+  %urem = urem i64 %zext1, %zext0
+  ret i64 %urem
 }
 
 define i32 @test16(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test16(
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 %y, 11
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHR]], 4
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], 3
-; CHECK-NEXT: [[REM:%.*]] = and i32 [[OR]], %x
-; CHECK-NEXT: ret i32 [[REM]]
-	%shr = lshr i32 %y, 11
-	%and = and i32 %shr, 4
-	%add = add i32 %and, 4
-	%rem = urem i32 %x, %add
-	ret i32 %rem
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 %y, 11
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHR]], 4
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[AND]], 3
+; CHECK-NEXT:    [[REM:%.*]] = and i32 [[TMP1]], %x
+; CHECK-NEXT:    ret i32 [[REM]]
+;
+  %shr = lshr i32 %y, 11
+  %and = and i32 %shr, 4
+  %add = add i32 %and, 4
+  %rem = urem i32 %x, %add
+  ret i32 %rem
 }
 
 define i32 @test17(i32 %X) {
 ; CHECK-LABEL: @test17(
-; CHECK-NEXT: icmp ne i32 %X, 1
-; CHECK-NEXT: zext i1
-; CHECK-NEXT: ret
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 %X, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %A = urem i32 1, %X
   ret i32 %A
 }
 
 define i32 @test18(i16 %x, i32 %y) {
-; CHECK: @test18
-; CHECK-NEXT: [[SHL:%.*]] = shl i16 %x, 3
-; CHECK-NEXT: [[AND:%.*]] = and i16 [[SHL]], 32
-; CHECK-NEXT: [[XOR:%.*]] = xor i16 [[AND]], 63
-; CHECK-NEXT: [[EXT:%.*]] = zext i16 [[XOR]] to i32
-; CHECK-NEXT: [[REM:%.*]] = and i32 [[EXT]], %y
-; CHECK-NEXT: ret i32 [[REM]]
-	%1 = and i16 %x, 4
-	%2 = icmp ne i16 %1, 0
-	%3 = select i1 %2, i32 32, i32 64
-	%4 = urem i32 %y, %3
-	ret i32 %4
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i16 %x, 3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[TMP1]], 32
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i16 [[TMP2]], 63
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], %y
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  %1 = and i16 %x, 4
+  %2 = icmp ne i16 %1, 0
+  %3 = select i1 %2, i32 32, i32 64
+  %4 = urem i32 %y, %3
+  ret i32 %4
 }
 
 define i32 @test19(i32 %x, i32 %y) {
-; CHECK: @test19
-; CHECK-NEXT: [[SHL1:%.*]] = shl i32 1, %x
-; CHECK-NEXT: [[SHL2:%.*]] = shl i32 1, %y
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL1]], [[SHL2]]
-; CHECK-NEXT: [[ADD:%.*]] = add i32 [[AND]], [[SHL1]]
-; CHECK-NEXT: [[SUB:%.*]] = add i32 [[ADD]], -1
-; CHECK-NEXT: [[REM:%.*]] = and i32 [[SUB]], %y
-; CHECK-NEXT: ret i32 [[REM]]
-	%A = shl i32 1, %x
-	%B = shl i32 1, %y
-	%C = and i32 %A, %B
-	%D = add i32 %C, %A
-	%E = urem i32 %y, %D
-	ret i32 %E
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[A:%.*]] = shl i32 1, %x
+; CHECK-NEXT:    [[B:%.*]] = shl i32 1, %y
+; CHECK-NEXT:    [[C:%.*]] = and i32 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = add i32 [[C]], [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[D]], -1
+; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], %y
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %A = shl i32 1, %x
+  %B = shl i32 1, %y
+  %C = and i32 %A, %B
+  %D = add i32 %C, %A
+  %E = urem i32 %y, %D
+  ret i32 %E
 }
 
 define <2 x i64> @test20(<2 x i64> %X, <2 x i1> %C) {
 ; CHECK-LABEL: @test20(
-; CHECK-NEXT: select <2 x i1> %C, <2 x i64> <i64 1, i64 2>, <2 x i64> zeroinitializer
-; CHECK-NEXT: ret <2 x i64>
-	%V = select <2 x i1> %C, <2 x i64> <i64 1, i64 2>, <2 x i64> <i64 8, i64 9>
-	%R = urem <2 x i64> %V, <i64 2, i64 3>
-	ret <2 x i64> %R
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> %C, <2 x i64> <i64 1, i64 2>, <2 x i64> zeroinitializer
+; CHECK-NEXT:    ret <2 x i64> [[R]]
+;
+  %V = select <2 x i1> %C, <2 x i64> <i64 1, i64 2>, <2 x i64> <i64 8, i64 9>
+  %R = urem <2 x i64> %V, <i64 2, i64 3>
+  ret <2 x i64> %R
 }
 
-define i32 @test21(i1 %c0, i32* %val) {
+define i32 @test21(i1 %c0, i32* %p) {
 ; CHECK-LABEL: @test21(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %c0, label %if.then, label %if.end
+; CHECK:       if.then:
+; CHECK-NEXT:    [[V:%.*]] = load volatile i32, i32* %p, align 4
+; CHECK-NEXT:    [[PHITMP:%.*]] = srem i32 [[V]], 5
+; CHECK-NEXT:    br label %if.end
+; CHECK:       if.end:
+; CHECK-NEXT:    [[LHS:%.*]] = phi i32 [ [[PHITMP]], %if.then ], [ 0, %entry ]
+; CHECK-NEXT:    ret i32 [[LHS]]
+;
 entry:
   br i1 %c0, label %if.then, label %if.end
 
 if.then:
-; CHECK: if.then:
-; CHECK-NEXT:  %v = load volatile i32, i32* %val, align 4
-; CHECK-NEXT:  %phitmp = srem i32 %v, 5
-
-  %v = load volatile i32, i32* %val
+  %v = load volatile i32, i32* %p
   br label %if.end
 
 if.end:
-; CHECK: if.end:
-; CHECK-NEXT:  %lhs = phi i32 [ %phitmp, %if.then ], [ 0, %entry ]
-; CHECK-NEXT:  ret i32 %lhs
-
   %lhs = phi i32 [ %v, %if.then ], [ 5, %entry ]
   %rem = srem i32 %lhs, 5
   ret i32 %rem
@@ -269,28 +431,34 @@ if.end:
 @a = common global [5 x i16] zeroinitializer, align 2
 @b = common global i16 0, align 2
 
-define i32 @pr27968_0(i1 %c0, i32* %val) {
+define i32 @pr27968_0(i1 %c0, i32* %p) {
 ; CHECK-LABEL: @pr27968_0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %c0, label %if.then, label %if.end
+; CHECK:       if.then:
+; CHECK-NEXT:    [[V:%.*]] = load volatile i32, i32* %p, align 4
+; CHECK-NEXT:    br label %if.end
+; CHECK:       if.end:
+; CHECK-NEXT:    [[LHS:%.*]] = phi i32 [ [[V]], %if.then ], [ 5, %entry ]
+; CHECK-NEXT:    br i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b), label [[REM]].is.safe, label [[REM]].is.unsafe
+; CHECK:       rem.is.safe:
+; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[LHS]], zext (i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b) to i32)
+; CHECK-NEXT:    ret i32 [[REM]]
+; CHECK:       rem.is.unsafe:
+; CHECK-NEXT:    ret i32 0
+;
 entry:
   br i1 %c0, label %if.then, label %if.end
 
 if.then:
-  %v = load volatile i32, i32* %val
+  %v = load volatile i32, i32* %p
   br label %if.end
 
-; CHECK: if.then:
-; CHECK-NOT: srem
-; CHECK:  br label %if.end
-
 if.end:
   %lhs = phi i32 [ %v, %if.then ], [ 5, %entry ]
   br i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b), label %rem.is.safe, label %rem.is.unsafe
 
 rem.is.safe:
-; CHECK: rem.is.safe:
-; CHECK-NEXT:  %rem = srem i32 %lhs, zext (i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b) to i32)
-; CHECK-NEXT:  ret i32 %rem
-
   %rem = srem i32 %lhs, zext (i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b) to i32)
   ret i32 %rem
 
@@ -298,19 +466,29 @@ rem.is.unsafe:
   ret i32 0
 }
 
-define i32 @pr27968_1(i1 %c0, i1 %always_false, i32* %val) {
+define i32 @pr27968_1(i1 %c0, i1 %always_false, i32* %p) {
 ; CHECK-LABEL: @pr27968_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %c0, label %if.then, label %if.end
+; CHECK:       if.then:
+; CHECK-NEXT:    [[V:%.*]] = load volatile i32, i32* %p, align 4
+; CHECK-NEXT:    br label %if.end
+; CHECK:       if.end:
+; CHECK-NEXT:    [[LHS:%.*]] = phi i32 [ [[V]], %if.then ], [ 5, %entry ]
+; CHECK-NEXT:    br i1 %always_false, label [[REM]].is.safe, label [[REM]].is.unsafe
+; CHECK:       rem.is.safe:
+; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[LHS]], -2147483648
+; CHECK-NEXT:    ret i32 [[REM]]
+; CHECK:       rem.is.unsafe:
+; CHECK-NEXT:    ret i32 0
+;
 entry:
   br i1 %c0, label %if.then, label %if.end
 
 if.then:
-  %v = load volatile i32, i32* %val
+  %v = load volatile i32, i32* %p
   br label %if.end
 
-; CHECK: if.then:
-; CHECK-NOT: srem
-; CHECK:  br label %if.end
-
 if.end:
   %lhs = phi i32 [ %v, %if.then ], [ 5, %entry ]
   br i1 %always_false, label %rem.is.safe, label %rem.is.unsafe
@@ -319,36 +497,38 @@ rem.is.safe:
   %rem = srem i32 %lhs, -2147483648
   ret i32 %rem
 
-; CHECK: rem.is.safe:
-; CHECK-NEXT:  %rem = srem i32 %lhs, -2147483648
-; CHECK-NEXT:  ret i32 %rem
-
 rem.is.unsafe:
   ret i32 0
 }
 
-define i32 @pr27968_2(i1 %c0, i32* %val) {
+define i32 @pr27968_2(i1 %c0, i32* %p) {
 ; CHECK-LABEL: @pr27968_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %c0, label %if.then, label %if.end
+; CHECK:       if.then:
+; CHECK-NEXT:    [[V:%.*]] = load volatile i32, i32* %p, align 4
+; CHECK-NEXT:    br label %if.end
+; CHECK:       if.end:
+; CHECK-NEXT:    [[LHS:%.*]] = phi i32 [ [[V]], %if.then ], [ 5, %entry ]
+; CHECK-NEXT:    br i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b), label [[REM]].is.safe, label [[REM]].is.unsafe
+; CHECK:       rem.is.safe:
+; CHECK-NEXT:    [[REM:%.*]] = urem i32 [[LHS]], zext (i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b) to i32)
+; CHECK-NEXT:    ret i32 [[REM]]
+; CHECK:       rem.is.unsafe:
+; CHECK-NEXT:    ret i32 0
+;
 entry:
   br i1 %c0, label %if.then, label %if.end
 
 if.then:
-  %v = load volatile i32, i32* %val
+  %v = load volatile i32, i32* %p
   br label %if.end
 
-; CHECK: if.then:
-; CHECK-NOT: urem
-; CHECK:  br label %if.end
-
 if.end:
   %lhs = phi i32 [ %v, %if.then ], [ 5, %entry ]
   br i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b), label %rem.is.safe, label %rem.is.unsafe
 
 rem.is.safe:
-; CHECK: rem.is.safe:
-; CHECK-NEXT:  %rem = urem i32 %lhs, zext (i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b) to i32)
-; CHECK-NEXT:  ret i32 %rem
-
   %rem = urem i32 %lhs, zext (i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b) to i32)
   ret i32 %rem
 
@@ -356,20 +536,29 @@ rem.is.unsafe:
   ret i32 0
 }
 
-define i32 @pr27968_3(i1 %c0, i1 %always_false, i32* %val) {
+define i32 @pr27968_3(i1 %c0, i1 %always_false, i32* %p) {
 ; CHECK-LABEL: @pr27968_3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %c0, label %if.then, label %if.end
+; CHECK:       if.then:
+; CHECK-NEXT:    [[V:%.*]] = load volatile i32, i32* %p, align 4
+; CHECK-NEXT:    [[PHITMP:%.*]] = and i32 [[V]], 2147483647
+; CHECK-NEXT:    br label %if.end
+; CHECK:       if.end:
+; CHECK-NEXT:    [[LHS:%.*]] = phi i32 [ [[PHITMP]], %if.then ], [ 5, %entry ]
+; CHECK-NEXT:    br i1 %always_false, label %rem.is.safe, label %rem.is.unsafe
+; CHECK:       rem.is.safe:
+; CHECK-NEXT:    ret i32 [[LHS]]
+; CHECK:       rem.is.unsafe:
+; CHECK-NEXT:    ret i32 0
+;
 entry:
   br i1 %c0, label %if.then, label %if.end
 
 if.then:
-  %v = load volatile i32, i32* %val
+  %v = load volatile i32, i32* %p
   br label %if.end
 
-; CHECK: if.then:
-; CHECK-NEXT:  %v = load volatile i32, i32* %val, align 4
-; CHECK-NEXT:  %phitmp = and i32 %v, 2147483647
-; CHECK-NEXT:  br label %if.end
-
 if.end:
   %lhs = phi i32 [ %v, %if.then ], [ 5, %entry ]
   br i1 %always_false, label %rem.is.safe, label %rem.is.unsafe
@@ -381,3 +570,4 @@ rem.is.safe:
 rem.is.unsafe:
   ret i32 0
 }
+
diff --git a/test/Transforms/InstCombine/select-bitext.ll b/test/Transforms/InstCombine/select-bitext.ll
index 6e374f5221d16..b66a9eef4ab63 100644
--- a/test/Transforms/InstCombine/select-bitext.ll
+++ b/test/Transforms/InstCombine/select-bitext.ll
@@ -100,7 +100,7 @@ define <2 x i64> @trunc_sel_larger_sext_vec(<2 x i32> %a, <2 x i1> %cmp) {
 ; CHECK-LABEL: @trunc_sel_larger_sext_vec(
 ; CHECK-NEXT:    [[TRUNC:%.*]] = zext <2 x i32> %a to <2 x i64>
 ; CHECK-NEXT:    [[SEXT:%.*]] = shl <2 x i64> [[TRUNC]], <i64 48, i64 48>
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[SEXT]], <i64 48, i64 48>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <2 x i64> [[SEXT]], <i64 48, i64 48>
 ; CHECK-NEXT:    [[EXT:%.*]] = select <2 x i1> %cmp, <2 x i64> [[TMP1]], <2 x i64> <i64 42, i64 43>
 ; CHECK-NEXT:    ret <2 x i64> [[EXT]]
 ;
@@ -127,7 +127,7 @@ define <2 x i32> @trunc_sel_smaller_sext_vec(<2 x i64> %a, <2 x i1> %cmp) {
 ; CHECK-LABEL: @trunc_sel_smaller_sext_vec(
 ; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <2 x i64> %a to <2 x i32>
 ; CHECK-NEXT:    [[SEXT:%.*]] = shl <2 x i32> [[TRUNC]], <i32 16, i32 16>
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[SEXT]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <2 x i32> [[SEXT]], <i32 16, i32 16>
 ; CHECK-NEXT:    [[EXT:%.*]] = select <2 x i1> %cmp, <2 x i32> [[TMP1]], <2 x i32> <i32 42, i32 43>
 ; CHECK-NEXT:    ret <2 x i32> [[EXT]]
 ;
@@ -153,7 +153,7 @@ define i32 @trunc_sel_equal_sext(i32 %a, i1 %cmp) {
 define <2 x i32> @trunc_sel_equal_sext_vec(<2 x i32> %a, <2 x i1> %cmp) {
 ; CHECK-LABEL: @trunc_sel_equal_sext_vec(
 ; CHECK-NEXT:    [[SEXT:%.*]] = shl <2 x i32> %a, <i32 16, i32 16>
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[SEXT]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <2 x i32> [[SEXT]], <i32 16, i32 16>
 ; CHECK-NEXT:    [[EXT:%.*]] = select <2 x i1> %cmp, <2 x i32> [[TMP1]], <2 x i32> <i32 42, i32 43>
 ; CHECK-NEXT:    ret <2 x i32> [[EXT]]
 ;
diff --git a/test/Transforms/InstCombine/select-cmp-br.ll b/test/Transforms/InstCombine/select-cmp-br.ll
index 1dc7e153f5fb0..59384ab7b1f02 100644
--- a/test/Transforms/InstCombine/select-cmp-br.ll
+++ b/test/Transforms/InstCombine/select-cmp-br.ll
@@ -1,155 +1,263 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; Replace a 'select' with 'or' in 'select - cmp [eq|ne] - br' sequence
 ; RUN: opt -instcombine -S < %s | FileCheck %s
 
-%C = type <{ %struct.S }>
 %struct.S = type { i64*, i32, i32 }
+%C = type <{ %struct.S }>
 
-declare void @bar(%struct.S *) #1
+declare void @bar(%struct.S*)
 declare void @foobar()
 
-define void @test1(%C*) {
+define void @test1(%C* %arg) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds [[C:%.*]], %C* [[ARG:%.*]], i64 0, i32 0, i32 0
+; CHECK-NEXT:    [[M:%.*]] = load i64*, i64** [[TMP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 1, i32 0, i32 0
+; CHECK-NEXT:    [[N:%.*]] = load i64*, i64** [[TMP1]], align 8
+; CHECK-NEXT:    [[NOT_TMP5:%.*]] = icmp ne i64* [[M]], [[N]]
+; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq %C* [[ARG]], null
+; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP71]], [[NOT_TMP5]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[BB10:%.*]], label [[BB8:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+; CHECK:       bb8:
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 0, i32 0
+; CHECK-NEXT:    tail call void @bar(%struct.S* [[TMP9]])
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb10:
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[M]], i64 9
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to i64 (%C*)**
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64 (%C*)*, i64 (%C*)** [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = tail call i64 [[TMP4]](%C* [[ARG]])
+; CHECK-NEXT:    br label [[BB]]
+;
 entry:
-  %1 = getelementptr inbounds %C, %C* %0, i64 0, i32 0, i32 0
-  %m = load i64*, i64** %1, align 8
-  %2 = getelementptr inbounds %C, %C* %0, i64 1, i32 0, i32 0
-  %n = load i64*, i64** %2, align 8
-  %3 = getelementptr inbounds i64, i64* %m, i64 9
-  %4 = bitcast i64* %3 to i64 (%C*)**
-  %5 = load i64 (%C*)*, i64 (%C*)** %4, align 8
-  %6 = icmp eq i64* %m, %n
-  %7 = select i1 %6, %C* %0, %C* null
-  %8 = icmp eq %C* %7, null
-  br i1 %8, label %12, label %10
-
-; <label>:9                                       ; preds = %10, %12
+  %tmp = getelementptr inbounds %C, %C* %arg, i64 0, i32 0, i32 0
+  %m = load i64*, i64** %tmp, align 8
+  %tmp1 = getelementptr inbounds %C, %C* %arg, i64 1, i32 0, i32 0
+  %n = load i64*, i64** %tmp1, align 8
+  %tmp2 = getelementptr inbounds i64, i64* %m, i64 9
+  %tmp3 = bitcast i64* %tmp2 to i64 (%C*)**
+  %tmp4 = load i64 (%C*)*, i64 (%C*)** %tmp3, align 8
+  %tmp5 = icmp eq i64* %m, %n
+  %tmp6 = select i1 %tmp5, %C* %arg, %C* null
+  %tmp7 = icmp eq %C* %tmp6, null
+  br i1 %tmp7, label %bb10, label %bb8
+
+bb:                                               ; preds = %bb10, %bb8
   ret void
 
-; <label>:10                                      ; preds = %entry
-  %11 = getelementptr inbounds %C, %C* %7, i64 0, i32 0
-  tail call void @bar(%struct.S* %11)
-  br label %9
+bb8:                                              ; preds = %entry
+  %tmp9 = getelementptr inbounds %C, %C* %tmp6, i64 0, i32 0
+  tail call void @bar(%struct.S* %tmp9)
+  br label %bb
 
-; <label>:12                                      ; preds = %entry
-  %13 = tail call i64 %5(%C* %0)
-  br label %9
-; CHECK-LABEL: @test1(
-; CHECK-NOT: select
-; CHECK: or
-; CHECK-NOT: select
+bb10:                                             ; preds = %entry
+  %tmp11 = tail call i64 %tmp4(%C* %arg)
+  br label %bb
 }
 
-define void @test2(%C*) {
+define void @test2(%C* %arg) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds [[C:%.*]], %C* [[ARG:%.*]], i64 0, i32 0, i32 0
+; CHECK-NEXT:    [[M:%.*]] = load i64*, i64** [[TMP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 1, i32 0, i32 0
+; CHECK-NEXT:    [[N:%.*]] = load i64*, i64** [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64* [[M]], [[N]]
+; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq %C* [[ARG]], null
+; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP71]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[BB10:%.*]], label [[BB8:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+; CHECK:       bb8:
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 0, i32 0
+; CHECK-NEXT:    tail call void @bar(%struct.S* [[TMP9]])
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb10:
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[M]], i64 9
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to i64 (%C*)**
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64 (%C*)*, i64 (%C*)** [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = tail call i64 [[TMP4]](%C* [[ARG]])
+; CHECK-NEXT:    br label [[BB]]
+;
 entry:
-  %1 = getelementptr inbounds %C, %C* %0, i64 0, i32 0, i32 0
-  %m = load i64*, i64** %1, align 8
-  %2 = getelementptr inbounds %C, %C* %0, i64 1, i32 0, i32 0
-  %n = load i64*, i64** %2, align 8
-  %3 = getelementptr inbounds i64, i64* %m, i64 9
-  %4 = bitcast i64* %3 to i64 (%C*)**
-  %5 = load i64 (%C*)*, i64 (%C*)** %4, align 8
-  %6 = icmp eq i64* %m, %n
-  %7 = select i1 %6, %C* null, %C* %0
-  %8 = icmp eq %C* %7, null
-  br i1 %8, label %12, label %10
-
-; <label>:9                                       ; preds = %10, %12
+  %tmp = getelementptr inbounds %C, %C* %arg, i64 0, i32 0, i32 0
+  %m = load i64*, i64** %tmp, align 8
+  %tmp1 = getelementptr inbounds %C, %C* %arg, i64 1, i32 0, i32 0
+  %n = load i64*, i64** %tmp1, align 8
+  %tmp2 = getelementptr inbounds i64, i64* %m, i64 9
+  %tmp3 = bitcast i64* %tmp2 to i64 (%C*)**
+  %tmp4 = load i64 (%C*)*, i64 (%C*)** %tmp3, align 8
+  %tmp5 = icmp eq i64* %m, %n
+  %tmp6 = select i1 %tmp5, %C* null, %C* %arg
+  %tmp7 = icmp eq %C* %tmp6, null
+  br i1 %tmp7, label %bb10, label %bb8
+
+bb:                                               ; preds = %bb10, %bb8
   ret void
 
-; <label>:10                                      ; preds = %entry
-  %11 = getelementptr inbounds %C, %C* %7, i64 0, i32 0
-  tail call void @bar(%struct.S* %11)
-  br label %9
+bb8:                                              ; preds = %entry
+  %tmp9 = getelementptr inbounds %C, %C* %tmp6, i64 0, i32 0
+  tail call void @bar(%struct.S* %tmp9)
+  br label %bb
 
-; <label>:12                                      ; preds = %entry
-  %13 = tail call i64 %5(%C* %0)
-  br label %9
-; CHECK-LABEL: @test2(
-; CHECK-NOT: select
-; CHECK: or
-; CHECK-NOT: select
+bb10:                                             ; preds = %entry
+  %tmp11 = tail call i64 %tmp4(%C* %arg)
+  br label %bb
 }
 
-define void @test3(%C*) {
+define void @test3(%C* %arg) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds [[C:%.*]], %C* [[ARG:%.*]], i64 0, i32 0, i32 0
+; CHECK-NEXT:    [[M:%.*]] = load i64*, i64** [[TMP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 1, i32 0, i32 0
+; CHECK-NEXT:    [[N:%.*]] = load i64*, i64** [[TMP1]], align 8
+; CHECK-NEXT:    [[NOT_TMP5:%.*]] = icmp ne i64* [[M]], [[N]]
+; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq %C* [[ARG]], null
+; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP71]], [[NOT_TMP5]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[BB10:%.*]], label [[BB8:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+; CHECK:       bb8:
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 0, i32 0
+; CHECK-NEXT:    tail call void @bar(%struct.S* [[TMP9]])
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb10:
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[M]], i64 9
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to i64 (%C*)**
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64 (%C*)*, i64 (%C*)** [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = tail call i64 [[TMP4]](%C* [[ARG]])
+; CHECK-NEXT:    br label [[BB]]
+;
 entry:
-  %1 = getelementptr inbounds %C, %C* %0, i64 0, i32 0, i32 0
-  %m = load i64*, i64** %1, align 8
-  %2 = getelementptr inbounds %C, %C* %0, i64 1, i32 0, i32 0
-  %n = load i64*, i64** %2, align 8
-  %3 = getelementptr inbounds i64, i64* %m, i64 9
-  %4 = bitcast i64* %3 to i64 (%C*)**
-  %5 = load i64 (%C*)*, i64 (%C*)** %4, align 8
-  %6 = icmp eq i64* %m, %n
-  %7 = select i1 %6, %C* %0, %C* null
-  %8 = icmp ne %C* %7, null
-  br i1 %8, label %10, label %12
-
-; <label>:9                                       ; preds = %10, %12
+  %tmp = getelementptr inbounds %C, %C* %arg, i64 0, i32 0, i32 0
+  %m = load i64*, i64** %tmp, align 8
+  %tmp1 = getelementptr inbounds %C, %C* %arg, i64 1, i32 0, i32 0
+  %n = load i64*, i64** %tmp1, align 8
+  %tmp2 = getelementptr inbounds i64, i64* %m, i64 9
+  %tmp3 = bitcast i64* %tmp2 to i64 (%C*)**
+  %tmp4 = load i64 (%C*)*, i64 (%C*)** %tmp3, align 8
+  %tmp5 = icmp eq i64* %m, %n
+  %tmp6 = select i1 %tmp5, %C* %arg, %C* null
+  %tmp7 = icmp ne %C* %tmp6, null
+  br i1 %tmp7, label %bb8, label %bb10
+
+bb:                                               ; preds = %bb10, %bb8
   ret void
 
-; <label>:10                                      ; preds = %entry
-  %11 = getelementptr inbounds %C, %C* %7, i64 0, i32 0
-  tail call void @bar(%struct.S* %11)
-  br label %9
+bb8:                                              ; preds = %entry
+  %tmp9 = getelementptr inbounds %C, %C* %tmp6, i64 0, i32 0
+  tail call void @bar(%struct.S* %tmp9)
+  br label %bb
 
-; <label>:12                                      ; preds = %entry
-  %13 = tail call i64 %5(%C* %0)
-  br label %9
-; CHECK-LABEL: @test3(
-; CHECK-NOT: select
-; CHECK: or
-; CHECK-NOT: select
+bb10:                                             ; preds = %entry
+  %tmp11 = tail call i64 %tmp4(%C* %arg)
+  br label %bb
 }
 
-define void @test4(%C*) {
+define void @test4(%C* %arg) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds [[C:%.*]], %C* [[ARG:%.*]], i64 0, i32 0, i32 0
+; CHECK-NEXT:    [[M:%.*]] = load i64*, i64** [[TMP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 1, i32 0, i32 0
+; CHECK-NEXT:    [[N:%.*]] = load i64*, i64** [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64* [[M]], [[N]]
+; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq %C* [[ARG]], null
+; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP71]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[BB10:%.*]], label [[BB8:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+; CHECK:       bb8:
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 0, i32 0
+; CHECK-NEXT:    tail call void @bar(%struct.S* [[TMP9]])
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb10:
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[M]], i64 9
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to i64 (%C*)**
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64 (%C*)*, i64 (%C*)** [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = tail call i64 [[TMP4]](%C* [[ARG]])
+; CHECK-NEXT:    br label [[BB]]
+;
 entry:
-  %1 = getelementptr inbounds %C, %C* %0, i64 0, i32 0, i32 0
-  %m = load i64*, i64** %1, align 8
-  %2 = getelementptr inbounds %C, %C* %0, i64 1, i32 0, i32 0
-  %n = load i64*, i64** %2, align 8
-  %3 = getelementptr inbounds i64, i64* %m, i64 9
-  %4 = bitcast i64* %3 to i64 (%C*)**
-  %5 = load i64 (%C*)*, i64 (%C*)** %4, align 8
-  %6 = icmp eq i64* %m, %n
-  %7 = select i1 %6, %C* null, %C* %0
-  %8 = icmp ne %C* %7, null
-  br i1 %8, label %10, label %12
-
-; <label>:9                                       ; preds = %10, %12
+  %tmp = getelementptr inbounds %C, %C* %arg, i64 0, i32 0, i32 0
+  %m = load i64*, i64** %tmp, align 8
+  %tmp1 = getelementptr inbounds %C, %C* %arg, i64 1, i32 0, i32 0
+  %n = load i64*, i64** %tmp1, align 8
+  %tmp2 = getelementptr inbounds i64, i64* %m, i64 9
+  %tmp3 = bitcast i64* %tmp2 to i64 (%C*)**
+  %tmp4 = load i64 (%C*)*, i64 (%C*)** %tmp3, align 8
+  %tmp5 = icmp eq i64* %m, %n
+  %tmp6 = select i1 %tmp5, %C* null, %C* %arg
+  %tmp7 = icmp ne %C* %tmp6, null
+  br i1 %tmp7, label %bb8, label %bb10
+
+bb:                                               ; preds = %bb10, %bb8
   ret void
 
-; <label>:10                                      ; preds = %entry
-  %11 = getelementptr inbounds %C, %C* %7, i64 0, i32 0
-  tail call void @bar(%struct.S* %11)
-  br label %9
+bb8:                                              ; preds = %entry
+  %tmp9 = getelementptr inbounds %C, %C* %tmp6, i64 0, i32 0
+  tail call void @bar(%struct.S* %tmp9)
+  br label %bb
 
-; <label>:12                                      ; preds = %entry
-  %13 = tail call i64 %5(%C* %0)
-  br label %9
-; CHECK-LABEL: @test4(
-; CHECK-NOT: select
-; CHECK: or
-; CHECK-NOT: select
+bb10:                                             ; preds = %entry
+  %tmp11 = tail call i64 %tmp4(%C* %arg)
+  br label %bb
 }
 
-define void @test5(%C*, i1) {
+define void @test5(%C* %arg, i1 %arg1) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq %C* [[ARG:%.*]], null
+; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP21]], [[ARG1:%.*]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[BB5:%.*]], label [[BB3:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+; CHECK:       bb3:
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[C:%.*]], %C* [[ARG]], i64 0, i32 0
+; CHECK-NEXT:    tail call void @bar(%struct.S* [[TMP4]])
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb5:
+; CHECK-NEXT:    tail call void @foobar()
+; CHECK-NEXT:    br label [[BB]]
+;
 entry:
-  %2 = select i1 %1, %C* null, %C* %0
-  %3 = icmp ne %C* %2, null
-  br i1 %3, label %5, label %7
+  %tmp = select i1 %arg1, %C* null, %C* %arg
+  %tmp2 = icmp ne %C* %tmp, null
+  br i1 %tmp2, label %bb3, label %bb5
 
-; <label>:4                                       ; preds = %10, %12
+bb:                                               ; preds = %bb5, %bb3
   ret void
 
-; <label>:5                                      ; preds = %entry
-  %6 = getelementptr inbounds %C, %C* %2, i64 0, i32 0
-  tail call void @bar(%struct.S* %6)
-  br label %4
+bb3:                                              ; preds = %entry
+  %tmp4 = getelementptr inbounds %C, %C* %tmp, i64 0, i32 0
+  tail call void @bar(%struct.S* %tmp4)
+  br label %bb
 
-; <label>:7                                      ; preds = %entry
+bb5:                                              ; preds = %entry
   tail call void @foobar()
-  br label %4
-; CHECK-LABEL: @test5(
-; CHECK-NOT: select
-; CHECK: or
-; CHECK-NOT: select
+  br label %bb
+}
+
+; Negative test. Must not trigger the select-cmp-br combine because the result
+; of the select is used in both flows following the br (the special case where
+; the conditional branch has the same target for both flows).
+define i32 @test6(i32 %arg, i1 %arg1) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[BB:%.*]], label [[BB]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[TMP:%.*]] = select i1 [[ARG1:%.*]], i32 [[ARG:%.*]], i32 0
+; CHECK-NEXT:    ret i32 [[TMP]]
+;
+entry:
+  %tmp = select i1 %arg1, i32 %arg, i32 0
+  %tmp2 = icmp eq i32 %tmp, 0
+  br i1 %tmp2, label %bb, label %bb
+
+bb:                                               ; preds = %entry, %entry
+  ret i32 %tmp
 }
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index f8c96e7f3f679..c26380eaa71be 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -190,7 +190,7 @@ define <2 x i1> @test62vec(<2 x i1> %A, <2 x i1> %B) {
 define i1 @test63(i1 %A, i1 %B) {
 ; CHECK-LABEL: @test63(
 ; CHECK-NEXT:    [[NOT:%.*]] = xor i1 %A, true
-; CHECK-NEXT:    [[C:%.*]] = or i1 %B, [[NOT]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[NOT]], %B
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %not = xor i1 %A, true
@@ -201,7 +201,7 @@ define i1 @test63(i1 %A, i1 %B) {
 define <2 x i1> @test63vec(<2 x i1> %A, <2 x i1> %B) {
 ; CHECK-LABEL: @test63vec(
 ; CHECK-NEXT:    [[NOT:%.*]] = xor <2 x i1> %A, <i1 true, i1 true>
-; CHECK-NEXT:    [[C:%.*]] = or <2 x i1> %B, [[NOT]]
+; CHECK-NEXT:    [[C:%.*]] = or <2 x i1> [[NOT]], %B
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %not = xor <2 x i1> %A, <i1 true, i1 true>
@@ -1264,11 +1264,10 @@ define i32 @PR23757(i32 %x) {
 define i32 @PR27137(i32 %a) {
 ; CHECK-LABEL: @PR27137(
 ; CHECK-NEXT:    [[NOT_A:%.*]] = xor i32 %a, -1
-; CHECK-NEXT:    [[C0:%.*]] = icmp slt i32 %a, 0
+; CHECK-NEXT:    [[C0:%.*]] = icmp sgt i32 [[NOT_A]], -1
 ; CHECK-NEXT:    [[S0:%.*]] = select i1 [[C0]], i32 [[NOT_A]], i32 -1
 ; CHECK-NEXT:    ret i32 [[S0]]
 ;
-
   %not_a = xor i32 %a, -1
   %c0 = icmp slt i32 %a, 0
   %s0 = select i1 %c0, i32 %not_a, i32 -1
@@ -1299,11 +1298,22 @@ define <2 x i32> @select_icmp_slt0_xor_vec(<2 x i32> %x) {
   ret <2 x i32> %x.xor
 }
 
-; Make sure that undef elements of the select condition are translated into undef elements of the shuffle mask.
-
 define <4 x i32> @canonicalize_to_shuffle(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @canonicalize_to_shuffle(
-; CHECK-NEXT:    [[SEL:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 6, i32 undef>
+; CHECK-NEXT:    [[SEL:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[SEL]]
+;
+  %sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %sel
+}
+
+; Undef elements of the select condition may not be translated into undef elements of a shuffle mask
+; because undef in a shuffle mask means we can return anything, not just one of the selected values.
+; https://bugs.llvm.org/show_bug.cgi?id=32486
+
+define <4 x i32> @undef_elts_in_condition(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @undef_elts_in_condition(
+; CHECK-NEXT:    [[SEL:%.*]] = select <4 x i1> <i1 true, i1 undef, i1 false, i1 undef>, <4 x i32> %a, <4 x i32> %b
 ; CHECK-NEXT:    ret <4 x i32> [[SEL]]
 ;
   %sel = select <4 x i1> <i1 true, i1 undef, i1 false, i1 undef>, <4 x i32> %a, <4 x i32> %b
@@ -1332,3 +1342,29 @@ define <4 x i32> @cannot_canonicalize_to_shuffle2(<4 x i32> %a, <4 x i32> %b) {
   ret <4 x i32> %sel
 }
 
+declare void @llvm.assume(i1)
+
+define i8 @assume_cond_true(i1 %cond, i8 %x, i8 %y) {
+; CHECK-LABEL: @assume_cond_true(
+; CHECK-NEXT:    call void @llvm.assume(i1 %cond)
+; CHECK-NEXT:    ret i8 %x
+;
+  call void @llvm.assume(i1 %cond)
+  %sel = select i1 %cond, i8 %x, i8 %y
+  ret i8 %sel
+}
+
+; computeKnownBitsFromAssume() understands the 'not' of an assumed condition.
+
+define i8 @assume_cond_false(i1 %cond, i8 %x, i8 %y) {
+; CHECK-LABEL: @assume_cond_false(
+; CHECK-NEXT:    [[NOTCOND:%.*]] = xor i1 %cond, true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[NOTCOND]])
+; CHECK-NEXT:    ret i8 %y
+;
+  %notcond = xor i1 %cond, true
+  call void @llvm.assume(i1 %notcond)
+  %sel = select i1 %cond, i8 %x, i8 %y
+  ret i8 %sel
+}
+
diff --git a/test/Transforms/InstCombine/select_meta.ll b/test/Transforms/InstCombine/select_meta.ll
index 82a85e5836dca..7d5771a0a81c7 100644
--- a/test/Transforms/InstCombine/select_meta.ll
+++ b/test/Transforms/InstCombine/select_meta.ll
@@ -193,12 +193,11 @@ define i32 @test74(i32 %x) {
   ret i32 %retval
 }
 
-; FIXME:
 ; The compare should change, but the metadata remains the same because the select operands are not swapped.
 define i32 @smin1(i32 %x) {
 ; CHECK-LABEL: @smin1(
 ; CHECK-NEXT:    [[NOT_X:%.*]] = xor i32 %x, -1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 %x, 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[NOT_X]], -1
 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[NOT_X]], i32 -1, !prof ![[MD1]]
 ; CHECK-NEXT:    ret i32 [[SEL]]
 ;
@@ -208,13 +207,12 @@ define i32 @smin1(i32 %x) {
   ret i32 %sel
 }
 
-; FIXME:
 ; The compare should change, and the metadata is swapped because the select operands are swapped.
 define i32 @smin2(i32 %x) {
 ; CHECK-LABEL: @smin2(
 ; CHECK-NEXT:    [[NOT_X:%.*]] = xor i32 %x, -1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 %x, 0
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -1, i32 [[NOT_X]], !prof ![[MD1]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[NOT_X]], -1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[NOT_X]], i32 -1, !prof ![[MD3]]
 ; CHECK-NEXT:    ret i32 [[SEL]]
 ;
   %not_x = xor i32 %x, -1
@@ -223,12 +221,11 @@ define i32 @smin2(i32 %x) {
   ret i32 %sel
 }
 
-; FIXME:
 ; The compare should change, but the metadata remains the same because the select operands are not swapped.
 define i32 @smax1(i32 %x) {
 ; CHECK-LABEL: @smax1(
 ; CHECK-NEXT:    [[NOT_X:%.*]] = xor i32 %x, -1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 %x, 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[NOT_X]], -1
 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[NOT_X]], i32 -1, !prof ![[MD1]]
 ; CHECK-NEXT:    ret i32 [[SEL]]
 ;
@@ -238,13 +235,12 @@ define i32 @smax1(i32 %x) {
   ret i32 %sel
 }
 
-; FIXME:
 ; The compare should change, and the metadata is swapped because the select operands are swapped.
 define i32 @smax2(i32 %x) {
 ; CHECK-LABEL: @smax2(
 ; CHECK-NEXT:    [[NOT_X:%.*]] = xor i32 %x, -1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 %x, 0
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -1, i32 [[NOT_X]], !prof ![[MD1]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[NOT_X]], -1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[NOT_X]], i32 -1, !prof ![[MD3]]
 ; CHECK-NEXT:    ret i32 [[SEL]]
 ;
   %not_x = xor i32 %x, -1
@@ -253,11 +249,10 @@ define i32 @smax2(i32 %x) {
   ret i32 %sel
 }
 
-; FIXME:
 ; The compare should change, but the metadata remains the same because the select operands are not swapped.
 define i32 @umin1(i32 %x) {
 ; CHECK-LABEL: @umin1(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 %x, -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 %x, -2147483648
 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 %x, i32 -2147483648, !prof ![[MD1]]
 ; CHECK-NEXT:    ret i32 [[SEL]]
 ;
@@ -266,12 +261,11 @@ define i32 @umin1(i32 %x) {
   ret i32 %sel
 }
 
-; FIXME:
 ; The compare should change, and the metadata is swapped because the select operands are swapped.
 define i32 @umin2(i32 %x) {
 ; CHECK-LABEL: @umin2(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 %x, 0
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 2147483647, i32 %x, !prof ![[MD1]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 %x, 2147483647
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 %x, i32 2147483647, !prof ![[MD3]]
 ; CHECK-NEXT:    ret i32 [[SEL]]
 ;
   %cmp = icmp slt i32 %x, 0
@@ -279,11 +273,10 @@ define i32 @umin2(i32 %x) {
   ret i32 %sel
 }
 
-; FIXME:
 ; The compare should change, but the metadata remains the same because the select operands are not swapped.
 define i32 @umax1(i32 %x) {
 ; CHECK-LABEL: @umax1(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 %x, 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 %x, 2147483647
 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 %x, i32 2147483647, !prof ![[MD1]]
 ; CHECK-NEXT:    ret i32 [[SEL]]
 ;
@@ -292,12 +285,11 @@ define i32 @umax1(i32 %x) {
   ret i32 %sel
 }
 
-; FIXME:
 ; The compare should change, and the metadata is swapped because the select operands are swapped.
 define i32 @umax2(i32 %x) {
 ; CHECK-LABEL: @umax2(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 %x, -1
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -2147483648, i32 %x, !prof ![[MD1]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 %x, -2147483648
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 %x, i32 -2147483648, !prof ![[MD3]]
 ; CHECK-NEXT:    ret i32 [[SEL]]
 ;
   %cmp = icmp sgt i32 %x, -1
diff --git a/test/Transforms/InstCombine/shift-sra.ll b/test/Transforms/InstCombine/shift-sra.ll
index 75235500d5136..4483e60b506a0 100644
--- a/test/Transforms/InstCombine/shift-sra.ll
+++ b/test/Transforms/InstCombine/shift-sra.ll
@@ -1,26 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 
 define i32 @test1(i32 %X, i8 %A) {
-        %shift.upgrd.1 = zext i8 %A to i32              ; <i32> [#uses=1]
-        ; can be logical shift.
-        %Y = ashr i32 %X, %shift.upgrd.1                ; <i32> [#uses=1]
-        %Z = and i32 %Y, 1              ; <i32> [#uses=1]
-        ret i32 %Z
 ; CHECK-LABEL: @test1(
-; CHECK: lshr i32 %X, %shift.upgrd.1 
+; CHECK-NEXT:    [[SHIFT_UPGRD_1:%.*]] = zext i8 %A to i32
+; CHECK-NEXT:    [[Y1:%.*]] = lshr i32 %X, [[SHIFT_UPGRD_1]]
+; CHECK-NEXT:    [[Z:%.*]] = and i32 [[Y1]], 1
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %shift.upgrd.1 = zext i8 %A to i32
+  ; can be logical shift.
+  %Y = ashr i32 %X, %shift.upgrd.1
+  %Z = and i32 %Y, 1
+  ret i32 %Z
 }
 
 define i32 @test2(i8 %tmp) {
-        %tmp3 = zext i8 %tmp to i32             ; <i32> [#uses=1]
-        %tmp4 = add i32 %tmp3, 7                ; <i32> [#uses=1]
-        %tmp5 = ashr i32 %tmp4, 3               ; <i32> [#uses=1]
-        ret i32 %tmp5
 ; CHECK-LABEL: @test2(
-; CHECK: lshr i32 %tmp4, 3
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 %tmp to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], 7
+; CHECK-NEXT:    [[TMP51:%.*]] = lshr i32 [[TMP4]], 3
+; CHECK-NEXT:    ret i32 [[TMP51]]
+;
+  %tmp3 = zext i8 %tmp to i32
+  %tmp4 = add i32 %tmp3, 7
+  %tmp5 = ashr i32 %tmp4, 3
+  ret i32 %tmp5
 }
 
 define i64 @test3(i1 %X, i64 %Y, i1 %Cond) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    br i1 %Cond, label %T, label %F
+; CHECK:       T:
+; CHECK-NEXT:    [[X2:%.*]] = sext i1 %X to i64
+; CHECK-NEXT:    br label %C
+; CHECK:       F:
+; CHECK-NEXT:    [[Y2:%.*]] = ashr i64 %Y, 63
+; CHECK-NEXT:    br label %C
+; CHECK:       C:
+; CHECK-NEXT:    [[P:%.*]] = phi i64 [ [[X2]], %T ], [ [[Y2]], %F ]
+; CHECK-NEXT:    ret i64 [[P]]
+;
   br i1 %Cond, label %T, label %F
 T:
   %X2 = sext i1 %X to i64
@@ -29,16 +50,24 @@ F:
   %Y2 = ashr i64 %Y, 63
   br label %C
 C:
-  %P = phi i64 [%X2, %T], [%Y2, %F] 
+  %P = phi i64 [%X2, %T], [%Y2, %F]
   %S = ashr i64 %P, 12
   ret i64 %S
-  
-; CHECK-LABEL: @test3(
-; CHECK: %P = phi i64
-; CHECK-NEXT: ret i64 %P
 }
 
 define i64 @test4(i1 %X, i64 %Y, i1 %Cond) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    br i1 %Cond, label %T, label %F
+; CHECK:       T:
+; CHECK-NEXT:    [[X2:%.*]] = sext i1 %X to i64
+; CHECK-NEXT:    br label %C
+; CHECK:       F:
+; CHECK-NEXT:    [[Y2:%.*]] = ashr i64 %Y, 63
+; CHECK-NEXT:    br label %C
+; CHECK:       C:
+; CHECK-NEXT:    [[P:%.*]] = phi i64 [ [[X2]], %T ], [ [[Y2]], %F ]
+; CHECK-NEXT:    ret i64 [[P]]
+;
   br i1 %Cond, label %T, label %F
 T:
   %X2 = sext i1 %X to i64
@@ -47,18 +76,29 @@ F:
   %Y2 = ashr i64 %Y, 63
   br label %C
 C:
-  %P = phi i64 [%X2, %T], [%Y2, %F] 
+  %P = phi i64 [%X2, %T], [%Y2, %F]
   %R = shl i64 %P, 12
   %S = ashr i64 %R, 12
   ret i64 %S
-  
-; CHECK-LABEL: @test4(
-; CHECK: %P = phi i64
-; CHECK-NEXT: ret i64 %P
 }
 
 ; rdar://7732987
 define i32 @test5(i32 %Y) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    br i1 undef, label %A, label %C
+; CHECK:       A:
+; CHECK-NEXT:    br i1 undef, label %B, label %D
+; CHECK:       B:
+; CHECK-NEXT:    br label %D
+; CHECK:       C:
+; CHECK-NEXT:    br i1 undef, label %D, label %E
+; CHECK:       D:
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 0, %A ], [ 0, %B ], [ %Y, %C ]
+; CHECK-NEXT:    [[S:%.*]] = ashr i32 [[P]], 16
+; CHECK-NEXT:    ret i32 [[S]]
+; CHECK:       E:
+; CHECK-NEXT:    ret i32 0
+;
   br i1 undef, label %A, label %C
 A:
   br i1 undef, label %B, label %D
@@ -67,12 +107,59 @@ B:
 C:
   br i1 undef, label %D, label %E
 D:
-  %P = phi i32 [0, %A], [0, %B], [%Y, %C] 
+  %P = phi i32 [0, %A], [0, %B], [%Y, %C]
   %S = ashr i32 %P, 16
   ret i32 %S
-; CHECK-LABEL: @test5(
-; CHECK: %P = phi i32
-; CHECK-NEXT: ashr i32 %P, 16
 E:
   ret i32 0
 }
+
+; (X >>s C1) >>s C2 --> X >>s (C1 + C2)
+
+define i32 @ashr_ashr(i32 %x) {
+; CHECK-LABEL: @ashr_ashr(
+; CHECK-NEXT:    [[SH2:%.*]] = ashr i32 %x, 12
+; CHECK-NEXT:    ret i32 [[SH2]]
+;
+  %sh1 = ashr i32 %x, 5
+  %sh2 = ashr i32 %sh1, 7
+  ret i32 %sh2
+}
+
+; PR3851
+; (X >>s C1) >>s C2 --> X >>s (Bitwidth - 1)
+
+define i32 @ashr_overshift(i32 %x) {
+; CHECK-LABEL: @ashr_overshift(
+; CHECK-NEXT:    [[SH2:%.*]] = ashr i32 %x, 31
+; CHECK-NEXT:    ret i32 [[SH2]]
+;
+  %sh1 = ashr i32 %x, 15
+  %sh2 = ashr i32 %sh1, 17
+  ret i32 %sh2
+}
+
+; (X >>s C1) >>s C2 --> X >>s (C1 + C2)
+
+define <2 x i32> @ashr_ashr_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @ashr_ashr_splat_vec(
+; CHECK-NEXT:    [[SH2:%.*]] = ashr <2 x i32> %x, <i32 12, i32 12>
+; CHECK-NEXT:    ret <2 x i32> [[SH2]]
+;
+  %sh1 = ashr <2 x i32> %x, <i32 5, i32 5>
+  %sh2 = ashr <2 x i32> %sh1, <i32 7, i32 7>
+  ret <2 x i32> %sh2
+}
+
+; (X >>s C1) >>s C2 --> X >>s (Bitwidth - 1)
+
+define <2 x i32> @ashr_overshift_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @ashr_overshift_splat_vec(
+; CHECK-NEXT:    [[SH2:%.*]] = ashr <2 x i32> %x, <i32 31, i32 31>
+; CHECK-NEXT:    ret <2 x i32> [[SH2]]
+;
+  %sh1 = ashr <2 x i32> %x, <i32 15, i32 15>
+  %sh2 = ashr <2 x i32> %sh1, <i32 17, i32 17>
+  ret <2 x i32> %sh2
+}
+
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index c046a72110c23..60ba35557f70a 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -1,6 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; This test makes sure that these instructions are properly eliminated.
-;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %A) {
@@ -161,9 +159,8 @@ define i8 @test9(i8 %A) {
   ret i8 %C
 }
 
-;; This transformation is deferred to DAGCombine:
 ;; (A >> 7) << 7 === A & 128
-;; The shl may be valuable to scalar evolution.
+
 define i8 @test10(i8 %A) {
 ; CHECK-LABEL: @test10(
 ; CHECK-NEXT:    [[B:%.*]] = and i8 %A, -128
@@ -454,9 +451,8 @@ define i32 @test25(i32 %tmp.2, i32 %AA) {
 
 define <2 x i32> @test25_vector(<2 x i32> %tmp.2, <2 x i32> %AA) {
 ; CHECK-LABEL: @test25_vector(
-; CHECK-NEXT:    [[TMP_3:%.*]] = lshr <2 x i32> %tmp.2, <i32 17, i32 17>
-; CHECK-NEXT:    [[TMP_51:%.*]] = shl <2 x i32> [[TMP_3]], <i32 17, i32 17>
-; CHECK-NEXT:    [[X2:%.*]] = add <2 x i32> [[TMP_51]], %AA
+; CHECK-NEXT:    [[TMP_3:%.*]] = and <2 x i32> %tmp.2, <i32 -131072, i32 -131072>
+; CHECK-NEXT:    [[X2:%.*]] = add <2 x i32> [[TMP_3]], %AA
 ; CHECK-NEXT:    [[TMP_6:%.*]] = and <2 x i32> [[X2]], <i32 -131072, i32 -131072>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP_6]]
 ;
@@ -640,30 +636,25 @@ define <2 x i1> @test35vec(<2 x i32> %X) {
 
 define i128 @test36(i128 %A, i128 %B) {
 ; CHECK-LABEL: @test36(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP231:%.*]] = or i128 %B, %A
 ; CHECK-NEXT:    [[INS:%.*]] = and i128 [[TMP231]], 18446744073709551615
 ; CHECK-NEXT:    ret i128 [[INS]]
 ;
-entry:
   %tmp27 = shl i128 %A, 64
   %tmp23 = shl i128 %B, 64
   %ins = or i128 %tmp23, %tmp27
   %tmp45 = lshr i128 %ins, 64
   ret i128 %tmp45
-
 }
 
 define i64 @test37(i128 %A, i32 %B) {
 ; CHECK-LABEL: @test37(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP22:%.*]] = zext i32 %B to i128
 ; CHECK-NEXT:    [[TMP23:%.*]] = shl nuw nsw i128 [[TMP22]], 32
 ; CHECK-NEXT:    [[INS:%.*]] = or i128 [[TMP23]], %A
 ; CHECK-NEXT:    [[TMP46:%.*]] = trunc i128 [[INS]] to i64
 ; CHECK-NEXT:    ret i64 [[TMP46]]
 ;
-entry:
   %tmp27 = shl i128 %A, 64
   %tmp22 = zext i32 %B to i128
   %tmp23 = shl i128 %tmp22, 96
@@ -671,7 +662,17 @@ entry:
   %tmp45 = lshr i128 %ins, 64
   %tmp46 = trunc i128 %tmp45 to i64
   ret i64 %tmp46
+}
 
+define <2 x i32> @shl_nuw_nsw_splat_vec(<2 x i8> %x) {
+; CHECK-LABEL: @shl_nuw_nsw_splat_vec(
+; CHECK-NEXT:    [[T2:%.*]] = zext <2 x i8> %x to <2 x i32>
+; CHECK-NEXT:    [[T3:%.*]] = shl nuw nsw <2 x i32> [[T2]], <i32 17, i32 17>
+; CHECK-NEXT:    ret <2 x i32> [[T3]]
+;
+  %t2 = zext <2 x i8> %x to <2 x i32>
+  %t3 = shl <2 x i32> %t2, <i32 17, i32 17>
+  ret <2 x i32> %t3
 }
 
 define i32 @test38(i32 %x) nounwind readnone {
@@ -789,6 +790,8 @@ define i32 @test45(i32 %a) nounwind {
   ret i32 %z
 }
 
+; (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
+
 define i32 @test46(i32 %a) {
 ; CHECK-LABEL: @test46(
 ; CHECK-NEXT:    [[Z:%.*]] = ashr exact i32 %a, 2
@@ -799,16 +802,44 @@ define i32 @test46(i32 %a) {
   ret i32 %z
 }
 
-define i32 @test47(i32 %a) {
+; (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
+
+define <2 x i32> @test46_splat_vec(<2 x i32> %a) {
+; CHECK-LABEL: @test46_splat_vec(
+; CHECK-NEXT:    [[Z:%.*]] = ashr exact <2 x i32> %a, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[Z]]
+;
+  %y = ashr exact <2 x i32> %a, <i32 3, i32 3>
+  %z = shl <2 x i32> %y, <i32 1, i32 1>
+  ret <2 x i32> %z
+}
+
+; (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
+
+define i8 @test47(i8 %a) {
 ; CHECK-LABEL: @test47(
-; CHECK-NEXT:    [[Z:%.*]] = lshr exact i32 %a, 2
-; CHECK-NEXT:    ret i32 [[Z]]
+; CHECK-NEXT:    [[Z:%.*]] = lshr exact i8 %a, 2
+; CHECK-NEXT:    ret i8 [[Z]]
 ;
-  %y = lshr exact i32 %a, 3
-  %z = shl i32 %y, 1
-  ret i32 %z
+  %y = lshr exact i8 %a, 3
+  %z = shl i8 %y, 1
+  ret i8 %z
+}
+
+; (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
+
+define <2 x i8> @test47_splat_vec(<2 x i8> %a) {
+; CHECK-LABEL: @test47_splat_vec(
+; CHECK-NEXT:    [[Z:%.*]] = lshr exact <2 x i8> %a, <i8 2, i8 2>
+; CHECK-NEXT:    ret <2 x i8> [[Z]]
+;
+  %y = lshr exact <2 x i8> %a, <i8 3, i8 3>
+  %z = shl <2 x i8> %y, <i8 1, i8 1>
+  ret <2 x i8> %z
 }
 
+; (X >>u,exact C1) << C2 --> X << (C2-C1) when C2 > C1
+
 define i32 @test48(i32 %x) {
 ; CHECK-LABEL: @test48(
 ; CHECK-NEXT:    [[B:%.*]] = shl i32 %x, 2
@@ -819,6 +850,32 @@ define i32 @test48(i32 %x) {
   ret i32 %B
 }
 
+; Verify that wrap flags are preserved from the original 'shl'.
+
+define i32 @test48_nuw_nsw(i32 %x) {
+; CHECK-LABEL: @test48_nuw_nsw(
+; CHECK-NEXT:    [[B:%.*]] = shl nuw nsw i32 %x, 2
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %A = lshr exact i32 %x, 1
+  %B = shl nuw nsw i32 %A, 3
+  ret i32 %B
+}
+
+; (X >>u,exact C1) << C2 --> X << (C2-C1) when splatted C2 > C1
+
+define <2 x i32> @test48_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test48_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = shl nuw nsw <2 x i32> %x, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %A = lshr exact <2 x i32> %x, <i32 1, i32 1>
+  %B = shl nsw nuw <2 x i32> %A, <i32 3, i32 3>
+  ret <2 x i32> %B
+}
+
+; (X >>s,exact C1) << C2 --> X << (C2-C1) when C2 > C1
+
 define i32 @test49(i32 %x) {
 ; CHECK-LABEL: @test49(
 ; CHECK-NEXT:    [[B:%.*]] = shl i32 %x, 2
@@ -829,6 +886,32 @@ define i32 @test49(i32 %x) {
   ret i32 %B
 }
 
+; Verify that wrap flags are preserved from the original 'shl'.
+
+define i32 @test49_nuw_nsw(i32 %x) {
+; CHECK-LABEL: @test49_nuw_nsw(
+; CHECK-NEXT:    [[B:%.*]] = shl nuw nsw i32 %x, 2
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %A = ashr exact i32 %x, 1
+  %B = shl nuw nsw i32 %A, 3
+  ret i32 %B
+}
+
+; (X >>s,exact C1) << C2 --> X << (C2-C1) when splatted C2 > C1
+
+define <2 x i32> @test49_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test49_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = shl nuw nsw <2 x i32> %x, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %A = ashr exact <2 x i32> %x, <i32 1, i32 1>
+  %B = shl nsw nuw <2 x i32> %A, <i32 3, i32 3>
+  ret <2 x i32> %B
+}
+
+; (X <<nsw C1) >>s C2 --> X >>s (C2-C1)
+
 define i32 @test50(i32 %x) {
 ; CHECK-LABEL: @test50(
 ; CHECK-NEXT:    [[B:%.*]] = ashr i32 %x, 2
@@ -839,6 +922,21 @@ define i32 @test50(i32 %x) {
   ret i32 %B
 }
 
+; (X <<nsw C1) >>s C2 --> X >>s (C2-C1)
+; Also, check that exact is propagated.
+
+define <2 x i32> @test50_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test50_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i32> %x, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %A = shl nsw <2 x i32> %x, <i32 1, i32 1>
+  %B = ashr exact <2 x i32> %A, <i32 3, i32 3>
+  ret <2 x i32> %B
+}
+
+; (X <<nuw C1) >>u C2 --> X >>u (C2-C1)
+
 define i32 @test51(i32 %x) {
 ; CHECK-LABEL: @test51(
 ; CHECK-NEXT:    [[B:%.*]] = lshr i32 %x, 2
@@ -849,6 +947,48 @@ define i32 @test51(i32 %x) {
   ret i32 %B
 }
 
+; (X <<nuw C1) >>u C2 --> X >>u (C2-C1) with splats
+; Also, check that exact is propagated.
+
+define <2 x i32> @test51_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test51_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = lshr exact <2 x i32> %x, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %A = shl nuw <2 x i32> %x, <i32 1, i32 1>
+  %B = lshr exact <2 x i32> %A, <i32 3, i32 3>
+  ret <2 x i32> %B
+}
+
+; (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
+; Also, check that exact is propagated.
+
+define i32 @test51_no_nuw(i32 %x) {
+; CHECK-LABEL: @test51_no_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i32 %x, 2
+; CHECK-NEXT:    [[B:%.*]] = and i32 [[TMP1]], 536870911
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %A = shl i32 %x, 1
+  %B = lshr exact i32 %A, 3
+  ret i32 %B
+}
+
+; (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
+
+define <2 x i32> @test51_no_nuw_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test51_no_nuw_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> %x, <i32 2, i32 2>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[TMP1]], <i32 536870911, i32 536870911>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %A = shl <2 x i32> %x, <i32 1, i32 1>
+  %B = lshr <2 x i32> %A, <i32 3, i32 3>
+  ret <2 x i32> %B
+}
+
+; (X <<nsw C1) >>s C2 --> X <<nsw (C1 - C2)
+
 define i32 @test52(i32 %x) {
 ; CHECK-LABEL: @test52(
 ; CHECK-NEXT:    [[B:%.*]] = shl nsw i32 %x, 2
@@ -859,6 +999,20 @@ define i32 @test52(i32 %x) {
   ret i32 %B
 }
 
+; (X <<nsw C1) >>s C2 --> X <<nsw (C1 - C2)
+
+define <2 x i32> @test52_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test52_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = shl nsw <2 x i32> %x, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %A = shl nsw <2 x i32> %x, <i32 3, i32 3>
+  %B = ashr <2 x i32> %A, <i32 1, i32 1>
+  ret <2 x i32> %B
+}
+
+; (X <<nuw C1) >>u C2 --> X <<nuw (C1 - C2)
+
 define i32 @test53(i32 %x) {
 ; CHECK-LABEL: @test53(
 ; CHECK-NEXT:    [[B:%.*]] = shl nuw i32 %x, 2
@@ -869,6 +1023,45 @@ define i32 @test53(i32 %x) {
   ret i32 %B
 }
 
+; (X <<nuw C1) >>u C2 --> X <<nuw (C1 - C2)
+
+define <2 x i32> @test53_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test53_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = shl nuw <2 x i32> %x, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %A = shl nuw <2 x i32> %x, <i32 3, i32 3>
+  %B = lshr <2 x i32> %A, <i32 1, i32 1>
+  ret <2 x i32> %B
+}
+
+; (X << C1) >>u C2  --> X << (C1 - C2) & (-1 >> C2)
+
+define i8 @test53_no_nuw(i8 %x) {
+; CHECK-LABEL: @test53_no_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 %x, 2
+; CHECK-NEXT:    [[B:%.*]] = and i8 [[TMP1]], 124
+; CHECK-NEXT:    ret i8 [[B]]
+;
+  %A = shl i8 %x, 3
+  %B = lshr i8 %A, 1
+  ret i8 %B
+}
+
+; (X << C1) >>u C2  --> X << (C1 - C2) & (-1 >> C2)
+; FIXME: Demanded bits should change the mask constant as it does for the scalar case.
+
+define <2 x i8> @test53_no_nuw_splat_vec(<2 x i8> %x) {
+; CHECK-LABEL: @test53_no_nuw_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> %x, <i8 2, i8 2>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i8> [[TMP1]], <i8 127, i8 127>
+; CHECK-NEXT:    ret <2 x i8> [[B]]
+;
+  %A = shl <2 x i8> %x, <i8 3, i8 3>
+  %B = lshr <2 x i8> %A, <i8 1, i8 1>
+  ret <2 x i8> %B
+}
+
 define i32 @test54(i32 %x) {
 ; CHECK-LABEL: @test54(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 %x, 3
@@ -1041,7 +1234,7 @@ define <2 x i65> @test_63(<2 x i64> %t) {
 ; CHECK-LABEL: @test_63(
 ; CHECK-NEXT:    [[A:%.*]] = zext <2 x i64> %t to <2 x i65>
 ; CHECK-NEXT:    [[SEXT:%.*]] = shl <2 x i65> [[A]], <i65 33, i65 33>
-; CHECK-NEXT:    [[B:%.*]] = ashr <2 x i65> [[SEXT]], <i65 33, i65 33>
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i65> [[SEXT]], <i65 33, i65 33>
 ; CHECK-NEXT:    ret <2 x i65> [[B]]
 ;
   %a = zext <2 x i64> %t to <2 x i65>
@@ -1052,12 +1245,26 @@ define <2 x i65> @test_63(<2 x i64> %t) {
 
 define i64 @test_64(i32 %t) {
 ; CHECK-LABEL: @test_64(
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 %t, 8
-; CHECK-NEXT: [[EXT:%.*]] = zext i32 [[SHL]] to i64
-; CHECK-NEXT: ret i64 [[EXT]]
-
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 %t, 8
+; CHECK-NEXT:    [[SHL:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    ret i64 [[SHL]]
+;
   %and = and i32 %t, 16777215
   %ext = zext i32 %and to i64
   %shl = shl i64 %ext, 8
   ret i64 %shl
 }
+
+define <2 x i64> @test_64_splat_vec(<2 x i32> %t) {
+; CHECK-LABEL: @test_64_splat_vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> %t, <i32 16777215, i32 16777215>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw <2 x i32> [[AND]], <i32 8, i32 8>
+; CHECK-NEXT:    [[SHL:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[SHL]]
+;
+  %and = and <2 x i32> %t, <i32 16777215, i32 16777215>
+  %ext = zext <2 x i32> %and to <2 x i64>
+  %shl = shl <2 x i64> %ext, <i64 8, i64 8>
+  ret <2 x i64> %shl
+}
+
diff --git a/test/Transforms/InstCombine/shufflevec-bitcast.ll b/test/Transforms/InstCombine/shufflevec-bitcast.ll
new file mode 100644
index 0000000000000..0f0365a07fb40
--- /dev/null
+++ b/test/Transforms/InstCombine/shufflevec-bitcast.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @test(<16 x i8> %w, i32* %o1, float* %o2) {
+
+; CHECK:       %v.bc = bitcast <16 x i8> %w to <4 x i32>
+; CHECK-NEXT:  %v.extract = extractelement <4 x i32> %v.bc, i32 3
+; CHECK-NEXT:  %v.bc{{[0-9]*}} = bitcast <16 x i8> %w to <4 x float>
+; CHECK-NEXT:  %v.extract{{[0-9]*}} = extractelement <4 x float> %v.bc{{[0-9]*}}, i32 3
+
+  %v = shufflevector <16 x i8> %w, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+  %f = bitcast <4 x i8> %v to float
+  %i = bitcast <4 x i8> %v to i32
+  store i32 %i, i32* %o1, align 4
+  store float %f, float* %o2, align 4
+  ret void
+}
diff --git a/test/Transforms/InstCombine/signext.ll b/test/Transforms/InstCombine/signext.ll
index bccadeb396f20..ff92ec0a8e3ce 100644
--- a/test/Transforms/InstCombine/signext.ll
+++ b/test/Transforms/InstCombine/signext.ll
@@ -61,6 +61,10 @@ define i32 @test5(i32 %x) {
   ret i32 %tmp.4
 }
 
+;  If the shift amount equals the difference in width of the destination
+;  and source scalar types:
+;  ashr (shl (zext X), C), C --> sext X
+
 define i32 @test6(i16 %P) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:    [[TMP_5:%.*]] = sext i16 %P to i32
@@ -72,6 +76,19 @@ define i32 @test6(i16 %P) {
   ret i32 %tmp.5
 }
 
+; Vectors should get the same fold as above.
+
+define <2 x i32> @test6_splat_vec(<2 x i12> %P) {
+; CHECK-LABEL: @test6_splat_vec(
+; CHECK-NEXT:    [[ASHR:%.*]] = sext <2 x i12> %P to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[ASHR]]
+;
+  %z = zext <2 x i12> %P to <2 x i32>
+  %shl = shl <2 x i32> %z, <i32 20, i32 20>
+  %ashr = ashr <2 x i32> %shl, <i32 20, i32 20>
+  ret <2 x i32> %ashr
+}
+
 define i32 @test7(i32 %x) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:    [[SUB:%.*]] = ashr i32 %x, 5
diff --git a/test/Transforms/InstCombine/sitofp.ll b/test/Transforms/InstCombine/sitofp.ll
index 8209778388364..149154723b952 100644
--- a/test/Transforms/InstCombine/sitofp.ll
+++ b/test/Transforms/InstCombine/sitofp.ll
@@ -1,41 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-; CHECK-LABEL: test1
-; CHECK: ret i1 true
 define i1 @test1(i8 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i1 true
+;
   %B = sitofp i8 %A to double
   %C = fcmp ult double %B, 128.0
   ret i1 %C
 }
 
-; CHECK-LABEL: test2
-; CHECK: ret i1 true
 define i1 @test2(i8 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i1 true
+;
   %B = sitofp i8 %A to double
   %C = fcmp ugt double %B, -128.1
   ret i1 %C
 }
 
-; CHECK-LABEL: test3
-; CHECK: ret i1 true
 define i1 @test3(i8 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i1 true
+;
   %B = sitofp i8 %A to double
   %C = fcmp ule double %B, 127.0
   ret i1 %C
 }
 
-; CHECK-LABEL: test4
-; CHECK: icmp ne i8 %A, 127
-; CHECK-NEXT: ret i1
 define i1 @test4(i8 %A) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[A:%.*]], 127
+; CHECK-NEXT:    ret i1 [[C]]
+;
   %B = sitofp i8 %A to double
   %C = fcmp ult double %B, 127.0
   ret i1 %C
 }
 
-; CHECK-LABEL: test5
-; CHECK: ret i32
 define i32 @test5(i32 %A) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
   %B = sitofp i32 %A to double
   %C = fptosi double %B to i32
   %D = uitofp i32 %C to double
@@ -43,10 +49,11 @@ define i32 @test5(i32 %A) {
   ret i32 %E
 }
 
-; CHECK-LABEL: test6
-; CHECK: and i32 %A, 39
-; CHECK-NEXT: ret i32
 define i32 @test6(i32 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[ADDCONV:%.*]] = and i32 [[A:%.*]], 39
+; CHECK-NEXT:    ret i32 [[ADDCONV]]
+;
   %B = and i32 %A, 7
   %C = and i32 %A, 32
   %D = sitofp i32 %B to double
@@ -56,35 +63,39 @@ define i32 @test6(i32 %A) {
   ret i32 %G
 }
 
-; CHECK-LABEL: test7
-; CHECK: ret i32
-define i32 @test7(i32 %A) nounwind {
+define i32 @test7(i32 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
   %B = sitofp i32 %A to double
   %C = fptoui double %B to i32
   ret i32 %C
 }
 
-; CHECK-LABEL: test8
-; CHECK: ret i32
-define i32 @test8(i32 %A) nounwind {
+define i32 @test8(i32 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
   %B = uitofp i32 %A to double
   %C = fptosi double %B to i32
   ret i32 %C
 }
 
-; CHECK-LABEL: test9
-; CHECK: zext i8
-; CHECK-NEXT: ret i32
-define i32 @test9(i8 %A) nounwind {
+define i32 @test9(i8 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[C:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %B = sitofp i8 %A to float
   %C = fptoui float %B to i32
   ret i32 %C
 }
 
-; CHECK-LABEL: test10
-; CHECK: sext i8
-; CHECK-NEXT: ret i32
-define i32 @test10(i8 %A) nounwind {
+define i32 @test10(i8 %A) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[C:%.*]] = sext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %B = sitofp i8 %A to float
   %C = fptosi float %B to i32
   ret i32 %C
@@ -92,10 +103,12 @@ define i32 @test10(i8 %A) nounwind {
 
 ; If the input value is outside of the range of the output cast, it's
 ; undefined behavior, so we can assume it fits.
-; CHECK-LABEL: test11
-; CHECK: trunc
-; CHECK-NEXT: ret i8
-define i8 @test11(i32 %A) nounwind {
+
+define i8 @test11(i32 %A) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[C:%.*]] = trunc i32 [[A:%.*]] to i8
+; CHECK-NEXT:    ret i8 [[C]]
+;
   %B = sitofp i32 %A to float
   %C = fptosi float %B to i8
   ret i8 %C
@@ -103,82 +116,103 @@ define i8 @test11(i32 %A) nounwind {
 
 ; If the input value is negative, it'll be outside the range of the
 ; output cast, and thus undefined behavior.
-; CHECK-LABEL: test12
-; CHECK: zext i8
-; CHECK-NEXT: ret i32
-define i32 @test12(i8 %A) nounwind {
+
+define i32 @test12(i8 %A) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[C:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %B = sitofp i8 %A to float
   %C = fptoui float %B to i32
   ret i32 %C
 }
 
 ; This can't fold because the 25-bit input doesn't fit in the mantissa.
-; CHECK-LABEL: test13
-; CHECK: uitofp
-; CHECK-NEXT: fptoui
-define i32 @test13(i25 %A) nounwind {
+
+define i32 @test13(i25 %A) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[B:%.*]] = uitofp i25 [[A:%.*]] to float
+; CHECK-NEXT:    [[C:%.*]] = fptoui float [[B]] to i32
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %B = uitofp i25 %A to float
   %C = fptoui float %B to i32
   ret i32 %C
 }
 
 ; But this one can.
-; CHECK-LABEL: test14
-; CHECK: zext i24
-; CHECK-NEXT: ret i32
-define i32 @test14(i24 %A) nounwind {
+
+define i32 @test14(i24 %A) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[C:%.*]] = zext i24 [[A:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %B = uitofp i24 %A to float
   %C = fptoui float %B to i32
   ret i32 %C
 }
 
 ; And this one can too.
-; CHECK-LABEL: test15
-; CHECK: trunc i32
-; CHECK-NEXT: ret i24
-define i24 @test15(i32 %A) nounwind {
+
+define i24 @test15(i32 %A) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[C:%.*]] = trunc i32 [[A:%.*]] to i24
+; CHECK-NEXT:    ret i24 [[C]]
+;
   %B = uitofp i32 %A to float
   %C = fptoui float %B to i24
   ret i24 %C
 }
 
-; This can fold because the 25-bit input is signed and we disard the sign bit.
-; CHECK-LABEL: test16
-; CHECK: zext
-define i32 @test16(i25 %A) nounwind {
- %B = sitofp i25 %A to float
- %C = fptoui float %B to i32
- ret i32 %C
+; This can fold because the 25-bit input is signed and we discard the sign bit.
+
+define i32 @test16(i25 %A) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[C:%.*]] = zext i25 [[A:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = sitofp i25 %A to float
+  %C = fptoui float %B to i32
+  ret i32 %C
 }
 
 ; This can't fold because the 26-bit input won't fit the mantissa
-; even after disarding the signed bit.
-; CHECK-LABEL: test17
-; CHECK: sitofp
-; CHECK-NEXT: fptoui
-define i32 @test17(i26 %A) nounwind {
- %B = sitofp i26 %A to float
- %C = fptoui float %B to i32
- ret i32 %C
+; even after discarding the signed bit.
+
+define i32 @test17(i26 %A) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[B:%.*]] = sitofp i26 [[A:%.*]] to float
+; CHECK-NEXT:    [[C:%.*]] = fptoui float [[B]] to i32
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = sitofp i26 %A to float
+  %C = fptoui float %B to i32
+  ret i32 %C
 }
 
-; This can fold because the 54-bit output is signed and we disard the sign bit.
-; CHECK-LABEL: test18
-; CHECK: trunc
-define i54 @test18(i64 %A) nounwind {
- %B = sitofp i64 %A to double
- %C = fptosi double %B to i54
- ret i54 %C
+; This can fold because the 54-bit output is signed and we discard the sign bit.
+
+define i54 @test18(i64 %A) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[C:%.*]] = trunc i64 [[A:%.*]] to i54
+; CHECK-NEXT:    ret i54 [[C]]
+;
+  %B = sitofp i64 %A to double
+  %C = fptosi double %B to i54
+  ret i54 %C
 }
 
 ; This can't fold because the 55-bit output won't fit the mantissa
-; even after disarding the sign bit.
-; CHECK-LABEL: test19
-; CHECK: sitofp
-; CHECK-NEXT: fptosi
-define i55 @test19(i64 %A) nounwind {
- %B = sitofp i64 %A to double
- %C = fptosi double %B to i55
- ret i55 %C
+; even after discarding the sign bit.
+
+define i55 @test19(i64 %A) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[B:%.*]] = sitofp i64 [[A:%.*]] to double
+; CHECK-NEXT:    [[C:%.*]] = fptosi double [[B]] to i55
+; CHECK-NEXT:    ret i55 [[C]]
+;
+  %B = sitofp i64 %A to double
+  %C = fptosi double %B to i55
+  ret i55 %C
 }
 
diff --git a/test/Transforms/InstCombine/srem.ll b/test/Transforms/InstCombine/srem.ll
deleted file mode 100644
index beefe4fb8d3fa..0000000000000
--- a/test/Transforms/InstCombine/srem.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep srem
-
-define i64 @foo(i64 %x1, i64 %y2) {
-	%r = sdiv i64 %x1, %y2
-	%r7 = mul i64 %r, %y2
-	%r8 = sub i64 %x1, %r7
-	ret i64 %r8
-}
diff --git a/test/Transforms/InstCombine/stpcpy_chk-1.ll b/test/Transforms/InstCombine/stpcpy_chk-1.ll
index 2fcc34b052278..45e6879c8d26f 100644
--- a/test/Transforms/InstCombine/stpcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/stpcpy_chk-1.ll
@@ -64,10 +64,10 @@ define i8* @test_simplify5() {
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: %len = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false)
+; CHECK-NEXT: %len = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false, i1 false)
 ; CHECK-NEXT: %1 = call i8* @__memcpy_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 %len)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 11)
-  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false, i1 false)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 %len)
   ret i8* %ret
 }
@@ -81,7 +81,7 @@ define i8* @test_simplify6() {
 ; CHECK-NEXT: %strlen = call i32 @strlen(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0))
 ; CHECK-NEXT: %1 = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 %strlen
 ; CHECK-NEXT: ret i8* %1
-  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false, i1 false)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %dst, i32 %len)
   ret i8* %ret
 }
@@ -100,4 +100,4 @@ define i8* @test_no_simplify1() {
 }
 
 declare i8* @__stpcpy_chk(i8*, i8*, i32) nounwind
-declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1) nounwind readonly
diff --git a/test/Transforms/InstCombine/strcpy_chk-1.ll b/test/Transforms/InstCombine/strcpy_chk-1.ll
index 7a21a49c993ce..824776c6ca18b 100644
--- a/test/Transforms/InstCombine/strcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/strcpy_chk-1.ll
@@ -64,10 +64,10 @@ define i8* @test_simplify5() {
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: %len = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false)
+; CHECK-NEXT: %len = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false, i1 false)
 ; CHECK-NEXT: %1 = call i8* @__memcpy_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 %len)
 ; CHECK-NEXT: ret i8* %1
-  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false, i1 false)
   %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 %len)
   ret i8* %ret
 }
@@ -78,10 +78,10 @@ define i8* @test_simplify6() {
 ; CHECK-LABEL: @test_simplify6(
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
 
-; CHECK-NEXT: %len = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false)
+; CHECK-NEXT: %len = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false, i1 false)
 ; CHECK-NEXT: %ret = call i8* @__strcpy_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i32 %len)
 ; CHECK-NEXT: ret i8* %ret
-  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false, i1 false)
   %ret = call i8* @__strcpy_chk(i8* %dst, i8* %dst, i32 %len)
   ret i8* %ret
 }
@@ -100,4 +100,4 @@ define i8* @test_no_simplify1() {
 }
 
 declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
-declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1) nounwind readonly
diff --git a/test/Transforms/InstCombine/sub-xor.ll b/test/Transforms/InstCombine/sub-xor.ll
index 9a0814c2c92f8..812305d8e4896 100644
--- a/test/Transforms/InstCombine/sub-xor.ll
+++ b/test/Transforms/InstCombine/sub-xor.ll
@@ -48,13 +48,3 @@ define i32 @test3(i32 %x) {
   ret i32 %add
 }
 
-define i32 @test4(i32 %x) {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 %x, -2147483606
-; CHECK-NEXT:    ret i32 [[ADD]]
-;
-  %sub = xor i32 %x, 2147483648
-  %add = add i32 %sub, 42
-  ret i32 %add
-}
-
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 32541f1f893eb..2388301c726e3 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -15,7 +15,7 @@ define i32 @test1(i32 %A) {
 
 define i32 @test2(i32 %A) {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    ret i32 %A
+; CHECK-NEXT:    ret i32 [[A:%.*]]
 ;
   %B = sub i32 %A, 0
   ret i32 %B
@@ -23,7 +23,7 @@ define i32 @test2(i32 %A) {
 
 define i32 @test3(i32 %A) {
 ; CHECK-LABEL: @test3(
-; CHECK-NEXT:    ret i32 %A
+; CHECK-NEXT:    ret i32 [[A:%.*]]
 ;
   %B = sub i32 0, %A
   %C = sub i32 0, %B
@@ -32,7 +32,7 @@ define i32 @test3(i32 %A) {
 
 define i32 @test4(i32 %A, i32 %x) {
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[C:%.*]] = add i32 %x, %A
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %B = sub i32 0, %A
@@ -42,8 +42,8 @@ define i32 @test4(i32 %A, i32 %x) {
 
 define i32 @test5(i32 %A, i32 %B, i32 %C) {
 ; CHECK-LABEL: @test5(
-; CHECK-NEXT:    [[D1:%.*]] = sub i32 %C, %B
-; CHECK-NEXT:    [[E:%.*]] = add i32 [[D1]], %A
+; CHECK-NEXT:    [[D1:%.*]] = sub i32 [[C:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[E:%.*]] = add i32 [[D1]], [[A:%.*]]
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
   %D = sub i32 %B, %C
@@ -53,8 +53,8 @@ define i32 @test5(i32 %A, i32 %B, i32 %C) {
 
 define i32 @test6(i32 %A, i32 %B) {
 ; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[B_NOT:%.*]] = xor i32 %B, -1
-; CHECK-NEXT:    [[D:%.*]] = and i32 %A, [[B_NOT]]
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i32 [[B:%.*]], -1
+; CHECK-NEXT:    [[D:%.*]] = and i32 [[B_NOT]], [[A:%.*]]
 ; CHECK-NEXT:    ret i32 [[D]]
 ;
   %C = and i32 %A, %B
@@ -62,9 +62,20 @@ define i32 @test6(i32 %A, i32 %B) {
   ret i32 %D
 }
 
+define i32 @test6commuted(i32 %A, i32 %B) {
+; CHECK-LABEL: @test6commuted(
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i32 [[B:%.*]], -1
+; CHECK-NEXT:    [[D:%.*]] = and i32 [[B_NOT]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %C = and i32 %B, %A
+  %D = sub i32 %A, %C
+  ret i32 %D
+}
+
 define i32 @test7(i32 %A) {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[B:%.*]] = xor i32 %A, -1
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[A:%.*]], -1
 ; CHECK-NEXT:    ret i32 [[B]]
 ;
   %B = sub i32 -1, %A
@@ -73,7 +84,7 @@ define i32 @test7(i32 %A) {
 
 define i32 @test8(i32 %A) {
 ; CHECK-LABEL: @test8(
-; CHECK-NEXT:    [[C:%.*]] = shl i32 %A, 3
+; CHECK-NEXT:    [[C:%.*]] = shl i32 [[A:%.*]], 3
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %B = mul i32 9, %A
@@ -83,7 +94,7 @@ define i32 @test8(i32 %A) {
 
 define i32 @test9(i32 %A) {
 ; CHECK-LABEL: @test9(
-; CHECK-NEXT:    [[C:%.*]] = mul i32 %A, -2
+; CHECK-NEXT:    [[C:%.*]] = mul i32 [[A:%.*]], -2
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %B = mul i32 3, %A
@@ -93,7 +104,7 @@ define i32 @test9(i32 %A) {
 
 define i32 @test10(i32 %A, i32 %B) {
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT:    [[E:%.*]] = mul i32 %A, %B
+; CHECK-NEXT:    [[E:%.*]] = mul i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
   %C = sub i32 0, %A
@@ -104,7 +115,7 @@ define i32 @test10(i32 %A, i32 %B) {
 
 define i32 @test10a(i32 %A) {
 ; CHECK-LABEL: @test10a(
-; CHECK-NEXT:    [[E:%.*]] = mul i32 %A, -7
+; CHECK-NEXT:    [[E:%.*]] = mul i32 [[A:%.*]], -7
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
   %C = sub i32 0, %A
@@ -114,7 +125,7 @@ define i32 @test10a(i32 %A) {
 
 define i1 @test11(i8 %A, i8 %B) {
 ; CHECK-LABEL: @test11(
-; CHECK-NEXT:    [[D:%.*]] = icmp ne i8 %A, %B
+; CHECK-NEXT:    [[D:%.*]] = icmp ne i8 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret i1 [[D]]
 ;
   %C = sub i8 %A, %B
@@ -124,7 +135,7 @@ define i1 @test11(i8 %A, i8 %B) {
 
 define <2 x i1> @test11vec(<2 x i8> %A, <2 x i8> %B) {
 ; CHECK-LABEL: @test11vec(
-; CHECK-NEXT:    [[D:%.*]] = icmp ne <2 x i8> %A, %B
+; CHECK-NEXT:    [[D:%.*]] = icmp ne <2 x i8> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret <2 x i1> [[D]]
 ;
   %C = sub <2 x i8> %A, %B
@@ -134,7 +145,7 @@ define <2 x i1> @test11vec(<2 x i8> %A, <2 x i8> %B) {
 
 define i32 @test12(i32 %A) {
 ; CHECK-LABEL: @test12(
-; CHECK-NEXT:    [[C:%.*]] = lshr i32 %A, 31
+; CHECK-NEXT:    [[C:%.*]] = lshr i32 [[A:%.*]], 31
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %B = ashr i32 %A, 31
@@ -144,7 +155,7 @@ define i32 @test12(i32 %A) {
 
 define i32 @test13(i32 %A) {
 ; CHECK-LABEL: @test13(
-; CHECK-NEXT:    [[C:%.*]] = ashr i32 %A, 31
+; CHECK-NEXT:    [[C:%.*]] = ashr i32 [[A:%.*]], 31
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %B = lshr i32 %A, 31
@@ -154,7 +165,7 @@ define i32 @test13(i32 %A) {
 
 define <2 x i32> @test12vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test12vec(
-; CHECK-NEXT:    [[C:%.*]] = lshr <2 x i32> %A, <i32 31, i32 31>
+; CHECK-NEXT:    [[C:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 31, i32 31>
 ; CHECK-NEXT:    ret <2 x i32> [[C]]
 ;
   %B = ashr <2 x i32> %A, <i32 31, i32 31>
@@ -164,7 +175,7 @@ define <2 x i32> @test12vec(<2 x i32> %A) {
 
 define <2 x i32> @test13vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test13vec(
-; CHECK-NEXT:    [[C:%.*]] = ashr <2 x i32> %A, <i32 31, i32 31>
+; CHECK-NEXT:    [[C:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
 ; CHECK-NEXT:    ret <2 x i32> [[C]]
 ;
   %B = lshr <2 x i32> %A, <i32 31, i32 31>
@@ -174,8 +185,8 @@ define <2 x i32> @test13vec(<2 x i32> %A) {
 
 define i32 @test15(i32 %A, i32 %B) {
 ; CHECK-LABEL: @test15(
-; CHECK-NEXT:    [[C:%.*]] = sub i32 0, %A
-; CHECK-NEXT:    [[D:%.*]] = srem i32 %B, [[C]]
+; CHECK-NEXT:    [[C:%.*]] = sub i32 0, [[A:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = srem i32 [[B:%.*]], [[C]]
 ; CHECK-NEXT:    ret i32 [[D]]
 ;
   %C = sub i32 0, %A
@@ -185,7 +196,7 @@ define i32 @test15(i32 %A, i32 %B) {
 
 define i32 @test16(i32 %A) {
 ; CHECK-LABEL: @test16(
-; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 %A, -1123
+; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 [[A:%.*]], -1123
 ; CHECK-NEXT:    ret i32 [[Y]]
 ;
   %X = sdiv i32 %A, 1123
@@ -197,7 +208,7 @@ define i32 @test16(i32 %A) {
 ; PR3142
 define i32 @test17(i32 %A) {
 ; CHECK-LABEL: @test17(
-; CHECK-NEXT:    [[B:%.*]] = sub i32 0, %A
+; CHECK-NEXT:    [[B:%.*]] = sub i32 0, [[A:%.*]]
 ; CHECK-NEXT:    [[C:%.*]] = sdiv i32 [[B]], 1234
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -218,7 +229,7 @@ define i64 @test18(i64 %Y) {
 
 define i32 @test19(i32 %X, i32 %Y) {
 ; CHECK-LABEL: @test19(
-; CHECK-NEXT:    ret i32 %X
+; CHECK-NEXT:    ret i32 [[X:%.*]]
 ;
   %Z = sub i32 %X, %Y
   %Q = add i32 %Z, %Y
@@ -227,7 +238,7 @@ define i32 @test19(i32 %X, i32 %Y) {
 
 define i1 @test20(i32 %g, i32 %h) {
 ; CHECK-LABEL: @test20(
-; CHECK-NEXT:    [[TMP_4:%.*]] = icmp ne i32 %h, 0
+; CHECK-NEXT:    [[TMP_4:%.*]] = icmp ne i32 [[H:%.*]], 0
 ; CHECK-NEXT:    ret i1 [[TMP_4]]
 ;
   %tmp.2 = sub i32 %g, %h
@@ -237,7 +248,7 @@ define i1 @test20(i32 %g, i32 %h) {
 
 define i1 @test21(i32 %g, i32 %h) {
 ; CHECK-LABEL: @test21(
-; CHECK-NEXT:    [[TMP_4:%.*]] = icmp ne i32 %h, 0
+; CHECK-NEXT:    [[TMP_4:%.*]] = icmp ne i32 [[H:%.*]], 0
 ; CHECK-NEXT:    ret i1 [[TMP_4]]
 ;
   %tmp.2 = sub i32 %g, %h
@@ -248,7 +259,7 @@ define i1 @test21(i32 %g, i32 %h) {
 ; PR2298
 define zeroext i1 @test22(i32 %a, i32 %b)  nounwind  {
 ; CHECK-LABEL: @test22(
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 %b, %a
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    ret i1 [[TMP5]]
 ;
   %tmp2 = sub i32 0, %a
@@ -260,7 +271,7 @@ define zeroext i1 @test22(i32 %a, i32 %b)  nounwind  {
 ; rdar://7362831
 define i32 @test23(i8* %P, i64 %A){
 ; CHECK-LABEL: @test23(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 %A to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %B = getelementptr inbounds i8, i8* %P, i64 %A
@@ -274,7 +285,7 @@ define i32 @test23(i8* %P, i64 %A){
 
 define i8 @test23_as1(i8 addrspace(1)* %P, i16 %A) {
 ; CHECK-LABEL: @test23_as1(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i16 %A to i8
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i16 [[A:%.*]] to i8
 ; CHECK-NEXT:    ret i8 [[TMP1]]
 ;
   %B = getelementptr inbounds i8, i8 addrspace(1)* %P, i16 %A
@@ -288,7 +299,7 @@ define i8 @test23_as1(i8 addrspace(1)* %P, i16 %A) {
 
 define i64 @test24(i8* %P, i64 %A){
 ; CHECK-LABEL: @test24(
-; CHECK-NEXT:    ret i64 %A
+; CHECK-NEXT:    ret i64 [[A:%.*]]
 ;
   %B = getelementptr inbounds i8, i8* %P, i64 %A
   %C = ptrtoint i8* %B to i64
@@ -299,7 +310,7 @@ define i64 @test24(i8* %P, i64 %A){
 
 define i16 @test24_as1(i8 addrspace(1)* %P, i16 %A) {
 ; CHECK-LABEL: @test24_as1(
-; CHECK-NEXT:    ret i16 %A
+; CHECK-NEXT:    ret i16 [[A:%.*]]
 ;
   %B = getelementptr inbounds i8, i8 addrspace(1)* %P, i16 %A
   %C = ptrtoint i8 addrspace(1)* %B to i16
@@ -310,7 +321,7 @@ define i16 @test24_as1(i8 addrspace(1)* %P, i16 %A) {
 
 define i64 @test24a(i8* %P, i64 %A){
 ; CHECK-LABEL: @test24a(
-; CHECK-NEXT:    [[DIFF_NEG:%.*]] = sub i64 0, %A
+; CHECK-NEXT:    [[DIFF_NEG:%.*]] = sub i64 0, [[A:%.*]]
 ; CHECK-NEXT:    ret i64 [[DIFF_NEG]]
 ;
   %B = getelementptr inbounds i8, i8* %P, i64 %A
@@ -322,7 +333,7 @@ define i64 @test24a(i8* %P, i64 %A){
 
 define i16 @test24a_as1(i8 addrspace(1)* %P, i16 %A) {
 ; CHECK-LABEL: @test24a_as1(
-; CHECK-NEXT:    [[DIFF_NEG:%.*]] = sub i16 0, %A
+; CHECK-NEXT:    [[DIFF_NEG:%.*]] = sub i16 0, [[A:%.*]]
 ; CHECK-NEXT:    ret i16 [[DIFF_NEG]]
 ;
   %B = getelementptr inbounds i8, i8 addrspace(1)* %P, i16 %A
@@ -337,7 +348,7 @@ define i16 @test24a_as1(i8 addrspace(1)* %P, i16 %A) {
 
 define i64 @test24b(i8* %P, i64 %A){
 ; CHECK-LABEL: @test24b(
-; CHECK-NEXT:    [[B_IDX:%.*]] = shl nuw i64 %A, 1
+; CHECK-NEXT:    [[B_IDX:%.*]] = shl nuw i64 [[A:%.*]], 1
 ; CHECK-NEXT:    ret i64 [[B_IDX]]
 ;
   %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A
@@ -349,7 +360,7 @@ define i64 @test24b(i8* %P, i64 %A){
 
 define i64 @test25(i8* %P, i64 %A){
 ; CHECK-LABEL: @test25(
-; CHECK-NEXT:    [[B_IDX:%.*]] = shl nuw i64 %A, 1
+; CHECK-NEXT:    [[B_IDX:%.*]] = shl nuw i64 [[A:%.*]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[B_IDX]], -84
 ; CHECK-NEXT:    ret i64 [[TMP1]]
 ;
@@ -363,7 +374,7 @@ define i64 @test25(i8* %P, i64 %A){
 
 define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) {
 ; CHECK-LABEL: @test25_as1(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 %A to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i16
 ; CHECK-NEXT:    [[B_IDX:%.*]] = shl nuw i16 [[TMP1]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i16 [[B_IDX]], -84
 ; CHECK-NEXT:    ret i16 [[TMP2]]
@@ -376,7 +387,7 @@ define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) {
 
 define i32 @test26(i32 %x) {
 ; CHECK-LABEL: @test26(
-; CHECK-NEXT:    [[NEG:%.*]] = shl i32 -3, %x
+; CHECK-NEXT:    [[NEG:%.*]] = shl i32 -3, [[X:%.*]]
 ; CHECK-NEXT:    ret i32 [[NEG]]
 ;
   %shl = shl i32 3, %x
@@ -386,8 +397,8 @@ define i32 @test26(i32 %x) {
 
 define i32 @test27(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test27(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 %y, 3
-; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[TMP1]], %x
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[Y:%.*]], 3
+; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i32 [[SUB]]
 ;
   %mul = mul i32 %y, -8
@@ -395,10 +406,87 @@ define i32 @test27(i32 %x, i32 %y) {
   ret i32 %sub
 }
 
+define <2 x i32> @test27vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test27vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[Y:%.*]], <i32 8, i32 6>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[SUB]]
+;
+  %mul = mul <2 x i32> %y, <i32 -8, i32 -6>
+  %sub = sub <2 x i32> %x, %mul
+  ret <2 x i32> %sub
+}
+
+define <2 x i32> @test27vecsplat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test27vecsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[SUB]]
+;
+  %mul = mul <2 x i32> %y, <i32 -8, i32 -8>
+  %sub = sub <2 x i32> %x, %mul
+  ret <2 x i32> %sub
+}
+
+define <2 x i32> @test27vecmixed(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test27vecmixed(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[Y:%.*]], <i32 8, i32 -8>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[SUB]]
+;
+  %mul = mul <2 x i32> %y, <i32 -8, i32 8>
+  %sub = sub <2 x i32> %x, %mul
+  ret <2 x i32> %sub
+}
+
+define i32 @test27commuted(i32 %x, i32 %y) {
+; CHECK-LABEL: @test27commuted(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[Y:%.*]], 3
+; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %mul = mul i32 -8, %y
+  %sub = sub i32 %x, %mul
+  ret i32 %sub
+}
+
+define <2 x i32> @test27commutedvec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test27commutedvec(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[Y:%.*]], <i32 8, i32 6>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[SUB]]
+;
+  %mul = mul <2 x i32> <i32 -8, i32 -6>, %y
+  %sub = sub <2 x i32> %x, %mul
+  ret <2 x i32> %sub
+}
+
+define <2 x i32> @test27commutedvecsplat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test27commutedvecsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[SUB]]
+;
+  %mul = mul <2 x i32> <i32 -8, i32 -8>, %y
+  %sub = sub <2 x i32> %x, %mul
+  ret <2 x i32> %sub
+}
+
+define <2 x i32> @test27commutedvecmixed(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test27commutedvecmixed(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[Y:%.*]], <i32 8, i32 -8>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[SUB]]
+;
+  %mul = mul <2 x i32> <i32 -8, i32 8>, %y
+  %sub = sub <2 x i32> %x, %mul
+  ret <2 x i32> %sub
+}
+
 define i32 @test28(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @test28(
-; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 %z, %y
-; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[TMP1]], %x
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[Z:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i32 [[SUB]]
 ;
   %neg = sub i32 0, %z
@@ -407,9 +495,21 @@ define i32 @test28(i32 %x, i32 %y, i32 %z) {
   ret i32 %sub
 }
 
+define i32 @test28commuted(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test28commuted(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %neg = sub i32 0, %z
+  %mul = mul i32 %y, %neg
+  %sub = sub i32 %x, %mul
+  ret i32 %sub
+}
+
 define i64 @test29(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-LABEL: @test29(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 %i, %j
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[I:%.*]], [[J:%.*]]
 ; CHECK-NEXT:    ret i64 [[TMP1]]
 ;
   %gep1 = getelementptr inbounds i8, i8* %foo, i64 %i
@@ -422,8 +522,8 @@ define i64 @test29(i8* %foo, i64 %i, i64 %j) {
 
 define i64 @test30(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-LABEL: @test30(
-; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i64 %i, 2
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[GEP1_IDX]], %j
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i64 [[I:%.*]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[GEP1_IDX]], [[J:%.*]]
 ; CHECK-NEXT:    ret i64 [[TMP1]]
 ;
   %bit = bitcast i8* %foo to i32*
@@ -437,8 +537,8 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) {
 
 define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
 ; CHECK-LABEL: @test30_as1(
-; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i16 %i, 2
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i16 [[GEP1_IDX]], %j
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i16 [[I:%.*]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i16 [[GEP1_IDX]], [[J:%.*]]
 ; CHECK-NEXT:    ret i16 [[TMP1]]
 ;
   %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
@@ -452,7 +552,7 @@ define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
 
 define <2 x i64> @test31(<2 x i64> %A) {
 ; CHECK-LABEL: @test31(
-; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i64> %A, <i64 3, i64 4>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i64> [[A:%.*]], <i64 3, i64 4>
 ; CHECK-NEXT:    ret <2 x i64> [[SUB]]
 ;
   %xor = xor <2 x i64> %A, <i64 -1, i64 -1>
@@ -462,7 +562,7 @@ define <2 x i64> @test31(<2 x i64> %A) {
 
 define <2 x i64> @test32(<2 x i64> %A) {
 ; CHECK-LABEL: @test32(
-; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i64> <i64 3, i64 4>, %A
+; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i64> <i64 3, i64 4>, [[A:%.*]]
 ; CHECK-NEXT:    ret <2 x i64> [[SUB]]
 ;
   %add = add <2 x i64> %A, <i64 -1, i64 -1>
@@ -472,7 +572,7 @@ define <2 x i64> @test32(<2 x i64> %A) {
 
 define <2 x i64> @test33(<2 x i1> %A) {
 ; CHECK-LABEL: @test33(
-; CHECK-NEXT:    [[SUB:%.*]] = sext <2 x i1> %A to <2 x i64>
+; CHECK-NEXT:    [[SUB:%.*]] = sext <2 x i1> [[A:%.*]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[SUB]]
 ;
   %ext = zext <2 x i1> %A to <2 x i64>
@@ -482,7 +582,7 @@ define <2 x i64> @test33(<2 x i1> %A) {
 
 define <2 x i64> @test34(<2 x i1> %A) {
 ; CHECK-LABEL: @test34(
-; CHECK-NEXT:    [[SUB:%.*]] = zext <2 x i1> %A to <2 x i64>
+; CHECK-NEXT:    [[SUB:%.*]] = zext <2 x i1> [[A:%.*]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[SUB]]
 ;
   %ext = sext <2 x i1> %A to <2 x i64>
@@ -492,7 +592,7 @@ define <2 x i64> @test34(<2 x i1> %A) {
 
 define <2 x i64> @test35(<2 x i64> %A) {
 ; CHECK-LABEL: @test35(
-; CHECK-NEXT:    [[SUB:%.*]] = mul <2 x i64> %A, <i64 -2, i64 -3>
+; CHECK-NEXT:    [[SUB:%.*]] = mul <2 x i64> [[A:%.*]], <i64 -2, i64 -3>
 ; CHECK-NEXT:    ret <2 x i64> [[SUB]]
 ;
   %mul = mul <2 x i64> %A, <i64 3, i64 4>
@@ -502,7 +602,7 @@ define <2 x i64> @test35(<2 x i64> %A) {
 
 define <2 x i64> @test36(<2 x i64> %A) {
 ; CHECK-LABEL: @test36(
-; CHECK-NEXT:    [[SUB:%.*]] = mul <2 x i64> %A, <i64 7, i64 15>
+; CHECK-NEXT:    [[SUB:%.*]] = mul <2 x i64> [[A:%.*]], <i64 7, i64 15>
 ; CHECK-NEXT:    ret <2 x i64> [[SUB]]
 ;
   %shl = shl <2 x i64> %A, <i64 3, i64 4>
@@ -512,7 +612,7 @@ define <2 x i64> @test36(<2 x i64> %A) {
 
 define <2 x i32> @test37(<2 x i32> %A) {
 ; CHECK-LABEL: @test37(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> %A, <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[A:%.*]], <i32 -2147483648, i32 -2147483648>
 ; CHECK-NEXT:    [[SUB:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[SUB]]
 ;
@@ -523,7 +623,7 @@ define <2 x i32> @test37(<2 x i32> %A) {
 
 define i32 @test38(i32 %A) {
 ; CHECK-LABEL: @test38(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 %A, -2147483648
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[A:%.*]], -2147483648
 ; CHECK-NEXT:    [[SUB:%.*]] = sext i1 [[TMP1]] to i32
 ; CHECK-NEXT:    ret i32 [[SUB]]
 ;
@@ -534,7 +634,7 @@ define i32 @test38(i32 %A) {
 
 define i32 @test39(i32 %A, i32 %x) {
 ; CHECK-LABEL: @test39(
-; CHECK-NEXT:    [[C:%.*]] = add i32 %x, %A
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %B = sub i32 0, %A
@@ -544,8 +644,8 @@ define i32 @test39(i32 %A, i32 %x) {
 
 define i16 @test40(i16 %a, i16 %b) {
 ; CHECK-LABEL: @test40(
-; CHECK-NEXT:    [[ASHR:%.*]] = ashr i16 %a, 1
-; CHECK-NEXT:    [[ASHR1:%.*]] = ashr i16 %b, 1
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr i16 [[A:%.*]], 1
+; CHECK-NEXT:    [[ASHR1:%.*]] = ashr i16 [[B:%.*]], 1
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i16 [[ASHR]], [[ASHR1]]
 ; CHECK-NEXT:    ret i16 [[SUB]]
 ;
@@ -557,8 +657,8 @@ define i16 @test40(i16 %a, i16 %b) {
 
 define i32 @test41(i16 %a, i16 %b) {
 ; CHECK-LABEL: @test41(
-; CHECK-NEXT:    [[CONV:%.*]] = sext i16 %a to i32
-; CHECK-NEXT:    [[CONV1:%.*]] = sext i16 %b to i32
+; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[A:%.*]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i16 [[B:%.*]] to i32
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]]
 ; CHECK-NEXT:    ret i32 [[SUB]]
 ;
@@ -570,8 +670,8 @@ define i32 @test41(i16 %a, i16 %b) {
 
 define i4 @test42(i4 %x, i4 %y) {
 ; CHECK-LABEL: @test42(
-; CHECK-NEXT:    [[A:%.*]] = and i4 %y, 7
-; CHECK-NEXT:    [[B:%.*]] = and i4 %x, 7
+; CHECK-NEXT:    [[A:%.*]] = and i4 [[Y:%.*]], 7
+; CHECK-NEXT:    [[B:%.*]] = and i4 [[X:%.*]], 7
 ; CHECK-NEXT:    [[C:%.*]] = sub nsw i4 [[A]], [[B]]
 ; CHECK-NEXT:    ret i4 [[C]]
 ;
@@ -583,8 +683,8 @@ define i4 @test42(i4 %x, i4 %y) {
 
 define i4 @test43(i4 %x, i4 %y) {
 ; CHECK-LABEL: @test43(
-; CHECK-NEXT:    [[A:%.*]] = or i4 %x, -8
-; CHECK-NEXT:    [[B:%.*]] = and i4 %y, 7
+; CHECK-NEXT:    [[A:%.*]] = or i4 [[X:%.*]], -8
+; CHECK-NEXT:    [[B:%.*]] = and i4 [[Y:%.*]], 7
 ; CHECK-NEXT:    [[C:%.*]] = sub nuw i4 [[A]], [[B]]
 ; CHECK-NEXT:    ret i4 [[C]]
 ;
@@ -596,7 +696,7 @@ define i4 @test43(i4 %x, i4 %y) {
 
 define i32 @test44(i32 %x) {
 ; CHECK-LABEL: @test44(
-; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 %x, -32768
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[X:%.*]], -32768
 ; CHECK-NEXT:    ret i32 [[SUB]]
 ;
   %sub = sub nsw i32 %x, 32768
@@ -605,7 +705,7 @@ define i32 @test44(i32 %x) {
 
 define i32 @test45(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test45(
-; CHECK-NEXT:    [[SUB:%.*]] = and i32 %x, %y
+; CHECK-NEXT:    [[SUB:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[SUB]]
 ;
   %or = or i32 %x, %y
@@ -614,10 +714,21 @@ define i32 @test45(i32 %x, i32 %y) {
   ret i32 %sub
 }
 
+define i32 @test45commuted(i32 %x, i32 %y) {
+; CHECK-LABEL: @test45commuted(
+; CHECK-NEXT:    [[SUB:%.*]] = and i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %or = or i32 %x, %y
+  %xor = xor i32 %y, %x
+  %sub = sub i32 %or, %xor
+  ret i32 %sub
+}
+
 define i32 @test46(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test46(
-; CHECK-NEXT:    [[X_NOT:%.*]] = xor i32 %x, -1
-; CHECK-NEXT:    [[SUB:%.*]] = and i32 %y, [[X_NOT]]
+; CHECK-NEXT:    [[X_NOT:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[SUB:%.*]] = and i32 [[X_NOT]], [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[SUB]]
 ;
   %or = or i32 %x, %y
@@ -625,10 +736,21 @@ define i32 @test46(i32 %x, i32 %y) {
   ret i32 %sub
 }
 
+define i32 @test46commuted(i32 %x, i32 %y) {
+; CHECK-LABEL: @test46commuted(
+; CHECK-NEXT:    [[X_NOT:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[SUB:%.*]] = and i32 [[X_NOT]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %or = or i32 %y, %x
+  %sub = sub i32 %or, %x
+  ret i32 %sub
+}
+
 define i32 @test47(i1 %A, i32 %B, i32 %C, i32 %D) {
 ; CHECK-LABEL: @test47(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 %D, %C
-; CHECK-NEXT:    [[SUB:%.*]] = select i1 %A, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = select i1 [[A:%.*]], i32 [[TMP1]], i32 0
 ; CHECK-NEXT:    ret i32 [[SUB]]
 ;
   %sel0 = select i1 %A, i32 %D, i32 %B
@@ -639,8 +761,8 @@ define i32 @test47(i1 %A, i32 %B, i32 %C, i32 %D) {
 
 define i32 @test48(i1 %A, i32 %B, i32 %C, i32 %D) {
 ; CHECK-LABEL: @test48(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 %D, %C
-; CHECK-NEXT:    [[SUB:%.*]] = select i1 %A, i32 0, i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = select i1 [[A:%.*]], i32 0, i32 [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[SUB]]
 ;
   %sel0 = select i1 %A, i32 %B, i32 %D
@@ -653,8 +775,8 @@ define i32 @test48(i1 %A, i32 %B, i32 %C, i32 %D) {
 
 define i8 @bool_sext_sub(i8 %x, i1 %y) {
 ; CHECK-LABEL: @bool_sext_sub(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 %y to i8
-; CHECK-NEXT:    [[SUB:%.*]] = add i8 [[TMP1]], %x
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[Y:%.*]] to i8
+; CHECK-NEXT:    [[SUB:%.*]] = add i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[SUB]]
 ;
   %sext = sext i1 %y to i8
@@ -666,8 +788,8 @@ define i8 @bool_sext_sub(i8 %x, i1 %y) {
 
 define <2 x i8> @bool_sext_sub_vec(<2 x i8> %x, <2 x i1> %y) {
 ; CHECK-LABEL: @bool_sext_sub_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i1> %y to <2 x i8>
-; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i8> [[TMP1]], %x
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i1> [[Y:%.*]] to <2 x i8>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[SUB]]
 ;
   %sext = sext <2 x i1> %y to <2 x i8>
@@ -679,8 +801,8 @@ define <2 x i8> @bool_sext_sub_vec(<2 x i8> %x, <2 x i1> %y) {
 
 define <2 x i8> @bool_sext_sub_vec_nsw(<2 x i8> %x, <2 x i1> %y) {
 ; CHECK-LABEL: @bool_sext_sub_vec_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i1> %y to <2 x i8>
-; CHECK-NEXT:    [[SUB:%.*]] = add nsw <2 x i8> [[TMP1]], %x
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i1> [[Y:%.*]] to <2 x i8>
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw <2 x i8> [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[SUB]]
 ;
   %sext = sext <2 x i1> %y to <2 x i8>
@@ -692,8 +814,8 @@ define <2 x i8> @bool_sext_sub_vec_nsw(<2 x i8> %x, <2 x i1> %y) {
 
 define i8 @bool_sext_sub_nuw(i8 %x, i1 %y) {
 ; CHECK-LABEL: @bool_sext_sub_nuw(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 %y to i8
-; CHECK-NEXT:    [[SUB:%.*]] = add i8 [[TMP1]], %x
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[Y:%.*]] to i8
+; CHECK-NEXT:    [[SUB:%.*]] = add i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[SUB]]
 ;
   %sext = sext i1 %y to i8
@@ -701,3 +823,169 @@ define i8 @bool_sext_sub_nuw(i8 %x, i1 %y) {
   ret i8 %sub
 }
 
+define i32 @test49(i32 %X) {
+; CHECK-LABEL: @test49(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 1, [[X:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = and i32 [[SUB]], 64
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %sub = sub i32 129, %X
+  %res = and i32 %sub, 64
+  ret i32 %res
+}
+
+define i32 @test50(i32 %X) {
+; CHECK-LABEL: @test50(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 1, [[X:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = and i32 [[SUB]], 127
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %sub = sub i32 129, %X
+  %res = and i32 %sub, 127
+  ret i32 %res
+}
+
+define i32 @test51(i32 %X) {
+; CHECK-LABEL: @test51(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 126, [[X:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = and i32 [[SUB]], 64
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %sub = sub i32 254, %X
+  %res = and i32 %sub, 64
+  ret i32 %res
+}
+
+define i32 @test52(i32 %X) {
+; CHECK-LABEL: @test52(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 126, [[X:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = and i32 [[SUB]], 127
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %sub = sub i32 254, %X
+  %res = and i32 %sub, 127
+  ret i32 %res
+}
+
+define <2 x i1> @test53(<2 x i1> %A, <2 x i1> %B) {
+; CHECK-LABEL: @test53(
+; CHECK-NEXT:    [[SUB:%.*]] = xor <2 x i1> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[SUB]]
+;
+  %sub = sub <2 x i1> %A, %B
+  ret <2 x i1> %sub
+}
+
+define i32 @test54(i1 %C) {
+; CHECK-LABEL: @test54(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], i32 -877, i32 113
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %A = select i1 %C, i32 1000, i32 10
+  %V = sub i32 123, %A
+  ret i32 %V
+}
+
+define <2 x i32> @test54vec(i1 %C) {
+; CHECK-LABEL: @test54vec(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 -877, i32 -877>, <2 x i32> <i32 113, i32 113>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
+  %V = sub <2 x i32> <i32 123, i32 123>, %A
+  ret <2 x i32> %V
+}
+
+define <2 x i32> @test54vec2(i1 %C) {
+; CHECK-LABEL: @test54vec2(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 -877, i32 -2167>, <2 x i32> <i32 113, i32 303>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 2500>, <2 x i32> <i32 10, i32 30>
+  %V = sub <2 x i32> <i32 123, i32 333>, %A
+  ret <2 x i32> %V
+}
+
+define i32 @test55(i1 %which) {
+; CHECK-LABEL: @test55(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ -877, [[ENTRY:%.*]] ], [ 113, [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+  %value = sub i32 123, %A
+  ret i32 %value
+}
+
+define <2 x i32> @test55vec(i1 %which) {
+; CHECK-LABEL: @test55vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 -877, i32 -877>, [[ENTRY:%.*]] ], [ <i32 113, i32 113>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+  %value = sub <2 x i32> <i32 123, i32 123>, %A
+  ret <2 x i32> %value
+}
+
+define <2 x i32> @test55vec2(i1 %which) {
+; CHECK-LABEL: @test55vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 -877, i32 -2167>, [[ENTRY:%.*]] ], [ <i32 113, i32 303>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+  %value = sub <2 x i32> <i32 123, i32 333>, %A
+  ret <2 x i32> %value
+}
+
+define i32 @test56(i32 %A, i32 %B) {
+; CHECK-LABEL: @test56(
+; CHECK-NEXT:    [[Y:%.*]] = sub i32 0, [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %X = add i32 %A, %B
+  %Y = sub i32 %A, %X
+  ret i32 %Y                                                                                                                                                                                                                                             }
+
+define i32 @test57(i32 %A, i32 %B) {
+; CHECK-LABEL: @test57(
+; CHECK-NEXT:    [[Y:%.*]] = sub i32 0, [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %X = add i32 %B, %A
+  %Y = sub i32 %A, %X
+  ret i32 %Y                                                                                                                                                                                                                                             }
diff --git a/test/Transforms/InstCombine/trunc.ll b/test/Transforms/InstCombine/trunc.ll
index eaa45bbb286c8..5597b578f0179 100644
--- a/test/Transforms/InstCombine/trunc.ll
+++ b/test/Transforms/InstCombine/trunc.ll
@@ -119,8 +119,8 @@ define i64 @test8(i32 %A, i32 %B) {
 
 define i8 @test9(i32 %X) {
 ; CHECK-LABEL: @test9(
-; CHECK-NEXT:    [[X_TR:%.*]] = trunc i32 %X to i8
-; CHECK-NEXT:    [[Z:%.*]] = and i8 [[X_TR]], 42
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %X to i8
+; CHECK-NEXT:    [[Z:%.*]] = and i8 [[TMP1]], 42
 ; CHECK-NEXT:    ret i8 [[Z]]
 ;
   %Y = and i32 %X, 42
@@ -464,3 +464,72 @@ define <8 x i16> @trunc_shl_v8i16_v8i32_4(<8 x i32> %a) {
   ret <8 x i16> %conv
 }
 
+; Although the mask is the same value, we don't create a shuffle for types that the backend may not be able to handle:
+; trunc (shuffle X, C, Mask) --> shuffle (trunc X), C', Mask
+
+define <4 x i8> @wide_shuf(<4 x i32> %x) {
+; CHECK-LABEL: @wide_shuf(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> %x, <4 x i32> <i32 undef, i32 3634, i32 90, i32 undef>, <4 x i32> <i32 1, i32 5, i32 6, i32 2>
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <4 x i32> [[SHUF]] to <4 x i8>
+; CHECK-NEXT:    ret <4 x i8> [[TRUNC]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> <i32 35, i32 3634, i32 90, i32 -1>, <4 x i32> <i32 1, i32 5, i32 6, i32 2>
+  %trunc = trunc <4 x i32> %shuf to <4 x i8>
+  ret <4 x i8> %trunc
+}
+
+; trunc (shuffle X, undef, SplatMask) --> shuffle (trunc X), undef, SplatMask
+
+define <4 x i8> @wide_splat1(<4 x i32> %x) {
+; CHECK-LABEL: @wide_splat1(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i32> %x to <4 x i8>
+; CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    ret <4 x i8> [[TRUNC]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  %trunc = trunc <4 x i32> %shuf to <4 x i8>
+  ret <4 x i8> %trunc
+}
+
+; Test weird types.
+; trunc (shuffle X, undef, SplatMask) --> shuffle (trunc X), undef, SplatMask
+
+define <3 x i31> @wide_splat2(<3 x i33> %x) {
+; CHECK-LABEL: @wide_splat2(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <3 x i33> %x to <3 x i31>
+; CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <3 x i31> [[TMP1]], <3 x i31> undef, <3 x i32> <i32 1, i32 1, i32 1>
+; CHECK-NEXT:    ret <3 x i31> [[TRUNC]]
+;
+  %shuf = shufflevector <3 x i33> %x, <3 x i33> undef, <3 x i32> <i32 1, i32 1, i32 1>
+  %trunc = trunc <3 x i33> %shuf to <3 x i31>
+  ret <3 x i31> %trunc
+}
+
+; FIXME:
+; trunc (shuffle X, undef, SplatMask) --> shuffle (trunc X), undef, SplatMask
+; A mask with undef elements should still be considered a splat mask.
+
+define <3 x i31> @wide_splat3(<3 x i33> %x) {
+; CHECK-LABEL: @wide_splat3(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x i33> %x, <3 x i33> undef, <3 x i32> <i32 undef, i32 1, i32 1>
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <3 x i33> [[SHUF]] to <3 x i31>
+; CHECK-NEXT:    ret <3 x i31> [[TRUNC]]
+;
+  %shuf = shufflevector <3 x i33> %x, <3 x i33> undef, <3 x i32> <i32 undef, i32 1, i32 1>
+  %trunc = trunc <3 x i33> %shuf to <3 x i31>
+  ret <3 x i31> %trunc
+}
+
+; TODO: The shuffle extends the length of the input vector. Should we shrink this?
+
+define <8 x i8> @wide_lengthening_splat(<4 x i16> %v) {
+; CHECK-LABEL: @wide_lengthening_splat(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TR:%.*]] = trunc <8 x i16> [[SHUF]] to <8 x i8>
+; CHECK-NEXT:    ret <8 x i8> [[TR]]
+;
+  %shuf = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer
+  %tr = trunc <8 x i16> %shuf to <8 x i8>
+  ret <8 x i8> %tr
+}
+
diff --git a/test/Transforms/InstCombine/type_pun.ll b/test/Transforms/InstCombine/type_pun.ll
index 098164cd029f0..56d1ffcb5d319 100644
--- a/test/Transforms/InstCombine/type_pun.ll
+++ b/test/Transforms/InstCombine/type_pun.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 ; Ensure that type punning using a union of vector and same-sized array
@@ -17,9 +18,10 @@ target datalayout = "p:32:32"
 ; Extracting the zeroth element in an i32 array.
 define i32 @type_pun_zeroth(<16 x i8> %in) {
 ; CHECK-LABEL: @type_pun_zeroth(
-; CHECK-NEXT: %[[BC:.*]] = bitcast <16 x i8> %in to <4 x i32>
-; CHECK-NEXT: %[[EXT:.*]] = extractelement <4 x i32> %[[BC]], i32 0
-; CHECK-NEXT: ret i32 %[[EXT]]
+; CHECK-NEXT:    [[SROA_BC:%.*]] = bitcast <16 x i8> [[IN:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[SROA_EXTRACT:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
+; CHECK-NEXT:    ret i32 [[SROA_EXTRACT]]
+;
   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %1 = bitcast <4 x i8> %sroa to i32
   ret i32 %1
@@ -28,9 +30,10 @@ define i32 @type_pun_zeroth(<16 x i8> %in) {
 ; Extracting the first element in an i32 array.
 define i32 @type_pun_first(<16 x i8> %in) {
 ; CHECK-LABEL: @type_pun_first(
-; CHECK-NEXT: %[[BC:.*]] = bitcast <16 x i8> %in to <4 x i32>
-; CHECK-NEXT: %[[EXT:.*]] = extractelement <4 x i32> %[[BC]], i32 1
-; CHECK-NEXT: ret i32 %[[EXT]]
+; CHECK-NEXT:    [[SROA_BC:%.*]] = bitcast <16 x i8> [[IN:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[SROA_EXTRACT:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 1
+; CHECK-NEXT:    ret i32 [[SROA_EXTRACT]]
+;
   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %1 = bitcast <4 x i8> %sroa to i32
   ret i32 %1
@@ -39,10 +42,11 @@ define i32 @type_pun_first(<16 x i8> %in) {
 ; Extracting an i32 that isn't aligned to any natural boundary.
 define i32 @type_pun_misaligned(<16 x i8> %in) {
 ; CHECK-LABEL: @type_pun_misaligned(
-; CHECK-NEXT: %[[SHUF:.*]] = shufflevector <16 x i8> %in, <16 x i8> undef, <16 x i32> <i32 6, i32 7, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: %[[BC:.*]] = bitcast <16 x i8> %[[SHUF]] to <4 x i32>
-; CHECK-NEXT: %[[EXT:.*]] = extractelement <4 x i32> %[[BC]], i32 0
-; CHECK-NEXT: ret i32 %[[EXT]]
+; CHECK-NEXT:    [[SROA_EXTRACT:%.*]] = shufflevector <16 x i8> [[IN:%.*]], <16 x i8> undef, <16 x i32> <i32 6, i32 7, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[SROA_BC:%.*]] = bitcast <16 x i8> [[SROA_EXTRACT]] to <4 x i32>
+; CHECK-NEXT:    [[SROA_EXTRACT1:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
+; CHECK-NEXT:    ret i32 [[SROA_EXTRACT1]]
+;
   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
   %1 = bitcast <4 x i8> %sroa to i32
   ret i32 %1
@@ -51,10 +55,11 @@ define i32 @type_pun_misaligned(<16 x i8> %in) {
 ; Type punning to an array of pointers.
 define i32* @type_pun_pointer(<16 x i8> %in) {
 ; CHECK-LABEL: @type_pun_pointer(
-; CHECK-NEXT: %[[BC:.*]] = bitcast <16 x i8> %in to <4 x i32>
-; CHECK-NEXT: %[[EXT:.*]] = extractelement <4 x i32> %[[BC]], i32 0
-; CHECK-NEXT: %[[I2P:.*]] = inttoptr i32 %[[EXT]] to i32*
-; CHECK-NEXT: ret i32* %[[I2P]]
+; CHECK-NEXT:    [[SROA_BC:%.*]] = bitcast <16 x i8> [[IN:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[SROA_EXTRACT:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = inttoptr i32 [[SROA_EXTRACT]] to i32*
+; CHECK-NEXT:    ret i32* [[TMP1]]
+;
   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %1 = bitcast <4 x i8> %sroa to i32
   %2 = inttoptr i32 %1 to i32*
@@ -64,9 +69,10 @@ define i32* @type_pun_pointer(<16 x i8> %in) {
 ; Type punning to an array of 32-bit floating-point values.
 define float @type_pun_float(<16 x i8> %in) {
 ; CHECK-LABEL: @type_pun_float(
-; CHECK-NEXT: %[[BC:.*]] = bitcast <16 x i8> %in to <4 x float>
-; CHECK-NEXT: %[[EXT:.*]] = extractelement <4 x float> %[[BC]], i32 0
-; CHECK-NEXT: ret float %[[EXT]]
+; CHECK-NEXT:    [[SROA_BC:%.*]] = bitcast <16 x i8> [[IN:%.*]] to <4 x float>
+; CHECK-NEXT:    [[SROA_EXTRACT:%.*]] = extractelement <4 x float> [[SROA_BC]], i32 0
+; CHECK-NEXT:    ret float [[SROA_EXTRACT]]
+;
   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %1 = bitcast <4 x i8> %sroa to float
   ret float %1
@@ -75,9 +81,10 @@ define float @type_pun_float(<16 x i8> %in) {
 ; Type punning to an array of 64-bit floating-point values.
 define double @type_pun_double(<16 x i8> %in) {
 ; CHECK-LABEL: @type_pun_double(
-; CHECK-NEXT: %[[BC:.*]] = bitcast <16 x i8> %in to <2 x double>
-; CHECK-NEXT: %[[EXT:.*]] = extractelement <2 x double> %[[BC]], i32 0
-; CHECK-NEXT: ret double %[[EXT]]
+; CHECK-NEXT:    [[SROA_BC:%.*]] = bitcast <16 x i8> [[IN:%.*]] to <2 x double>
+; CHECK-NEXT:    [[SROA_EXTRACT:%.*]] = extractelement <2 x double> [[SROA_BC]], i32 0
+; CHECK-NEXT:    ret double [[SROA_EXTRACT]]
+;
   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   %1 = bitcast <8 x i8> %sroa to double
   ret double %1
@@ -87,13 +94,14 @@ define double @type_pun_double(<16 x i8> %in) {
 ; Verify that multiple uses with different bitcast types are properly handled.
 define { float, i32 } @type_pun_float_i32(<16 x i8> %in) {
 ; CHECK-LABEL: @type_pun_float_i32(
-; CHECK-NEXT: %[[BCI:.*]] = bitcast <16 x i8> %in to <4 x i32>
-; CHECK-NEXT: %[[EXTI:.*]] = extractelement <4 x i32> %[[BCI]], i32 0
-; CHECK-NEXT: %[[BCF:.*]] = bitcast <16 x i8> %in to <4 x float>
-; CHECK-NEXT: %[[EXTF:.*]] = extractelement <4 x float> %[[BCF]], i32 0
-; CHECK-NEXT: %1 = insertvalue { float, i32 } undef, float %[[EXTF]], 0
-; CHECK-NEXT: %2 = insertvalue { float, i32 } %1, i32 %[[EXTI]], 1
-; CHECK-NEXT: ret { float, i32 } %2
+; CHECK-NEXT:    [[SROA_BC:%.*]] = bitcast <16 x i8> [[IN:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[SROA_EXTRACT:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
+; CHECK-NEXT:    [[SROA_BC1:%.*]] = bitcast <16 x i8> [[IN]] to <4 x float>
+; CHECK-NEXT:    [[SROA_EXTRACT2:%.*]] = extractelement <4 x float> [[SROA_BC1]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { float, i32 } undef, float [[SROA_EXTRACT2]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { float, i32 } [[TMP1]], i32 [[SROA_EXTRACT]], 1
+; CHECK-NEXT:    ret { float, i32 } [[TMP2]]
+;
   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %f = bitcast <4 x i8> %sroa to float
   %i = bitcast <4 x i8> %sroa to i32
@@ -106,24 +114,29 @@ define { float, i32 } @type_pun_float_i32(<16 x i8> %in) {
 ; Verify that the bitcast is shared and dominates usage.
 define i32 @type_pun_i32_ctrl(<16 x i8> %in) {
 ; CHECK-LABEL: @type_pun_i32_ctrl(
-entry: ; CHECK-NEXT: entry:
-; CHECK-NEXT: %[[BC:.*]] = bitcast <16 x i8> %in to <4 x i32>
-; CHECK-NEXT: br
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SROA_BC:%.*]] = bitcast <16 x i8> [[IN:%.*]] to <4 x i32>
+; CHECK-NEXT:    br i1 undef, label [[LEFT:%.*]], label [[RIGHT:%.*]]
+; CHECK:       left:
+; CHECK-NEXT:    [[SROA_EXTRACT1:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
+; CHECK-NEXT:    br label [[TAIL:%.*]]
+; CHECK:       right:
+; CHECK-NEXT:    [[SROA_EXTRACT:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
+; CHECK-NEXT:    br label [[TAIL]]
+; CHECK:       tail:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[SROA_EXTRACT1]], [[LEFT]] ], [ [[SROA_EXTRACT]], [[RIGHT]] ]
+; CHECK-NEXT:    ret i32 [[I]]
+;
+entry:
   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   br i1 undef, label %left, label %right
-left: ; CHECK: left:
-; CHECK-NEXT: %[[EXTL:.*]] = extractelement <4 x i32> %[[BC]], i32 0
-; CHECK-NEXT: br
+left:
   %lhs = bitcast <4 x i8> %sroa to i32
   br label %tail
-right: ; CHECK: right:
-; CHECK-NEXT: %[[EXTR:.*]] = extractelement <4 x i32> %[[BC]], i32 0
-; CHECK-NEXT: br
+right:
   %rhs = bitcast <4 x i8> %sroa to i32
   br label %tail
-tail: ; CHECK: tail:
-; CHECK-NEXT: %i = phi i32 [ %[[EXTL]], %left ], [ %[[EXTR]], %right ]
-; CHECK-NEXT: ret i32 %i
+tail:
   %i = phi i32 [ %lhs, %left ], [ %rhs, %right ]
   ret i32 %i
 }
@@ -132,9 +145,10 @@ tail: ; CHECK: tail:
 ; should stay the same.
 define i40 @type_pun_unhandled(<16 x i8> %in) {
 ; CHECK-LABEL: @type_pun_unhandled(
-; CHECK-NEXT: %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <5 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8>
-; CHECK-NEXT: %1 = bitcast <5 x i8> %sroa to i40
-; CHECK-NEXT: ret i40 %1
+; CHECK-NEXT:    [[SROA:%.*]] = shufflevector <16 x i8> [[IN:%.*]], <16 x i8> undef, <5 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <5 x i8> [[SROA]] to i40
+; CHECK-NEXT:    ret i40 [[TMP1]]
+;
   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <5 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8>
   %1 = bitcast <5 x i8> %sroa to i40
   ret i40 %1
diff --git a/test/Transforms/InstCombine/urem.ll b/test/Transforms/InstCombine/urem.ll
deleted file mode 100644
index 0549d759eac48..0000000000000
--- a/test/Transforms/InstCombine/urem.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-define i64 @rem_unsigned(i64 %x1, i64 %y2) {
-; CHECK-LABEL: @rem_unsigned(
-; CHECK-NEXT:    [[R:%.*]] = urem i64 %x1, %y2
-; CHECK-NEXT:    ret i64 [[R]]
-;
-  %r = udiv i64 %x1, %y2
-  %r7 = mul i64 %r, %y2
-  %r8 = sub i64 %x1, %r7
-  ret i64 %r8
-}
-
-; PR28672 - https://llvm.org/bugs/show_bug.cgi?id=28672
-
-define i8 @big_divisor(i8 %x) {
-; CHECK-LABEL: @big_divisor(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 %x, -127
-; CHECK-NEXT:    [[TMP2:%.*]] = add i8 %x, 127
-; CHECK-NEXT:    [[REM:%.*]] = select i1 [[TMP1]], i8 %x, i8 [[TMP2]]
-; CHECK-NEXT:    ret i8 [[REM]]
-;
-  %rem = urem i8 %x, 129
-  ret i8 %rem
-}
-
-define i5 @biggest_divisor(i5 %x) {
-; CHECK-LABEL: @biggest_divisor(
-; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i5 %x, -1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[NOT_]] to i5
-; CHECK-NEXT:    [[REM:%.*]] = add i5 [[TMP1]], %x
-; CHECK-NEXT:    ret i5 [[REM]]
-;
-  %rem = urem i5 %x, -1
-  ret i5 %rem
-}
-
-; TODO: Should vector subtract of constant be canonicalized to add?
-define <2 x i4> @big_divisor_vec(<2 x i4> %x) {
-; CHECK-LABEL: @big_divisor_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i4> %x, <i4 -3, i4 -3>
-; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i4> %x, <i4 -3, i4 -3>
-; CHECK-NEXT:    [[REM:%.*]] = select <2 x i1> [[TMP1]], <2 x i4> %x, <2 x i4> [[TMP2]]
-; CHECK-NEXT:    ret <2 x i4> [[REM]]
-;
-  %rem = urem <2 x i4> %x, <i4 13, i4 13>
-  ret <2 x i4> %rem
-}
-
diff --git a/test/Transforms/InstCombine/vararg.ll b/test/Transforms/InstCombine/vararg.ll
index 263a7425a0759..111cb4de7bc32 100644
--- a/test/Transforms/InstCombine/vararg.ll
+++ b/test/Transforms/InstCombine/vararg.ll
@@ -2,8 +2,8 @@
 
 %struct.__va_list = type { i8*, i8*, i8*, i32, i32 }
 
-declare void @llvm.lifetime.start(i64, i8* nocapture)
-declare void @llvm.lifetime.end(i64, i8* nocapture)
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
 declare void @llvm.va_start(i8*)
 declare void @llvm.va_end(i8*)
 declare void @llvm.va_copy(i8*, i8*)
@@ -17,14 +17,14 @@ entry:
   %va1 = alloca %struct.__va_list, align 8
   %0 = bitcast %struct.__va_list* %va0 to i8*
   %1 = bitcast %struct.__va_list* %va1 to i8*
-  call void @llvm.lifetime.start(i64 32, i8* %0)
+  call void @llvm.lifetime.start.p0i8(i64 32, i8* %0)
   call void @llvm.va_start(i8* %0)
-  call void @llvm.lifetime.start(i64 32, i8* %1)
+  call void @llvm.lifetime.start.p0i8(i64 32, i8* %1)
   call void @llvm.va_copy(i8* %1, i8* %0)
   call void @llvm.va_end(i8* %1)
-  call void @llvm.lifetime.end(i64 32, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 32, i8* %1)
   call void @llvm.va_end(i8* %0)
-  call void @llvm.lifetime.end(i64 32, i8* %0)
+  call void @llvm.lifetime.end.p0i8(i64 32, i8* %0)
   ret i32 0
 }
 
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 7c46adaf616e6..5f27634da19cc 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -67,7 +67,7 @@ define i64 @test3(float %f, double %d) {
 ; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP11]], [[TMP14]]
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
 ; CHECK-NEXT:    ret i64 [[TMP15]]
 ;
   %v00 = insertelement <4 x float> undef, float %f, i32 0
@@ -182,10 +182,9 @@ define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind {
 
 define <2 x float> @test_fptrunc(double %f) {
 ; CHECK-LABEL: @test_fptrunc(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %f, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 0.000000e+00, i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = fptrunc <2 x double> [[TMP2]] to <2 x float>
-; CHECK-NEXT:    ret <2 x float> [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> <double undef, double 0.000000e+00>, double %f, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float>
+; CHECK-NEXT:    ret <2 x float> [[TMP2]]
 ;
   %tmp9 = insertelement <4 x double> undef, double %f, i32 0
   %tmp10 = insertelement <4 x double> %tmp9, double 0.000000e+00, i32 1
@@ -198,10 +197,9 @@ define <2 x float> @test_fptrunc(double %f) {
 
 define <2 x double> @test_fpext(float %f) {
 ; CHECK-LABEL: @test_fpext(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float 0.000000e+00, i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
-; CHECK-NEXT:    ret <2 x double> [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> <float undef, float 0.000000e+00>, float %f, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
+; CHECK-NEXT:    ret <2 x double> [[TMP2]]
 ;
   %tmp9 = insertelement <4 x float> undef, float %f, i32 0
   %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1
@@ -223,8 +221,7 @@ define <4 x double> @test_shuffle(<4 x double> %f) {
 
 define <4 x float> @test_select(float %f, float %g) {
 ; CHECK-LABEL: @test_select(
-; CHECK-NEXT:    [[A0:%.*]] = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[A3:%.*]] = insertelement <4 x float> [[A0]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[A3:%.*]] = insertelement <4 x float> <float undef, float undef, float undef, float 3.000000e+00>, float %f, i32 0
 ; CHECK-NEXT:    [[RET:%.*]] = shufflevector <4 x float> [[A3]], <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
 ; CHECK-NEXT:    ret <4 x float> [[RET]]
 ;
diff --git a/test/Transforms/InstCombine/vec_sext.ll b/test/Transforms/InstCombine/vec_sext.ll
index 10947c1781e03..79a32d64b0638 100644
--- a/test/Transforms/InstCombine/vec_sext.ll
+++ b/test/Transforms/InstCombine/vec_sext.ll
@@ -6,7 +6,7 @@ define <4 x i32> @psignd_3(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, %a
 ; CHECK-NEXT:    [[B_LOBIT:%.*]] = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
 ; CHECK-NEXT:    [[T1:%.*]] = xor <4 x i32> [[B_LOBIT]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[T2:%.*]] = and <4 x i32> %a, [[T1]]
+; CHECK-NEXT:    [[T2:%.*]] = and <4 x i32> [[T1]], %a
 ; CHECK-NEXT:    [[T3:%.*]] = and <4 x i32> [[B_LOBIT]], [[SUB]]
 ; CHECK-NEXT:    [[COND:%.*]] = or <4 x i32> [[T2]], [[T3]]
 ; CHECK-NEXT:    ret <4 x i32> [[COND]]
diff --git a/test/Transforms/InstCombine/vector-casts.ll b/test/Transforms/InstCombine/vector-casts.ll
index 8d01cad4b453d..643ab6c5348fa 100644
--- a/test/Transforms/InstCombine/vector-casts.ll
+++ b/test/Transforms/InstCombine/vector-casts.ll
@@ -110,23 +110,11 @@ define <2 x i64> @bar(<2 x i65> %t) {
   ret <2 x i64> %b
 }
 
-define <2 x i65> @foos(<2 x i64> %t) {
-; CHECK-LABEL: @foos(
-; CHECK-NEXT:    [[A:%.*]] = zext <2 x i64> %t to <2 x i65>
-; CHECK-NEXT:    [[SEXT:%.*]] = shl <2 x i65> [[A]], <i65 33, i65 33>
-; CHECK-NEXT:    [[B:%.*]] = ashr <2 x i65> [[SEXT]], <i65 33, i65 33>
-; CHECK-NEXT:    ret <2 x i65> [[B]]
-;
-  %a = trunc <2 x i64> %t to <2 x i32>
-  %b = sext <2 x i32> %a to <2 x i65>
-  ret <2 x i65> %b
-}
-
 define <2 x i64> @bars(<2 x i65> %t) {
 ; CHECK-LABEL: @bars(
 ; CHECK-NEXT:    [[A:%.*]] = trunc <2 x i65> %t to <2 x i64>
 ; CHECK-NEXT:    [[SEXT:%.*]] = shl <2 x i64> [[A]], <i64 32, i64 32>
-; CHECK-NEXT:    [[B:%.*]] = ashr <2 x i64> [[SEXT]], <i64 32, i64 32>
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[SEXT]], <i64 32, i64 32>
 ; CHECK-NEXT:    ret <2 x i64> [[B]]
 ;
   %a = trunc <2 x i65> %t to <2 x i32>
@@ -137,7 +125,7 @@ define <2 x i64> @bars(<2 x i65> %t) {
 define <2 x i64> @quxs(<2 x i64> %t) {
 ; CHECK-LABEL: @quxs(
 ; CHECK-NEXT:    [[SEXT:%.*]] = shl <2 x i64> %t, <i64 32, i64 32>
-; CHECK-NEXT:    [[B:%.*]] = ashr <2 x i64> [[SEXT]], <i64 32, i64 32>
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[SEXT]], <i64 32, i64 32>
 ; CHECK-NEXT:    ret <2 x i64> [[B]]
 ;
   %a = trunc <2 x i64> %t to <2 x i32>
@@ -148,7 +136,7 @@ define <2 x i64> @quxs(<2 x i64> %t) {
 define <2 x i64> @quxt(<2 x i64> %t) {
 ; CHECK-LABEL: @quxt(
 ; CHECK-NEXT:    [[A:%.*]] = shl <2 x i64> %t, <i64 32, i64 32>
-; CHECK-NEXT:    [[B:%.*]] = ashr <2 x i64> [[A]], <i64 32, i64 32>
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[A]], <i64 32, i64 32>
 ; CHECK-NEXT:    ret <2 x i64> [[B]]
 ;
   %a = shl <2 x i64> %t, <i64 32, i64 32>
@@ -228,3 +216,91 @@ define <8 x i32> @pr24458(<8 x float> %n) {
   ret <8 x i32> %wrong
 }
 
+; Hoist a trunc to a scalar if we're inserting into an undef vector.
+; trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
+
+define <3 x i16> @trunc_inselt_undef(i32 %x) {
+; CHECK-LABEL: @trunc_inselt_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %x to i16
+; CHECK-NEXT:    [[TRUNC:%.*]] = insertelement <3 x i16> undef, i16 [[TMP1]], i32 1
+; CHECK-NEXT:    ret <3 x i16> [[TRUNC]]
+;
+  %vec = insertelement <3 x i32> undef, i32 %x, i32 1
+  %trunc = trunc <3 x i32> %vec to <3 x i16>
+  ret <3 x i16> %trunc
+}
+
+; Hoist a trunc to a scalar if we're inserting into an undef vector.
+; trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
+
+define <2 x float> @fptrunc_inselt_undef(double %x, i32 %index) {
+; CHECK-LABEL: @fptrunc_inselt_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc double %x to float
+; CHECK-NEXT:    [[TRUNC:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 %index
+; CHECK-NEXT:    ret <2 x float> [[TRUNC]]
+;
+  %vec = insertelement <2 x double> <double undef, double undef>, double %x, i32 %index
+  %trunc = fptrunc <2 x double> %vec to <2 x float>
+  ret <2 x float> %trunc
+}
+
+; TODO: Strengthen the backend, so we can have this canonicalization.
+; Insert a scalar int into a constant vector and truncate:
+; trunc (inselt C, X, Index) --> inselt C, (trunc X), Index
+
+define <3 x i16> @trunc_inselt1(i32 %x) {
+; CHECK-LABEL: @trunc_inselt1(
+; CHECK-NEXT:    [[VEC:%.*]] = insertelement <3 x i32> <i32 3, i32 undef, i32 65536>, i32 %x, i32 1
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <3 x i32> [[VEC]] to <3 x i16>
+; CHECK-NEXT:    ret <3 x i16> [[TRUNC]]
+;
+  %vec = insertelement <3 x i32> <i32 3, i32 -2, i32 65536>, i32 %x, i32 1
+  %trunc = trunc <3 x i32> %vec to <3 x i16>
+  ret <3 x i16> %trunc
+}
+
+; TODO: Strengthen the backend, so we can have this canonicalization.
+; Insert a scalar FP into a constant vector and FP truncate:
+; fptrunc (inselt C, X, Index) --> inselt C, (fptrunc X), Index
+
+define <2 x float> @fptrunc_inselt1(double %x, i32 %index) {
+; CHECK-LABEL: @fptrunc_inselt1(
+; CHECK-NEXT:    [[VEC:%.*]] = insertelement <2 x double> <double undef, double 3.000000e+00>, double %x, i32 %index
+; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc <2 x double> [[VEC]] to <2 x float>
+; CHECK-NEXT:    ret <2 x float> [[TRUNC]]
+;
+  %vec = insertelement <2 x double> <double undef, double 3.0>, double %x, i32 %index
+  %trunc = fptrunc <2 x double> %vec to <2 x float>
+  ret <2 x float> %trunc
+}
+
+; TODO: Strengthen the backend, so we can have this canonicalization.
+; Insert a scalar int constant into a vector and truncate:
+; trunc (inselt X, C, Index) --> inselt (trunc X), C', Index
+
+define <8 x i16> @trunc_inselt2(<8 x i32> %x, i32 %index) {
+; CHECK-LABEL: @trunc_inselt2(
+; CHECK-NEXT:    [[VEC:%.*]] = insertelement <8 x i32> %x, i32 1048576, i32 %index
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <8 x i32> [[VEC]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[TRUNC]]
+;
+  %vec = insertelement <8 x i32> %x, i32 1048576, i32 %index
+  %trunc = trunc <8 x i32> %vec to <8 x i16>
+  ret <8 x i16> %trunc
+}
+
+; TODO: Strengthen the backend, so we can have this canonicalization.
+; Insert a scalar FP constant into a vector and FP truncate:
+; fptrunc (inselt X, C, Index) --> inselt (fptrunc X), C', Index
+
+define <3 x float> @fptrunc_inselt2(<3 x double> %x) {
+; CHECK-LABEL: @fptrunc_inselt2(
+; CHECK-NEXT:    [[VEC:%.*]] = insertelement <3 x double> %x, double 4.000000e+00, i32 2
+; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc <3 x double> [[VEC]] to <3 x float>
+; CHECK-NEXT:    ret <3 x float> [[TRUNC]]
+;
+  %vec = insertelement <3 x double> %x, double 4.0, i32 2
+  %trunc = fptrunc <3 x double> %vec to <3 x float>
+  ret <3 x float> %trunc
+}
+
diff --git a/test/Transforms/InstCombine/vector-srem.ll b/test/Transforms/InstCombine/vector-srem.ll
deleted file mode 100644
index 44b38596e684a..0000000000000
--- a/test/Transforms/InstCombine/vector-srem.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) {
-; CHECK-LABEL: @foo(
-; CHECK-NEXT:    [[K:%.*]] = srem <4 x i32> %t, %u
-; CHECK-NEXT:    ret <4 x i32> [[K]]
-;
-  %k = sdiv <4 x i32> %t, %u
-  %l = mul <4 x i32> %k, %u
-  %m = sub <4 x i32> %t, %l
-  ret <4 x i32> %m
-}
diff --git a/test/Transforms/InstCombine/vector-urem.ll b/test/Transforms/InstCombine/vector-urem.ll
index 6cecc16069d36..34eebeef3bb10 100644
--- a/test/Transforms/InstCombine/vector-urem.ll
+++ b/test/Transforms/InstCombine/vector-urem.ll
@@ -19,11 +19,3 @@ define <4 x i32> @test_v4i32_const_pow2(<4 x i32> %a0) {
   ret <4 x i32> %1
 }
 
-define <4 x i32> @test_v4i32_const_pow2_or_zero(<4 x i32> %a0) {
-; CHECK-LABEL: @test_v4i32_const_pow2_or_zero(
-; CHECK-NEXT:    [[TMP1:%.*]] = urem <4 x i32> %a0, <i32 1, i32 2, i32 0, i32 8>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = urem <4 x i32> %a0, <i32 1, i32 2, i32 0, i32 8>
-  ret <4 x i32> %1
-}
diff --git a/test/Transforms/InstCombine/vector_insertelt_shuffle.ll b/test/Transforms/InstCombine/vector_insertelt_shuffle.ll
index b3e614653cfa2..c358509d690e9 100644
--- a/test/Transforms/InstCombine/vector_insertelt_shuffle.ll
+++ b/test/Transforms/InstCombine/vector_insertelt_shuffle.ll
@@ -1,94 +1,95 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-define<4 x float> @foo(<4 x float> %x) {
+; insertelements should fold to shuffle
+define <4 x float> @foo(<4 x float> %x) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[INS2:%.*]] = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[INS2]]
+;
   %ins1 = insertelement<4 x float> %x, float 1.0, i32 1
   %ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
-  ret<4 x float> %ins2
+  ret <4 x float> %ins2
 }
 
-; insertelements should fold to shuffle
-; CHECK-LABEL: @foo
-; CHECK-NEXT: shufflevector <4 x float> %{{.+}}, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-; CHECK-NEXT: ret <4 x float> %
+; Insert of a constant is canonicalized ahead of insert of a variable.
 
-define<4 x float> @bar(<4 x float> %x, float %a) {
+define <4 x float> @bar(<4 x float> %x, float %a) {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> %x, float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 1
+; CHECK-NEXT:    ret <4 x float> [[INS2]]
+;
   %ins1 = insertelement<4 x float> %x, float %a, i32 1
   %ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
-  ret<4 x float> %ins2
+  ret <4 x float> %ins2
 }
 
-; CHECK-LABEL: @bar
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float %{{.+}}, i32 1
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2
-; CHECK-NEXT: ret <4 x float> %
-
-define<4 x float> @baz(<4 x float> %x, i32 %a) {
+define <4 x float> @baz(<4 x float> %x, i32 %a) {
+; CHECK-LABEL: @baz(
+; CHECK-NEXT:    [[INS1:%.*]] = insertelement <4 x float> %x, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 2.000000e+00, i32 %a
+; CHECK-NEXT:    ret <4 x float> [[INS2]]
+;
   %ins1 = insertelement<4 x float> %x, float 1.0, i32 1
   %ins2 = insertelement<4 x float> %ins1, float 2.0, i32 %a
-  ret<4 x float> %ins2
+  ret <4 x float> %ins2
 }
 
-; CHECK-LABEL: @baz
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 1
-; CHECK-NEXT: insertelement <4 x float> %ins1, float 2.000000e+00, i32 %
-; CHECK-NEXT: ret <4 x float> %
-
-define<4 x float> @bazz(<4 x float> %x, i32 %a) {
+; insertelements should fold to shuffle
+define <4 x float> @bazz(<4 x float> %x, i32 %a) {
+; CHECK-LABEL: @bazz(
+; CHECK-NEXT:    [[INS1:%.*]] = insertelement <4 x float> %x, float 1.000000e+00, i32 3
+; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 5.000000e+00, i32 %a
+; CHECK-NEXT:    [[INS5:%.*]] = shufflevector <4 x float> [[INS2]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT:    [[INS6:%.*]] = insertelement <4 x float> [[INS5]], float 7.000000e+00, i32 %a
+; CHECK-NEXT:    ret <4 x float> [[INS6]]
+;
   %ins1 = insertelement<4 x float> %x, float 1.0, i32 3
   %ins2 = insertelement<4 x float> %ins1, float 5.0, i32 %a
   %ins3 = insertelement<4 x float> %ins2, float 3.0, i32 2
   %ins4 = insertelement<4 x float> %ins3, float 1.0, i32 1
   %ins5 = insertelement<4 x float> %ins4, float 2.0, i32 2
   %ins6 = insertelement<4 x float> %ins5, float 7.0, i32 %a
-  ret<4 x float> %ins6
+  ret <4 x float> %ins6
 }
 
-; insertelements should fold to shuffle
-; CHECK-LABEL: @bazz
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 3
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 5.000000e+00, i32 %
-; CHECK-NEXT: shufflevector <4 x float> %{{.+}}, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 7.000000e+00, i32 %
-; CHECK-NEXT: ret <4 x float> %
-
-define<4 x float> @bazzz(<4 x float> %x) {
+define <4 x float> @bazzz(<4 x float> %x) {
+; CHECK-LABEL: @bazzz(
+; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x float> %x, float 2.000000e+00, i32 2
+; CHECK-NEXT:    ret <4 x float> [[INS2]]
+;
   %ins1 = insertelement<4 x float> %x, float 1.0, i32 5
   %ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
-  ret<4 x float> %ins2
+  ret <4 x float> %ins2
 }
 
-; CHECK-LABEL: @bazzz
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2
-; CHECK-NEXT: ret <4 x float> %
-
-define<4 x float> @bazzzz(<4 x float> %x) {
+define <4 x float> @bazzzz(<4 x float> %x) {
+; CHECK-LABEL: @bazzzz(
+; CHECK-NEXT:    [[INS1:%.*]] = insertelement <4 x float> %x, float 1.000000e+00, i32 undef
+; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x float> %x, float 2.000000e+00, i32 2
+; CHECK-NEXT:    ret <4 x float> [[INS2]]
+;
   %ins1 = insertelement<4 x float> %x, float 1.0, i32 undef
   %ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
-  ret<4 x float> %ins2
+  ret <4 x float> %ins2
 }
 
-; CHECK-LABEL: @bazzzz
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 undef
-; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2
-; CHECK-NEXT: ret <4 x float> %
-
-define<4 x float> @bazzzzz() {
+define <4 x float> @bazzzzz() {
+; CHECK-LABEL: @bazzzzz(
+; CHECK-NEXT:    ret <4 x float> <float 1.000000e+00, float 5.000000e+00, float 1.000000e+01, float 4.000000e+00>
+;
   %ins1 = insertelement <4 x float> insertelement (<4 x float> <float 1.0, float 2.0, float 3.0, float undef>, float 4.0, i32 3), float 5.0, i32 1
   %ins2 = insertelement<4 x float> %ins1, float 10.0, i32 2
-  ret<4 x float> %ins2
+  ret <4 x float> %ins2
 }
 
-; insertelements should fold to shuffle
-; CHECK-LABEL: @bazzzzz
-; CHECK-NEXT: ret <4 x float> <float 1.000000e+00, float 5.000000e+00, float 1.000000e+01, float 4.000000e+00>
-
-define<4 x float> @bazzzzzz(<4 x float> %x, i32 %a) {
+define <4 x float> @bazzzzzz(<4 x float> %x, i32 %a) {
+; CHECK-LABEL: @bazzzzzz(
+; CHECK-NEXT:    ret <4 x float> <float undef, float 5.000000e+00, float undef, float 4.000000e+00>
+;
   %ins1 = insertelement <4 x float> insertelement (<4 x float> shufflevector (<4 x float> undef, <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0> , <4 x i32> <i32 0, i32 5, i32 undef, i32 6> ), float 4.0, i32 3), float 5.0, i32 1
-  ret<4 x float> %ins1
+  ret <4 x float> %ins1
 }
 
-; insertelements should fold to shuffle
-; CHECK-LABEL: @bazzzzz
-; CHECK-NEXT: ret <4 x float> <float undef, float 5.000000e+00, float undef, float 4.000000e+00>
 
diff --git a/test/Transforms/InstCombine/win-math.ll b/test/Transforms/InstCombine/win-math.ll
index e6e79e2b84a02..36947791393d9 100644
--- a/test/Transforms/InstCombine/win-math.ll
+++ b/test/Transforms/InstCombine/win-math.ll
@@ -56,15 +56,15 @@ declare double @ceil(double %x)
 define float @float_ceil(float %x) nounwind readnone {
 ; WIN32-LABEL: @float_ceil(
 ; WIN32-NOT: float @ceilf
-; WIN32: double @ceil
+; WIN32: float @llvm.ceil.f32
 ; WIN64-LABEL: @float_ceil(
-; WIN64: float @ceilf
+; WIN64: float @llvm.ceil.f32
 ; WIN64-NOT: double @ceil
 ; MINGW32-LABEL: @float_ceil(
-; MINGW32: float @ceilf
+; MINGW32: float @llvm.ceil.f32
 ; MINGW32-NOT: double @ceil
 ; MINGW64-LABEL: @float_ceil(
-; MINGW64: float @ceilf
+; MINGW64: float @llvm.ceil.f32
 ; MINGW64-NOT: double @ceil
     %1 = fpext float %x to double
     %2 = call double @ceil(double %1)
@@ -137,15 +137,15 @@ declare double @floor(double %x)
 define float @float_floor(float %x) nounwind readnone {
 ; WIN32-LABEL: @float_floor(
 ; WIN32-NOT: float @floorf
-; WIN32: double @floor
+; WIN32: float @llvm.floor.f32
 ; WIN64-LABEL: @float_floor(
-; WIN64: float @floorf
+; WIN64: float @llvm.floor.f32
 ; WIN64-NOT: double @floor
 ; MINGW32-LABEL: @float_floor(
-; MINGW32: float @floorf
+; MINGW32: float @llvm.floor.f32
 ; MINGW32-NOT: double @floor
 ; MINGW64-LABEL: @float_floor(
-; MINGW64: float @floorf
+; MINGW64: float @llvm.floor.f32
 ; MINGW64-NOT: double @floor
     %1 = fpext float %x to double
     %2 = call double @floor(double %1)
@@ -262,10 +262,10 @@ define float @float_round(float %x) nounwind readnone {
 ; WIN64-NOT: float @roundf
 ; WIN64: double @round
 ; MINGW32-LABEL: @float_round(
-; MINGW32: float @roundf
+; MINGW32: float @llvm.round.f32
 ; MINGW32-NOT: double @round
 ; MINGW64-LABEL: @float_round(
-; MINGW64: float @roundf
+; MINGW64: float @llvm.round.f32
 ; MINGW64-NOT: double @round
     %1 = fpext float %x to double
     %2 = call double @round(double %1)
@@ -274,21 +274,26 @@ define float @float_round(float %x) nounwind readnone {
 }
 
 declare float @powf(float, float)
-; win32 lacks sqrtf&fabsf, win64 lacks fabsf
+
+; win32 lacks sqrtf&fabsf, win64 lacks fabsf, but
+; calls to the intrinsics can be emitted instead.
 define float @float_powsqrt(float %x) nounwind readnone {
 ; WIN32-LABEL: @float_powsqrt(
 ; WIN32-NOT: float @sqrtf
 ; WIN32: float @powf
+
 ; WIN64-LABEL: @float_powsqrt(
-; WIN64-NOT: float @sqrtf
-; WIN64: float @powf
+; WIN64: float @sqrtf
+; WIN64: float @llvm.fabs.f32(
+; WIN64-NOT: float @powf
+
 ; MINGW32-LABEL: @float_powsqrt(
 ; MINGW32: float @sqrtf
-; MINGW32: float @fabsf
+; MINGW32: float @llvm.fabs.f32
 ; MINGW32-NOT: float @powf
 ; MINGW64-LABEL: @float_powsqrt(
 ; MINGW64: float @sqrtf
-; MINGW64: float @fabsf
+; MINGW64: float @llvm.fabs.f32(
 ; MINGW64-NOT: float @powf
     %1 = call float @powf(float %x, float 0.5)
     ret float %1
diff --git a/test/Transforms/InstCombine/x86-avx2.ll b/test/Transforms/InstCombine/x86-avx2.ll
index 4c13b4c6ae74c..f4045f788e2d2 100644
--- a/test/Transforms/InstCombine/x86-avx2.ll
+++ b/test/Transforms/InstCombine/x86-avx2.ll
@@ -81,5 +81,29 @@ define <8 x float> @undef_test_vpermps(<8 x float> %a0) {
   ret <8 x float> %a
 }
 
+; Verify simplify demanded elts.
+
+define <8 x i32> @elts_test_vpermd(<8 x i32> %a0, i32 %a1) {
+; CHECK-LABEL: @elts_test_vpermd(
+; CHECK-NEXT:    ret <8 x i32> %a0
+;
+  %1 = insertelement <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i32 %a1, i32 0
+  %2 = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %1)
+  %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i32> %3
+}
+
+define <8 x float> @elts_test_vpermps(<8 x float> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @elts_test_vpermps(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1)
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    ret <8 x float> [[TMP2]]
+;
+  %1 = insertelement <8 x i32> %a1, i32 0, i32 7
+  %2 = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %1)
+  %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> zeroinitializer
+  ret <8 x float> %3
+}
+
 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
 declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
diff --git a/test/Transforms/InstCombine/x86-avx512.ll b/test/Transforms/InstCombine/x86-avx512.ll
index d2a2580d8c249..2a24d93ce76a0 100644
--- a/test/Transforms/InstCombine/x86-avx512.ll
+++ b/test/Transforms/InstCombine/x86-avx512.ll
@@ -6,10 +6,10 @@ declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>,
 
 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @test_add_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> %a, i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> %b, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> %a, float [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
 ; CHECK-NEXT:    ret <4 x float> [[TMP4]]
 ;
   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
@@ -21,7 +21,7 @@ define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
 
 define <4 x float> @test_add_ss_round(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @test_add_ss_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
@@ -33,14 +33,14 @@ define <4 x float> @test_add_ss_round(<4 x float> %a, <4 x float> %b) {
 
 define <4 x float> @test_add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_add_ss_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> %a, i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> %b, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 %mask to <8 x i1>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> %c, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> %a, float [[TMP7]], i64 0
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
 ; CHECK-NEXT:    ret <4 x float> [[TMP8]]
 ;
   %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
@@ -52,7 +52,7 @@ define <4 x float> @test_add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
 
 define <4 x float> @test_add_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_add_ss_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
@@ -83,10 +83,10 @@ declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x doubl
 
 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @test_add_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> %a, i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> %b, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> %a, double [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[TMP4]]
 ;
   %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
@@ -96,7 +96,7 @@ define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
 
 define <2 x double> @test_add_sd_round(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @test_add_sd_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> undef, i8 -1, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
@@ -106,14 +106,14 @@ define <2 x double> @test_add_sd_round(<2 x double> %a, <2 x double> %b) {
 
 define <2 x double> @test_add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_add_sd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> %a, i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> %b, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 %mask to <8 x i1>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> %c, i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> %a, double [[TMP7]], i64 0
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[TMP8]]
 ;
   %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
@@ -123,7 +123,7 @@ define <2 x double> @test_add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou
 
 define <2 x double> @test_add_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_add_sd_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
@@ -148,10 +148,10 @@ declare <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float>, <4 x float>,
 
 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @test_sub_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> %a, i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> %b, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> %a, float [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
 ; CHECK-NEXT:    ret <4 x float> [[TMP4]]
 ;
   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
@@ -163,7 +163,7 @@ define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
 
 define <4 x float> @test_sub_ss_round(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @test_sub_ss_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
@@ -175,14 +175,14 @@ define <4 x float> @test_sub_ss_round(<4 x float> %a, <4 x float> %b) {
 
 define <4 x float> @test_sub_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_sub_ss_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> %a, i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> %b, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 %mask to <8 x i1>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> %c, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> %a, float [[TMP7]], i64 0
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
 ; CHECK-NEXT:    ret <4 x float> [[TMP8]]
 ;
   %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
@@ -194,7 +194,7 @@ define <4 x float> @test_sub_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
 
 define <4 x float> @test_sub_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_sub_ss_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
@@ -225,10 +225,10 @@ declare <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double>, <2 x doubl
 
 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @test_sub_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> %a, i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> %b, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> %a, double [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[TMP4]]
 ;
   %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
@@ -238,7 +238,7 @@ define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
 
 define <2 x double> @test_sub_sd_round(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @test_sub_sd_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> undef, i8 -1, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
@@ -248,14 +248,14 @@ define <2 x double> @test_sub_sd_round(<2 x double> %a, <2 x double> %b) {
 
 define <2 x double> @test_sub_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_sub_sd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> %a, i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> %b, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 %mask to <8 x i1>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> %c, i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> %a, double [[TMP7]], i64 0
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[TMP8]]
 ;
   %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
@@ -265,7 +265,7 @@ define <2 x double> @test_sub_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou
 
 define <2 x double> @test_sub_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_sub_sd_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
@@ -290,10 +290,10 @@ declare <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float>, <4 x float>,
 
 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @test_mul_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> %a, i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> %b, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> %a, float [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
 ; CHECK-NEXT:    ret <4 x float> [[TMP4]]
 ;
   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
@@ -305,7 +305,7 @@ define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
 
 define <4 x float> @test_mul_ss_round(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @test_mul_ss_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
@@ -317,14 +317,14 @@ define <4 x float> @test_mul_ss_round(<4 x float> %a, <4 x float> %b) {
 
 define <4 x float> @test_mul_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mul_ss_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> %a, i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> %b, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 %mask to <8 x i1>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> %c, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> %a, float [[TMP7]], i64 0
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
 ; CHECK-NEXT:    ret <4 x float> [[TMP8]]
 ;
   %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
@@ -336,7 +336,7 @@ define <4 x float> @test_mul_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
 
 define <4 x float> @test_mul_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mul_ss_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
@@ -367,10 +367,10 @@ declare <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double>, <2 x doubl
 
 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @test_mul_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> %a, i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> %b, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> %a, double [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[TMP4]]
 ;
   %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
@@ -380,7 +380,7 @@ define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
 
 define <2 x double> @test_mul_sd_round(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @test_mul_sd_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> undef, i8 -1, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
@@ -390,14 +390,14 @@ define <2 x double> @test_mul_sd_round(<2 x double> %a, <2 x double> %b) {
 
 define <2 x double> @test_mul_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mul_sd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> %a, i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> %b, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 %mask to <8 x i1>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> %c, i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> %a, double [[TMP7]], i64 0
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[TMP8]]
 ;
   %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
@@ -407,7 +407,7 @@ define <2 x double> @test_mul_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou
 
 define <2 x double> @test_mul_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mul_sd_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
@@ -432,10 +432,10 @@ declare <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float>, <4 x float>,
 
 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @test_div_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> %a, i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> %b, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> %a, float [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
 ; CHECK-NEXT:    ret <4 x float> [[TMP4]]
 ;
   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
@@ -447,7 +447,7 @@ define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
 
 define <4 x float> @test_div_ss_round(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @test_div_ss_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
@@ -459,14 +459,14 @@ define <4 x float> @test_div_ss_round(<4 x float> %a, <4 x float> %b) {
 
 define <4 x float> @test_div_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_div_ss_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> %a, i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> %b, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 %mask to <8 x i1>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> %c, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> %a, float [[TMP7]], i64 0
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
 ; CHECK-NEXT:    ret <4 x float> [[TMP8]]
 ;
   %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
@@ -478,7 +478,7 @@ define <4 x float> @test_div_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
 
 define <4 x float> @test_div_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_div_ss_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
@@ -509,10 +509,10 @@ declare <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double>, <2 x doubl
 
 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @test_div_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> %a, i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> %b, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> %a, double [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[TMP4]]
 ;
   %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
@@ -522,7 +522,7 @@ define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
 
 define <2 x double> @test_div_sd_round(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @test_div_sd_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> undef, i8 -1, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
@@ -532,14 +532,14 @@ define <2 x double> @test_div_sd_round(<2 x double> %a, <2 x double> %b) {
 
 define <2 x double> @test_div_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_div_sd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> %a, i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> %b, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 %mask to <8 x i1>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> %c, i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> %a, double [[TMP7]], i64 0
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[TMP8]]
 ;
   %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
@@ -549,7 +549,7 @@ define <2 x double> @test_div_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou
 
 define <2 x double> @test_div_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_div_sd_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
@@ -574,7 +574,7 @@ declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>,
 
 define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @test_max_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4)
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
@@ -586,7 +586,7 @@ define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
 
 define <4 x float> @test_max_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_max_ss_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
@@ -617,7 +617,7 @@ declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x doubl
 
 define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @test_max_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> undef, i8 -1, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 4)
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
@@ -627,7 +627,7 @@ define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
 
 define <2 x double> @test_max_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_max_sd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
@@ -652,7 +652,7 @@ declare <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float>, <4 x float>,
 
 define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @test_min_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4)
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
@@ -664,7 +664,7 @@ define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
 
 define <4 x float> @test_min_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_min_ss_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
@@ -695,7 +695,7 @@ declare <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double>, <2 x doubl
 
 define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @test_min_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> undef, i8 -1, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 4)
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
@@ -705,7 +705,7 @@ define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
 
 define <2 x double> @test_min_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_min_sd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
@@ -730,7 +730,7 @@ declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
 
 define i8 @test_cmp_ss(<4 x float> %a, <4 x float> %b, i8 %mask) {
 ; CHECK-LABEL: @test_cmp_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %a, <4 x float> %b, i32 3, i8 %mask, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 3, i8 [[MASK:%.*]], i32 4)
 ; CHECK-NEXT:    ret i8 [[TMP1]]
 ;
   %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
@@ -747,7 +747,7 @@ declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32
 
 define i8 @test_cmp_sd(<2 x double> %a, <2 x double> %b, i8 %mask) {
 ; CHECK-LABEL: @test_cmp_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %a, <2 x double> %b, i32 3, i8 %mask, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 3, i8 [[MASK:%.*]], i32 4)
 ; CHECK-NEXT:    ret i8 [[TMP1]]
 ;
   %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
@@ -758,22 +758,22 @@ define i8 @test_cmp_sd(<2 x double> %a, <2 x double> %b, i8 %mask) {
 
 define i64 @test(float %f, double %d) {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[V00:%.*]] = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[V00]], i32 4)
-; CHECK-NEXT:    [[V10:%.*]] = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> [[V10]], i32 4)
-; CHECK-NEXT:    [[V20:%.*]] = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[V20]], i32 4)
-; CHECK-NEXT:    [[V30:%.*]] = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> [[V30]], i32 4)
-; CHECK-NEXT:    [[V40:%.*]] = insertelement <2 x double> undef, double %d, i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[V40]], i32 4)
-; CHECK-NEXT:    [[V50:%.*]] = insertelement <2 x double> undef, double %d, i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = tail call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> [[V50]], i32 4)
-; CHECK-NEXT:    [[V60:%.*]] = insertelement <2 x double> undef, double %d, i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> [[V60]], i32 4)
-; CHECK-NEXT:    [[V70:%.*]] = insertelement <2 x double> undef, double %d, i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> [[V70]], i32 4)
+; CHECK-NEXT:    [[V03:%.*]] = insertelement <4 x float> undef, float [[F:%.*]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[V03]], i32 4)
+; CHECK-NEXT:    [[V13:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> [[V13]], i32 4)
+; CHECK-NEXT:    [[V23:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[V23]], i32 4)
+; CHECK-NEXT:    [[V33:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> [[V33]], i32 4)
+; CHECK-NEXT:    [[V41:%.*]] = insertelement <2 x double> undef, double [[D:%.*]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[V41]], i32 4)
+; CHECK-NEXT:    [[V51:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> [[V51]], i32 4)
+; CHECK-NEXT:    [[V61:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> [[V61]], i32 4)
+; CHECK-NEXT:    [[V71:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> [[V71]], i32 4)
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
@@ -781,7 +781,7 @@ define i64 @test(float %f, double %d) {
 ; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP11]], [[TMP14]]
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
 ; CHECK-NEXT:    ret i64 [[TMP15]]
 ;
   %v00 = insertelement <4 x float> undef, float %f, i32 0
@@ -838,22 +838,22 @@ declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32)
 
 define i64 @test2(float %f, double %d) {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[V00:%.*]] = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[V00]], i32 4)
-; CHECK-NEXT:    [[V10:%.*]] = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> [[V10]], i32 4)
-; CHECK-NEXT:    [[V20:%.*]] = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> [[V20]], i32 4)
-; CHECK-NEXT:    [[V30:%.*]] = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> [[V30]], i32 4)
-; CHECK-NEXT:    [[V40:%.*]] = insertelement <2 x double> undef, double %d, i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[V40]], i32 4)
-; CHECK-NEXT:    [[V50:%.*]] = insertelement <2 x double> undef, double %d, i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = tail call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> [[V50]], i32 4)
-; CHECK-NEXT:    [[V60:%.*]] = insertelement <2 x double> undef, double %d, i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> [[V60]], i32 4)
-; CHECK-NEXT:    [[V70:%.*]] = insertelement <2 x double> undef, double %d, i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = tail call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> [[V70]], i32 4)
+; CHECK-NEXT:    [[V03:%.*]] = insertelement <4 x float> undef, float [[F:%.*]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[V03]], i32 4)
+; CHECK-NEXT:    [[V13:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> [[V13]], i32 4)
+; CHECK-NEXT:    [[V23:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> [[V23]], i32 4)
+; CHECK-NEXT:    [[V33:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> [[V33]], i32 4)
+; CHECK-NEXT:    [[V41:%.*]] = insertelement <2 x double> undef, double [[D:%.*]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[V41]], i32 4)
+; CHECK-NEXT:    [[V51:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> [[V51]], i32 4)
+; CHECK-NEXT:    [[V61:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> [[V61]], i32 4)
+; CHECK-NEXT:    [[V71:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = tail call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> [[V71]], i32 4)
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
@@ -861,7 +861,7 @@ define i64 @test2(float %f, double %d) {
 ; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP11]], [[TMP14]]
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
 ; CHECK-NEXT:    ret i64 [[TMP15]]
 ;
   %v00 = insertelement <4 x float> undef, float %f, i32 0
@@ -920,8 +920,8 @@ declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4
 
 define <4 x float> @test_mask_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mask_vfmadd_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 4)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+; CHECK-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT:    ret <4 x float> [[RES]]
 ;
   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@@ -935,7 +935,7 @@ define <4 x float> @test_mask_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x flo
 
 define float @test_mask_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mask_vfmadd_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret float [[TMP2]]
 ;
@@ -963,8 +963,8 @@ declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>,
 
 define <2 x double> @test_mask_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mask_vfmadd_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+; CHECK-NEXT:    [[RES:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT:    ret <2 x double> [[RES]]
 ;
   %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
   %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
@@ -974,7 +974,7 @@ define <2 x double> @test_mask_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x
 
 define double @test_mask_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mask_vfmadd_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret double [[TMP2]]
 ;
@@ -998,8 +998,8 @@ declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <
 
 define <4 x float> @test_maskz_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_maskz_vfmadd_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 4)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+; CHECK-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT:    ret <4 x float> [[RES]]
 ;
   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@@ -1013,7 +1013,7 @@ define <4 x float> @test_maskz_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x fl
 
 define float @test_maskz_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_maskz_vfmadd_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret float [[TMP2]]
 ;
@@ -1041,8 +1041,8 @@ declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double>
 
 define <2 x double> @test_maskz_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_maskz_vfmadd_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+; CHECK-NEXT:    [[RES:%.*]] = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT:    ret <2 x double> [[RES]]
 ;
   %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
   %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
@@ -1052,7 +1052,7 @@ define <2 x double> @test_maskz_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x
 
 define double @test_maskz_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_maskz_vfmadd_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret double [[TMP2]]
 ;
@@ -1076,8 +1076,8 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <
 
 define <4 x float> @test_mask3_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mask3_vfmadd_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 4)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+; CHECK-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT:    ret <4 x float> [[RES]]
 ;
   %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@@ -1091,7 +1091,7 @@ define <4 x float> @test_mask3_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x fl
 
 define float @test_mask3_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mask3_vfmadd_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret float [[TMP2]]
 ;
@@ -1119,8 +1119,8 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>
 
 define <2 x double> @test_mask3_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mask3_vfmadd_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+; CHECK-NEXT:    [[RES:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT:    ret <2 x double> [[RES]]
 ;
   %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
   %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
@@ -1130,7 +1130,7 @@ define <2 x double> @test_mask3_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x
 
 define double @test_mask3_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mask3_vfmadd_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret double [[TMP2]]
 ;
@@ -1154,8 +1154,8 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <
 
 define <4 x float> @test_mask3_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mask3_vfmsub_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 4)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+; CHECK-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT:    ret <4 x float> [[RES]]
 ;
   %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@@ -1169,7 +1169,7 @@ define <4 x float> @test_mask3_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x fl
 
 define float @test_mask3_vfmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mask3_vfmsub_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret float [[TMP2]]
 ;
@@ -1197,8 +1197,8 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>
 
 define <2 x double> @test_mask3_vfmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mask3_vfmsub_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+; CHECK-NEXT:    [[RES:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT:    ret <2 x double> [[RES]]
 ;
   %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
   %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
@@ -1208,7 +1208,7 @@ define <2 x double> @test_mask3_vfmsub_sd(<2 x double> %a, <2 x double> %b, <2 x
 
 define double @test_mask3_vfmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mask3_vfmsub_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret double [[TMP2]]
 ;
@@ -1232,8 +1232,8 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float>, <4 x float>,
 
 define <4 x float> @test_mask3_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mask3_vfnmsub_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 4)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+; CHECK-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT:    ret <4 x float> [[RES]]
 ;
   %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@@ -1247,7 +1247,7 @@ define <4 x float> @test_mask3_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x f
 
 define float @test_mask3_vfnmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mask3_vfnmsub_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret float [[TMP2]]
 ;
@@ -1275,8 +1275,8 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double>, <2 x double
 
 define <2 x double> @test_mask3_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mask3_vfnmsub_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+; CHECK-NEXT:    [[RES:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT:    ret <2 x double> [[RES]]
 ;
   %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
   %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
@@ -1286,7 +1286,7 @@ define <2 x double> @test_mask3_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2
 
 define double @test_mask3_vfnmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mask3_vfnmsub_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret double [[TMP2]]
 ;
@@ -1310,7 +1310,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8
 
 define <8 x i32> @identity_test_permvar_si_256(<8 x i32> %a0) {
 ; CHECK-LABEL: @identity_test_permvar_si_256(
-; CHECK-NEXT:    ret <8 x i32> %a0
+; CHECK-NEXT:    ret <8 x i32> [[A0:%.*]]
 ;
   %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x i32> undef, i8 -1)
   ret <8 x i32> %a
@@ -1318,8 +1318,8 @@ define <8 x i32> @identity_test_permvar_si_256(<8 x i32> %a0) {
 
 define <8 x i32> @identity_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
 ; CHECK-LABEL: @identity_test_permvar_si_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> %a0, <8 x i32> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[A0:%.*]], <8 x i32> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
 ;
   %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x i32> %passthru, i8 %mask)
@@ -1328,7 +1328,7 @@ define <8 x i32> @identity_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %pa
 
 define <8 x i32> @zero_test_permvar_si_256(<8 x i32> %a0) {
 ; CHECK-LABEL: @zero_test_permvar_si_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> undef, i8 -1)
@@ -1337,9 +1337,9 @@ define <8 x i32> @zero_test_permvar_si_256(<8 x i32> %a0) {
 
 define <8 x i32> @zero_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
 ; CHECK-LABEL: @zero_test_permvar_si_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x i32> [[TMP3]]
 ;
   %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> %passthru, i8 %mask)
@@ -1348,7 +1348,7 @@ define <8 x i32> @zero_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passth
 
 define <8 x i32> @shuffle_test_permvar_si_256(<8 x i32> %a0) {
 ; CHECK-LABEL: @shuffle_test_permvar_si_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x i32> undef, i8 -1)
@@ -1357,9 +1357,9 @@ define <8 x i32> @shuffle_test_permvar_si_256(<8 x i32> %a0) {
 
 define <8 x i32> @shuffle_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
 ; CHECK-LABEL: @shuffle_test_permvar_si_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x i32> [[TMP3]]
 ;
   %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x i32> %passthru, i8 %mask)
@@ -1368,7 +1368,7 @@ define <8 x i32> @shuffle_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %pas
 
 define <8 x i32> @undef_test_permvar_si_256(<8 x i32> %a0) {
 ; CHECK-LABEL: @undef_test_permvar_si_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x i32> undef, i8 -1)
@@ -1377,9 +1377,9 @@ define <8 x i32> @undef_test_permvar_si_256(<8 x i32> %a0) {
 
 define <8 x i32> @undef_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
 ; CHECK-LABEL: @undef_test_permvar_si_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x i32> [[TMP3]]
 ;
   %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x i32> %passthru, i8 %mask)
@@ -1390,7 +1390,7 @@ declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>,
 
 define <8 x float> @identity_test_permvar_sf_256(<8 x float> %a0) {
 ; CHECK-LABEL: @identity_test_permvar_sf_256(
-; CHECK-NEXT:    ret <8 x float> %a0
+; CHECK-NEXT:    ret <8 x float> [[A0:%.*]]
 ;
   %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x float> undef, i8 -1)
   ret <8 x float> %a
@@ -1398,8 +1398,8 @@ define <8 x float> @identity_test_permvar_sf_256(<8 x float> %a0) {
 
 define <8 x float> @identity_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
 ; CHECK-LABEL: @identity_test_permvar_sf_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x float> %a0, <8 x float> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[A0:%.*]], <8 x float> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x float> [[TMP2]]
 ;
   %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x float> %passthru, i8 %mask)
@@ -1408,7 +1408,7 @@ define <8 x float> @identity_test_permvar_sf_256_mask(<8 x float> %a0, <8 x floa
 
 define <8 x float> @zero_test_permvar_sf_256(<8 x float> %a0) {
 ; CHECK-LABEL: @zero_test_permvar_sf_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <8 x float> [[TMP1]]
 ;
   %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> zeroinitializer, <8 x float> undef, i8 -1)
@@ -1417,9 +1417,9 @@ define <8 x float> @zero_test_permvar_sf_256(<8 x float> %a0) {
 
 define <8 x float> @zero_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
 ; CHECK-LABEL: @zero_test_permvar_sf_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x float> [[TMP3]]
 ;
   %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> zeroinitializer, <8 x float> %passthru, i8 %mask)
@@ -1428,7 +1428,7 @@ define <8 x float> @zero_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %
 
 define <8 x float> @shuffle_test_permvar_sf_256(<8 x float> %a0) {
 ; CHECK-LABEL: @shuffle_test_permvar_sf_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <8 x float> [[TMP1]]
 ;
   %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x float> undef, i8 -1)
@@ -1437,9 +1437,9 @@ define <8 x float> @shuffle_test_permvar_sf_256(<8 x float> %a0) {
 
 define <8 x float> @shuffle_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
 ; CHECK-LABEL: @shuffle_test_permvar_sf_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x float> [[TMP3]]
 ;
   %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x float> %passthru, i8 %mask)
@@ -1448,7 +1448,7 @@ define <8 x float> @shuffle_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float
 
 define <8 x float> @undef_test_permvar_sf_256(<8 x float> %a0) {
 ; CHECK-LABEL: @undef_test_permvar_sf_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <8 x float> [[TMP1]]
 ;
   %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x float> undef, i8 -1)
@@ -1457,9 +1457,9 @@ define <8 x float> @undef_test_permvar_sf_256(<8 x float> %a0) {
 
 define <8 x float> @undef_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
 ; CHECK-LABEL: @undef_test_permvar_sf_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x float> [[TMP3]]
 ;
   %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x float> %passthru, i8 %mask)
@@ -1470,7 +1470,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4
 
 define <4 x i64> @identity_test_permvar_di_256(<4 x i64> %a0) {
 ; CHECK-LABEL: @identity_test_permvar_di_256(
-; CHECK-NEXT:    ret <4 x i64> %a0
+; CHECK-NEXT:    ret <4 x i64> [[A0:%.*]]
 ;
   %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x i64> undef, i8 -1)
   ret <4 x i64> %a
@@ -1478,10 +1478,10 @@ define <4 x i64> @identity_test_permvar_di_256(<4 x i64> %a0) {
 
 define <4 x i64> @identity_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
 ; CHECK-LABEL: @identity_test_permvar_di_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> %a0, <4 x i64> %passthru
-; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[A0:%.*]], <4 x i64> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <4 x i64> [[TMP2]]
 ;
   %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x i64> %passthru, i8 %mask)
   ret <4 x i64> %a
@@ -1489,7 +1489,7 @@ define <4 x i64> @identity_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %pa
 
 define <4 x i64> @zero_test_permvar_di_256(<4 x i64> %a0) {
 ; CHECK-LABEL: @zero_test_permvar_di_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer, <4 x i64> undef, i8 -1)
@@ -1498,11 +1498,11 @@ define <4 x i64> @zero_test_permvar_di_256(<4 x i64> %a0) {
 
 define <4 x i64> @zero_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
 ; CHECK-LABEL: @zero_test_permvar_di_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i64> [[TMP1]], <4 x i64> %passthru
-; CHECK-NEXT:    ret <4 x i64> [[TMP4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
 ;
   %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer, <4 x i64> %passthru, i8 %mask)
   ret <4 x i64> %a
@@ -1510,7 +1510,7 @@ define <4 x i64> @zero_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passth
 
 define <4 x i64> @shuffle_test_permvar_di_256(<4 x i64> %a0) {
 ; CHECK-LABEL: @shuffle_test_permvar_di_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>, <4 x i64> undef, i8 -1)
@@ -1519,11 +1519,11 @@ define <4 x i64> @shuffle_test_permvar_di_256(<4 x i64> %a0) {
 
 define <4 x i64> @shuffle_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
 ; CHECK-LABEL: @shuffle_test_permvar_di_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i64> [[TMP1]], <4 x i64> %passthru
-; CHECK-NEXT:    ret <4 x i64> [[TMP4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
 ;
   %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>, <4 x i64> %passthru, i8 %mask)
   ret <4 x i64> %a
@@ -1531,7 +1531,7 @@ define <4 x i64> @shuffle_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %pas
 
 define <4 x i64> @undef_test_permvar_di_256(<4 x i64> %a0) {
 ; CHECK-LABEL: @undef_test_permvar_di_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>, <4 x i64> undef, i8 -1)
@@ -1540,11 +1540,11 @@ define <4 x i64> @undef_test_permvar_di_256(<4 x i64> %a0) {
 
 define <4 x i64> @undef_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
 ; CHECK-LABEL: @undef_test_permvar_di_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i64> [[TMP1]], <4 x i64> %passthru
-; CHECK-NEXT:    ret <4 x i64> [[TMP4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
 ;
   %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>, <4 x i64> %passthru, i8 %mask)
   ret <4 x i64> %a
@@ -1554,7 +1554,7 @@ declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64
 
 define <4 x double> @identity_test_permvar_df_256(<4 x double> %a0) {
 ; CHECK-LABEL: @identity_test_permvar_df_256(
-; CHECK-NEXT:    ret <4 x double> %a0
+; CHECK-NEXT:    ret <4 x double> [[A0:%.*]]
 ;
   %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x double> undef, i8 -1)
   ret <4 x double> %a
@@ -1562,10 +1562,10 @@ define <4 x double> @identity_test_permvar_df_256(<4 x double> %a0) {
 
 define <4 x double> @identity_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
 ; CHECK-LABEL: @identity_test_permvar_df_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x double> %a0, <4 x double> %passthru
-; CHECK-NEXT:    ret <4 x double> [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[A0:%.*]], <4 x double> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <4 x double> [[TMP2]]
 ;
   %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x double> %passthru, i8 %mask)
   ret <4 x double> %a
@@ -1573,7 +1573,7 @@ define <4 x double> @identity_test_permvar_df_256_mask(<4 x double> %a0, <4 x do
 
 define <4 x double> @zero_test_permvar_df_256(<4 x double> %a0) {
 ; CHECK-LABEL: @zero_test_permvar_df_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <4 x double> [[TMP1]]
 ;
   %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer, <4 x double> undef, i8 -1)
@@ -1582,11 +1582,11 @@ define <4 x double> @zero_test_permvar_df_256(<4 x double> %a0) {
 
 define <4 x double> @zero_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
 ; CHECK-LABEL: @zero_test_permvar_df_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP1]], <4 x double> %passthru
-; CHECK-NEXT:    ret <4 x double> [[TMP4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <4 x double> [[TMP3]]
 ;
   %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer, <4 x double> %passthru, i8 %mask)
   ret <4 x double> %a
@@ -1594,7 +1594,7 @@ define <4 x double> @zero_test_permvar_df_256_mask(<4 x double> %a0, <4 x double
 
 define <4 x double> @shuffle_test_permvar_df_256(<4 x double> %a0) {
 ; CHECK-LABEL: @shuffle_test_permvar_df_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <4 x double> [[TMP1]]
 ;
   %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>, <4 x double> undef, i8 -1)
@@ -1603,11 +1603,11 @@ define <4 x double> @shuffle_test_permvar_df_256(<4 x double> %a0) {
 
 define <4 x double> @shuffle_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
 ; CHECK-LABEL: @shuffle_test_permvar_df_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP1]], <4 x double> %passthru
-; CHECK-NEXT:    ret <4 x double> [[TMP4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <4 x double> [[TMP3]]
 ;
   %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>, <4 x double> %passthru, i8 %mask)
   ret <4 x double> %a
@@ -1615,7 +1615,7 @@ define <4 x double> @shuffle_test_permvar_df_256_mask(<4 x double> %a0, <4 x dou
 
 define <4 x double> @undef_test_permvar_df_256(<4 x double> %a0) {
 ; CHECK-LABEL: @undef_test_permvar_df_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <4 x double> [[TMP1]]
 ;
   %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>, <4 x double> undef, i8 -1)
@@ -1624,11 +1624,11 @@ define <4 x double> @undef_test_permvar_df_256(<4 x double> %a0) {
 
 define <4 x double> @undef_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
 ; CHECK-LABEL: @undef_test_permvar_df_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP1]], <4 x double> %passthru
-; CHECK-NEXT:    ret <4 x double> [[TMP4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <4 x double> [[TMP3]]
 ;
   %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>, <4 x double> %passthru, i8 %mask)
   ret <4 x double> %a
@@ -1638,7 +1638,7 @@ declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>,
 
 define <16 x i32> @identity_test_permvar_si_512(<16 x i32> %a0) {
 ; CHECK-LABEL: @identity_test_permvar_si_512(
-; CHECK-NEXT:    ret <16 x i32> %a0
+; CHECK-NEXT:    ret <16 x i32> [[A0:%.*]]
 ;
   %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> undef, i16 -1)
   ret <16 x i32> %a
@@ -1646,8 +1646,8 @@ define <16 x i32> @identity_test_permvar_si_512(<16 x i32> %a0) {
 
 define <16 x i32> @identity_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
 ; CHECK-LABEL: @identity_test_permvar_si_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> %a0, <16 x i32> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[A0:%.*]], <16 x i32> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <16 x i32> [[TMP2]]
 ;
   %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> %passthru, i16 %mask)
@@ -1656,7 +1656,7 @@ define <16 x i32> @identity_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32>
 
 define <16 x i32> @zero_test_permvar_si_512(<16 x i32> %a0) {
 ; CHECK-LABEL: @zero_test_permvar_si_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer, <16 x i32> undef, i16 -1)
@@ -1665,9 +1665,9 @@ define <16 x i32> @zero_test_permvar_si_512(<16 x i32> %a0) {
 
 define <16 x i32> @zero_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
 ; CHECK-LABEL: @zero_test_permvar_si_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <16 x i32> [[TMP3]]
 ;
   %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer, <16 x i32> %passthru, i16 %mask)
@@ -1676,7 +1676,7 @@ define <16 x i32> @zero_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %pas
 
 define <16 x i32> @shuffle_test_permvar_si_512(<16 x i32> %a0) {
 ; CHECK-LABEL: @shuffle_test_permvar_si_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x i32> undef, i16 -1)
@@ -1685,9 +1685,9 @@ define <16 x i32> @shuffle_test_permvar_si_512(<16 x i32> %a0) {
 
 define <16 x i32> @shuffle_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
 ; CHECK-LABEL: @shuffle_test_permvar_si_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <16 x i32> [[TMP3]]
 ;
   %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x i32> %passthru, i16 %mask)
@@ -1696,7 +1696,7 @@ define <16 x i32> @shuffle_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %
 
 define <16 x i32> @undef_test_permvar_si_512(<16 x i32> %a0) {
 ; CHECK-LABEL: @undef_test_permvar_si_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x i32> undef, i16 -1)
@@ -1705,9 +1705,9 @@ define <16 x i32> @undef_test_permvar_si_512(<16 x i32> %a0) {
 
 define <16 x i32> @undef_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
 ; CHECK-LABEL: @undef_test_permvar_si_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <16 x i32> [[TMP3]]
 ;
   %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x i32> %passthru, i16 %mask)
@@ -1718,7 +1718,7 @@ declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i3
 
 define <16 x float> @identity_test_permvar_sf_512(<16 x float> %a0) {
 ; CHECK-LABEL: @identity_test_permvar_sf_512(
-; CHECK-NEXT:    ret <16 x float> %a0
+; CHECK-NEXT:    ret <16 x float> [[A0:%.*]]
 ;
   %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x float> undef, i16 -1)
   ret <16 x float> %a
@@ -1726,8 +1726,8 @@ define <16 x float> @identity_test_permvar_sf_512(<16 x float> %a0) {
 
 define <16 x float> @identity_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
 ; CHECK-LABEL: @identity_test_permvar_sf_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x float> %a0, <16 x float> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[A0:%.*]], <16 x float> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <16 x float> [[TMP2]]
 ;
   %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x float> %passthru, i16 %mask)
@@ -1736,7 +1736,7 @@ define <16 x float> @identity_test_permvar_sf_512_mask(<16 x float> %a0, <16 x f
 
 define <16 x float> @zero_test_permvar_sf_512(<16 x float> %a0) {
 ; CHECK-LABEL: @zero_test_permvar_sf_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <16 x float> [[TMP1]]
 ;
   %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer, <16 x float> undef, i16 -1)
@@ -1745,9 +1745,9 @@ define <16 x float> @zero_test_permvar_sf_512(<16 x float> %a0) {
 
 define <16 x float> @zero_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
 ; CHECK-LABEL: @zero_test_permvar_sf_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <16 x float> [[TMP3]]
 ;
   %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer, <16 x float> %passthru, i16 %mask)
@@ -1756,7 +1756,7 @@ define <16 x float> @zero_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float
 
 define <16 x float> @shuffle_test_permvar_sf_512(<16 x float> %a0) {
 ; CHECK-LABEL: @shuffle_test_permvar_sf_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <16 x float> [[TMP1]]
 ;
   %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x float> undef, i16 -1)
@@ -1765,9 +1765,9 @@ define <16 x float> @shuffle_test_permvar_sf_512(<16 x float> %a0) {
 
 define <16 x float> @shuffle_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
 ; CHECK-LABEL: @shuffle_test_permvar_sf_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <16 x float> [[TMP3]]
 ;
   %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x float> %passthru, i16 %mask)
@@ -1776,7 +1776,7 @@ define <16 x float> @shuffle_test_permvar_sf_512_mask(<16 x float> %a0, <16 x fl
 
 define <16 x float> @undef_test_permvar_sf_512(<16 x float> %a0) {
 ; CHECK-LABEL: @undef_test_permvar_sf_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <16 x float> [[TMP1]]
 ;
   %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x float> undef, i16 -1)
@@ -1785,9 +1785,9 @@ define <16 x float> @undef_test_permvar_sf_512(<16 x float> %a0) {
 
 define <16 x float> @undef_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
 ; CHECK-LABEL: @undef_test_permvar_sf_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <16 x float> [[TMP3]]
 ;
   %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x float> %passthru, i16 %mask)
@@ -1798,7 +1798,7 @@ declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8
 
 define <8 x i64> @identity_test_permvar_di_512(<8 x i64> %a0) {
 ; CHECK-LABEL: @identity_test_permvar_di_512(
-; CHECK-NEXT:    ret <8 x i64> %a0
+; CHECK-NEXT:    ret <8 x i64> [[A0:%.*]]
 ;
   %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x i64> undef, i8 -1)
   ret <8 x i64> %a
@@ -1806,8 +1806,8 @@ define <8 x i64> @identity_test_permvar_di_512(<8 x i64> %a0) {
 
 define <8 x i64> @identity_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
 ; CHECK-LABEL: @identity_test_permvar_di_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> %a0, <8 x i64> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[A0:%.*]], <8 x i64> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x i64> [[TMP2]]
 ;
   %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x i64> %passthru, i8 %mask)
@@ -1816,7 +1816,7 @@ define <8 x i64> @identity_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %pa
 
 define <8 x i64> @zero_test_permvar_di_512(<8 x i64> %a0) {
 ; CHECK-LABEL: @zero_test_permvar_di_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer, <8 x i64> undef, i8 -1)
@@ -1825,9 +1825,9 @@ define <8 x i64> @zero_test_permvar_di_512(<8 x i64> %a0) {
 
 define <8 x i64> @zero_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
 ; CHECK-LABEL: @zero_test_permvar_di_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x i64> [[TMP3]]
 ;
   %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer, <8 x i64> %passthru, i8 %mask)
@@ -1836,7 +1836,7 @@ define <8 x i64> @zero_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passth
 
 define <8 x i64> @shuffle_test_permvar_di_512(<8 x i64> %a0) {
 ; CHECK-LABEL: @shuffle_test_permvar_di_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> undef, i8 -1)
@@ -1845,9 +1845,9 @@ define <8 x i64> @shuffle_test_permvar_di_512(<8 x i64> %a0) {
 
 define <8 x i64> @shuffle_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
 ; CHECK-LABEL: @shuffle_test_permvar_di_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x i64> [[TMP3]]
 ;
   %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> %passthru, i8 %mask)
@@ -1856,7 +1856,7 @@ define <8 x i64> @shuffle_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %pas
 
 define <8 x i64> @undef_test_permvar_di_512(<8 x i64> %a0) {
 ; CHECK-LABEL: @undef_test_permvar_di_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> undef, i8 -1)
@@ -1865,9 +1865,9 @@ define <8 x i64> @undef_test_permvar_di_512(<8 x i64> %a0) {
 
 define <8 x i64> @undef_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
 ; CHECK-LABEL: @undef_test_permvar_di_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x i64> [[TMP3]]
 ;
   %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> %passthru, i8 %mask)
@@ -1878,7 +1878,7 @@ declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64
 
 define <8 x double> @identity_test_permvar_df_512(<8 x double> %a0) {
 ; CHECK-LABEL: @identity_test_permvar_df_512(
-; CHECK-NEXT:    ret <8 x double> %a0
+; CHECK-NEXT:    ret <8 x double> [[A0:%.*]]
 ;
   %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x double> undef, i8 -1)
   ret <8 x double> %a
@@ -1886,8 +1886,8 @@ define <8 x double> @identity_test_permvar_df_512(<8 x double> %a0) {
 
 define <8 x double> @identity_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
 ; CHECK-LABEL: @identity_test_permvar_df_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x double> %a0, <8 x double> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[A0:%.*]], <8 x double> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x double> [[TMP2]]
 ;
   %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x double> %passthru, i8 %mask)
@@ -1896,7 +1896,7 @@ define <8 x double> @identity_test_permvar_df_512_mask(<8 x double> %a0, <8 x do
 
 define <8 x double> @zero_test_permvar_df_512(<8 x double> %a0) {
 ; CHECK-LABEL: @zero_test_permvar_df_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
   %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer, <8 x double> undef, i8 -1)
@@ -1905,9 +1905,9 @@ define <8 x double> @zero_test_permvar_df_512(<8 x double> %a0) {
 
 define <8 x double> @zero_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
 ; CHECK-LABEL: @zero_test_permvar_df_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x double> [[TMP3]]
 ;
   %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer, <8 x double> %passthru, i8 %mask)
@@ -1916,7 +1916,7 @@ define <8 x double> @zero_test_permvar_df_512_mask(<8 x double> %a0, <8 x double
 
 define <8 x double> @shuffle_test_permvar_df_512(<8 x double> %a0) {
 ; CHECK-LABEL: @shuffle_test_permvar_df_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
   %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> undef, i8 -1)
@@ -1925,9 +1925,9 @@ define <8 x double> @shuffle_test_permvar_df_512(<8 x double> %a0) {
 
 define <8 x double> @shuffle_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
 ; CHECK-LABEL: @shuffle_test_permvar_df_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x double> [[TMP3]]
 ;
   %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> %passthru, i8 %mask)
@@ -1936,7 +1936,7 @@ define <8 x double> @shuffle_test_permvar_df_512_mask(<8 x double> %a0, <8 x dou
 
 define <8 x double> @undef_test_permvar_df_512(<8 x double> %a0) {
 ; CHECK-LABEL: @undef_test_permvar_df_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
   %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> undef, i8 -1)
@@ -1945,9 +1945,9 @@ define <8 x double> @undef_test_permvar_df_512(<8 x double> %a0) {
 
 define <8 x double> @undef_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
 ; CHECK-LABEL: @undef_test_permvar_df_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x double> [[TMP3]]
 ;
   %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> %passthru, i8 %mask)
@@ -1958,7 +1958,7 @@ declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8
 
 define <8 x i16> @identity_test_permvar_hi_128(<8 x i16> %a0) {
 ; CHECK-LABEL: @identity_test_permvar_hi_128(
-; CHECK-NEXT:    ret <8 x i16> %a0
+; CHECK-NEXT:    ret <8 x i16> [[A0:%.*]]
 ;
   %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, <8 x i16> undef, i8 -1)
   ret <8 x i16> %a
@@ -1966,8 +1966,8 @@ define <8 x i16> @identity_test_permvar_hi_128(<8 x i16> %a0) {
 
 define <8 x i16> @identity_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
 ; CHECK-LABEL: @identity_test_permvar_hi_128_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> %a0, <8 x i16> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[A0:%.*]], <8 x i16> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 ;
   %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, <8 x i16> %passthru, i8 %mask)
@@ -1976,7 +1976,7 @@ define <8 x i16> @identity_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %pa
 
 define <8 x i16> @zero_test_permvar_hi_128(<8 x i16> %a0) {
 ; CHECK-LABEL: @zero_test_permvar_hi_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i16> undef, i8 -1)
@@ -1985,9 +1985,9 @@ define <8 x i16> @zero_test_permvar_hi_128(<8 x i16> %a0) {
 
 define <8 x i16> @zero_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
 ; CHECK-LABEL: @zero_test_permvar_hi_128_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
   %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i16> %passthru, i8 %mask)
@@ -1996,7 +1996,7 @@ define <8 x i16> @zero_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passth
 
 define <8 x i16> @shuffle_test_permvar_hi_128(<8 x i16> %a0) {
 ; CHECK-LABEL: @shuffle_test_permvar_hi_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <8 x i16> undef, i8 -1)
@@ -2005,9 +2005,9 @@ define <8 x i16> @shuffle_test_permvar_hi_128(<8 x i16> %a0) {
 
 define <8 x i16> @shuffle_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
 ; CHECK-LABEL: @shuffle_test_permvar_hi_128_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
   %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <8 x i16> %passthru, i8 %mask)
@@ -2016,7 +2016,7 @@ define <8 x i16> @shuffle_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %pas
 
 define <8 x i16> @undef_test_permvar_hi_128(<8 x i16> %a0) {
 ; CHECK-LABEL: @undef_test_permvar_hi_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 undef, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <8 x i16> undef, i8 -1)
@@ -2025,9 +2025,9 @@ define <8 x i16> @undef_test_permvar_hi_128(<8 x i16> %a0) {
 
 define <8 x i16> @undef_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
 ; CHECK-LABEL: @undef_test_permvar_hi_128_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
   %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 undef, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <8 x i16> %passthru, i8 %mask)
@@ -2038,7 +2038,7 @@ declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>,
 
 define <16 x i16> @identity_test_permvar_hi_256(<16 x i16> %a0) {
 ; CHECK-LABEL: @identity_test_permvar_hi_256(
-; CHECK-NEXT:    ret <16 x i16> %a0
+; CHECK-NEXT:    ret <16 x i16> [[A0:%.*]]
 ;
   %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, <16 x i16> undef, i16 -1)
   ret <16 x i16> %a
@@ -2046,8 +2046,8 @@ define <16 x i16> @identity_test_permvar_hi_256(<16 x i16> %a0) {
 
 define <16 x i16> @identity_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
 ; CHECK-LABEL: @identity_test_permvar_hi_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> %a0, <16 x i16> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[A0:%.*]], <16 x i16> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <16 x i16> [[TMP2]]
 ;
   %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, <16 x i16> %passthru, i16 %mask)
@@ -2056,7 +2056,7 @@ define <16 x i16> @identity_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16>
 
 define <16 x i16> @zero_test_permvar_hi_256(<16 x i16> %a0) {
 ; CHECK-LABEL: @zero_test_permvar_hi_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer, <16 x i16> undef, i16 -1)
@@ -2065,9 +2065,9 @@ define <16 x i16> @zero_test_permvar_hi_256(<16 x i16> %a0) {
 
 define <16 x i16> @zero_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
 ; CHECK-LABEL: @zero_test_permvar_hi_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
 ;
   %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer, <16 x i16> %passthru, i16 %mask)
@@ -2076,7 +2076,7 @@ define <16 x i16> @zero_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %pas
 
 define <16 x i16> @shuffle_test_permvar_hi_256(<16 x i16> %a0) {
 ; CHECK-LABEL: @shuffle_test_permvar_hi_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> undef, i16 -1)
@@ -2085,9 +2085,9 @@ define <16 x i16> @shuffle_test_permvar_hi_256(<16 x i16> %a0) {
 
 define <16 x i16> @shuffle_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
 ; CHECK-LABEL: @shuffle_test_permvar_hi_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
 ;
   %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> %passthru, i16 %mask)
@@ -2096,7 +2096,7 @@ define <16 x i16> @shuffle_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %
 
 define <16 x i16> @undef_test_permvar_hi_256(<16 x i16> %a0) {
 ; CHECK-LABEL: @undef_test_permvar_hi_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 undef, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> undef, i16 -1)
@@ -2105,9 +2105,9 @@ define <16 x i16> @undef_test_permvar_hi_256(<16 x i16> %a0) {
 
 define <16 x i16> @undef_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
 ; CHECK-LABEL: @undef_test_permvar_hi_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
 ;
   %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 undef, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> %passthru, i16 %mask)
@@ -2118,7 +2118,7 @@ declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>,
 
 define <32 x i16> @identity_test_permvar_hi_512(<32 x i16> %a0) {
 ; CHECK-LABEL: @identity_test_permvar_hi_512(
-; CHECK-NEXT:    ret <32 x i16> %a0
+; CHECK-NEXT:    ret <32 x i16> [[A0:%.*]]
 ;
   %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>, <32 x i16> undef, i32 -1)
   ret <32 x i16> %a
@@ -2126,8 +2126,8 @@ define <32 x i16> @identity_test_permvar_hi_512(<32 x i16> %a0) {
 
 define <32 x i16> @identity_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
 ; CHECK-LABEL: @identity_test_permvar_hi_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 %mask to <32 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> %a0, <32 x i16> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[A0:%.*]], <32 x i16> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <32 x i16> [[TMP2]]
 ;
   %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>, <32 x i16> %passthru, i32 %mask)
@@ -2136,7 +2136,7 @@ define <32 x i16> @identity_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16>
 
 define <32 x i16> @zero_test_permvar_hi_512(<32 x i16> %a0) {
 ; CHECK-LABEL: @zero_test_permvar_hi_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer, <32 x i16> undef, i32 -1)
@@ -2145,9 +2145,9 @@ define <32 x i16> @zero_test_permvar_hi_512(<32 x i16> %a0) {
 
 define <32 x i16> @zero_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
 ; CHECK-LABEL: @zero_test_permvar_hi_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 %mask to <32 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
 ;
   %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer, <32 x i16> %passthru, i32 %mask)
@@ -2156,7 +2156,7 @@ define <32 x i16> @zero_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %pas
 
 define <32 x i16> @shuffle_test_permvar_hi_512(<32 x i16> %a0) {
 ; CHECK-LABEL: @shuffle_test_permvar_hi_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <32 x i16> undef, i32 -1)
@@ -2165,9 +2165,9 @@ define <32 x i16> @shuffle_test_permvar_hi_512(<32 x i16> %a0) {
 
 define <32 x i16> @shuffle_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
 ; CHECK-LABEL: @shuffle_test_permvar_hi_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 %mask to <32 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
 ;
   %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <32 x i16> %passthru, i32 %mask)
@@ -2176,7 +2176,7 @@ define <32 x i16> @shuffle_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %
 
 define <32 x i16> @undef_test_permvar_hi_512(<32 x i16> %a0) {
 ; CHECK-LABEL: @undef_test_permvar_hi_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 undef, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <32 x i16> undef, i32 -1)
@@ -2185,9 +2185,9 @@ define <32 x i16> @undef_test_permvar_hi_512(<32 x i16> %a0) {
 
 define <32 x i16> @undef_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
 ; CHECK-LABEL: @undef_test_permvar_hi_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 %mask to <32 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
 ;
   %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 undef, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <32 x i16> %passthru, i32 %mask)
@@ -2198,7 +2198,7 @@ declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16
 
 define <16 x i8> @identity_test_permvar_qi_128(<16 x i8> %a0) {
 ; CHECK-LABEL: @identity_test_permvar_qi_128(
-; CHECK-NEXT:    ret <16 x i8> %a0
+; CHECK-NEXT:    ret <16 x i8> [[A0:%.*]]
 ;
   %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> undef, i16 -1)
   ret <16 x i8> %a
@@ -2206,8 +2206,8 @@ define <16 x i8> @identity_test_permvar_qi_128(<16 x i8> %a0) {
 
 define <16 x i8> @identity_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
 ; CHECK-LABEL: @identity_test_permvar_qi_128_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> %a0, <16 x i8> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[A0:%.*]], <16 x i8> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
 ;
   %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> %passthru, i16 %mask)
@@ -2216,7 +2216,7 @@ define <16 x i8> @identity_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %pa
 
 define <16 x i8> @zero_test_permvar_qi_128(<16 x i8> %a0) {
 ; CHECK-LABEL: @zero_test_permvar_qi_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
 ;
   %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i8> undef, i16 -1)
@@ -2225,9 +2225,9 @@ define <16 x i8> @zero_test_permvar_qi_128(<16 x i8> %a0) {
 
 define <16 x i8> @zero_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
 ; CHECK-LABEL: @zero_test_permvar_qi_128_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
 ;
   %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i8> %passthru, i16 %mask)
@@ -2236,7 +2236,7 @@ define <16 x i8> @zero_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passth
 
 define <16 x i8> @shuffle_test_permvar_qi_128(<16 x i8> %a0) {
 ; CHECK-LABEL: @shuffle_test_permvar_qi_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
 ;
   %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> undef, i16 -1)
@@ -2245,9 +2245,9 @@ define <16 x i8> @shuffle_test_permvar_qi_128(<16 x i8> %a0) {
 
 define <16 x i8> @shuffle_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
 ; CHECK-LABEL: @shuffle_test_permvar_qi_128_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
 ;
   %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> %passthru, i16 %mask)
@@ -2256,7 +2256,7 @@ define <16 x i8> @shuffle_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %pas
 
 define <16 x i8> @undef_test_permvar_qi_128(<16 x i8> %a0) {
 ; CHECK-LABEL: @undef_test_permvar_qi_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
 ;
   %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 undef, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> undef, i16 -1)
@@ -2265,9 +2265,9 @@ define <16 x i8> @undef_test_permvar_qi_128(<16 x i8> %a0) {
 
 define <16 x i8> @undef_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
 ; CHECK-LABEL: @undef_test_permvar_qi_128_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
 ;
   %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 undef, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> %passthru, i16 %mask)
@@ -2278,7 +2278,7 @@ declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32
 
 define <32 x i8> @identity_test_permvar_qi_256(<32 x i8> %a0) {
 ; CHECK-LABEL: @identity_test_permvar_qi_256(
-; CHECK-NEXT:    ret <32 x i8> %a0
+; CHECK-NEXT:    ret <32 x i8> [[A0:%.*]]
 ;
   %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, <32 x i8> undef, i32 -1)
   ret <32 x i8> %a
@@ -2286,8 +2286,8 @@ define <32 x i8> @identity_test_permvar_qi_256(<32 x i8> %a0) {
 
 define <32 x i8> @identity_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
 ; CHECK-LABEL: @identity_test_permvar_qi_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 %mask to <32 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i8> %a0, <32 x i8> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i8> [[A0:%.*]], <32 x i8> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <32 x i8> [[TMP2]]
 ;
   %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, <32 x i8> %passthru, i32 %mask)
@@ -2296,7 +2296,7 @@ define <32 x i8> @identity_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %pa
 
 define <32 x i8> @zero_test_permvar_qi_256(<32 x i8> %a0) {
 ; CHECK-LABEL: @zero_test_permvar_qi_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
 ;
   %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i8> undef, i32 -1)
@@ -2305,9 +2305,9 @@ define <32 x i8> @zero_test_permvar_qi_256(<32 x i8> %a0) {
 
 define <32 x i8> @zero_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
 ; CHECK-LABEL: @zero_test_permvar_qi_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 %mask to <32 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
 ;
   %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i8> %passthru, i32 %mask)
@@ -2316,7 +2316,7 @@ define <32 x i8> @zero_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passth
 
 define <32 x i8> @shuffle_test_permvar_qi_256(<32 x i8> %a0) {
 ; CHECK-LABEL: @shuffle_test_permvar_qi_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
 ;
   %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <32 x i8> undef, i32 -1)
@@ -2325,9 +2325,9 @@ define <32 x i8> @shuffle_test_permvar_qi_256(<32 x i8> %a0) {
 
 define <32 x i8> @shuffle_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
 ; CHECK-LABEL: @shuffle_test_permvar_qi_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 %mask to <32 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
 ;
   %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <32 x i8> %passthru, i32 %mask)
@@ -2336,7 +2336,7 @@ define <32 x i8> @shuffle_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %pas
 
 define <32 x i8> @undef_test_permvar_qi_256(<32 x i8> %a0) {
 ; CHECK-LABEL: @undef_test_permvar_qi_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
 ;
   %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 undef, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <32 x i8> undef, i32 -1)
@@ -2345,9 +2345,9 @@ define <32 x i8> @undef_test_permvar_qi_256(<32 x i8> %a0) {
 
 define <32 x i8> @undef_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
 ; CHECK-LABEL: @undef_test_permvar_qi_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 %mask to <32 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
 ;
   %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 undef, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <32 x i8> %passthru, i32 %mask)
@@ -2358,7 +2358,7 @@ declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64
 
 define <64 x i8> @identity_test_permvar_qi_512(<64 x i8> %a0) {
 ; CHECK-LABEL: @identity_test_permvar_qi_512(
-; CHECK-NEXT:    ret <64 x i8> %a0
+; CHECK-NEXT:    ret <64 x i8> [[A0:%.*]]
 ;
   %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63>, <64 x i8> undef, i64 -1)
   ret <64 x i8> %a
@@ -2366,8 +2366,8 @@ define <64 x i8> @identity_test_permvar_qi_512(<64 x i8> %a0) {
 
 define <64 x i8> @identity_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
 ; CHECK-LABEL: @identity_test_permvar_qi_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 %mask to <64 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <64 x i1> [[TMP1]], <64 x i8> %a0, <64 x i8> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <64 x i1> [[TMP1]], <64 x i8> [[A0:%.*]], <64 x i8> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <64 x i8> [[TMP2]]
 ;
   %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63>, <64 x i8> %passthru, i64 %mask)
@@ -2376,7 +2376,7 @@ define <64 x i8> @identity_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %pa
 
 define <64 x i8> @zero_test_permvar_qi_512(<64 x i8> %a0) {
 ; CHECK-LABEL: @zero_test_permvar_qi_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
 ;
   %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer, <64 x i8> undef, i64 -1)
@@ -2385,9 +2385,9 @@ define <64 x i8> @zero_test_permvar_qi_512(<64 x i8> %a0) {
 
 define <64 x i8> @zero_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
 ; CHECK-LABEL: @zero_test_permvar_qi_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 %mask to <64 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
 ;
   %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer, <64 x i8> %passthru, i64 %mask)
@@ -2396,7 +2396,7 @@ define <64 x i8> @zero_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passth
 
 define <64 x i8> @shuffle_test_permvar_qi_512(<64 x i8> %a0) {
 ; CHECK-LABEL: @shuffle_test_permvar_qi_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
 ;
   %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 63, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <64 x i8> undef, i64 -1)
@@ -2405,9 +2405,9 @@ define <64 x i8> @shuffle_test_permvar_qi_512(<64 x i8> %a0) {
 
 define <64 x i8> @shuffle_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
 ; CHECK-LABEL: @shuffle_test_permvar_qi_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 %mask to <64 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
 ;
   %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 63, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <64 x i8> %passthru, i64 %mask)
@@ -2416,7 +2416,7 @@ define <64 x i8> @shuffle_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %pas
 
 define <64 x i8> @undef_test_permvar_qi_512(<64 x i8> %a0) {
 ; CHECK-LABEL: @undef_test_permvar_qi_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 undef, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 undef, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
 ;
   %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 undef, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <64 x i8> undef, i64 -1)
@@ -2425,9 +2425,9 @@ define <64 x i8> @undef_test_permvar_qi_512(<64 x i8> %a0) {
 
 define <64 x i8> @undef_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
 ; CHECK-LABEL: @undef_test_permvar_qi_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 undef, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 %mask to <64 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> %passthru
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 undef, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
 ; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
 ;
   %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 undef, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <64 x i8> %passthru, i64 %mask)
@@ -2438,7 +2438,7 @@ declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>
 
 define <16 x float> @test_add_ps(<16 x float> %a, <16 x float> %b) {
 ; CHECK-LABEL: @test_add_ps(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <16 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <16 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret <16 x float> [[TMP1]]
 ;
   %1 = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 4)
@@ -2447,7 +2447,7 @@ define <16 x float> @test_add_ps(<16 x float> %a, <16 x float> %b) {
 
 define <16 x float> @test_add_ps_round(<16 x float> %a, <16 x float> %b) {
 ; CHECK-LABEL: @test_add_ps_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> undef, i16 -1, i32 8)
 ; CHECK-NEXT:    ret <16 x float> [[TMP1]]
 ;
   %1 = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 8)
@@ -2456,9 +2456,9 @@ define <16 x float> @test_add_ps_round(<16 x float> %a, <16 x float> %b) {
 
 define <16 x float> @test_add_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
 ; CHECK-LABEL: @test_add_ps_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <16 x float> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> %c
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <16 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
 ; CHECK-NEXT:    ret <16 x float> [[TMP3]]
 ;
   %1 = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
@@ -2467,7 +2467,7 @@ define <16 x float> @test_add_ps_mask(<16 x float> %a, <16 x float> %b, <16 x fl
 
 define <16 x float> @test_add_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
 ; CHECK-LABEL: @test_add_ps_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> [[C:%.*]], i16 [[MASK:%.*]], i32 8)
 ; CHECK-NEXT:    ret <16 x float> [[TMP1]]
 ;
   %1 = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 8)
@@ -2478,7 +2478,7 @@ declare <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double>, <8 x double>
 
 define <8 x double> @test_add_pd(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @test_add_pd(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x double> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
   %1 = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 4)
@@ -2487,7 +2487,7 @@ define <8 x double> @test_add_pd(<8 x double> %a, <8 x double> %b) {
 
 define <8 x double> @test_add_pd_round(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @test_add_pd_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> undef, i8 -1, i32 8)
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
   %1 = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 8)
@@ -2496,9 +2496,9 @@ define <8 x double> @test_add_pd_round(<8 x double> %a, <8 x double> %b) {
 
 define <8 x double> @test_add_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_add_pd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x double> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> %c
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
 ; CHECK-NEXT:    ret <8 x double> [[TMP3]]
 ;
   %1 = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 4)
@@ -2507,7 +2507,7 @@ define <8 x double> @test_add_pd_mask(<8 x double> %a, <8 x double> %b, <8 x dou
 
 define <8 x double> @test_add_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_add_pd_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
   %1 = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8)
@@ -2518,7 +2518,7 @@ declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>
 
 define <16 x float> @test_sub_ps(<16 x float> %a, <16 x float> %b) {
 ; CHECK-LABEL: @test_sub_ps(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub <16 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret <16 x float> [[TMP1]]
 ;
   %1 = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 4)
@@ -2527,7 +2527,7 @@ define <16 x float> @test_sub_ps(<16 x float> %a, <16 x float> %b) {
 
 define <16 x float> @test_sub_ps_round(<16 x float> %a, <16 x float> %b) {
 ; CHECK-LABEL: @test_sub_ps_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> undef, i16 -1, i32 8)
 ; CHECK-NEXT:    ret <16 x float> [[TMP1]]
 ;
   %1 = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 8)
@@ -2536,9 +2536,9 @@ define <16 x float> @test_sub_ps_round(<16 x float> %a, <16 x float> %b) {
 
 define <16 x float> @test_sub_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
 ; CHECK-LABEL: @test_sub_ps_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub <16 x float> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> %c
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
 ; CHECK-NEXT:    ret <16 x float> [[TMP3]]
 ;
   %1 = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
@@ -2547,7 +2547,7 @@ define <16 x float> @test_sub_ps_mask(<16 x float> %a, <16 x float> %b, <16 x fl
 
 define <16 x float> @test_sub_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
 ; CHECK-LABEL: @test_sub_ps_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> [[C:%.*]], i16 [[MASK:%.*]], i32 8)
 ; CHECK-NEXT:    ret <16 x float> [[TMP1]]
 ;
   %1 = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 8)
@@ -2558,7 +2558,7 @@ declare <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double>, <8 x double>
 
 define <8 x double> @test_sub_pd(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @test_sub_pd(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub <8 x double> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
   %1 = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 4)
@@ -2567,7 +2567,7 @@ define <8 x double> @test_sub_pd(<8 x double> %a, <8 x double> %b) {
 
 define <8 x double> @test_sub_pd_round(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @test_sub_pd_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> undef, i8 -1, i32 8)
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
   %1 = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 8)
@@ -2576,9 +2576,9 @@ define <8 x double> @test_sub_pd_round(<8 x double> %a, <8 x double> %b) {
 
 define <8 x double> @test_sub_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_sub_pd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub <8 x double> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> %c
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
 ; CHECK-NEXT:    ret <8 x double> [[TMP3]]
 ;
   %1 = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 4)
@@ -2587,7 +2587,7 @@ define <8 x double> @test_sub_pd_mask(<8 x double> %a, <8 x double> %b, <8 x dou
 
 define <8 x double> @test_sub_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_sub_pd_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
   %1 = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8)
@@ -2598,7 +2598,7 @@ declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>
 
 define <16 x float> @test_mul_ps(<16 x float> %a, <16 x float> %b) {
 ; CHECK-LABEL: @test_mul_ps(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <16 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <16 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret <16 x float> [[TMP1]]
 ;
   %1 = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 4)
@@ -2607,7 +2607,7 @@ define <16 x float> @test_mul_ps(<16 x float> %a, <16 x float> %b) {
 
 define <16 x float> @test_mul_ps_round(<16 x float> %a, <16 x float> %b) {
 ; CHECK-LABEL: @test_mul_ps_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> undef, i16 -1, i32 8)
 ; CHECK-NEXT:    ret <16 x float> [[TMP1]]
 ;
   %1 = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 8)
@@ -2616,9 +2616,9 @@ define <16 x float> @test_mul_ps_round(<16 x float> %a, <16 x float> %b) {
 
 define <16 x float> @test_mul_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
 ; CHECK-LABEL: @test_mul_ps_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <16 x float> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> %c
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <16 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
 ; CHECK-NEXT:    ret <16 x float> [[TMP3]]
 ;
   %1 = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
@@ -2627,7 +2627,7 @@ define <16 x float> @test_mul_ps_mask(<16 x float> %a, <16 x float> %b, <16 x fl
 
 define <16 x float> @test_mul_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
 ; CHECK-LABEL: @test_mul_ps_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> [[C:%.*]], i16 [[MASK:%.*]], i32 8)
 ; CHECK-NEXT:    ret <16 x float> [[TMP1]]
 ;
   %1 = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 8)
@@ -2638,7 +2638,7 @@ declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>
 
 define <8 x double> @test_mul_pd(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @test_mul_pd(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <8 x double> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <8 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
   %1 = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 4)
@@ -2647,7 +2647,7 @@ define <8 x double> @test_mul_pd(<8 x double> %a, <8 x double> %b) {
 
 define <8 x double> @test_mul_pd_round(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @test_mul_pd_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> undef, i8 -1, i32 8)
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
   %1 = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 8)
@@ -2656,9 +2656,9 @@ define <8 x double> @test_mul_pd_round(<8 x double> %a, <8 x double> %b) {
 
 define <8 x double> @test_mul_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mul_pd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <8 x double> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> %c
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <8 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
 ; CHECK-NEXT:    ret <8 x double> [[TMP3]]
 ;
   %1 = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 4)
@@ -2667,7 +2667,7 @@ define <8 x double> @test_mul_pd_mask(<8 x double> %a, <8 x double> %b, <8 x dou
 
 define <8 x double> @test_mul_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_mul_pd_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
   %1 = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8)
@@ -2678,7 +2678,7 @@ declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>
 
 define <16 x float> @test_div_ps(<16 x float> %a, <16 x float> %b) {
 ; CHECK-LABEL: @test_div_ps(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <16 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <16 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret <16 x float> [[TMP1]]
 ;
   %1 = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 4)
@@ -2687,7 +2687,7 @@ define <16 x float> @test_div_ps(<16 x float> %a, <16 x float> %b) {
 
 define <16 x float> @test_div_ps_round(<16 x float> %a, <16 x float> %b) {
 ; CHECK-LABEL: @test_div_ps_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> undef, i16 -1, i32 8)
 ; CHECK-NEXT:    ret <16 x float> [[TMP1]]
 ;
   %1 = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 8)
@@ -2696,9 +2696,9 @@ define <16 x float> @test_div_ps_round(<16 x float> %a, <16 x float> %b) {
 
 define <16 x float> @test_div_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
 ; CHECK-LABEL: @test_div_ps_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <16 x float> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 %mask to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> %c
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <16 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
 ; CHECK-NEXT:    ret <16 x float> [[TMP3]]
 ;
   %1 = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
@@ -2707,7 +2707,7 @@ define <16 x float> @test_div_ps_mask(<16 x float> %a, <16 x float> %b, <16 x fl
 
 define <16 x float> @test_div_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
 ; CHECK-LABEL: @test_div_ps_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> [[C:%.*]], i16 [[MASK:%.*]], i32 8)
 ; CHECK-NEXT:    ret <16 x float> [[TMP1]]
 ;
   %1 = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 8)
@@ -2718,7 +2718,7 @@ declare <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double>, <8 x double>
 
 define <8 x double> @test_div_pd(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @test_div_pd(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <8 x double> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
   %1 = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 4)
@@ -2727,7 +2727,7 @@ define <8 x double> @test_div_pd(<8 x double> %a, <8 x double> %b) {
 
 define <8 x double> @test_div_pd_round(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @test_div_pd_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> undef, i8 -1, i32 8)
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
   %1 = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 8)
@@ -2736,9 +2736,9 @@ define <8 x double> @test_div_pd_round(<8 x double> %a, <8 x double> %b) {
 
 define <8 x double> @test_div_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_div_pd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <8 x double> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 %mask to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> %c
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
 ; CHECK-NEXT:    ret <8 x double> [[TMP3]]
 ;
   %1 = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 4)
@@ -2747,7 +2747,7 @@ define <8 x double> @test_div_pd_mask(<8 x double> %a, <8 x double> %b, <8 x dou
 
 define <8 x double> @test_div_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
 ; CHECK-LABEL: @test_div_pd_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
   %1 = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8)
@@ -2758,8 +2758,8 @@ declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32)
 
 define i32 @test_comi_ss_0(float %a, float %b) {
 ; CHECK-LABEL: @test_comi_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i32 0, i32 4)
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
@@ -2779,8 +2779,8 @@ declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32)
 
 define i32 @test_comi_sd_0(double %a, double %b) {
 ; CHECK-LABEL: @test_comi_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i32 0, i32 4)
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
diff --git a/test/Transforms/InstCombine/x86-muldq.ll b/test/Transforms/InstCombine/x86-muldq.ll
index 8b14a781f0918..bcbb8919c403e 100644
--- a/test/Transforms/InstCombine/x86-muldq.ll
+++ b/test/Transforms/InstCombine/x86-muldq.ll
@@ -2,6 +2,158 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 ;
+; UNDEF Elts
+;
+
+define <2 x i64> @undef_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @undef_pmuludq_128(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @undef_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @undef_pmuludq_256(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef)
+  ret <4 x i64> %1
+}
+
+define <8 x i64> @undef_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @undef_pmuludq_512(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef)
+  ret <8 x i64> %1
+}
+
+define <2 x i64> @undef_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @undef_pmuldq_128(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @undef_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @undef_pmuldq_256(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef)
+  ret <4 x i64> %1
+}
+
+define <8 x i64> @undef_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @undef_pmuldq_512(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef)
+  ret <8 x i64> %1
+}
+
+define <2 x i64> @undef_zero_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuludq_128(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @undef_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuludq_256(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> undef)
+  ret <4 x i64> %1
+}
+
+define <8 x i64> @undef_zero_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuludq_512(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <2 x i64> @undef_zero_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuldq_128(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> zeroinitializer, <4 x i32> undef)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @undef_zero_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuldq_256(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <8 x i64> @undef_zero_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuldq_512(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> undef)
+  ret <8 x i64> %1
+}
+
+;
+; Constant Folding
+;
+
+define <2 x i64> @fold_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @fold_pmuludq_128(
+; CHECK-NEXT:    ret <2 x i64> <i64 9223372030412324865, i64 4294967295>
+;
+  %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2147483647, i32 1, i32 1, i32 3>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @fold_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @fold_pmuludq_256(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <8 x i64> @fold_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @fold_pmuludq_512(
+; CHECK-NEXT:    ret <8 x i64> <i64 0, i64 0, i64 255, i64 131070, i64 0, i64 -281474976645121, i64 140737488289792, i64 281470681743360>
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> <i32 0, i32 0, i32 undef, i32 0, i32 1, i32 1, i32 2, i32 2, i32 undef, i32 undef, i32 -1, i32 -1, i32 65536, i32 -1, i32 -65536, i32 undef>, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 255, i32 -256, i32 65535, i32 -65536, i32 0, i32 -1, i32 -65535, i32 -65535, i32 2147483647, i32 2147483648, i32 65536, i32 -65535>)
+  ret <8 x i64> %1
+}
+
+define <2 x i64> @fold_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @fold_pmuldq_128(
+; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 2>
+;
+  %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> <i32 undef, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 undef, i32 1, i32 -2, i32 3>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @fold_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @fold_pmuldq_256(
+; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 4294836225, i64 140737488289792, i64 -140737488355328>
+;
+  %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> <i32 undef, i32 1, i32 -65535, i32 128, i32 65536, i32 2147483647, i32 -2147483648, i32 65536>, <8 x i32> <i32 0, i32 -1, i32 -65535, i32 -65535, i32 2147483647, i32 2147483648, i32 65536, i32 -65535>)
+  ret <4 x i64> %1
+}
+
+define <8 x i64> @fold_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @fold_pmuldq_512(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> <i32 undef, i32 -1, i32 -3, i32 -1, i32 8, i32 10, i32 -256, i32 65536, i32 undef, i32 1, i32 -65535, i32 128, i32 65536, i32 2147483647, i32 -2147483648, i32 65536>)
+  ret <8 x i64> %1
+}
+
+;
 ; PMULUDQ/PMULDQ - only the even elements (0, 2, 4, 6) of the vXi32 inputs are required.
 ;
 
@@ -55,8 +207,8 @@ define <2 x i64> @test_demanded_elts_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
   ret <2 x i64> %3
 }
 
-define <4 x i64> @test_demanded_elts_pmuluq_256(<8 x i32> %a0, <8 x i32> %a1) {
-; CHECK-LABEL: @test_demanded_elts_pmuluq_256(
+define <4 x i64> @test_demanded_elts_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pmuldq_256(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef>
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> [[TMP1]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
@@ -69,8 +221,8 @@ define <4 x i64> @test_demanded_elts_pmuluq_256(<8 x i32> %a0, <8 x i32> %a1) {
   ret <4 x i64> %4
 }
 
-define <8 x i64> @test_demanded_elts_pmuluq_512(<16 x i32> %a0, <16 x i32> %a1) {
-; CHECK-LABEL: @test_demanded_elts_pmuluq_512(
+define <8 x i64> @test_demanded_elts_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pmuldq_512(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 15, i32 undef>
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a0, <16 x i32> [[TMP1]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 3, i32 4, i32 4, i32 7, i32 7>
diff --git a/test/Transforms/InstCombine/x86-pack.ll b/test/Transforms/InstCombine/x86-pack.ll
new file mode 100644
index 0000000000000..f3c41a8aa4763
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-pack.ll
@@ -0,0 +1,366 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;
+; UNDEF Elts
+;
+
+define <8 x i16> @undef_packssdw_128() {
+; CHECK-LABEL: @undef_packssdw_128(
+; CHECK-NEXT:    ret <8 x i16> undef
+;
+  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> undef, <4 x i32> undef)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @undef_packusdw_128() {
+; CHECK-LABEL: @undef_packusdw_128(
+; CHECK-NEXT:    ret <8 x i16> undef
+;
+  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> undef, <4 x i32> undef)
+  ret <8 x i16> %1
+}
+
+define <16 x i8> @undef_packsswb_128() {
+; CHECK-LABEL: @undef_packsswb_128(
+; CHECK-NEXT:    ret <16 x i8> undef
+;
+  %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> undef, <8 x i16> undef)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @undef_packuswb_128() {
+; CHECK-LABEL: @undef_packuswb_128(
+; CHECK-NEXT:    ret <16 x i8> undef
+;
+  %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> undef, <8 x i16> undef)
+  ret <16 x i8> %1
+}
+
+define <16 x i16> @undef_packssdw_256() {
+; CHECK-LABEL: @undef_packssdw_256(
+; CHECK-NEXT:    ret <16 x i16> undef
+;
+  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> undef, <8 x i32> undef)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @undef_packusdw_256() {
+; CHECK-LABEL: @undef_packusdw_256(
+; CHECK-NEXT:    ret <16 x i16> undef
+;
+  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> undef, <8 x i32> undef)
+  ret <16 x i16> %1
+}
+
+define <32 x i8> @undef_packsswb_256() {
+; CHECK-LABEL: @undef_packsswb_256(
+; CHECK-NEXT:    ret <32 x i8> undef
+;
+  %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> undef, <16 x i16> undef)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @undef_packuswb_256() {
+; CHECK-LABEL: @undef_packuswb_256(
+; CHECK-NEXT:    ret <32 x i8> undef
+;
+  %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> undef, <16 x i16> undef)
+  ret <32 x i8> %1
+}
+
+define <32 x i16> @undef_packssdw_512() {
+; CHECK-LABEL: @undef_packssdw_512(
+; CHECK-NEXT:    ret <32 x i16> undef
+;
+  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> undef, <16 x i32> undef)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @undef_packusdw_512() {
+; CHECK-LABEL: @undef_packusdw_512(
+; CHECK-NEXT:    ret <32 x i16> undef
+;
+  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> undef, <16 x i32> undef)
+  ret <32 x i16> %1
+}
+
+define <64 x i8> @undef_packsswb_512() {
+; CHECK-LABEL: @undef_packsswb_512(
+; CHECK-NEXT:    ret <64 x i8> undef
+;
+  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> undef)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @undef_packuswb_512() {
+; CHECK-LABEL: @undef_packuswb_512(
+; CHECK-NEXT:    ret <64 x i8> undef
+;
+  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> undef, <32 x i16> undef)
+  ret <64 x i8> %1
+}
+
+;
+; Constant Folding
+;
+
+define <8 x i16> @fold_packssdw_128() {
+; CHECK-LABEL: @fold_packssdw_128(
+; CHECK-NEXT:    ret <8 x i16> <i16 0, i16 -1, i16 32767, i16 -32768, i16 0, i16 0, i16 0, i16 0>
+;
+  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> <i32 0, i32 -1, i32 65536, i32 -131072>, <4 x i32> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @fold_packusdw_128() {
+; CHECK-LABEL: @fold_packusdw_128(
+; CHECK-NEXT:    ret <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 -32768, i16 -1>
+;
+  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> undef, <4 x i32> <i32 0, i32 -1, i32 32768, i32 65537>)
+  ret <8 x i16> %1
+}
+
+define <16 x i8> @fold_packsswb_128() {
+; CHECK-LABEL: @fold_packsswb_128(
+; CHECK-NEXT:    ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>
+;
+  %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> undef)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @fold_packuswb_128() {
+; CHECK-LABEL: @fold_packuswb_128(
+; CHECK-NEXT:    ret <16 x i8> <i8 0, i8 1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 15, i8 0, i8 127, i8 0, i8 1, i8 0, i8 1, i8 0, i8 0>
+;
+  %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 1, i16 -1, i16 255, i16 65535, i16 -32768, i16 -127, i16 15>, <8 x i16> <i16 -15, i16 127, i16 32768, i16 -65535, i16 -255, i16 1, i16 -1, i16 0>)
+  ret <16 x i8> %1
+}
+
+define <16 x i16> @fold_packssdw_256() {
+; CHECK-LABEL: @fold_packssdw_256(
+; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 256, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef>
+;
+  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> <i32 0, i32 256, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>, <8 x i32> undef)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @fold_packusdw_256() {
+; CHECK-LABEL: @fold_packusdw_256(
+; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 0, i16 0, i16 -1, i16 0, i16 256, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767>
+;
+  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> <i32 0, i32 -256, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767>, <8 x i32> <i32 0, i32 256, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>)
+  ret <16 x i16> %1
+}
+
+define <32 x i8> @fold_packsswb_256() {
+; CHECK-LABEL: @fold_packsswb_256(
+; CHECK-NEXT:    ret <32 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+;
+  %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> undef, <16 x i16> zeroinitializer)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @fold_packuswb_256() {
+; CHECK-LABEL: @fold_packuswb_256(
+; CHECK-NEXT:    ret <32 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64>
+;
+  %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> zeroinitializer, <16 x i16> <i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 256, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64>)
+  ret <32 x i8> %1
+}
+
+define <32 x i16> @fold_packssdw_512() {
+; CHECK-LABEL: @fold_packssdw_512(
+; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 512, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 512, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef>
+;
+  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> <i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767, i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>, <16 x i32> undef)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @fold_packusdw_512() {
+; CHECK-LABEL: @fold_packusdw_512(
+; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 0, i16 0, i16 -1, i16 0, i16 512, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767, i16 0, i16 0, i16 0, i16 -1, i16 0, i16 512, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767>
+;
+  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> <i32 0, i32 -512, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767, i32 0, i32 -512, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767>, <16 x i32> <i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767, i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>)
+  ret <32 x i16> %1
+}
+
+define <64 x i8> @fold_packsswb_512() {
+; CHECK-LABEL: @fold_packsswb_512(
+; CHECK-NEXT:    ret <64 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+;
+  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> zeroinitializer)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @fold_packuswb_512() {
+; CHECK-LABEL: @fold_packuswb_512(
+; CHECK-NEXT:    ret <64 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64>
+;
+  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> zeroinitializer, <32 x i16> <i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 512, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 512, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64>)
+  ret <64 x i8> %1
+}
+
+;
+; Demanded Elts
+;
+
+define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @elts_packssdw_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
+  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 undef, i32 undef>
+  %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>
+  %3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2)
+  %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 7, i32 7, i32 7, i32 7>
+  ret <8 x i16> %4
+}
+
+define <8 x i16> @elts_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @elts_packusdw_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]])
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = insertelement <4 x i32> %a0, i32 0, i32 0
+  %2 = insertelement <4 x i32> %a1, i32 0, i32 3
+  %3 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %1, <4 x i32> %2)
+  %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef>
+  ret <8 x i16> %4
+}
+
+define <16 x i8> @elts_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: @elts_packsswb_128(
+; CHECK-NEXT:    ret <16 x i8> zeroinitializer
+;
+  %1 = insertelement <8 x i16> %a0, i16 0, i32 0
+  %2 = insertelement <8 x i16> %a1, i16 0, i32 0
+  %3 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2)
+  %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+  ret <16 x i8> %4
+}
+
+define <16 x i8> @elts_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: @elts_packuswb_128(
+; CHECK-NEXT:    ret <16 x i8> undef
+;
+  %1 = insertelement <8 x i16> undef, i16 0, i32 0
+  %2 = insertelement <8 x i16> undef, i16 0, i32 0
+  %3 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2)
+  %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+  ret <16 x i8> %4
+}
+
+define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @elts_packssdw_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> undef)
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 undef, i32 6, i32 5, i32 undef>
+  %3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2)
+  %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 11, i32 12, i32 undef, i32 undef, i32 15>
+  ret <16 x i16> %4
+}
+
+define <16 x i16> @elts_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @elts_packusdw_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> undef, <8 x i32> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
+;
+  %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %3 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %2)
+  %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i16> %4
+}
+
+define <32 x i8> @elts_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) {
+; CHECK-LABEL: @elts_packsswb_256(
+; CHECK-NEXT:    ret <32 x i8> zeroinitializer
+;
+  %1 = insertelement <16 x i16> %a0, i16 0, i32 0
+  %2 = insertelement <16 x i16> %a1, i16 0, i32 8
+  %3 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2)
+  %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
+  ret <32 x i8> %4
+}
+
+define <32 x i8> @elts_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) {
+; CHECK-LABEL: @elts_packuswb_256(
+; CHECK-NEXT:    ret <32 x i8> undef
+;
+  %1 = insertelement <16 x i16> undef, i16 0, i32 1
+  %2 = insertelement <16 x i16> undef, i16 0, i32 0
+  %3 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2)
+  %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> zeroinitializer
+  ret <32 x i8> %4
+}
+
+define <32 x i16> @elts_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @elts_packssdw_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A0:%.*]], <16 x i32> undef)
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 undef, i32 6, i32 5, i32 undef, i32 undef, i32 10, i32 9, i32 undef, i32 undef, i32 14, i32 13, i32 undef>
+  %3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2)
+  %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 11, i32 12, i32 undef, i32 undef, i32 15, i32 undef, i32 undef, i32 18, i32 19, i32 20, i32 undef, i32 undef, i32 23, i32 24, i32 undef, i32 undef, i32 27, i32 28, i32 undef, i32 undef, i32 31>
+  ret <32 x i16> %4
+}
+
+define <32 x i16> @elts_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @elts_packusdw_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> undef, <16 x i32> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
+;
+  %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+  %3 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %1, <16 x i32> %2)
+  %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <32 x i16> %4
+}
+
+define <64 x i8> @elts_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) {
+; CHECK-LABEL: @elts_packsswb_512(
+; CHECK-NEXT:    ret <64 x i8> zeroinitializer
+;
+  %1 = insertelement <32 x i16> %a0, i16 0, i32 0
+  %2 = insertelement <32 x i16> %a1, i16 0, i32 8
+  %3 = insertelement <32 x i16> %1, i16 0, i32 16
+  %4 = insertelement <32 x i16> %2, i16 0, i32 24
+  %5 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %3, <32 x i16> %4)
+  %6 = shufflevector <64 x i8> %5, <64 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
+  ret <64 x i8> %6
+}
+
+define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) {
+; CHECK-LABEL: @elts_packuswb_512(
+; CHECK-NEXT:    ret <64 x i8> undef
+;
+  %1 = insertelement <32 x i16> undef, i16 0, i32 1
+  %2 = insertelement <32 x i16> undef, i16 0, i32 0
+  %3 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %1, <32 x i16> %2)
+  %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> zeroinitializer
+  ret <64 x i8> %4
+}
+
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
+declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
+declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
+declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
+declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
+
+declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>) nounwind readnone
+declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>) nounwind readnone
+declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>) nounwind readnone
+declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) nounwind readnone
diff --git a/test/Transforms/InstCombine/x86-pshufb.ll b/test/Transforms/InstCombine/x86-pshufb.ll
index b37884ddd58a7..f181ef57fe20b 100644
--- a/test/Transforms/InstCombine/x86-pshufb.ll
+++ b/test/Transforms/InstCombine/x86-pshufb.ll
@@ -468,6 +468,48 @@ define <64 x i8> @fold_with_allundef_elts_avx512(<64 x i8> %InVec) {
   ret <64 x i8> %1
 }
 
+; Demanded elts tests.
+
+define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) {
+; CHECK-LABEL: @demanded_elts_insertion(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %BaseMask)
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 undef>
+; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+;
+  %1 = insertelement <16 x i8> %BaseMask, i8 %M0, i32 0
+  %2 = insertelement <16 x i8> %1, i8 %M15, i32 15
+  %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %2)
+  %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 undef>
+  ret <16 x i8> %4
+}
+
+define <32 x i8> @demanded_elts_insertion_avx2(<32 x i8> %InVec, <32 x i8> %BaseMask, i8 %M0, i8 %M22) {
+; CHECK-LABEL: @demanded_elts_insertion_avx2(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> [[TMP1]])
+; CHECK-NEXT:    ret <32 x i8> [[TMP2]]
+;
+  %1 = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0
+  %2 = insertelement <32 x i8> %1, i8 %M22, i32 22
+  %3 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> %2)
+  %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 undef, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <32 x i8> %4
+}
+
+define <64 x i8> @demanded_elts_insertion_avx512(<64 x i8> %InVec, <64 x i8> %BaseMask, i8 %M0, i8 %M30) {
+; CHECK-LABEL: @demanded_elts_insertion_avx512(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <64 x i8> undef, i8 %M0, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <64 x i8> [[TMP2]], <64 x i8> undef, <64 x i32> zeroinitializer
+; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
+;
+  %1 = insertelement <64 x i8> %BaseMask, i8 %M0, i32 0
+  %2 = insertelement <64 x i8> %1, i8 %M30, i32 30
+  %3 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> %2)
+  %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> zeroinitializer
+  ret <64 x i8> %4
+}
+
 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
 declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
 declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)
diff --git a/test/Transforms/InstCombine/x86-vpermil.ll b/test/Transforms/InstCombine/x86-vpermil.ll
index fad10d7ad5c53..f68eb36c4b587 100644
--- a/test/Transforms/InstCombine/x86-vpermil.ll
+++ b/test/Transforms/InstCombine/x86-vpermil.ll
@@ -221,6 +221,74 @@ define <8 x double> @undef_test_vpermilvar_pd_512(<8 x double> %v) {
   ret <8 x double> %a
 }
 
+; Simplify demanded elts
+
+define <4 x float> @elts_test_vpermilvar_ps(<4 x float> %a0, i32 %a1) {
+; CHECK-LABEL: @elts_test_vpermilvar_ps(
+; CHECK-NEXT:    ret <4 x float> %a0
+;
+  %1 = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %a1, i32 3
+  %2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %1)
+  %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  ret <4 x float> %3
+}
+
+define <8 x float> @elts_test_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @elts_test_vpermilvar_ps_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 6, i32 undef, i32 7>
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %1 = shufflevector <8 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 2, i32 1, i32 0>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %1)
+  %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 undef, i32 7>
+  ret <8 x float> %3
+}
+
+define <16 x float> @elts_test_vpermilvar_ps_512(<16 x float> %a0, <16 x i32> %a1, i32 %a2) {
+; CHECK-LABEL: @elts_test_vpermilvar_ps_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %a1)
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %1 = insertelement <16 x i32> %a1, i32 %a2, i32 0
+  %2 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %1)
+  %3 = shufflevector <16 x float> %2, <16 x float> undef, <16 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x float> %3
+}
+
+define <2 x double> @elts_test_vpermilvar_pd(<2 x double> %a0, i64 %a1) {
+; CHECK-LABEL: @elts_test_vpermilvar_pd(
+; CHECK-NEXT:    ret <2 x double> %a0
+;
+  %1 = insertelement <2 x i64> <i64 0, i64 2>, i64 %a1, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %1)
+  %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
+  ret <2 x double> %3
+}
+
+define <4 x double> @elts_test_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: @elts_test_vpermilvar_pd_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 undef>
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
+;
+  %1 = shufflevector <4 x i64> <i64 0, i64 2, i64 0, i64 2>, <4 x i64> %a1, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  %2 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %1)
+  %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  ret <4 x double> %3
+}
+
+define <8 x double> @elts_test_vpermilvar_pd_512(<8 x double> %a0, <8 x i64> %a1, i64 %a2) {
+; CHECK-LABEL: @elts_test_vpermilvar_pd_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i64> undef, i64 %a2, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    ret <8 x double> [[TMP3]]
+;
+  %1 = insertelement <8 x i64> %a1, i64 %a2, i32 0
+  %2 = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> %1)
+  %3 = shufflevector <8 x double> %2, <8 x double> undef, <8 x i32> zeroinitializer
+  ret <8 x double> %3
+}
+
 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>)
 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>)
 declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>)
diff --git a/test/Transforms/InstCombine/xor.ll b/test/Transforms/InstCombine/xor.ll
index cd137776bbfd1..570155b162325 100644
--- a/test/Transforms/InstCombine/xor.ll
+++ b/test/Transforms/InstCombine/xor.ll
@@ -321,7 +321,7 @@ define i32 @test25(i32 %g, i32 %h) {
 
 define i32 @test26(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test26(
-; CHECK-NEXT:    [[T4:%.*]] = and i32 %a, %b
+; CHECK-NEXT:    [[T4:%.*]] = and i32 %b, %a
 ; CHECK-NEXT:    ret i32 [[T4]]
 ;
   %b2 = xor i32 %b, -1
@@ -352,3 +352,187 @@ define i32 @test28(i32 %indvar) {
   %t214 = xor i32 %t7, -2147483648
   ret i32 %t214
 }
+
+define i32 @test29(i1 %C) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], i32 915, i32 113
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %A = select i1 %C, i32 1000, i32 10
+  %V = xor i32 %A, 123
+  ret i32 %V
+}
+
+define <2 x i32> @test29vec(i1 %C) {
+; CHECK-LABEL: @test29vec(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 915, i32 915>, <2 x i32> <i32 113, i32 113>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
+  %V = xor <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %V
+}
+
+define <2 x i32> @test29vec2(i1 %C) {
+; CHECK-LABEL: @test29vec2(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 915, i32 2185>, <2 x i32> <i32 113, i32 339>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 2500>, <2 x i32> <i32 10, i32 30>
+  %V = xor <2 x i32> %A, <i32 123, i32 333>
+  ret <2 x i32> %V
+}
+
+define i32 @test30(i1 %which) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 915, [[ENTRY:%.*]] ], [ 113, [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+  %value = xor i32 %A, 123
+  ret i32 %value
+}
+
+define <2 x i32> @test30vec(i1 %which) {
+; CHECK-LABEL: @test30vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 915, i32 915>, [[ENTRY:%.*]] ], [ <i32 113, i32 113>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+  %value = xor <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %value
+}
+
+define <2 x i32> @test30vec2(i1 %which) {
+; CHECK-LABEL: @test30vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 915, i32 2185>, [[ENTRY:%.*]] ], [ <i32 113, i32 339>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+  %value = xor <2 x i32> %A, <i32 123, i32 333>
+  ret <2 x i32> %value
+}
+
+define i32 @test31(i32 %A, i32 %B) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B:%.*]], -1
+; CHECK-NEXT:    [[XOR:%.*]] = and i32 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %and = or i32 %A, %B
+  %xor = xor i32 %B, %and
+  ret i32 %xor
+}
+
+define i32 @test32(i32 %A, i32 %B) {
+; CHECK-LABEL: @test32(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B:%.*]], -1
+; CHECK-NEXT:    [[XOR:%.*]] = and i32 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %and = or i32 %B, %A
+  %xor = xor i32 %B, %and
+  ret i32 %xor
+}
+
+define i32 @test33(i32 %A, i32 %B) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B:%.*]], -1
+; CHECK-NEXT:    [[XOR:%.*]] = and i32 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %and = or i32 %A, %B
+  %xor = xor i32 %and, %B
+  ret i32 %xor
+}
+
+define i32 @test34(i32 %A, i32 %B) {
+; CHECK-LABEL: @test34(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B:%.*]], -1
+; CHECK-NEXT:    [[XOR:%.*]] = and i32 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %and = or i32 %B, %A
+  %xor = xor i32 %and, %B
+  ret i32 %xor
+}
+
+define i32 @test35(i32 %A, i32 %B) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[XOR:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %and = and i32 %A, %B
+  %xor = xor i32 %B, %and
+  ret i32 %xor
+}
+
+define i32 @test36(i32 %A, i32 %B) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[XOR:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %and = and i32 %B, %A
+  %xor = xor i32 %B, %and
+  ret i32 %xor
+}
+
+define i32 @test37(i32 %A, i32 %B) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[XOR:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %and = and i32 %A, %B
+  %xor = xor i32 %and, %B
+  ret i32 %xor
+}
+
+define i32 @test38(i32 %A, i32 %B) {
+; CHECK-LABEL: @test38(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[XOR:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %and = and i32 %B, %A
+  %xor = xor i32 %and, %B
+  ret i32 %xor
+}
diff --git a/test/Transforms/InstCombine/xor2.ll b/test/Transforms/InstCombine/xor2.ll
index f3591ed9c8a9b..79e62723f143f 100644
--- a/test/Transforms/InstCombine/xor2.ll
+++ b/test/Transforms/InstCombine/xor2.ll
@@ -110,7 +110,7 @@ define i32 @test6(i32 %x) {
 define i32 @test7(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:    [[B_NOT:%.*]] = xor i32 %b, -1
-; CHECK-NEXT:    [[XOR:%.*]] = or i32 %a, [[B_NOT]]
+; CHECK-NEXT:    [[XOR:%.*]] = or i32 [[B_NOT]], %a
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %or = or i32 %a, %b
@@ -123,7 +123,7 @@ define i32 @test7(i32 %a, i32 %b) {
 define i32 @test8(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test8(
 ; CHECK-NEXT:    [[B_NOT:%.*]] = xor i32 %b, -1
-; CHECK-NEXT:    [[XOR:%.*]] = or i32 %a, [[B_NOT]]
+; CHECK-NEXT:    [[XOR:%.*]] = or i32 [[B_NOT]], %a
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %neg = xor i32 %a, -1
@@ -144,6 +144,18 @@ define i32 @test9(i32 %b, i32 %c) {
   ret i32 %xor2
 }
 
+; (A & B) ^ (B ^ A) -> (A | B)
+define i32 @test9b(i32 %b, i32 %c) {
+; CHECK-LABEL: @test9b(
+; CHECK-NEXT:    [[XOR2:%.*]] = or i32 [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR2]]
+;
+  %and = and i32 %b, %c
+  %xor = xor i32 %c, %b
+  %xor2 = xor i32 %and, %xor
+  ret i32 %xor2
+}
+
 ; (A ^ B) ^ (A & B) -> (A | B)
 define i32 @test10(i32 %b, i32 %c) {
 ; CHECK-LABEL: @test10(
@@ -156,6 +168,18 @@ define i32 @test10(i32 %b, i32 %c) {
   ret i32 %xor2
 }
 
+; (A ^ B) ^ (A & B) -> (A | B)
+define i32 @test10b(i32 %b, i32 %c) {
+; CHECK-LABEL: @test10b(
+; CHECK-NEXT:    [[XOR2:%.*]] = or i32 [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR2]]
+;
+  %xor = xor i32 %b, %c
+  %and = and i32 %c, %b
+  %xor2 = xor i32 %xor, %and
+  ret i32 %xor2
+}
+
 define i32 @test11(i32 %A, i32 %B) {
 ; CHECK-LABEL: @test11(
 ; CHECK-NEXT:    ret i32 0
diff --git a/test/Transforms/InstCombine/zero-point-zero-add.ll b/test/Transforms/InstCombine/zero-point-zero-add.ll
index e466e8ad7429b..a23db75525e93 100644
--- a/test/Transforms/InstCombine/zero-point-zero-add.ll
+++ b/test/Transforms/InstCombine/zero-point-zero-add.ll
@@ -15,7 +15,7 @@ define double @test(double %X) {
 
 define double @test1(double %X) {
 ; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[Y:%.*]] = call double @fabs(double %X)
+; CHECK-NEXT:    [[Y:%.*]] = call double @llvm.fabs.f64(double %X)
 ; CHECK-NEXT:    ret double [[Y]]
 ;
   %Y = call double @fabs(double %X)
diff --git a/test/Transforms/InstCombine/zext-or-icmp.ll b/test/Transforms/InstCombine/zext-or-icmp.ll
index 610e9a754f0d5..afbe36da3e37b 100644
--- a/test/Transforms/InstCombine/zext-or-icmp.ll
+++ b/test/Transforms/InstCombine/zext-or-icmp.ll
@@ -19,3 +19,33 @@ define i8 @zext_or_icmp_icmp(i8 %a, i8 %b) {
 ; CHECK-NEXT:    ret i8 %zext
 }
 
+; Here, widening the or from i1 to i32 and removing one of the icmps would
+; widen an undef value (created by the out-of-range shift), increasing the
+; range of valid values for the return, so we can't do it.
+define i32 @dont_widen_undef() {
+entry:
+  br label %block2
+
+block1:
+  br label %block2
+
+block2:
+  %m.011 = phi i32 [ 33, %entry ], [ 0, %block1 ]
+  %cmp.i = icmp ugt i32 %m.011, 1
+  %m.1.op = lshr i32 1, %m.011
+  %sext.mask = and i32 %m.1.op, 65535
+  %cmp115 = icmp ne i32 %sext.mask, 0
+  %cmp1 = or i1 %cmp.i, %cmp115
+  %conv2 = zext i1 %cmp1 to i32
+  ret i32 %conv2
+
+; CHECK-LABEL: dont_widen_undef(
+; CHECK:         %m.011 = phi i32 [ 33, %entry ], [ 0, %block1 ]
+; CHECK-NEXT:    %cmp.i = icmp ugt i32 %m.011, 1
+; CHECK-NEXT:    %m.1.op = lshr i32 1, %m.011
+; CHECK-NEXT:    %sext.mask = and i32 %m.1.op, 65535
+; CHECK-NEXT:    %cmp115 = icmp ne i32 %sext.mask, 0
+; CHECK-NEXT:    %cmp1 = or i1 %cmp.i, %cmp115
+; CHECK-NEXT:    %conv2 = zext i1 %cmp1 to i32
+; CHECK-NEXT:    ret i32 %conv2
+}
diff --git a/test/Transforms/InstCombine/zext-phi.ll b/test/Transforms/InstCombine/zext-phi.ll
new file mode 100644
index 0000000000000..5e352415c747c
--- /dev/null
+++ b/test/Transforms/InstCombine/zext-phi.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n8:16:32:64"
+
+; Although i1 is not in the datalayout, we should treat it
+; as a legal type because it is a fundamental type in IR.
+; This means we should shrink the phi (sink the zexts).
+
+define i64 @sink_i1_casts(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @sink_i1_casts(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond1, label %if, label %end
+; CHECK:       if:
+; CHECK-NEXT:    br label %end
+; CHECK:       end:
+; CHECK-NEXT:    [[PHI_IN:%.*]] = phi i1 [ %cond1, %entry ], [ %cond2, %if ]
+; CHECK-NEXT:    [[PHI:%.*]] = zext i1 [[PHI_IN]] to i64
+; CHECK-NEXT:    ret i64 [[PHI]]
+;
+entry:
+  %z1 = zext i1 %cond1 to i64
+  br i1 %cond1, label %if, label %end
+
+if:
+  %z2 = zext i1 %cond2 to i64
+  br label %end
+
+end:
+  %phi = phi i64 [ %z1, %entry ], [ %z2, %if ]
+  ret i64 %phi
+}
+
diff --git a/test/Transforms/InstCombine/zext.ll b/test/Transforms/InstCombine/zext.ll
index 740509809d1c4..887d839cb8c79 100644
--- a/test/Transforms/InstCombine/zext.ll
+++ b/test/Transforms/InstCombine/zext.ll
@@ -35,7 +35,7 @@ define <2 x i64> @test3(<2 x i64> %A) {
 
 define <2 x i64> @test4(<2 x i64> %A) {
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i64> %A, <i64 4294967295, i64 4294967295>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i64> %A, <i64 63, i64 63>
 ; CHECK-NEXT:    [[XOR:%.*]] = and <2 x i64> [[TMP1]], <i64 23, i64 42>
 ; CHECK-NEXT:    ret <2 x i64> [[XOR]]
 ;
author	Dimitry Andric <dim@FreeBSD.org>	2017-04-16 16:01:22 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-04-16 16:01:22 +0000
commit	71d5a2540a98c81f5bcaeb48805e0e2881f530ef (patch)
tree	5343938942df402b49ec7300a1c25a2d4ccd5821 /test/Transforms/InstCombine
parent	31bbf64f3a4974a2d6c8b3b27ad2f519caf74057 (diff)