55 files changed, 1620 insertions, 630 deletions
diff --git a/test/Analysis/DemandedBits/basic.ll b/test/Analysis/DemandedBits/basic.ll
index 5b8652396b3a..6f44465315e6 100644
--- a/test/Analysis/DemandedBits/basic.ll
+++ b/test/Analysis/DemandedBits/basic.ll
@@ -1,9 +1,9 @@
 ; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s
 ; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s
  
-; CHECK-DAG: DemandedBits: 0xFF for   %1 = add nsw i32 %a, 5
-; CHECK-DAG: DemandedBits: 0xFF for   %3 = trunc i32 %2 to i8
-; CHECK-DAG: DemandedBits: 0xFF for   %2 = mul nsw i32 %1, %b
+; CHECK-DAG: DemandedBits: 0xff for   %1 = add nsw i32 %a, 5
+; CHECK-DAG: DemandedBits: 0xff for   %3 = trunc i32 %2 to i8
+; CHECK-DAG: DemandedBits: 0xff for   %2 = mul nsw i32 %1, %b
 define i8 @test_mul(i32 %a, i32 %b) {
   %1 = add nsw i32 %a, 5
   %2 = mul nsw i32 %1, %b
diff --git a/test/Analysis/DemandedBits/intrinsics.ll b/test/Analysis/DemandedBits/intrinsics.ll
index 5a6d17284a72..48f6d4624422 100644
--- a/test/Analysis/DemandedBits/intrinsics.ll
+++ b/test/Analysis/DemandedBits/intrinsics.ll
@@ -1,9 +1,9 @@
 ; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s
 ; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s
 
-; CHECK-DAG: DemandedBits: 0xFF000000 for   %1 = or i32 %x, 1
-; CHECK-DAG: DemandedBits: 0xFF for   %2 = call i32 @llvm.bitreverse.i32(i32 %1)
-; CHECK-DAG: DemandedBits: 0xFF for   %3 = trunc i32 %2 to i8
+; CHECK-DAG: DemandedBits: 0xff000000 for   %1 = or i32 %x, 1
+; CHECK-DAG: DemandedBits: 0xff for   %2 = call i32 @llvm.bitreverse.i32(i32 %1)
+; CHECK-DAG: DemandedBits: 0xff for   %3 = trunc i32 %2 to i8
 define i8 @test_bswap(i32 %x) {
   %1 = or i32 %x, 1
   %2 = call i32 @llvm.bswap.i32(i32 %1)
@@ -12,9 +12,9 @@ define i8 @test_bswap(i32 %x) {
 }
 declare i32 @llvm.bswap.i32(i32)
 
-; CHECK-DAG: DemandedBits: 0xFF000000 for   %1 = or i32 %x, 1
-; CHECK-DAG: DemandedBits: 0xFF for   %2 = call i32 @llvm.bswap.i32(i32 %1)
-; CHECK-DAG: DemandedBits: 0xFF for   %3 = trunc i32 %2 to i8
+; CHECK-DAG: DemandedBits: 0xff000000 for   %1 = or i32 %x, 1
+; CHECK-DAG: DemandedBits: 0xff for   %2 = call i32 @llvm.bswap.i32(i32 %1)
+; CHECK-DAG: DemandedBits: 0xff for   %3 = trunc i32 %2 to i8
 define i8 @test_bitreverse(i32 %x) {
   %1 = or i32 %x, 1
   %2 = call i32 @llvm.bitreverse.i32(i32 %1)
diff --git a/test/Analysis/Lint/noalias-byval.ll b/test/Analysis/Lint/noalias-byval.ll
new file mode 100644
index 000000000000..5b36c6d15df3
--- /dev/null
+++ b/test/Analysis/Lint/noalias-byval.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -lint -disable-output 2>&1 | FileCheck %s
+
+%s = type { i8 }
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) #0
+
+declare void @f1(%s* noalias nocapture sret, %s* nocapture readnone)
+
+define void @f2() {
+entry:
+  %c = alloca %s
+  %tmp = alloca %s
+  %0 = bitcast %s* %c to i8*
+  %1 = bitcast %s* %tmp to i8*
+  call void @llvm.memset.p0i8.i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false)
+  call void @f1(%s* sret %c, %s* %c)
+  ret void
+}
+
+; Lint should complain about us passing %c to both arguments since one of them
+; is noalias.
+; CHECK: Unusual: noalias argument aliases another argument
+; CHECK-NEXT: call void @f1(%s* sret %c, %s* %c)
+
+declare void @f3(%s* noalias nocapture sret, %s* byval nocapture readnone)
+
+define void @f4() {
+entry:
+  %c = alloca %s
+  %tmp = alloca %s
+  %0 = bitcast %s* %c to i8*
+  %1 = bitcast %s* %tmp to i8*
+  call void @llvm.memset.p0i8.i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false)
+  call void @f3(%s* sret %c, %s* byval %c)
+  ret void
+}
+
+; Lint should not complain about passing %c to both arguments even if one is
+; noalias, since the other one is byval, effectively copying the data to the
+; stack instead of passing the pointer itself.
+; CHECK-NOT: Unusual: noalias argument aliases another argument
+; CHECK-NOT: call void @f3(%s* sret %c, %s* %c)
+
+attributes #0 = { argmemonly nounwind }
diff --git a/test/Analysis/ScalarEvolution/truncate.ll b/test/Analysis/ScalarEvolution/truncate.ll
new file mode 100644
index 000000000000..e9bd39d7a268
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/truncate.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -analyze -scalar-evolution
+; RUN: opt < %s -passes='print<scalar-evolution>'
+; Regression test for assert ScalarEvolution::getTruncateExpr.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @snork(i8* %arg, i8 %arg1, i64 %arg2) {
+bb:
+  br label %bb12
+
+bb3:                                              ; preds = %bb34
+  br i1 true, label %bb4, label %bb12
+
+bb4:                                              ; preds = %bb3
+  br label %bb6
+
+bb5:                                              ; preds = %bb6
+  ret void
+
+bb6:                                              ; preds = %bb6, %bb4
+  %tmp = phi i64 [ %tmp28, %bb4 ], [ %tmp10, %bb6 ]
+  %tmp7 = phi i32 [ 3, %bb4 ], [ %tmp11, %bb6 ]
+  %tmp8 = trunc i64 %tmp to i32
+  %tmp9 = sdiv i32 %tmp8, %tmp7
+  %tmp10 = add i64 %tmp, -1
+  %tmp11 = add i32 %tmp9, %tmp7
+  br i1 true, label %bb5, label %bb6
+
+bb12:                                             ; preds = %bb3, %bb
+  br label %bb13
+
+bb13:                                             ; preds = %bb34, %bb12
+  %tmp14 = phi i64 [ %arg2, %bb12 ], [ %tmp28, %bb34 ]
+  %tmp15 = phi i8 [ %arg1, %bb12 ], [ %tmp26, %bb34 ]
+  %tmp16 = phi i32 [ 1, %bb12 ], [ %tmp35, %bb34 ]
+  %tmp17 = add i8 %tmp15, -1
+  %tmp18 = sext i8 %tmp17 to i64
+  %tmp19 = sub i64 1, %tmp14
+  %tmp20 = add i64 %tmp19, %tmp18
+  %tmp21 = trunc i64 %tmp20 to i32
+  %tmp22 = icmp eq i32 %tmp21, 0
+  br i1 %tmp22, label %bb32, label %bb23
+
+bb23:                                             ; preds = %bb13
+  br i1 true, label %bb25, label %bb24
+
+bb24:                                             ; preds = %bb23
+  br label %bb25
+
+bb25:                                             ; preds = %bb24, %bb23
+  %tmp26 = add i8 %tmp15, -2
+  %tmp27 = sext i8 %tmp26 to i64
+  %tmp28 = sub i64 %tmp27, %tmp20
+  %tmp29 = trunc i64 %tmp28 to i32
+  %tmp30 = icmp eq i32 %tmp29, 0
+  br i1 %tmp30, label %bb31, label %bb34
+
+bb31:                                             ; preds = %bb25
+  br label %bb33
+
+bb32:                                             ; preds = %bb13
+  br label %bb33
+
+bb33:                                             ; preds = %bb32, %bb31
+  unreachable
+
+bb34:                                             ; preds = %bb25
+  %tmp35 = add nuw nsw i32 %tmp16, 2
+  %tmp36 = icmp ugt i32 %tmp16, 52
+  br i1 %tmp36, label %bb3, label %bb13
+}
diff --git a/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir b/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir
index 630b34028162..c9ff2cd0d514 100644
--- a/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir
+++ b/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir
@@ -1,7 +1,7 @@
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math %s | FileCheck --check-prefix=UNPROFITABLE %s
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynosm1 -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynosm1 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
 #
 name:            f1_2s
 registers:
@@ -80,3 +80,82 @@ body:             |
 # PROFITABLE-LABEL: name: f1_2d
 # PROFITABLE: %5:fpr128 = FNEGv2f64 %2
 # PROFITABLE-NEXT: FMLAv2f64 killed %5, %0, %1
+---
+name:            f1_both_fmul_2s
+registers:
+  - { id: 0, class: fpr64 }
+  - { id: 1, class: fpr64 }
+  - { id: 2, class: fpr64 }
+  - { id: 3, class: fpr64 }
+  - { id: 4, class: fpr64 }
+  - { id: 5, class: fpr64 }
+  - { id: 6, class: fpr64 }
+body:             |
+  bb.0.entry:
+    %3:fpr64 = COPY %q3
+    %2:fpr64 = COPY %q2
+    %1:fpr64 = COPY %q1
+    %0:fpr64 = COPY %q0
+    %4:fpr64 = FMULv2f32 %0, %1
+    %5:fpr64 = FMULv2f32 %2, %3
+    %6:fpr64 = FSUBv2f32 killed %4, %5
+    %q0 = COPY %6
+    RET_ReallyLR implicit %q0
+
+...
+# ALL-LABEL: name: f1_both_fmul_2s
+# ALL: %4:fpr64 = FMULv2f32 %0, %1
+# ALL-NEXT: FMLSv2f32 killed %4, %2, %3
+---
+name:            f1_both_fmul_4s
+registers:
+  - { id: 0, class: fpr128 }
+  - { id: 1, class: fpr128 }
+  - { id: 2, class: fpr128 }
+  - { id: 3, class: fpr128 }
+  - { id: 4, class: fpr128 }
+  - { id: 5, class: fpr128 }
+  - { id: 6, class: fpr128 }
+body:             |
+  bb.0.entry:
+    %3:fpr128 = COPY %q3
+    %2:fpr128 = COPY %q2
+    %1:fpr128 = COPY %q1
+    %0:fpr128 = COPY %q0
+    %4:fpr128 = FMULv4f32 %0, %1
+    %5:fpr128 = FMULv4f32 %2, %3
+    %6:fpr128 = FSUBv4f32 killed %4, %5
+    %q0 = COPY %6
+    RET_ReallyLR implicit %q0
+
+...
+# ALL-LABEL: name: f1_both_fmul_4s
+# ALL: %4:fpr128 = FMULv4f32 %0, %1
+# ALL-NEXT: FMLSv4f32 killed %4, %2, %3
+---
+name:            f1_both_fmul_2d
+registers:
+  - { id: 0, class: fpr128 }
+  - { id: 1, class: fpr128 }
+  - { id: 2, class: fpr128 }
+  - { id: 3, class: fpr128 }
+  - { id: 4, class: fpr128 }
+  - { id: 5, class: fpr128 }
+  - { id: 6, class: fpr128 }
+body:             |
+  bb.0.entry:
+    %3:fpr128 = COPY %q3
+    %2:fpr128 = COPY %q2
+    %1:fpr128 = COPY %q1
+    %0:fpr128 = COPY %q0
+    %4:fpr128 = FMULv2f64 %0, %1
+    %5:fpr128 = FMULv2f64 %2, %3
+    %6:fpr128 = FSUBv2f64 killed %4, %5
+    %q0 = COPY %6
+    RET_ReallyLR implicit %q0
+
+...
+# ALL-LABEL: name: f1_both_fmul_2d
+# ALL: %4:fpr128 = FMULv2f64 %0, %1
+# ALL-NEXT: FMLSv2f64 killed %4, %2, %3
+
diff --git a/test/CodeGen/AArch64/combine-and-like.ll b/test/CodeGen/AArch64/combine-and-like.ll
new file mode 100644
index 000000000000..15770c2e02ff
--- /dev/null
+++ b/test/CodeGen/AArch64/combine-and-like.ll
@@ -0,0 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
+
+define i32 @f(i32 %a0) {
+; CHECK-LABEL: f:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    ret
+  %1 = lshr i32 %a0, 2147483647
+  %2 = add i32 %1, 2147483647
+  %3 = and i32 %2, %1
+  ret i32 %3
+}
diff --git a/test/CodeGen/X86/avx512-schedule.ll b/test/CodeGen/X86/avx512-schedule.ll
index 306b95f0f3ae..af99b86ca5d1 100755
--- a/test/CodeGen/X86/avx512-schedule.ll
+++ b/test/CodeGen/X86/avx512-schedule.ll
@@ -129,7 +129,7 @@ entry:
 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
 ; GENERIC-LABEL: imulq512:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpmullq %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpmullq %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: imulq512:
@@ -143,7 +143,7 @@ define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
 define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
 ; GENERIC-LABEL: imulq256:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpmullq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpmullq %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: imulq256:
@@ -157,7 +157,7 @@ define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
 define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
 ; GENERIC-LABEL: imulq128:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpmullq %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpmullq %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: imulq128:
@@ -550,7 +550,7 @@ define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
 ; GENERIC-LABEL: vpmulld_test:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpmulld %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpmulld %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpmulld_test:
diff --git a/test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir b/test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir
index 965014162073..bbefc4f920a1 100644
--- a/test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir
+++ b/test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir
@@ -57,7 +57,7 @@
 # 	return w;
 # }
 #
-# CHECK:       129:       eb 13   jmp     19 <ifElse+0x7E>
+# CHECK:       129:       eb 13   jmp     19 <ifElse+0x7e>
 # CHECK:       12e:       eb a0   jmp     -96 <ifElse+0x10>
 # CHECK:       132:       eb 9c   jmp     -100 <ifElse+0x10>
 # CHECK:       137:       eb 97   jmp     -105 <ifElse+0x10>
diff --git a/test/CodeGen/X86/combine-pmuldq.ll b/test/CodeGen/X86/combine-pmuldq.ll
index 53ab87a386b3..ebfe0d56358e 100644
--- a/test/CodeGen/X86/combine-pmuldq.ll
+++ b/test/CodeGen/X86/combine-pmuldq.ll
@@ -1,6 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=AVX --check-prefix=AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=AVX --check-prefix=AVX512DQVL
 
 ; TODO - shuffle+sext are superfluous
 define <2 x i64> @combine_shuffle_sext_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
@@ -66,13 +69,29 @@ define <2 x i64> @combine_shuffle_zero_pmuludq(<4 x i32> %a0, <4 x i32> %a1) {
 ; SSE-NEXT:    pmuludq %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: combine_shuffle_zero_pmuludq:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
-; AVX-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
-; AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX2-LABEL: combine_shuffle_zero_pmuludq:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
+; AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: combine_shuffle_zero_pmuludq:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
+; AVX512VL-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    retq
+;
+; AVX512DQVL-LABEL: combine_shuffle_zero_pmuludq:
+; AVX512DQVL:       # %bb.0:
+; AVX512DQVL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512DQVL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX512DQVL-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
+; AVX512DQVL-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT:    retq
   %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   %2 = shufflevector <4 x i32> %a1, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   %3 = bitcast <4 x i32> %1 to <2 x i64>
@@ -94,13 +113,29 @@ define <4 x i64> @combine_shuffle_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1)
 ; SSE-NEXT:    pmuludq %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: combine_shuffle_zero_pmuludq_256:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
-; AVX-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
-; AVX-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    retq
+; AVX2-LABEL: combine_shuffle_zero_pmuludq_256:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
+; AVX2-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: combine_shuffle_zero_pmuludq_256:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
+; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
+; AVX512VL-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    retq
+;
+; AVX512DQVL-LABEL: combine_shuffle_zero_pmuludq_256:
+; AVX512DQVL:       # %bb.0:
+; AVX512DQVL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512DQVL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
+; AVX512DQVL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
+; AVX512DQVL-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT:    retq
   %1 = shufflevector <8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
   %2 = shufflevector <8 x i32> %a1, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
   %3 = bitcast <8 x i32> %1 to <4 x i64>
@@ -108,3 +143,46 @@ define <4 x i64> @combine_shuffle_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1)
   %5 = mul <4 x i64> %3, %4
   ret <4 x i64> %5
 }
+
+define <8 x i64> @combine_zext_pmuludq_256(<8 x i32> %a) {
+; SSE-LABEL: combine_zext_pmuludq_256:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; SSE-NEXT:    pmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE-NEXT:    pmovzxdq {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero
+; SSE-NEXT:    pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
+; SSE-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [715827883,715827883]
+; SSE-NEXT:    pmuludq %xmm1, %xmm0
+; SSE-NEXT:    pmuludq %xmm1, %xmm2
+; SSE-NEXT:    pmuludq %xmm1, %xmm4
+; SSE-NEXT:    pmuludq %xmm1, %xmm3
+; SSE-NEXT:    movdqa %xmm4, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: combine_zext_pmuludq_256:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [715827883,715827883,715827883,715827883]
+; AVX2-NEXT:    vpmuludq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmuludq %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: combine_zext_pmuludq_256:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; AVX512VL-NEXT:    vpmuludq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512VL-NEXT:    retq
+;
+; AVX512DQVL-LABEL: combine_zext_pmuludq_256:
+; AVX512DQVL:       # %bb.0:
+; AVX512DQVL-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; AVX512DQVL-NEXT:    vpmuludq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512DQVL-NEXT:    retq
+  %1 = zext <8 x i32> %a to <8 x i64>
+  %2 = mul nuw nsw <8 x i64> %1, <i64 715827883, i64 715827883, i64 715827883, i64 715827883, i64 715827883, i64 715827883, i64 715827883, i64 715827883>
+  ret <8 x i64> %2
+}
diff --git a/test/CodeGen/X86/fdiv-combine.ll b/test/CodeGen/X86/fdiv-combine.ll
index 912110e75d27..62e86e3ad2cc 100644
--- a/test/CodeGen/X86/fdiv-combine.ll
+++ b/test/CodeGen/X86/fdiv-combine.ll
@@ -95,6 +95,41 @@ define double @div3_arcp(double %x, double %y, double %z) {
   ret double %ret
 }
 
+define float @div_select_constant_fold(i1 zeroext %arg) {
+; CHECK-LABEL: div_select_constant_fold:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jne .LBB6_1
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB6_1:
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    retq
+  %tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00
+  %B2 = fdiv float %tmp, 1.000000e+00
+  ret float %B2
+}
+
+define float @div_select_constant_fold_zero(i1 zeroext %arg) {
+; CHECK-LABEL: div_select_constant_fold_zero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jne .LBB7_1
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    jmp .LBB7_3
+; CHECK-NEXT:  .LBB7_1:
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:  .LBB7_3:
+; CHECK-NEXT:    xorps %xmm1, %xmm1
+; CHECK-NEXT:    divss %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00
+  %B2 = fdiv float %tmp, 0.000000e+00
+  ret float %B2
+}
+
 define void @PR24141() {
 ; CHECK-LABEL: PR24141:
 ; CHECK:	callq
diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll
index e09ad3e4e0b8..6431847064f0 100644
--- a/test/CodeGen/X86/gather-addresses.ll
+++ b/test/CodeGen/X86/gather-addresses.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN
 ; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=WIN
 ; RUN: llc -mtriple=i686-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN32
@@ -7,34 +8,59 @@
 ; use an efficient mov/shift sequence rather than shuffling each individual
 ; element out of the index vector.
 
-; CHECK-LABEL: foo:
-; LIN: movdqa	(%rsi), %xmm0
-; LIN: pand 	(%rdx), %xmm0
-; LIN: pextrq	$1, %xmm0, %r[[REG4:.+]]
-; LIN: movq 	%xmm0, %r[[REG2:.+]]
-; LIN: movslq	%e[[REG2]], %r[[REG1:.+]]
-; LIN: sarq    $32, %r[[REG2]]
-; LIN: movslq	%e[[REG4]], %r[[REG3:.+]]
-; LIN: sarq    $32, %r[[REG4]]
-; LIN: movsd    (%rdi,%r[[REG3]],8), %xmm1
-; LIN: movhpd   (%rdi,%r[[REG4]],8), %xmm1 
-; LIN: movq     %rdi, %xmm1 
-; LIN: movq     %r[[REG3]], %xmm0
-
-; WIN: movdqa	(%rdx), %xmm0
-; WIN: pand 	(%r8), %xmm0
-; WIN: pextrq	$1, %xmm0, %r[[REG4:.+]]
-; WIN: movq 	%xmm0, %r[[REG2:.+]]
-; WIN: movslq	%e[[REG2]], %r[[REG1:.+]]
-; WIN: sarq    $32, %r[[REG2]]
-; WIN: movslq	%e[[REG4]], %r[[REG3:.+]]
-; WIN: sarq    $32, %r[[REG4]]
-; WIN: movsd    (%rcx,%r[[REG3]],8), %xmm1
-; WIN: movhpd   (%rcx,%r[[REG4]],8), %xmm1
-; WIN: movdqa   (%r[[REG2]]), %xmm0
-; WIN: movq     %r[[REG2]], %xmm1
-
 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
+; LIN-LABEL: foo:
+; LIN:       # %bb.0:
+; LIN-NEXT:    movdqa (%rsi), %xmm0
+; LIN-NEXT:    pand (%rdx), %xmm0
+; LIN-NEXT:    pextrq $1, %xmm0, %rax
+; LIN-NEXT:    movq %xmm0, %rcx
+; LIN-NEXT:    movslq %ecx, %rdx
+; LIN-NEXT:    sarq $32, %rcx
+; LIN-NEXT:    movslq %eax, %rsi
+; LIN-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; LIN-NEXT:    movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; LIN-NEXT:    sarq $32, %rax
+; LIN-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; LIN-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; LIN-NEXT:    retq
+;
+; WIN-LABEL: foo:
+; WIN:       # %bb.0:
+; WIN-NEXT:    movdqa (%rdx), %xmm0
+; WIN-NEXT:    pand (%r8), %xmm0
+; WIN-NEXT:    pextrq $1, %xmm0, %rax
+; WIN-NEXT:    movq %xmm0, %rdx
+; WIN-NEXT:    movslq %edx, %r8
+; WIN-NEXT:    sarq $32, %rdx
+; WIN-NEXT:    movslq %eax, %r9
+; WIN-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; WIN-NEXT:    movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; WIN-NEXT:    sarq $32, %rax
+; WIN-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; WIN-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; WIN-NEXT:    retq
+;
+; LIN32-LABEL: foo:
+; LIN32:       # %bb.0:
+; LIN32-NEXT:    pushl %edi
+; LIN32-NEXT:    pushl %esi
+; LIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; LIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; LIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; LIN32-NEXT:    movdqa (%edx), %xmm0
+; LIN32-NEXT:    pand (%ecx), %xmm0
+; LIN32-NEXT:    pextrd $1, %xmm0, %ecx
+; LIN32-NEXT:    pextrd $2, %xmm0, %edx
+; LIN32-NEXT:    pextrd $3, %xmm0, %esi
+; LIN32-NEXT:    movd %xmm0, %edi
+; LIN32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; LIN32-NEXT:    movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; LIN32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; LIN32-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; LIN32-NEXT:    popl %esi
+; LIN32-NEXT:    popl %edi
+; LIN32-NEXT:    retl
   %a = load <4 x i32>, <4 x i32>* %i
   %b = load <4 x i32>, <4 x i32>* %h
   %j = and <4 x i32> %a, %b
@@ -60,13 +86,81 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
 ; Check that the sequence previously used above, which bounces the vector off the
 ; cache works for x86-32. Note that in this case it will not be used for index
 ; calculation, since indexes are 32-bit, not 64.
-; CHECK-LABEL: old:
-; LIN32: movaps	%xmm0, (%esp)
-; LIN32-DAG: {{(mov|and)}}l	(%esp),
-; LIN32-DAG: {{(mov|and)}}l	4(%esp),
-; LIN32-DAG: {{(mov|and)}}l	8(%esp),
-; LIN32-DAG: {{(mov|and)}}l	12(%esp),
 define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind {
+; LIN-LABEL: old:
+; LIN:       # %bb.0:
+; LIN-NEXT:    movdqa (%rsi), %xmm0
+; LIN-NEXT:    pand (%rdx), %xmm0
+; LIN-NEXT:    pextrq $1, %xmm0, %rax
+; LIN-NEXT:    movq %rax, %rdx
+; LIN-NEXT:    shrq $32, %rdx
+; LIN-NEXT:    movq %xmm0, %rsi
+; LIN-NEXT:    movq %rsi, %rdi
+; LIN-NEXT:    shrq $32, %rdi
+; LIN-NEXT:    andl %ecx, %esi
+; LIN-NEXT:    andl %ecx, %eax
+; LIN-NEXT:    andq %rcx, %rdi
+; LIN-NEXT:    andq %rcx, %rdx
+; LIN-NEXT:    movq %rdi, %xmm1
+; LIN-NEXT:    movq %rsi, %xmm0
+; LIN-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; LIN-NEXT:    movq %rdx, %xmm2
+; LIN-NEXT:    movq %rax, %xmm1
+; LIN-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; LIN-NEXT:    retq
+;
+; WIN-LABEL: old:
+; WIN:       # %bb.0:
+; WIN-NEXT:    movdqa (%rdx), %xmm0
+; WIN-NEXT:    pand (%r8), %xmm0
+; WIN-NEXT:    pextrq $1, %xmm0, %r8
+; WIN-NEXT:    movq %r8, %rcx
+; WIN-NEXT:    shrq $32, %rcx
+; WIN-NEXT:    movq %xmm0, %rax
+; WIN-NEXT:    movq %rax, %rdx
+; WIN-NEXT:    shrq $32, %rdx
+; WIN-NEXT:    andl %r9d, %eax
+; WIN-NEXT:    andl %r9d, %r8d
+; WIN-NEXT:    andq %r9, %rdx
+; WIN-NEXT:    andq %r9, %rcx
+; WIN-NEXT:    movq %rdx, %xmm1
+; WIN-NEXT:    movq %rax, %xmm0
+; WIN-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; WIN-NEXT:    movq %rcx, %xmm2
+; WIN-NEXT:    movq %r8, %xmm1
+; WIN-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; WIN-NEXT:    retq
+;
+; LIN32-LABEL: old:
+; LIN32:       # %bb.0:
+; LIN32-NEXT:    pushl %ebp
+; LIN32-NEXT:    movl %esp, %ebp
+; LIN32-NEXT:    pushl %esi
+; LIN32-NEXT:    andl $-16, %esp
+; LIN32-NEXT:    subl $32, %esp
+; LIN32-NEXT:    movl 20(%ebp), %eax
+; LIN32-NEXT:    movl 16(%ebp), %ecx
+; LIN32-NEXT:    movl 12(%ebp), %edx
+; LIN32-NEXT:    movaps (%edx), %xmm0
+; LIN32-NEXT:    andps (%ecx), %xmm0
+; LIN32-NEXT:    movaps %xmm0, (%esp)
+; LIN32-NEXT:    movl (%esp), %ecx
+; LIN32-NEXT:    andl %eax, %ecx
+; LIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; LIN32-NEXT:    andl %eax, %edx
+; LIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; LIN32-NEXT:    andl %eax, %esi
+; LIN32-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; LIN32-NEXT:    movd %edx, %xmm1
+; LIN32-NEXT:    movd %ecx, %xmm0
+; LIN32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; LIN32-NEXT:    movd %eax, %xmm2
+; LIN32-NEXT:    movd %esi, %xmm1
+; LIN32-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; LIN32-NEXT:    leal -4(%ebp), %esp
+; LIN32-NEXT:    popl %esi
+; LIN32-NEXT:    popl %ebp
+; LIN32-NEXT:    retl
   %a = load <4 x i32>, <4 x i32>* %i
   %b = load <4 x i32>, <4 x i32>* %h
   %j = and <4 x i32> %a, %b
@@ -77,7 +171,7 @@ define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind
   %q0 = zext i32 %d0 to i64
   %q1 = zext i32 %d1 to i64
   %q2 = zext i32 %d2 to i64
-  %q3 = zext i32 %d3 to i64  
+  %q3 = zext i32 %d3 to i64
   %r0 = and i64 %q0, %f
   %r1 = and i64 %q1, %f
   %r2 = and i64 %q2, %f
diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll
index d318dde34434..d3521ca9f1e3 100644
--- a/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/test/CodeGen/X86/masked_gather_scatter.ll
@@ -497,7 +497,7 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
 ; SKX_SMALL-NEXT:    vpbroadcastq %rdi, %zmm2
 ; SKX_SMALL-NEXT:    vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
 ; SKX_SMALL-NEXT:    vpmovsxdq %ymm1, %zmm1
-; SKX_SMALL-NEXT:    vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
+; SKX_SMALL-NEXT:    vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1
 ; SKX_SMALL-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
 ; SKX_SMALL-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
 ; SKX_SMALL-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
@@ -510,7 +510,7 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
 ; SKX_LARGE-NEXT:    vpbroadcastq %rdi, %zmm2
 ; SKX_LARGE-NEXT:    vpmovsxdq %ymm1, %zmm1
 ; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
-; SKX_LARGE-NEXT:    vpmullq (%rax){1to8}, %zmm1, %zmm1
+; SKX_LARGE-NEXT:    vpmuldq (%rax){1to8}, %zmm1, %zmm1
 ; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
 ; SKX_LARGE-NEXT:    vpmullq (%rax){1to8}, %zmm0, %zmm0
 ; SKX_LARGE-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
@@ -582,7 +582,7 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
 ; SKX_SMALL-NEXT:    vpbroadcastq %rdi, %zmm2
 ; SKX_SMALL-NEXT:    vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
 ; SKX_SMALL-NEXT:    vpmovsxdq %ymm1, %zmm1
-; SKX_SMALL-NEXT:    vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
+; SKX_SMALL-NEXT:    vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1
 ; SKX_SMALL-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
 ; SKX_SMALL-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
 ; SKX_SMALL-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
@@ -595,7 +595,7 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
 ; SKX_LARGE-NEXT:    vpbroadcastq %rdi, %zmm2
 ; SKX_LARGE-NEXT:    vpmovsxdq %ymm1, %zmm1
 ; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
-; SKX_LARGE-NEXT:    vpmullq (%rax){1to8}, %zmm1, %zmm1
+; SKX_LARGE-NEXT:    vpmuldq (%rax){1to8}, %zmm1, %zmm1
 ; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
 ; SKX_LARGE-NEXT:    vpmullq (%rax){1to8}, %zmm0, %zmm0
 ; SKX_LARGE-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
diff --git a/test/CodeGen/X86/setcc-combine.ll b/test/CodeGen/X86/setcc-combine.ll
index a4a8e67d742c..56cff4ab6f2f 100644
--- a/test/CodeGen/X86/setcc-combine.ll
+++ b/test/CodeGen/X86/setcc-combine.ll
@@ -183,3 +183,27 @@ define i32 @test_gt_2(<4 x i32> %A, <4 x i32> %B) {
   ret i32 %t1
 }
 
+; (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
+; Don't combine with i1 - out of range constant
+define void @test_i1_uge(i1 *%A2) {
+; CHECK-LABEL: test_i1_uge:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movb (%rdi), %al
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    xorb $1, %cl
+; CHECK-NEXT:    andb %cl, %al
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    negq %rax
+; CHECK-NEXT:    andb $1, %cl
+; CHECK-NEXT:    movb %cl, (%rdi,%rax)
+; CHECK-NEXT:    retq
+  %L5 = load i1, i1* %A2
+  %C3 = icmp ne i1 %L5, true
+  %C8 = icmp eq i1 %L5, false
+  %C9 = icmp ugt i1 %C3, %C8
+  %G3 = getelementptr i1, i1* %A2, i1 %C9
+  store i1 %C3, i1* %G3
+  ret void
+}
+
diff --git a/test/CodeGen/X86/shrink_vmul.ll b/test/CodeGen/X86/shrink_vmul.ll
index a516c709517d..ced3a40e4a46 100644
--- a/test/CodeGen/X86/shrink_vmul.ll
+++ b/test/CodeGen/X86/shrink_vmul.ll
@@ -112,13 +112,14 @@ define void @mul_4xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE-NEXT:    movl c, %esi
 ; X86-SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-SSE-NEXT:    pxor %xmm2, %xmm2
-; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; X86-SSE-NEXT:    pmullw %xmm0, %xmm1
-; X86-SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; X86-SSE-NEXT:    movdqu %xmm1, (%esi,%ecx,4)
+; X86-SSE-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X86-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X86-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X86-SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X86-SSE-NEXT:    pmaddwd %xmm0, %xmm2
+; X86-SSE-NEXT:    movdqu %xmm2, (%esi,%ecx,4)
 ; X86-SSE-NEXT:    popl %esi
 ; X86-SSE-NEXT:    retl
 ;
@@ -142,13 +143,14 @@ define void @mul_4xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-SSE:       # %bb.0: # %entry
 ; X64-SSE-NEXT:    movq {{.*}}(%rip), %rax
 ; X64-SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-SSE-NEXT:    pxor %xmm2, %xmm2
-; X64-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X64-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; X64-SSE-NEXT:    pmullw %xmm0, %xmm1
-; X64-SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; X64-SSE-NEXT:    movdqu %xmm1, (%rax,%rdx,4)
+; X64-SSE-NEXT:    pxor %xmm1, %xmm1
+; X64-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X64-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X64-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X64-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X64-SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X64-SSE-NEXT:    pmaddwd %xmm0, %xmm2
+; X64-SSE-NEXT:    movdqu %xmm2, (%rax,%rdx,4)
 ; X64-SSE-NEXT:    retq
 ;
 ; X64-AVX-LABEL: mul_4xi8:
@@ -2215,13 +2217,7 @@ define void @PR34947() {
 ; X86-SSE-NEXT:    xorl %edx, %edx
 ; X86-SSE-NEXT:    divl (%eax)
 ; X86-SSE-NEXT:    movd %edx, %xmm0
-; X86-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
-; X86-SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; X86-SSE-NEXT:    pmuludq %xmm2, %xmm1
-; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X86-SSE-NEXT:    pmuludq %xmm2, %xmm3
-; X86-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
-; X86-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-SSE-NEXT:    pmaddwd {{\.LCPI.*}}, %xmm1
 ; X86-SSE-NEXT:    movl $8199, %eax # imm = 0x2007
 ; X86-SSE-NEXT:    movd %eax, %xmm2
 ; X86-SSE-NEXT:    pmuludq %xmm0, %xmm2
@@ -2415,13 +2411,7 @@ define void @PR34947() {
 ; X64-SSE-NEXT:    xorl %edx, %edx
 ; X64-SSE-NEXT:    divl (%rax)
 ; X64-SSE-NEXT:    movd %edx, %xmm0
-; X64-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
-; X64-SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; X64-SSE-NEXT:    pmuludq %xmm2, %xmm1
-; X64-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X64-SSE-NEXT:    pmuludq %xmm2, %xmm3
-; X64-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
-; X64-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X64-SSE-NEXT:    pmaddwd {{.*}}(%rip), %xmm1
 ; X64-SSE-NEXT:    movl $8199, %eax # imm = 0x2007
 ; X64-SSE-NEXT:    movd %eax, %xmm2
 ; X64-SSE-NEXT:    pmuludq %xmm0, %xmm2
diff --git a/test/CodeGen/X86/slow-pmulld.ll b/test/CodeGen/X86/slow-pmulld.ll
index 4d73b11349f5..325e6ee4085a 100644
--- a/test/CodeGen/X86/slow-pmulld.ll
+++ b/test/CodeGen/X86/slow-pmulld.ll
@@ -10,22 +10,14 @@
 define <4 x i32> @foo(<4 x i8> %A) {
 ; CHECK32-LABEL: foo:
 ; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[4],zero,xmm0[8],zero,xmm0[12],zero,xmm0[u,u,u,u,u,u,u,u]
-; CHECK32-NEXT:    movdqa {{.*#+}} xmm1 = <18778,18778,18778,18778,u,u,u,u>
-; CHECK32-NEXT:    movdqa %xmm0, %xmm2
-; CHECK32-NEXT:    pmullw %xmm1, %xmm0
-; CHECK32-NEXT:    pmulhw %xmm1, %xmm2
-; CHECK32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; CHECK32-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; CHECK32-NEXT:    pmaddwd {{\.LCPI.*}}, %xmm0
 ; CHECK32-NEXT:    retl
 ;
 ; CHECK64-LABEL: foo:
 ; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[4],zero,xmm0[8],zero,xmm0[12],zero,xmm0[u,u,u,u,u,u,u,u]
-; CHECK64-NEXT:    movdqa {{.*#+}} xmm1 = <18778,18778,18778,18778,u,u,u,u>
-; CHECK64-NEXT:    movdqa %xmm0, %xmm2
-; CHECK64-NEXT:    pmullw %xmm1, %xmm0
-; CHECK64-NEXT:    pmulhw %xmm1, %xmm2
-; CHECK64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; CHECK64-NEXT:    pand {{.*}}(%rip), %xmm0
+; CHECK64-NEXT:    pmaddwd {{.*}}(%rip), %xmm0
 ; CHECK64-NEXT:    retq
 ;
 ; SSE4-32-LABEL: foo:
diff --git a/test/CodeGen/X86/sse2-schedule.ll b/test/CodeGen/X86/sse2-schedule.ll
index ad2edfe0959e..a789b861b7aa 100644
--- a/test/CodeGen/X86/sse2-schedule.ll
+++ b/test/CodeGen/X86/sse2-schedule.ll
@@ -5624,16 +5624,8 @@ define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
 ;
 ; ATOM-LABEL: test_pmaddwd:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    pmaddwd %xmm1, %xmm0
-; ATOM-NEXT:    pmaddwd (%rdi), %xmm0
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
+; ATOM-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [5:5.00]
+; ATOM-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [5:5.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pmaddwd:
@@ -6241,16 +6233,8 @@ define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
 ;
 ; ATOM-LABEL: test_pmuludq:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    pmuludq %xmm1, %xmm0
-; ATOM-NEXT:    pmuludq (%rdi), %xmm0
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
+; ATOM-NEXT:    pmuludq %xmm1, %xmm0 # sched: [5:5.00]
+; ATOM-NEXT:    pmuludq (%rdi), %xmm0 # sched: [5:5.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pmuludq:
@@ -6394,12 +6378,8 @@ define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
 ;
 ; ATOM-LABEL: test_psadbw:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    psadbw %xmm1, %xmm0
-; ATOM-NEXT:    psadbw (%rdi), %xmm0
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
+; ATOM-NEXT:    psadbw %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    psadbw (%rdi), %xmm0 # sched: [1:1.00]
 ; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    nop # sched: [1:0.50]
diff --git a/test/MC/ARM/branch-disassemble.s b/test/MC/ARM/branch-disassemble.s
index 4df40e05e8c9..5380a1d0b9e4 100644
--- a/test/MC/ARM/branch-disassemble.s
+++ b/test/MC/ARM/branch-disassemble.s
@@ -7,8 +7,8 @@
 @ RUN:   | FileCheck %s -check-prefix CHECK-THUMB
 
 b.w .Lbranch
-@ CHECK-ARM: b #4 <$a.0+0xC>
-@ CHECK-THUMB: b.w #8 <$t.0+0xC>
+@ CHECK-ARM: b #4 <$a.0+0xc>
+@ CHECK-THUMB: b.w #8 <$t.0+0xc>
 adds r0, r1, #42
 adds r1, r2, #42
 .Lbranch:
diff --git a/test/MC/ELF/comdat-declaration-errors.s b/test/MC/ELF/comdat-declaration-errors.s
new file mode 100644
index 000000000000..fade8cfe41dd
--- /dev/null
+++ b/test/MC/ELF/comdat-declaration-errors.s
@@ -0,0 +1,14 @@
+// RUN: not llvm-mc -triple x86_64-pc-linux-gnu %s \
+// RUN:   -filetype=obj -o %t.o 2>&1 | FileCheck %s
+
+// Check we error out on incorrect COMDATs declarations
+// and not just silently ingnore them.
+
+// CHECK:      error: invalid group name
+// CHECK-NEXT: .section .foo,"G",@progbits,-abc,comdat
+
+// CHECK:      error: invalid linkage
+// CHECK-NEXT: .section .bar,"G",@progbits,abc,-comdat
+
+.section .foo,"G",@progbits,-abc,comdat
+.section .bar,"G",@progbits,abc,-comdat
diff --git a/test/MC/X86/PREFETCH-32.s b/test/MC/X86/PREFETCH-32.s
new file mode 100644
index 000000000000..caec44ea386c
--- /dev/null
+++ b/test/MC/X86/PREFETCH-32.s
@@ -0,0 +1,169 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: prefetch -485498096(%edx,%eax,4) 
+// CHECK: encoding: [0x0f,0x0d,0x84,0x82,0x10,0xe3,0x0f,0xe3]        
+prefetch -485498096(%edx,%eax,4) 
+
+// CHECK: prefetch 485498096(%edx,%eax,4) 
+// CHECK: encoding: [0x0f,0x0d,0x84,0x82,0xf0,0x1c,0xf0,0x1c]        
+prefetch 485498096(%edx,%eax,4) 
+
+// CHECK: prefetch 485498096(%edx) 
+// CHECK: encoding: [0x0f,0x0d,0x82,0xf0,0x1c,0xf0,0x1c]        
+prefetch 485498096(%edx) 
+
+// CHECK: prefetch 485498096 
+// CHECK: encoding: [0x0f,0x0d,0x05,0xf0,0x1c,0xf0,0x1c]        
+prefetch 485498096 
+
+// CHECK: prefetch 64(%edx,%eax) 
+// CHECK: encoding: [0x0f,0x0d,0x44,0x02,0x40]        
+prefetch 64(%edx,%eax) 
+
+// CHECK: prefetch (%edx) 
+// CHECK: encoding: [0x0f,0x0d,0x02]        
+prefetch (%edx) 
+
+// CHECK: prefetchnta -485498096(%edx,%eax,4) 
+// CHECK: encoding: [0x0f,0x18,0x84,0x82,0x10,0xe3,0x0f,0xe3]        
+prefetchnta -485498096(%edx,%eax,4) 
+
+// CHECK: prefetchnta 485498096(%edx,%eax,4) 
+// CHECK: encoding: [0x0f,0x18,0x84,0x82,0xf0,0x1c,0xf0,0x1c]        
+prefetchnta 485498096(%edx,%eax,4) 
+
+// CHECK: prefetchnta 485498096(%edx) 
+// CHECK: encoding: [0x0f,0x18,0x82,0xf0,0x1c,0xf0,0x1c]        
+prefetchnta 485498096(%edx) 
+
+// CHECK: prefetchnta 485498096 
+// CHECK: encoding: [0x0f,0x18,0x05,0xf0,0x1c,0xf0,0x1c]        
+prefetchnta 485498096 
+
+// CHECK: prefetchnta 64(%edx,%eax) 
+// CHECK: encoding: [0x0f,0x18,0x44,0x02,0x40]        
+prefetchnta 64(%edx,%eax) 
+
+// CHECK: prefetchnta (%edx) 
+// CHECK: encoding: [0x0f,0x18,0x02]        
+prefetchnta (%edx) 
+
+// CHECK: prefetcht0 -485498096(%edx,%eax,4) 
+// CHECK: encoding: [0x0f,0x18,0x8c,0x82,0x10,0xe3,0x0f,0xe3]        
+prefetcht0 -485498096(%edx,%eax,4) 
+
+// CHECK: prefetcht0 485498096(%edx,%eax,4) 
+// CHECK: encoding: [0x0f,0x18,0x8c,0x82,0xf0,0x1c,0xf0,0x1c]        
+prefetcht0 485498096(%edx,%eax,4) 
+
+// CHECK: prefetcht0 485498096(%edx) 
+// CHECK: encoding: [0x0f,0x18,0x8a,0xf0,0x1c,0xf0,0x1c]        
+prefetcht0 485498096(%edx) 
+
+// CHECK: prefetcht0 485498096 
+// CHECK: encoding: [0x0f,0x18,0x0d,0xf0,0x1c,0xf0,0x1c]        
+prefetcht0 485498096 
+
+// CHECK: prefetcht0 64(%edx,%eax) 
+// CHECK: encoding: [0x0f,0x18,0x4c,0x02,0x40]        
+prefetcht0 64(%edx,%eax) 
+
+// CHECK: prefetcht0 (%edx) 
+// CHECK: encoding: [0x0f,0x18,0x0a]        
+prefetcht0 (%edx) 
+
+// CHECK: prefetcht1 -485498096(%edx,%eax,4) 
+// CHECK: encoding: [0x0f,0x18,0x94,0x82,0x10,0xe3,0x0f,0xe3]        
+prefetcht1 -485498096(%edx,%eax,4) 
+
+// CHECK: prefetcht1 485498096(%edx,%eax,4) 
+// CHECK: encoding: [0x0f,0x18,0x94,0x82,0xf0,0x1c,0xf0,0x1c]        
+prefetcht1 485498096(%edx,%eax,4) 
+
+// CHECK: prefetcht1 485498096(%edx) 
+// CHECK: encoding: [0x0f,0x18,0x92,0xf0,0x1c,0xf0,0x1c]        
+prefetcht1 485498096(%edx) 
+
+// CHECK: prefetcht1 485498096 
+// CHECK: encoding: [0x0f,0x18,0x15,0xf0,0x1c,0xf0,0x1c]        
+prefetcht1 485498096 
+
+// CHECK: prefetcht1 64(%edx,%eax) 
+// CHECK: encoding: [0x0f,0x18,0x54,0x02,0x40]        
+prefetcht1 64(%edx,%eax) 
+
+// CHECK: prefetcht1 (%edx) 
+// CHECK: encoding: [0x0f,0x18,0x12]        
+prefetcht1 (%edx) 
+
+// CHECK: prefetcht2 -485498096(%edx,%eax,4) 
+// CHECK: encoding: [0x0f,0x18,0x9c,0x82,0x10,0xe3,0x0f,0xe3]        
+prefetcht2 -485498096(%edx,%eax,4) 
+
+// CHECK: prefetcht2 485498096(%edx,%eax,4) 
+// CHECK: encoding: [0x0f,0x18,0x9c,0x82,0xf0,0x1c,0xf0,0x1c]        
+prefetcht2 485498096(%edx,%eax,4) 
+
+// CHECK: prefetcht2 485498096(%edx) 
+// CHECK: encoding: [0x0f,0x18,0x9a,0xf0,0x1c,0xf0,0x1c]        
+prefetcht2 485498096(%edx) 
+
+// CHECK: prefetcht2 485498096 
+// CHECK: encoding: [0x0f,0x18,0x1d,0xf0,0x1c,0xf0,0x1c]        
+prefetcht2 485498096 
+
+// CHECK: prefetcht2 64(%edx,%eax) 
+// CHECK: encoding: [0x0f,0x18,0x5c,0x02,0x40]        
+prefetcht2 64(%edx,%eax) 
+
+// CHECK: prefetcht2 (%edx) 
+// CHECK: encoding: [0x0f,0x18,0x1a]        
+prefetcht2 (%edx) 
+
+// CHECK: prefetchw -485498096(%edx,%eax,4) 
+// CHECK: encoding: [0x0f,0x0d,0x8c,0x82,0x10,0xe3,0x0f,0xe3]        
+prefetchw -485498096(%edx,%eax,4) 
+
+// CHECK: prefetchw 485498096(%edx,%eax,4) 
+// CHECK: encoding: [0x0f,0x0d,0x8c,0x82,0xf0,0x1c,0xf0,0x1c]        
+prefetchw 485498096(%edx,%eax,4) 
+
+// CHECK: prefetchw 485498096(%edx) 
+// CHECK: encoding: [0x0f,0x0d,0x8a,0xf0,0x1c,0xf0,0x1c]        
+prefetchw 485498096(%edx) 
+
+// CHECK: prefetchw 485498096 
+// CHECK: encoding: [0x0f,0x0d,0x0d,0xf0,0x1c,0xf0,0x1c]        
+prefetchw 485498096 
+
+// CHECK: prefetchw 64(%edx,%eax) 
+// CHECK: encoding: [0x0f,0x0d,0x4c,0x02,0x40]        
+prefetchw 64(%edx,%eax) 
+
+// CHECK: prefetchw (%edx) 
+// CHECK: encoding: [0x0f,0x0d,0x0a]        
+prefetchw (%edx) 
+
+// CHECK: prefetchwt1 -485498096(%edx,%eax,4) 
+// CHECK: encoding: [0x0f,0x0d,0x94,0x82,0x10,0xe3,0x0f,0xe3]
+prefetchwt1 -485498096(%edx,%eax,4) 
+
+// CHECK: prefetchwt1 485498096(%edx,%eax,4) 
+// CHECK: encoding: [0x0f,0x0d,0x94,0x82,0xf0,0x1c,0xf0,0x1c]
+prefetchwt1 485498096(%edx,%eax,4) 
+
+// CHECK: prefetchwt1 485498096(%edx) 
+// CHECK: encoding: [0x0f,0x0d,0x92,0xf0,0x1c,0xf0,0x1c]
+prefetchwt1 485498096(%edx) 
+
+// CHECK: prefetchwt1 485498096 
+// CHECK: encoding: [0x0f,0x0d,0x15,0xf0,0x1c,0xf0,0x1c]
+prefetchwt1 485498096 
+
+// CHECK: prefetchwt1 64(%edx,%eax) 
+// CHECK: encoding: [0x0f,0x0d,0x54,0x02,0x40
+prefetchwt1 64(%edx,%eax) 
+
+// CHECK: prefetchwt1 (%edx) 
+// CHECK: encoding:  [0x0f,0x0d,0x12]
+prefetchwt1 (%edx) 
diff --git a/test/MC/X86/PREFETCH-64.s b/test/MC/X86/PREFETCH-64.s
new file mode 100644
index 000000000000..0c4a126a2a20
--- /dev/null
+++ b/test/MC/X86/PREFETCH-64.s
@@ -0,0 +1,170 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: prefetch 485498096 
+// CHECK: encoding: [0x0f,0x0d,0x04,0x25,0xf0,0x1c,0xf0,0x1c]        
+prefetch 485498096 
+
+// CHECK: prefetch 64(%rdx) 
+// CHECK: encoding: [0x0f,0x0d,0x42,0x40]        
+prefetch 64(%rdx) 
+
+// CHECK: prefetch 64(%rdx,%rax,4) 
+// CHECK: encoding: [0x0f,0x0d,0x44,0x82,0x40]        
+prefetch 64(%rdx,%rax,4) 
+
+// CHECK: prefetch -64(%rdx,%rax,4) 
+// CHECK: encoding: [0x0f,0x0d,0x44,0x82,0xc0]        
+prefetch -64(%rdx,%rax,4) 
+
+// CHECK: prefetch 64(%rdx,%rax) 
+// CHECK: encoding: [0x0f,0x0d,0x44,0x02,0x40]        
+prefetch 64(%rdx,%rax) 
+
+// CHECK: prefetchnta 485498096 
+// CHECK: encoding: [0x0f,0x18,0x04,0x25,0xf0,0x1c,0xf0,0x1c]        
+prefetchnta 485498096 
+
+// CHECK: prefetchnta 64(%rdx) 
+// CHECK: encoding: [0x0f,0x18,0x42,0x40]        
+prefetchnta 64(%rdx) 
+
+// CHECK: prefetchnta 64(%rdx,%rax,4) 
+// CHECK: encoding: [0x0f,0x18,0x44,0x82,0x40]        
+prefetchnta 64(%rdx,%rax,4) 
+
+// CHECK: prefetchnta -64(%rdx,%rax,4) 
+// CHECK: encoding: [0x0f,0x18,0x44,0x82,0xc0]        
+prefetchnta -64(%rdx,%rax,4) 
+
+// CHECK: prefetchnta 64(%rdx,%rax) 
+// CHECK: encoding: [0x0f,0x18,0x44,0x02,0x40]        
+prefetchnta 64(%rdx,%rax) 
+
+// CHECK: prefetchnta (%rdx) 
+// CHECK: encoding: [0x0f,0x18,0x02]        
+prefetchnta (%rdx) 
+
+// CHECK: prefetch (%rdx) 
+// CHECK: encoding: [0x0f,0x0d,0x02]        
+prefetch (%rdx) 
+
+// CHECK: prefetcht0 485498096 
+// CHECK: encoding: [0x0f,0x18,0x0c,0x25,0xf0,0x1c,0xf0,0x1c]        
+prefetcht0 485498096 
+
+// CHECK: prefetcht0 64(%rdx) 
+// CHECK: encoding: [0x0f,0x18,0x4a,0x40]        
+prefetcht0 64(%rdx) 
+
+// CHECK: prefetcht0 64(%rdx,%rax,4) 
+// CHECK: encoding: [0x0f,0x18,0x4c,0x82,0x40]        
+prefetcht0 64(%rdx,%rax,4) 
+
+// CHECK: prefetcht0 -64(%rdx,%rax,4) 
+// CHECK: encoding: [0x0f,0x18,0x4c,0x82,0xc0]        
+prefetcht0 -64(%rdx,%rax,4) 
+
+// CHECK: prefetcht0 64(%rdx,%rax) 
+// CHECK: encoding: [0x0f,0x18,0x4c,0x02,0x40]        
+prefetcht0 64(%rdx,%rax) 
+
+// CHECK: prefetcht0 (%rdx) 
+// CHECK: encoding: [0x0f,0x18,0x0a]        
+prefetcht0 (%rdx) 
+
+// CHECK: prefetcht1 485498096 
+// CHECK: encoding: [0x0f,0x18,0x14,0x25,0xf0,0x1c,0xf0,0x1c]        
+prefetcht1 485498096 
+
+// CHECK: prefetcht1 64(%rdx) 
+// CHECK: encoding: [0x0f,0x18,0x52,0x40]        
+prefetcht1 64(%rdx) 
+
+// CHECK: prefetcht1 64(%rdx,%rax,4) 
+// CHECK: encoding: [0x0f,0x18,0x54,0x82,0x40]        
+prefetcht1 64(%rdx,%rax,4) 
+
+// CHECK: prefetcht1 -64(%rdx,%rax,4) 
+// CHECK: encoding: [0x0f,0x18,0x54,0x82,0xc0]        
+prefetcht1 -64(%rdx,%rax,4) 
+
+// CHECK: prefetcht1 64(%rdx,%rax) 
+// CHECK: encoding: [0x0f,0x18,0x54,0x02,0x40]        
+prefetcht1 64(%rdx,%rax) 
+
+// CHECK: prefetcht1 (%rdx) 
+// CHECK: encoding: [0x0f,0x18,0x12]        
+prefetcht1 (%rdx) 
+
+// CHECK: prefetcht2 485498096 
+// CHECK: encoding: [0x0f,0x18,0x1c,0x25,0xf0,0x1c,0xf0,0x1c]        
+prefetcht2 485498096 
+
+// CHECK: prefetcht2 64(%rdx) 
+// CHECK: encoding: [0x0f,0x18,0x5a,0x40]        
+prefetcht2 64(%rdx) 
+
+// CHECK: prefetcht2 64(%rdx,%rax,4) 
+// CHECK: encoding: [0x0f,0x18,0x5c,0x82,0x40]        
+prefetcht2 64(%rdx,%rax,4) 
+
+// CHECK: prefetcht2 -64(%rdx,%rax,4) 
+// CHECK: encoding: [0x0f,0x18,0x5c,0x82,0xc0]        
+prefetcht2 -64(%rdx,%rax,4) 
+
+// CHECK: prefetcht2 64(%rdx,%rax) 
+// CHECK: encoding: [0x0f,0x18,0x5c,0x02,0x40]        
+prefetcht2 64(%rdx,%rax) 
+
+// CHECK: prefetcht2 (%rdx) 
+// CHECK: encoding: [0x0f,0x18,0x1a]        
+prefetcht2 (%rdx) 
+
+// CHECK: prefetchw 485498096 
+// CHECK: encoding: [0x0f,0x0d,0x0c,0x25,0xf0,0x1c,0xf0,0x1c]        
+prefetchw 485498096 
+
+// CHECK: prefetchw 64(%rdx) 
+// CHECK: encoding: [0x0f,0x0d,0x4a,0x40]        
+prefetchw 64(%rdx) 
+
+// CHECK: prefetchw 64(%rdx,%rax,4) 
+// CHECK: encoding: [0x0f,0x0d,0x4c,0x82,0x40]        
+prefetchw 64(%rdx,%rax,4) 
+
+// CHECK: prefetchw -64(%rdx,%rax,4) 
+// CHECK: encoding: [0x0f,0x0d,0x4c,0x82,0xc0]        
+prefetchw -64(%rdx,%rax,4) 
+
+// CHECK: prefetchw 64(%rdx,%rax) 
+// CHECK: encoding: [0x0f,0x0d,0x4c,0x02,0x40]        
+prefetchw 64(%rdx,%rax) 
+
+// CHECK: prefetchw (%rdx) 
+// CHECK: encoding: [0x0f,0x0d,0x0a]        
+prefetchw (%rdx) 
+
+// CHECK: prefetchwt1 485498096 
+// CHECK: encoding:  [0x0f,0x0d,0x14,0x25,0xf0,0x1c,0xf0,0x1c]
+prefetchwt1 485498096 
+
+// CHECK: prefetchwt1 64(%rdx) 
+// CHECK: encoding: [0x0f,0x0d,0x52,0x40]
+prefetchwt1 64(%rdx) 
+
+// CHECK: prefetchwt1 64(%rdx,%rax,4) 
+// CHECK: encoding: [0x0f,0x0d,0x54,0x82,0x40]
+prefetchwt1 64(%rdx,%rax,4) 
+
+// CHECK: prefetchwt1 -64(%rdx,%rax,4) 
+// CHECK: encoding: [0x0f,0x0d,0x54,0x82,0xc0]
+prefetchwt1 -64(%rdx,%rax,4) 
+
+// CHECK: prefetchwt1 64(%rdx,%rax) 
+// CHECK: encoding: [0x0f,0x0d,0x54,0x02,0x40]        
+prefetchwt1 64(%rdx,%rax) 
+
+// CHECK: prefetchwt1 (%rdx) 
+// CHECK: encoding: [0x0f,0x0d,0x12]        
+prefetchwt1 (%rdx) 
+
diff --git a/test/MC/X86/RDPMC-32.s b/test/MC/X86/RDPMC-32.s
new file mode 100644
index 000000000000..5168af3a62c1
--- /dev/null
+++ b/test/MC/X86/RDPMC-32.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: rdpmc 
+// CHECK: encoding: [0x0f,0x33]          
+rdpmc 
+
diff --git a/test/MC/X86/RDPMC-64.s b/test/MC/X86/RDPMC-64.s
new file mode 100644
index 000000000000..56fa3d9fa828
--- /dev/null
+++ b/test/MC/X86/RDPMC-64.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: rdpmc 
+// CHECK: encoding: [0x0f,0x33]          
+rdpmc 
+
diff --git a/test/MC/X86/RDRAND-32.s b/test/MC/X86/RDRAND-32.s
new file mode 100644
index 000000000000..5461ca74eabb
--- /dev/null
+++ b/test/MC/X86/RDRAND-32.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: rdrandl %eax 
+// CHECK: encoding: [0x0f,0xc7,0xf0]         
+rdrandl %eax 
+
diff --git a/test/MC/X86/RDRAND-64.s b/test/MC/X86/RDRAND-64.s
new file mode 100644
index 000000000000..3482c0ecd5c3
--- /dev/null
+++ b/test/MC/X86/RDRAND-64.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: rdrandl %r13d 
+// CHECK: encoding: [0x41,0x0f,0xc7,0xf5]         
+rdrandl %r13d 
+
+// CHECK: rdrandq %r13 
+// CHECK: encoding: [0x49,0x0f,0xc7,0xf5]         
+rdrandq %r13 
+
+// CHECK: rdrandw %r13w 
+// CHECK: encoding: [0x66,0x41,0x0f,0xc7,0xf5]         
+rdrandw %r13w 
+
diff --git a/test/MC/X86/RDSEED-32.s b/test/MC/X86/RDSEED-32.s
new file mode 100644
index 000000000000..87be0d502a66
--- /dev/null
+++ b/test/MC/X86/RDSEED-32.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: rdseedl %eax 
+// CHECK: encoding: [0x0f,0xc7,0xf8]         
+rdseedl %eax 
+
diff --git a/test/MC/X86/RDSEED-64.s b/test/MC/X86/RDSEED-64.s
new file mode 100644
index 000000000000..0d710ceaa5bb
--- /dev/null
+++ b/test/MC/X86/RDSEED-64.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: rdseedl %r13d 
+// CHECK: encoding: [0x41,0x0f,0xc7,0xfd]         
+rdseedl %r13d 
+
+// CHECK: rdseedq %r13 
+// CHECK: encoding: [0x49,0x0f,0xc7,0xfd]         
+rdseedq %r13 
+
+// CHECK: rdseedw %r13w 
+// CHECK: encoding: [0x66,0x41,0x0f,0xc7,0xfd]         
+rdseedw %r13w 
+
diff --git a/test/MC/X86/RDTSCP-32.s b/test/MC/X86/RDTSCP-32.s
new file mode 100644
index 000000000000..48232edf7d52
--- /dev/null
+++ b/test/MC/X86/RDTSCP-32.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: rdtscp 
+// CHECK: encoding: [0x0f,0x01,0xf9]          
+rdtscp 
+
diff --git a/test/MC/X86/RDTSCP-64.s b/test/MC/X86/RDTSCP-64.s
new file mode 100644
index 000000000000..045fd49a2738
--- /dev/null
+++ b/test/MC/X86/RDTSCP-64.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: rdtscp 
+// CHECK: encoding: [0x0f,0x01,0xf9]          
+rdtscp 
+
diff --git a/test/MC/X86/RDWRFSGS-64.s b/test/MC/X86/RDWRFSGS-64.s
new file mode 100644
index 000000000000..47314caf867d
--- /dev/null
+++ b/test/MC/X86/RDWRFSGS-64.s
@@ -0,0 +1,34 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: rdfsbasel %r13d 
+// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xc5]         
+rdfsbasel %r13d 
+
+// CHECK: rdfsbaseq %r13 
+// CHECK: encoding: [0xf3,0x49,0x0f,0xae,0xc5]         
+rdfsbaseq %r13 
+
+// CHECK: rdgsbasel %r13d 
+// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xcd]         
+rdgsbasel %r13d 
+
+// CHECK: rdgsbaseq %r13 
+// CHECK: encoding: [0xf3,0x49,0x0f,0xae,0xcd]         
+rdgsbaseq %r13 
+
+// CHECK: wrfsbasel %r13d 
+// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xd5]         
+wrfsbasel %r13d 
+
+// CHECK: wrfsbaseq %r13 
+// CHECK: encoding: [0xf3,0x49,0x0f,0xae,0xd5]         
+wrfsbaseq %r13 
+
+// CHECK: wrgsbasel %r13d 
+// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xdd]         
+wrgsbasel %r13d 
+
+// CHECK: wrgsbaseq %r13 
+// CHECK: encoding: [0xf3,0x49,0x0f,0xae,0xdd]         
+wrgsbaseq %r13 
+
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index b1e89cde9796..23846d921a8c 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -99,6 +99,10 @@
 // CHECK: shll $2, %eax
         sall $2, %eax
 
+// CHECK: rep movsb
+rep     # comment
+movsb
+
 // CHECK: rep
 // CHECK: insb
         rep;insb
diff --git a/test/MC/X86/x86_64-asm-match.s b/test/MC/X86/x86_64-asm-match.s
new file mode 100644
index 000000000000..3208e4f4e0f0
--- /dev/null
+++ b/test/MC/X86/x86_64-asm-match.s
@@ -0,0 +1,52 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown  -debug-only=asm-matcher  %s 2>&1 | FileCheck %s
+// REQUIRES: asserts
+
+// CHECK: AsmMatcher: found 4 encodings with mnemonic 'pshufb'
+// CHECK:Trying to match opcode MMX_PSHUFBrr64
+// CHECK:  Matching formal operand class MCK_VR64 against actual operand at index 1 (): Opcode result: multiple operand mismatches, ignoring this opcode
+// CHECK:Trying to match opcode PSHUFBrr
+// CHECK:  Matching formal operand class MCK_FR32 against actual operand at index 1 (): Opcode result: multiple operand mismatches, ignoring this opcode
+// CHECK:Trying to match opcode PSHUFBrm
+// CHECK:  Matching formal operand class MCK_Mem128 against actual operand at index 1 (): match success using generic matcher
+// CHECK:  Matching formal operand class MCK_FR32 against actual operand at index 2 (): match success using generic matcher
+// CHECK:  Matching formal operand class InvalidMatchClass against actual operand at index 3: actual operand index out of range Opcode result: complete match, selecting this opcode
+// CHECK:AsmMatcher: found 2 encodings with mnemonic 'sha1rnds4'
+// CHECK:Trying to match opcode SHA1RNDS4rri
+// CHECK:  Matching formal operand class MCK_ImmUnsignedi8 against actual operand at index 1 (): match success using generic matcher
+// CHECK:  Matching formal operand class MCK_FR32 against actual operand at index 2 (): match success using generic matcher
+// CHECK:  Matching formal operand class MCK_FR32 against actual operand at index 3 (): match success using generic matcher
+// CHECK:  Matching formal operand class InvalidMatchClass against actual operand at index 4: actual operand index out of range Opcode result: complete match, selecting this opcode
+// CHECK:AsmMatcher: found 4 encodings with mnemonic 'pinsrw'
+// CHECK:Trying to match opcode MMX_PINSRWirri
+// CHECK:  Matching formal operand class MCK_ImmUnsignedi8 against actual operand at index 1 (): match success using generic matcher
+// CHECK:  Matching formal operand class MCK_GR32orGR64 against actual operand at index 2 (): match success using generic matcher
+// CHECK:  Matching formal operand class MCK_VR64 against actual operand at index 3 (): Opcode result: multiple operand mismatches, ignoring this opcode
+// CHECK:Trying to match opcode PINSRWrri
+// CHECK:  Matching formal operand class MCK_ImmUnsignedi8 against actual operand at index 1 (): match success using generic matcher
+// CHECK:  Matching formal operand class MCK_GR32orGR64 against actual operand at index 2 (): match success using generic matcher
+// CHECK:  Matching formal operand class MCK_FR32 against actual operand at index 3 (): match success using generic matcher
+// CHECK:  Matching formal operand class InvalidMatchClass against actual operand at index 4: actual operand index out of range Opcode result: complete match, selecting this opcode
+// CHECK:AsmMatcher: found 2 encodings with mnemonic 'crc32l'
+// CHECK:Trying to match opcode CRC32r32r32
+// CHECK:  Matching formal operand class MCK_GR32 against actual operand at index 1 (): Opcode result: multiple operand mismatches, ignoring this opcode
+// CHECK:Trying to match opcode CRC32r32m32
+// CHECK:  Matching formal operand class MCK_Mem32 against actual operand at index 1 (): match success using generic matcher
+// CHECK:  Matching formal operand class MCK_GR32 against actual operand at index 2 (): match success using generic matcher
+// CHECK:  Matching formal operand class InvalidMatchClass against actual operand at index 3: actual operand index out of range Opcode result: complete match, selecting this opcode
+// CHECK:AsmMatcher: found 4 encodings with mnemonic 'punpcklbw'
+// CHECK:Trying to match opcode MMX_PUNPCKLBWirr
+// CHECK:  Matching formal operand class MCK_VR64 against actual operand at index 1 (): match success using generic matcher
+// CHECK:  Matching formal operand class MCK_VR64 against actual operand at index 2 (): Opcode result: multiple operand mismatches, ignoring this opcode
+// CHECK:Trying to match opcode MMX_PUNPCKLBWirm
+// CHECK:  Matching formal operand class MCK_VR64 against actual operand at index 1 (): match success using generic matcher
+// CHECK:  Matching formal operand class MCK_Mem64 against actual operand at index 2 (): match success using generic matcher
+// CHECK:  Matching formal operand class InvalidMatchClass against actual operand at index 3: actual operand index out of range Opcode result: complete match, selecting this opcode
+
+
+pshufb    CPI1_0(%rip), %xmm1
+sha1rnds4 $1, %xmm1, %xmm2
+pinsrw    $3, %ecx, %xmm5
+crc32l    %gs:0xdeadbeef(%rbx,%rcx,8),%ecx
+
+.intel_syntax
+punpcklbw mm0, qword ptr [rsp]
diff --git a/test/SafepointIRVerifier/from-same-relocation-in-phi-nodes.ll b/test/SafepointIRVerifier/from-same-relocation-in-phi-nodes.ll
new file mode 100644
index 000000000000..4df19b2d7262
--- /dev/null
+++ b/test/SafepointIRVerifier/from-same-relocation-in-phi-nodes.ll
@@ -0,0 +1,26 @@
+; XFAIL: *
+; RUN: opt -safepoint-ir-verifier-print-only -verify-safepoint-ir -S %s 2>&1 | FileCheck %s
+
+; In %merge %val.unrelocated, %ptr and %arg should be unrelocated.
+; FIXME: if this test fails it is a false-positive alarm. IR is correct.
+define void @test.unrelocated-phi.ok(i8 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: Verifying gc pointers in function: test.unrelocated-phi.ok
+ bci_0:
+  %ptr = getelementptr i8, i8 addrspace(1)* %arg, i64 4
+  br i1 undef, label %left, label %right
+
+ left:
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  br label %merge
+
+ right:
+  br label %merge
+
+ merge:
+; CHECK: No illegal uses found by SafepointIRVerifier in: test.unrelocated-phi.ok
+  %val.unrelocated = phi i8 addrspace(1)* [ %arg, %left ], [ %ptr, %right ]
+  %c = icmp eq i8 addrspace(1)* %val.unrelocated, %arg
+  ret void
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/SafepointIRVerifier/unrecorded-live-at-sp.ll b/test/SafepointIRVerifier/unrecorded-live-at-sp.ll
index e3f21c3e7133..5cd4aa741454 100644
--- a/test/SafepointIRVerifier/unrecorded-live-at-sp.ll
+++ b/test/SafepointIRVerifier/unrecorded-live-at-sp.ll
@@ -1,8 +1,9 @@
 ; RUN: opt %s -safepoint-ir-verifier-print-only -verify-safepoint-ir -S 2>&1 | FileCheck %s
 
 ; CHECK:      Illegal use of unrelocated value found!
-; CHECK-NEXT: Def:   %base_phi3 = phi %jObject addrspace(1)* [ %obj609.relocated, %not_zero146 ], [ %base_phi2, %bci_37-aload ], !is_base_value !0
-; CHECK-NEXT: Use:   %base_phi2 = phi %jObject addrspace(1)* [ %base_phi3, %not_zero179 ], [ %cast5, %bci_0 ], !is_base_value !0
+; CHECK-NEXT: Def:   %base_phi4 = phi %jObject addrspace(1)* addrspace(1)* [ %addr98.relocated, %not_zero146 ], [ %cast6, %bci_37-aload ], !is_base_value !0
+; CHECK-NEXT: Use:   %safepoint_token = tail call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, %jObject addrspace(1)* %base_phi1, %jObject addrspace(1)* addrspace(1)* %base_phi4, %jObject addrspace(1)* addrspace(1)* %relocated4, %jObject addrspace(1)* %relocated7)
+
 
 %jObject = type { [8 x i8] }
 
diff --git a/test/SafepointIRVerifier/uses-in-phi-nodes.ll b/test/SafepointIRVerifier/uses-in-phi-nodes.ll
index d06eb6e0d9a7..bbf98577230d 100644
--- a/test/SafepointIRVerifier/uses-in-phi-nodes.ll
+++ b/test/SafepointIRVerifier/uses-in-phi-nodes.ll
@@ -14,9 +14,9 @@ define i8 addrspace(1)* @test.not.ok.0(i8 addrspace(1)* %arg) gc "statepoint-exa
 
  merge:
 ; CHECK: Illegal use of unrelocated value found!
-; CHECK-NEXT: Def: i8 addrspace(1)* %arg
-; CHECK-NEXT: Use:   %val = phi i8 addrspace(1)* [ %arg, %left ], [ %arg, %right ]
-  %val = phi i8 addrspace(1)* [ %arg, %left ], [ %arg, %right]
+; CHECK-NEXT: Def:   %val = phi i8 addrspace(1)* [ %arg, %left ], [ %arg, %right ]
+; CHECK-NEXT: Use:   ret i8 addrspace(1)* %val
+  %val = phi i8 addrspace(1)* [ %arg, %left ], [ %arg, %right ]
   ret i8 addrspace(1)* %val
 }
 
@@ -34,9 +34,9 @@ define i8 addrspace(1)* @test.not.ok.1(i8 addrspace(1)* %arg) gc "statepoint-exa
 
  merge:
 ; CHECK: Illegal use of unrelocated value found!
-; CHECK-NEXT: Def: i8 addrspace(1)* %arg
-; CHECK-NEXT: Use:   %val = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right ]
-  %val = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right]
+; CHECK-NEXT: Def:   %val = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right ]
+; CHECK-NEXT: Use:   ret i8 addrspace(1)* %val
+  %val = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right ]
   ret i8 addrspace(1)* %val
 }
 
@@ -74,5 +74,99 @@ define i8 addrspace(1)* @test.ok.1(i8 addrspace(1)* %arg) gc "statepoint-example
   ret i8 addrspace(1)* %val
 }
 
+; It should be allowed to compare poisoned ptr with null.
+define void @test.poisoned.cmp.ok(i8 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: Verifying gc pointers in function: test.poisoned.cmp.ok
+ bci_0:
+  br i1 undef, label %left, label %right
+
+ left:
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0)
+  %arg.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) ; arg, arg
+  br label %merge
+
+ right:
+  %safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0)
+  br label %merge
+
+ merge:
+; CHECK: No illegal uses found by SafepointIRVerifier in: test.poisoned.cmp.ok
+  %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ]
+  %c = icmp eq i8 addrspace(1)* %val.poisoned, null
+  ret void
+}
+
+; It is illegal to compare poisoned ptr and relocated.
+define void @test.poisoned.cmp.fail.0(i8 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: Verifying gc pointers in function: test.poisoned.cmp.fail.0
+ bci_0:
+  br i1 undef, label %left, label %right
+
+ left:
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0)
+  %arg.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) ; arg, arg
+  br label %merge
+
+ right:
+  %safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0)
+  %arg.relocated2 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token2, i32 7, i32 7) ; arg, arg
+  br label %merge
+
+ merge:
+; CHECK: Illegal use of unrelocated value found!
+; CHECK-NEXT: Def:   %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ]
+; CHECK-NEXT: Use:   %c = icmp eq i8 addrspace(1)* %val.poisoned, %val
+  %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ]
+  %val = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg.relocated2, %right ]
+  %c = icmp eq i8 addrspace(1)* %val.poisoned, %val
+  ret void
+}
+
+; It is illegal to compare poisoned ptr and unrelocated.
+define void @test.poisoned.cmp.fail.1(i8 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: Verifying gc pointers in function: test.poisoned.cmp.fail.1
+ bci_0:
+  br i1 undef, label %left, label %right
+
+ left:
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0)
+  %arg.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) ; arg, arg
+  br label %merge
+
+ right:
+  %safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0)
+  %arg.relocated2 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token2, i32 7, i32 7) ; arg, arg
+  br label %merge
+
+ merge:
+; CHECK: Illegal use of unrelocated value found!
+; CHECK-NEXT: Def:   %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ]
+; CHECK-NEXT: Use:   %c = icmp eq i8 addrspace(1)* %val.poisoned, %arg
+  %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ]
+  %c = icmp eq i8 addrspace(1)* %val.poisoned, %arg
+  ret void
+}
+
+; It should be allowed to compare unrelocated phi with unrelocated value.
+define void @test.unrelocated-phi.cmp.ok(i8 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: Verifying gc pointers in function: test.unrelocated-phi.cmp.ok
+ bci_0:
+  br i1 undef, label %left, label %right
+
+ left:
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  br label %merge
+
+ right:
+  br label %merge
+
+ merge:
+; CHECK: No illegal uses found by SafepointIRVerifier in: test.unrelocated-phi.cmp.ok
+  %val.unrelocated = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right ]
+  %c = icmp eq i8 addrspace(1)* %val.unrelocated, %arg
+  ret void
+}
+
 declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
 declare void @not_statepoint()
diff --git a/test/ThinLTO/X86/Inputs/noinline.ll b/test/ThinLTO/X86/Inputs/noinline.ll
new file mode 100644
index 000000000000..73db2912cabc
--- /dev/null
+++ b/test/ThinLTO/X86/Inputs/noinline.ll
@@ -0,0 +1,8 @@
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+define i32 @foo(i32) local_unnamed_addr #0 {
+  ret i32 10
+}
+
+attributes #0 = { noinline }
diff --git a/test/ThinLTO/X86/noinline.ll b/test/ThinLTO/X86/noinline.ll
new file mode 100644
index 000000000000..27f59ab90967
--- /dev/null
+++ b/test/ThinLTO/X86/noinline.ll
@@ -0,0 +1,26 @@
+; This test checks that ThinLTO doesn't try to import noinline function
+; which, when takes place, causes promotion of its callee.
+; RUN: opt -module-summary %s -o %t1.bc
+; RUN: opt -module-summary %p/Inputs/noinline.ll -o %t2.bc
+; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t3.o \
+; RUN:   -save-temps       \
+; RUN:   -r=%t1.bc,main,px \
+; RUN:   -r=%t1.bc,foo,    \
+; RUN:   -r=%t2.bc,foo,p
+
+; RUN: llvm-dis %t3.o.1.3.import.bc -o - | FileCheck %s
+
+; CHECK-NOT: define available_externally i32 @foo
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @main(i32, i8** nocapture readnone) local_unnamed_addr #0 {
+  %3 = tail call i32 @foo(i32 %0) #0
+  ret i32 %3
+}
+
+declare i32 @foo(i32) local_unnamed_addr
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll b/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll
index fde0692d00a2..b05b27f533bb 100644
--- a/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll
+++ b/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll
@@ -122,6 +122,19 @@ entry:
 }
 
 
+define i8 @sub_compare_folding_swapPD256_undef(<4 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @sub_compare_folding_swapPD256_undef(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> fsub (<4 x double> undef, <4 x double> undef), <4 x double> zeroinitializer, i32 5, i8 -1)
+; CHECK-NEXT:    ret i8 [[TMP]]
+;
+entry:
+  %sub.i1 = fsub ninf <4 x double> undef, undef
+  %tmp = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5, i8 -1)
+  ret i8 %tmp
+}
+
+
 define i8 @sub_compare_folding_swapPD512(<8 x double> %a, <8 x double> %b){
 ; CHECK-LABEL: @sub_compare_folding_swapPD512(
 ; CHECK-NEXT:  entry:
diff --git a/test/Transforms/InstCombine/extractelement.ll b/test/Transforms/InstCombine/extractelement.ll
new file mode 100644
index 000000000000..66fbd25947dc
--- /dev/null
+++ b/test/Transforms/InstCombine/extractelement.ll
@@ -0,0 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @extractelement_out_of_range(<2 x i32> %x) {
+; CHECK-LABEL: @extractelement_out_of_range(
+; CHECK-NEXT:    [[E1:%.*]] = extractelement <2 x i32> [[X:%.*]], i8 16
+; CHECK-NEXT:    ret i32 [[E1]]
+;
+  %E1 = extractelement <2 x i32> %x, i8 16
+  ret i32 %E1
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index c6f88fb9cf05..e0698f8b3b77 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -267,12 +267,17 @@ define void @powi(double %V, double *%P) {
 
   %C = tail call double @llvm.powi.f64(double %V, i32 1) nounwind
   store volatile double %C, double* %P
+
+  %D = tail call double @llvm.powi.f64(double %V, i32 2) nounwind
+  store volatile double %D, double* %P
   ret void
 ; CHECK-LABEL: @powi(
 ; CHECK: %A = fdiv double 1.0{{.*}}, %V
 ; CHECK: store volatile double %A,
 ; CHECK: store volatile double 1.0
 ; CHECK: store volatile double %V
+; CHECK: %D = fmul double %V, %V
+; CHECK: store volatile double %D
 }
 
 define i32 @cttz(i32 %a) {
diff --git a/test/Transforms/InstCombine/minmax-fold.ll b/test/Transforms/InstCombine/minmax-fold.ll
index 6004a55f0f8e..933aac7e23f2 100644
--- a/test/Transforms/InstCombine/minmax-fold.ll
+++ b/test/Transforms/InstCombine/minmax-fold.ll
@@ -744,3 +744,158 @@ define <2 x i8> @min_through_cast_vec2(<2 x i32> %x) {
   %res = select <2 x i1> %cmp, <2 x i8> %x_trunc, <2 x i8> <i8 255, i8 255>
   ret <2 x i8> %res
 }
+
+; Remove a min/max op in a sequence with a common operand.
+; PR35717: https://bugs.llvm.org/show_bug.cgi?id=35717
+
+; min(min(a, b), min(b, c)) --> min(min(a, b), c)
+
+define i32 @common_factor_smin(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @common_factor_smin(
+; CHECK-NEXT:    [[CMP_AB:%.*]] = icmp slt i32 %a, %b
+; CHECK-NEXT:    [[MIN_AB:%.*]] = select i1 [[CMP_AB]], i32 %a, i32 %b
+; CHECK-NEXT:    [[CMP_BC:%.*]] = icmp slt i32 %b, %c
+; CHECK-NEXT:    [[MIN_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c
+; CHECK-NEXT:    [[CMP_AB_BC:%.*]] = icmp slt i32 [[MIN_AB]], [[MIN_BC]]
+; CHECK-NEXT:    [[MIN_ABC:%.*]] = select i1 [[CMP_AB_BC]], i32 [[MIN_AB]], i32 [[MIN_BC]]
+; CHECK-NEXT:    ret i32 [[MIN_ABC]]
+;
+  %cmp_ab = icmp slt i32 %a, %b
+  %min_ab = select i1 %cmp_ab, i32 %a, i32 %b
+  %cmp_bc = icmp slt i32 %b, %c
+  %min_bc = select i1 %cmp_bc, i32 %b, i32 %c
+  %cmp_ab_bc = icmp slt i32 %min_ab, %min_bc
+  %min_abc = select i1 %cmp_ab_bc, i32 %min_ab, i32 %min_bc
+  ret i32 %min_abc
+}
+
+; max(max(a, b), max(c, b)) --> max(max(a, b), c)
+
+define <2 x i32> @common_factor_smax(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: @common_factor_smax(
+; CHECK-NEXT:    [[CMP_AB:%.*]] = icmp sgt <2 x i32> %a, %b
+; CHECK-NEXT:    [[MAX_AB:%.*]] = select <2 x i1> [[CMP_AB]], <2 x i32> %a, <2 x i32> %b
+; CHECK-NEXT:    [[CMP_CB:%.*]] = icmp sgt <2 x i32> %c, %b
+; CHECK-NEXT:    [[MAX_CB:%.*]] = select <2 x i1> [[CMP_CB]], <2 x i32> %c, <2 x i32> %b
+; CHECK-NEXT:    [[CMP_AB_CB:%.*]] = icmp sgt <2 x i32> [[MAX_AB]], [[MAX_CB]]
+; CHECK-NEXT:    [[MAX_ABC:%.*]] = select <2 x i1> [[CMP_AB_CB]], <2 x i32> [[MAX_AB]], <2 x i32> [[MAX_CB]]
+; CHECK-NEXT:    ret <2 x i32> [[MAX_ABC]]
+;
+  %cmp_ab = icmp sgt <2 x i32> %a, %b
+  %max_ab = select <2 x i1> %cmp_ab, <2 x i32> %a, <2 x i32> %b
+  %cmp_cb = icmp sgt <2 x i32> %c, %b
+  %max_cb = select <2 x i1> %cmp_cb, <2 x i32> %c, <2 x i32> %b
+  %cmp_ab_cb = icmp sgt <2 x i32> %max_ab, %max_cb
+  %max_abc = select <2 x i1> %cmp_ab_cb, <2 x i32> %max_ab, <2 x i32> %max_cb
+  ret <2 x i32> %max_abc
+}
+
+; min(min(b, c), min(a, b)) --> min(min(b, c), a)
+
+define <2 x i32> @common_factor_umin(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: @common_factor_umin(
+; CHECK-NEXT:    [[CMP_BC:%.*]] = icmp ult <2 x i32> %b, %c
+; CHECK-NEXT:    [[MIN_BC:%.*]] = select <2 x i1> [[CMP_BC]], <2 x i32> %b, <2 x i32> %c
+; CHECK-NEXT:    [[CMP_AB:%.*]] = icmp ult <2 x i32> %a, %b
+; CHECK-NEXT:    [[MIN_AB:%.*]] = select <2 x i1> [[CMP_AB]], <2 x i32> %a, <2 x i32> %b
+; CHECK-NEXT:    [[CMP_BC_AB:%.*]] = icmp ult <2 x i32> [[MIN_BC]], [[MIN_AB]]
+; CHECK-NEXT:    [[MIN_ABC:%.*]] = select <2 x i1> [[CMP_BC_AB]], <2 x i32> [[MIN_BC]], <2 x i32> [[MIN_AB]]
+; CHECK-NEXT:    ret <2 x i32> [[MIN_ABC]]
+;
+  %cmp_bc = icmp ult <2 x i32> %b, %c
+  %min_bc = select <2 x i1> %cmp_bc, <2 x i32> %b, <2 x i32> %c
+  %cmp_ab = icmp ult <2 x i32> %a, %b
+  %min_ab = select <2 x i1> %cmp_ab, <2 x i32> %a, <2 x i32> %b
+  %cmp_bc_ab = icmp ult <2 x i32> %min_bc, %min_ab
+  %min_abc = select <2 x i1> %cmp_bc_ab, <2 x i32> %min_bc, <2 x i32> %min_ab
+  ret <2 x i32> %min_abc
+}
+
+; max(max(b, c), max(b, a)) --> max(max(b, c), a)
+
+define i32 @common_factor_umax(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @common_factor_umax(
+; CHECK-NEXT:    [[CMP_BC:%.*]] = icmp ugt i32 %b, %c
+; CHECK-NEXT:    [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c
+; CHECK-NEXT:    [[CMP_BA:%.*]] = icmp ugt i32 %b, %a
+; CHECK-NEXT:    [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a
+; CHECK-NEXT:    [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]]
+; CHECK-NEXT:    [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]]
+; CHECK-NEXT:    ret i32 [[MAX_ABC]]
+;
+  %cmp_bc = icmp ugt i32 %b, %c
+  %max_bc = select i1 %cmp_bc, i32 %b, i32 %c
+  %cmp_ba = icmp ugt i32 %b, %a
+  %max_ba = select i1 %cmp_ba, i32 %b, i32 %a
+  %cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba
+  %max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba
+  ret i32 %max_abc
+}
+
+declare void @extra_use(i32)
+
+define i32 @common_factor_umax_extra_use_lhs(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @common_factor_umax_extra_use_lhs(
+; CHECK-NEXT:    [[CMP_BC:%.*]] = icmp ugt i32 %b, %c
+; CHECK-NEXT:    [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c
+; CHECK-NEXT:    [[CMP_BA:%.*]] = icmp ugt i32 %b, %a
+; CHECK-NEXT:    [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a
+; CHECK-NEXT:    [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]]
+; CHECK-NEXT:    [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]]
+; CHECK-NEXT:    call void @extra_use(i32 [[MAX_BC]])
+; CHECK-NEXT:    ret i32 [[MAX_ABC]]
+;
+  %cmp_bc = icmp ugt i32 %b, %c
+  %max_bc = select i1 %cmp_bc, i32 %b, i32 %c
+  %cmp_ba = icmp ugt i32 %b, %a
+  %max_ba = select i1 %cmp_ba, i32 %b, i32 %a
+  %cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba
+  %max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba
+  call void @extra_use(i32 %max_bc)
+  ret i32 %max_abc
+}
+
+define i32 @common_factor_umax_extra_use_rhs(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @common_factor_umax_extra_use_rhs(
+; CHECK-NEXT:    [[CMP_BC:%.*]] = icmp ugt i32 %b, %c
+; CHECK-NEXT:    [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c
+; CHECK-NEXT:    [[CMP_BA:%.*]] = icmp ugt i32 %b, %a
+; CHECK-NEXT:    [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a
+; CHECK-NEXT:    [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]]
+; CHECK-NEXT:    [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]]
+; CHECK-NEXT:    call void @extra_use(i32 [[MAX_BA]])
+; CHECK-NEXT:    ret i32 [[MAX_ABC]]
+;
+  %cmp_bc = icmp ugt i32 %b, %c
+  %max_bc = select i1 %cmp_bc, i32 %b, i32 %c
+  %cmp_ba = icmp ugt i32 %b, %a
+  %max_ba = select i1 %cmp_ba, i32 %b, i32 %a
+  %cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba
+  %max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba
+  call void @extra_use(i32 %max_ba)
+  ret i32 %max_abc
+}
+
+define i32 @common_factor_umax_extra_use_both(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @common_factor_umax_extra_use_both(
+; CHECK-NEXT:    [[CMP_BC:%.*]] = icmp ugt i32 %b, %c
+; CHECK-NEXT:    [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c
+; CHECK-NEXT:    [[CMP_BA:%.*]] = icmp ugt i32 %b, %a
+; CHECK-NEXT:    [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a
+; CHECK-NEXT:    [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]]
+; CHECK-NEXT:    [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]]
+; CHECK-NEXT:    call void @extra_use(i32 [[MAX_BC]])
+; CHECK-NEXT:    call void @extra_use(i32 [[MAX_BA]])
+; CHECK-NEXT:    ret i32 [[MAX_ABC]]
+;
+  %cmp_bc = icmp ugt i32 %b, %c
+  %max_bc = select i1 %cmp_bc, i32 %b, i32 %c
+  %cmp_ba = icmp ugt i32 %b, %a
+  %max_ba = select i1 %cmp_ba, i32 %b, i32 %a
+  %cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba
+  %max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba
+  call void @extra_use(i32 %max_bc)
+  call void @extra_use(i32 %max_ba)
+  ret i32 %max_abc
+}
+
diff --git a/test/Transforms/InstCombine/minmax-fp.ll b/test/Transforms/InstCombine/minmax-fp.ll
index 0851a5d435b8..b94bce2dbb80 100644
--- a/test/Transforms/InstCombine/minmax-fp.ll
+++ b/test/Transforms/InstCombine/minmax-fp.ll
@@ -155,13 +155,13 @@ define i8 @t13(float %a) {
   ret i8 %3
 }
 
-; <= comparison, where %a could be -0.0. Not safe.
+; %a could be -0.0, but it doesn't matter because the conversion to int is the same for 0.0 or -0.0.
 define i8 @t14(float %a) {
 ; CHECK-LABEL: @t14(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ule float %a, 0.000000e+00
-; CHECK-NEXT:    [[TMP2:%.*]] = fptosi float %a to i8
-; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 0
-; CHECK-NEXT:    ret i8 [[TMP3]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp oge float %a, 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[DOTINV]], float 0.000000e+00, float %a
+; CHECK-NEXT:    [[TMP2:%.*]] = fptosi float [[TMP1]] to i8
+; CHECK-NEXT:    ret i8 [[TMP2]]
 ;
   %1 = fcmp ule float %a, 0.0
   %2 = fptosi float %a to i8
@@ -169,6 +169,19 @@ define i8 @t14(float %a) {
   ret i8 %3
 }
 
+define i8 @t14_commute(float %a) {
+; CHECK-LABEL: @t14_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt float %a, 0.000000e+00
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float %a, float 0.000000e+00
+; CHECK-NEXT:    [[TMP3:%.*]] = fptosi float [[TMP2]] to i8
+; CHECK-NEXT:    ret i8 [[TMP3]]
+;
+  %1 = fcmp ule float %a, 0.0
+  %2 = fptosi float %a to i8
+  %3 = select i1 %1, i8 0, i8 %2
+  ret i8 %3
+}
+
 define i8 @t15(float %a) {
 ; CHECK-LABEL: @t15(
 ; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp nsz oge float %a, 0.000000e+00
diff --git a/test/Transforms/InstCombine/rem.ll b/test/Transforms/InstCombine/rem.ll
index 4e90b337b09c..f71a0fb4c621 100644
--- a/test/Transforms/InstCombine/rem.ll
+++ b/test/Transforms/InstCombine/rem.ll
@@ -593,3 +593,17 @@ define <2 x i32> @test23(<2 x i32> %A) {
   %mul = srem <2 x i32> %and, <i32 2147483647, i32 2147483647>
   ret <2 x i32> %mul
 }
+
+; FP division-by-zero is not UB.
+
+define double @PR34870(i1 %cond, double %x, double %y) {
+; CHECK-LABEL: @PR34870(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 %cond, double %y, double 0.000000e+00
+; CHECK-NEXT:    [[FMOD:%.*]] = frem double %x, [[SEL]]
+; CHECK-NEXT:    ret double [[FMOD]]
+;
+  %sel = select i1 %cond, double %y, double 0.0
+  %fmod = frem double %x, %sel
+  ret double %fmod
+}
+
diff --git a/test/Transforms/InstSimplify/extract-element.ll b/test/Transforms/InstSimplify/extract-element.ll
new file mode 100644
index 000000000000..8ee75a603cd1
--- /dev/null
+++ b/test/Transforms/InstSimplify/extract-element.ll
@@ -0,0 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+; Weird Types
+
+define i129 @vec_extract_negidx(<3 x i129> %a) {
+; CHECK-LABEL: @vec_extract_negidx(
+; CHECK-NEXT:    [[E1:%.*]] = extractelement <3 x i129> [[A:%.*]], i129 -1
+; CHECK-NEXT:    ret i129 [[E1]]
+;
+  %E1 = extractelement <3 x i129> %a, i129 -1
+  ret i129 %E1
+}
diff --git a/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll b/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll
new file mode 100644
index 000000000000..28db1c834062
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll
@@ -0,0 +1,128 @@
+; RUN: opt -loop-unroll -unroll-runtime -unroll-runtime-epilog -S %s | FileCheck %s
+
+; Test that epilogue is tagged with the same debug information as original loop body rather than original loop exit.
+
+; CHECK: for.body.i:
+; CHECK:   br i1 {{.*}}, label %lee1.exit.loopexit.unr-lcssa.loopexit, label %for.body.i, !dbg ![[LOOP_LOC:[0-9]+]]
+; CHECK: lee1.exit.loopexit.unr-lcssa.loopexit:
+; CHECK:   br label %lee1.exit.loopexit.unr-lcssa, !dbg ![[LOOP_LOC]]
+; CHECK: lee1.exit.loopexit.unr-lcssa:
+; CHECK:   %lcmp.mod = icmp ne i32 %xtraiter, 0, !dbg ![[LOOP_LOC]]
+; CHECK:   br i1 %lcmp.mod, label %for.body.i.epil.preheader, label %lee1.exit.loopexit, !dbg ![[LOOP_LOC]]
+; CHECK: for.body.i.epil.preheader:
+; CHECK:   br label %for.body.i.epil, !dbg ![[LOOP_LOC]]
+; CHECK: lee1.exit.loopexit:
+; CHECK:   br label %lee1.exit, !dbg ![[EXIT_LOC:[0-9]+]]
+
+; CHECK-DAG: ![[LOOP_LOC]] = !DILocation(line: 5, column: 3, scope: !{{.*}}, inlinedAt: !{{.*}})
+; CHECK-DAG: ![[EXIT_LOC]] = !DILocation(line: 11, column: 12, scope: !{{.*}}, inlinedAt: !{{.*}})
+
+; Function Attrs: nounwind readnone
+define i32 @goo(i32 %a, i32 %b) local_unnamed_addr #0 !dbg !8 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !13, metadata !15), !dbg !16
+  tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !14, metadata !15), !dbg !17
+  tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !18, metadata !15), !dbg !26
+  tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !21, metadata !15), !dbg !28
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !22, metadata !15), !dbg !29
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !23, metadata !15), !dbg !30
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !23, metadata !15), !dbg !30
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !22, metadata !15), !dbg !29
+  %cmp7.i = icmp eq i32 %b, 0, !dbg !31
+  br i1 %cmp7.i, label %lee1.exit, label %for.body.i.preheader, !dbg !33
+
+for.body.i.preheader:                             ; preds = %entry
+  br label %for.body.i, !dbg !34
+
+for.body.i:                                       ; preds = %for.body.i.preheader, %for.body.i
+  %i.09.i = phi i32 [ %inc.i, %for.body.i ], [ 0, %for.body.i.preheader ]
+  %t.08.i = phi i32 [ %add1.i, %for.body.i ], [ 0, %for.body.i.preheader ]
+  %div.i = sdiv i32 %t.08.i, 2, !dbg !34
+  %add.i = add i32 %t.08.i, %a, !dbg !35
+  %add1.i = add i32 %add.i, %div.i, !dbg !36
+  tail call void @llvm.dbg.value(metadata i32 %add1.i, i64 0, metadata !22, metadata !15), !dbg !29
+  %inc.i = add nuw i32 %i.09.i, 1, !dbg !37
+  tail call void @llvm.dbg.value(metadata i32 %inc.i, i64 0, metadata !23, metadata !15), !dbg !30
+  tail call void @llvm.dbg.value(metadata i32 %inc.i, i64 0, metadata !23, metadata !15), !dbg !30
+  tail call void @llvm.dbg.value(metadata i32 %add1.i, i64 0, metadata !22, metadata !15), !dbg !29
+  %exitcond.i = icmp eq i32 %inc.i, %b, !dbg !31
+  br i1 %exitcond.i, label %lee1.exit.loopexit, label %for.body.i, !dbg !33, !llvm.loop !38
+
+lee1.exit.loopexit:                               ; preds = %for.body.i
+  %add1.i.lcssa = phi i32 [ %add1.i, %for.body.i ]
+  br label %lee1.exit, !dbg !41
+
+lee1.exit:                                        ; preds = %lee1.exit.loopexit, %entry
+  %t.0.lcssa.i = phi i32 [ 0, %entry ], [ %add1.i.lcssa, %lee1.exit.loopexit ]
+  tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !44, metadata !15), !dbg !47
+  tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !45, metadata !15), !dbg !48
+  %add.i4 = add nsw i32 %b, %a, !dbg !41
+  %sub.i = sub nsw i32 %a, %b, !dbg !49
+  %mul.i = mul nsw i32 %add.i4, %sub.i, !dbg !50
+  %add = add nsw i32 %t.0.lcssa.i, %mul.i, !dbg !51
+  ret i32 %add, !dbg !52
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+neon,+strict-align,+vfp3,-crypto,-d16,-fp-armv8,-fp-only-sp,-fp16,-vfp4" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Snapdragon LLVM ARM Compiler 4.0.5 (based on llvm.org 4.0+)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "t.c", directory: "/prj/llvm-arm/scratch1/zhaoshiz/bugs/debug-symbol")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{i32 1, !"min_enum_size", i32 4}
+!7 = !{!"Snapdragon LLVM ARM Compiler 4.0.5 (based on llvm.org 4.0+)"}
+!8 = distinct !DISubprogram(name: "goo", scope: !1, file: !1, line: 23, type: !9, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !12)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!11, !11, !11}
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !{!13, !14}
+!13 = !DILocalVariable(name: "a", arg: 1, scope: !8, file: !1, line: 23, type: !11)
+!14 = !DILocalVariable(name: "b", arg: 2, scope: !8, file: !1, line: 23, type: !11)
+!15 = !DIExpression()
+!16 = !DILocation(line: 23, column: 14, scope: !8)
+!17 = !DILocation(line: 23, column: 21, scope: !8)
+!18 = !DILocalVariable(name: "a", arg: 1, scope: !19, file: !1, line: 3, type: !11)
+!19 = distinct !DISubprogram(name: "lee1", scope: !1, file: !1, line: 3, type: !9, isLocal: true, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !20)
+!20 = !{!18, !21, !22, !23}
+!21 = !DILocalVariable(name: "b", arg: 2, scope: !19, file: !1, line: 3, type: !11)
+!22 = !DILocalVariable(name: "t", scope: !19, file: !1, line: 4, type: !11)
+!23 = !DILocalVariable(name: "i", scope: !24, file: !1, line: 5, type: !25)
+!24 = distinct !DILexicalBlock(scope: !19, file: !1, line: 5, column: 3)
+!25 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+!26 = !DILocation(line: 3, column: 22, scope: !19, inlinedAt: !27)
+!27 = distinct !DILocation(line: 24, column: 27, scope: !8)
+!28 = !DILocation(line: 3, column: 29, scope: !19, inlinedAt: !27)
+!29 = !DILocation(line: 4, column: 7, scope: !19, inlinedAt: !27)
+!30 = !DILocation(line: 5, column: 17, scope: !24, inlinedAt: !27)
+!31 = !DILocation(line: 5, column: 23, scope: !32, inlinedAt: !27)
+!32 = distinct !DILexicalBlock(scope: !24, file: !1, line: 5, column: 3)
+!33 = !DILocation(line: 5, column: 3, scope: !24, inlinedAt: !27)
+!34 = !DILocation(line: 6, column: 13, scope: !32, inlinedAt: !27)
+!35 = !DILocation(line: 6, column: 11, scope: !32, inlinedAt: !27)
+!36 = !DILocation(line: 6, column: 7, scope: !32, inlinedAt: !27)
+!37 = !DILocation(line: 5, column: 28, scope: !32, inlinedAt: !27)
+!38 = distinct !{!38, !39, !40}
+!39 = !DILocation(line: 5, column: 3, scope: !24)
+!40 = !DILocation(line: 6, column: 14, scope: !24)
+!41 = !DILocation(line: 11, column: 12, scope: !42, inlinedAt: !46)
+!42 = distinct !DISubprogram(name: "lee2", scope: !1, file: !1, line: 10, type: !9, isLocal: true, isDefinition: true, scopeLine: 10, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !43)
+!43 = !{!44, !45}
+!44 = !DILocalVariable(name: "a", arg: 1, scope: !42, file: !1, line: 10, type: !11)
+!45 = !DILocalVariable(name: "b", arg: 2, scope: !42, file: !1, line: 10, type: !11)
+!46 = distinct !DILocation(line: 24, column: 40, scope: !8)
+!47 = !DILocation(line: 10, column: 22, scope: !42, inlinedAt: !46)
+!48 = !DILocation(line: 10, column: 29, scope: !42, inlinedAt: !46)
+!49 = !DILocation(line: 11, column: 20, scope: !42, inlinedAt: !46)
+!50 = !DILocation(line: 11, column: 16, scope: !42, inlinedAt: !46)
+!51 = !DILocation(line: 24, column: 38, scope: !8)
+!52 = !DILocation(line: 24, column: 3, scope: !8)
diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll
index d180980c95b9..32463373ca99 100644
--- a/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -13,9 +13,11 @@
 ; EPILOG:   br i1 %niter.ncmp.1, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !dbg [[BODY_LOC:![0-9]+]]
 ; EPILOG-NOT: br i1 %niter.ncmp.2, label %for.end.loopexit{{.*}}, label %for.body
 ; EPILOG: for.body.epil.preheader:
-; EPILOG:   br label %for.body.epil, !dbg [[EXIT_LOC:![0-9]+]]
+; EPILOG:   br label %for.body.epil, !dbg [[BODY_LOC]]
 ; EPILOG: for.body.epil:
-; EPILOG:   br label %for.end.loopexit.epilog-lcssa, !dbg [[BODY_LOC:![0-9]+]]
+; EPILOG:   br label %for.end.loopexit.epilog-lcssa, !dbg [[BODY_LOC]]
+; EPILOG: for.end.loopexit:
+; EPILOG:   br label %for.end, !dbg [[EXIT_LOC:![0-9]+]]
 
 ; EPILOG-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
 ; EPILOG-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
diff --git a/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll b/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll
deleted file mode 100644
index e3d1f6dd2b17..000000000000
--- a/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll
+++ /dev/null
@@ -1,48 +0,0 @@
-; RUN: opt < %s -memcpyopt -S | FileCheck %s
-; Test memcpy-memcpy dependencies across invoke edges.
-
-; Test that memcpyopt works across the non-unwind edge of an invoke.
-
-define hidden void @test_normal(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
-entry:
-  %temp = alloca i8, i32 64
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
-  invoke void @invoke_me()
-          to label %try.cont unwind label %lpad
-
-lpad:
-  landingpad { i8*, i32 }
-          catch i8* null
-  ret void
-
-try.cont:
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false)
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false)
-  ret void
-}
-
-; Test that memcpyopt works across the unwind edge of an invoke.
-
-define hidden void @test_unwind(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
-entry:
-  %temp = alloca i8, i32 64
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
-  invoke void @invoke_me()
-          to label %try.cont unwind label %lpad
-
-lpad:
-  landingpad { i8*, i32 }
-          catch i8* null
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false)
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false)
-  ret void
-
-try.cont:
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
-declare i32 @__gxx_personality_v0(...)
-declare void @invoke_me() readnone
diff --git a/test/Transforms/MemCpyOpt/merge-into-memset.ll b/test/Transforms/MemCpyOpt/merge-into-memset.ll
deleted file mode 100644
index fc31038a4e6d..000000000000
--- a/test/Transforms/MemCpyOpt/merge-into-memset.ll
+++ /dev/null
@@ -1,45 +0,0 @@
-; RUN: opt < %s -memcpyopt -S | FileCheck %s
-; Update cached non-local dependence information when merging stores into memset.
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-; Don't delete the memcpy in %if.then, even though it depends on an instruction
-; which will be deleted.
-
-; CHECK-LABEL: @foo
-define void @foo(i1 %c, i8* %d, i8* %e, i8* %f) {
-entry:
-  %tmp = alloca [50 x i8], align 8
-  %tmp4 = bitcast [50 x i8]* %tmp to i8*
-  %tmp1 = getelementptr inbounds i8, i8* %tmp4, i64 1
-  call void @llvm.memset.p0i8.i64(i8* nonnull %d, i8 0, i64 10, i32 1, i1 false), !dbg !5
-  store i8 0, i8* %tmp4, align 8, !dbg !5
-; CHECK: call void @llvm.memset.p0i8.i64(i8* nonnull %d, i8 0, i64 10, i32 1, i1 false), !dbg !5
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %tmp1, i8* nonnull %d, i64 10, i32 1, i1 false)
-  br i1 %c, label %if.then, label %exit
-
-if.then:
-; CHECK: if.then:
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* nonnull %tmp4, i64 30, i32 8, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* nonnull %tmp4, i64 30, i32 8, i1 false)
-  br label %exit
-
-exit:
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
-declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1)
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!3, !4}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
-!1 = !DIFile(filename: "t.rs", directory: "/tmp")
-!2 = !{}
-!3 = !{i32 2, !"Dwarf Version", i32 4}
-!4 = !{i32 2, !"Debug Info Version", i32 3}
-!5 = !DILocation(line: 8, column: 5, scope: !6)
-!6 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 5, type: !7, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
-!7 = !DISubroutineType(types: !8)
-!8 = !{null}
diff --git a/test/Transforms/MemCpyOpt/mixed-sizes.ll b/test/Transforms/MemCpyOpt/mixed-sizes.ll
deleted file mode 100644
index 9091fe7f56c0..000000000000
--- a/test/Transforms/MemCpyOpt/mixed-sizes.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: opt < %s -memcpyopt -S | FileCheck %s
-; Handle memcpy-memcpy dependencies of differing sizes correctly.
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-; Don't delete the second memcpy, even though there's an earlier
-; memcpy with a larger size from the same address.
-
-; CHECK-LABEL: @foo
-define i32 @foo(i1 %z) {
-entry:
-  %a = alloca [10 x i32]
-  %s = alloca [10 x i32]
-  %0 = bitcast [10 x i32]* %a to i8*
-  %1 = bitcast [10 x i32]* %s to i8*
-  call void @llvm.memset.p0i8.i64(i8* nonnull %1, i8 0, i64 40, i32 16, i1 false)
-  %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %a, i64 0, i64 0
-  store i32 1, i32* %arrayidx
-  %scevgep = getelementptr [10 x i32], [10 x i32]* %s, i64 0, i64 1
-  %scevgep7 = bitcast i32* %scevgep to i8*
-  br i1 %z, label %for.body3.lr.ph, label %for.inc7.1
-
-for.body3.lr.ph:                                  ; preds = %entry
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 17179869180, i32 4, i1 false)
-  br label %for.inc7.1
-
-for.inc7.1:
-; CHECK: for.inc7.1:
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 4, i32 4, i1 false)
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 4, i32 4, i1 false)
-  %2 = load i32, i32* %arrayidx
-  ret i32 %2
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
-declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1)
diff --git a/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll b/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll
deleted file mode 100644
index 5b0510211d9f..000000000000
--- a/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll
+++ /dev/null
@@ -1,114 +0,0 @@
-; RUN: opt < %s -memcpyopt -S | FileCheck %s
-; Make sure memcpy-memcpy dependence is optimized across
-; basic blocks (conditional branches and invokes).
-
-%struct.s = type { i32, i32 }
-
-@s_foo = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4
-@s_baz = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4
-@i = external constant i8*
-
-declare void @qux()
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
-declare void @__cxa_throw(i8*, i8*, i8*)
-declare i32 @__gxx_personality_v0(...)
-declare i8* @__cxa_begin_catch(i8*)
-
-; A simple partial redundancy. Test that the second memcpy is optimized
-; to copy directly from the original source rather than from the temporary.
-
-; CHECK-LABEL: @wobble
-define void @wobble(i8* noalias %dst, i8* %src, i1 %some_condition) {
-bb:
-  %temp = alloca i8, i32 64
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
-  br i1 %some_condition, label %more, label %out
-
-out:
-  call void @qux()
-  unreachable
-
-more:
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false)
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false)
-  ret void
-}
-
-; A CFG triangle with a partial redundancy targeting an alloca. Test that the
-; memcpy inside the triangle is optimized to copy directly from the original
-; source rather than from the temporary.
-
-; CHECK-LABEL: @foo
-define i32 @foo(i1 %t3) {
-bb:
-  %s = alloca %struct.s, align 4
-  %t = alloca %struct.s, align 4
-  %s1 = bitcast %struct.s* %s to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s1, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false)
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s1, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false)
-  br i1 %t3, label %bb4, label %bb7
-
-bb4:                                              ; preds = %bb
-  %t5 = bitcast %struct.s* %t to i8*
-  %s6 = bitcast %struct.s* %s to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t5, i8* %s6, i64 8, i32 4, i1 false)
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t5, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false)
-  br label %bb7
-
-bb7:                                              ; preds = %bb4, %bb
-  %t8 = getelementptr %struct.s, %struct.s* %t, i32 0, i32 0
-  %t9 = load i32, i32* %t8, align 4
-  %t10 = getelementptr %struct.s, %struct.s* %t, i32 0, i32 1
-  %t11 = load i32, i32* %t10, align 4
-  %t12 = add i32 %t9, %t11
-  ret i32 %t12
-}
-
-; A CFG diamond with an invoke on one side, and a partially redundant memcpy
-; into an alloca on the other. Test that the memcpy inside the diamond is
-; optimized to copy ; directly from the original source rather than from the
-; temporary. This more complex test represents a relatively common usage
-; pattern.
-
-; CHECK-LABEL: @baz
-define i32 @baz(i1 %t5) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
-bb:
-  %s = alloca %struct.s, align 4
-  %t = alloca %struct.s, align 4
-  %s3 = bitcast %struct.s* %s to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s3, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false)
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s3, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false)
-  br i1 %t5, label %bb6, label %bb22
-
-bb6:                                              ; preds = %bb
-  invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null)
-          to label %bb25 unwind label %bb9
-
-bb9:                                              ; preds = %bb6
-  %t10 = landingpad { i8*, i32 }
-          catch i8* null
-  br label %bb13
-
-bb13:                                             ; preds = %bb9
-  %t15 = call i8* @__cxa_begin_catch(i8* null)
-  br label %bb23
-
-bb22:                                             ; preds = %bb
-  %t23 = bitcast %struct.s* %t to i8*
-  %s24 = bitcast %struct.s* %s to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t23, i8* %s24, i64 8, i32 4, i1 false)
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t23, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false)
-  br label %bb23
-
-bb23:                                             ; preds = %bb22, %bb13
-  %t17 = getelementptr inbounds %struct.s, %struct.s* %t, i32 0, i32 0
-  %t18 = load i32, i32* %t17, align 4
-  %t19 = getelementptr inbounds %struct.s, %struct.s* %t, i32 0, i32 1
-  %t20 = load i32, i32* %t19, align 4
-  %t21 = add nsw i32 %t18, %t20
-  ret i32 %t21
-
-bb25:                                             ; preds = %bb6
-  unreachable
-}
diff --git a/test/Transforms/RewriteStatepointsForGC/check_traversal_order.ll b/test/Transforms/RewriteStatepointsForGC/check_traversal_order.ll
new file mode 100644
index 000000000000..57e35ccad638
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/check_traversal_order.ll
@@ -0,0 +1,38 @@
+; RUN: opt -S -rewrite-statepoints-for-gc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @f()
+declare void @g(i8 addrspace(1)*, i8 addrspace(1)*)
+declare i32 @personality_function()
+
+; Make sure that we do not fail assertion because we process call of @g before
+; we process the call of @f.
+
+define void @test_01(i8 addrspace(1)* %p, i1 %cond) gc "statepoint-example" personality i32 ()* @personality_function {
+
+; CHECK-LABEL: @test_01(
+
+entry:
+  %tmp0 = insertelement <2 x i8 addrspace(1)*> undef, i8 addrspace(1)* %p, i32 0
+  %tmp1 = insertelement <2 x i8 addrspace(1)*> %tmp0, i8 addrspace(1)* %p, i32 1
+  %tmp2 = extractelement <2 x i8 addrspace(1)*> %tmp1, i32 1
+  %tmp3 = extractelement <2 x i8 addrspace(1)*> %tmp1, i32 0
+  br label %loop
+
+loop:
+  br i1 %cond, label %cond_block, label %exit
+
+cond_block:
+  br i1 %cond, label %backedge, label %exit
+
+exit:
+  %tmp4 = phi i8 addrspace(1)* [ %tmp2, %loop ], [ %tmp2, %cond_block ]
+  call void @g(i8 addrspace(1)* %tmp3, i8 addrspace(1)* %tmp4)
+  ret void
+
+backedge:
+  call void @f()
+  br label %loop
+}
diff --git a/test/Transforms/SimplifyCFG/X86/if-conversion.ll b/test/Transforms/SimplifyCFG/X86/if-conversion.ll
deleted file mode 100644
index 28702572d480..000000000000
--- a/test/Transforms/SimplifyCFG/X86/if-conversion.ll
+++ /dev/null
@@ -1,231 +0,0 @@
-; RUN: opt < %s -simplifycfg -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -S | FileCheck %s
-; Avoid if-conversion if there is a long dependence chain.
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-; The first several cases test FindLongDependenceChain returns true, so
-; if-conversion is blocked.
-
-define i64 @test1(i64** %pp, i64* %p) {
-entry:
-  %0 = load i64*, i64** %pp, align 8
-  %1 = load i64, i64* %0, align 8
-  %cmp = icmp slt i64 %1, 0
-  %pint = ptrtoint i64* %p to i64
-  br i1 %cmp, label %cond.true, label %cond.false
-
-cond.true:
-  %p1 = add i64 %pint, 8
-  br label %cond.end
-
-cond.false:
-  %p2 = or i64 %pint, 16
-  br label %cond.end
-
-cond.end:
-  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
-  %ptr = inttoptr i64 %p3 to i64*
-  %val = load i64, i64* %ptr, align 8
-  ret i64 %val
-
-; CHECK-NOT: select
-}
-
-define i64 @test2(i64** %pp, i64* %p) {
-entry:
-  %0 = load i64*, i64** %pp, align 8
-  %1 = load i64, i64* %0, align 8
-  %cmp = icmp slt i64 %1, 0
-  %pint = ptrtoint i64* %p to i64
-  br i1 %cmp, label %cond.true, label %cond.false
-
-cond.true:
-  %p1 = add i64 %pint, 8
-  br label %cond.end
-
-cond.false:
-  %p2 = add i64 %pint, 16
-  br label %cond.end
-
-cond.end:
-  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
-  %ptr = inttoptr i64 %p3 to i64*
-  %val = load i64, i64* %ptr, align 8
-  ret i64 %val
-
-; CHECK-LABEL: @test2
-; CHECK-NOT: select
-}
-
-; The following cases test FindLongDependenceChain returns false, so
-; if-conversion will proceed.
-
-; Non trivial LatencyAdjustment.
-define i64 @test3(i64** %pp, i64* %p) {
-entry:
-  %0 = load i64*, i64** %pp, align 8
-  %1 = load i64, i64* %0, align 8
-  %cmp = icmp slt i64 %1, 0
-  %pint = ptrtoint i64* %p to i64
-  br i1 %cmp, label %cond.true, label %cond.false
-
-cond.true:
-  %p1 = add i64 %pint, 8
-  br label %cond.end
-
-cond.false:
-  %p2 = or i64 %pint, 16
-  br label %cond.end
-
-cond.end:
-  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
-  %p4 = add i64 %p3, %1
-  %ptr = inttoptr i64 %p4 to i64*
-  %val = load i64, i64* %ptr, align 8
-  ret i64 %val
-
-; CHECK-LABEL: @test3
-; CHECK: select
-}
-
-; Short dependence chain.
-define i64 @test4(i64* %pp, i64* %p) {
-entry:
-  %0 = load i64, i64* %pp, align 8
-  %cmp = icmp slt i64 %0, 0
-  %pint = ptrtoint i64* %p to i64
-  br i1 %cmp, label %cond.true, label %cond.false
-
-cond.true:
-  %p1 = add i64 %pint, 8
-  br label %cond.end
-
-cond.false:
-  %p2 = or i64 %pint, 16
-  br label %cond.end
-
-cond.end:
-  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
-  %ptr = inttoptr i64 %p3 to i64*
-  %val = load i64, i64* %ptr, align 8
-  ret i64 %val
-
-; CHECK-LABEL: @test4
-; CHECK: select
-}
-
-; High IPC.
-define i64 @test5(i64** %pp, i64* %p) {
-entry:
-  %0 = load i64*, i64** %pp, align 8
-  %1 = load i64, i64* %0, align 8
-  %cmp = icmp slt i64 %1, 0
-  %pint = ptrtoint i64* %p to i64
-  %2 = add i64 %pint, 2
-  %3 = add i64 %pint, 3
-  %4 = or i64 %pint, 16
-  %5 = and i64 %pint, 255
-
-  %6 = or i64 %2, 9
-  %7 = and i64 %3, 255
-  %8 = add i64 %4, 4
-  %9 = add i64 %5, 5
-
-  %10 = add i64 %6, 2
-  %11 = add i64 %7, 3
-  %12 = add i64 %8, 4
-  %13 = add i64 %9, 5
-
-  %14 = add i64 %10, 6
-  %15 = add i64 %11, 7
-  %16 = add i64 %12, 8
-  %17 = add i64 %13, 9
-
-  %18 = add i64 %14, 10
-  %19 = add i64 %15, 11
-  %20 = add i64 %16, 12
-  %21 = add i64 %17, 13
-
-  br i1 %cmp, label %cond.true, label %cond.false
-
-cond.true:
-  %p1 = add i64 %pint, 8
-  br label %cond.end
-
-cond.false:
-  %p2 = or i64 %pint, 16
-  br label %cond.end
-
-cond.end:
-  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
-  %ptr = inttoptr i64 %p3 to i64*
-  %val = load i64, i64* %ptr, align 8
-
-  ret i64 %val
-
-; CHECK-LABEL: @test5
-; CHECK: select
-}
-
-; Large BB size.
-define i64 @test6(i64** %pp, i64* %p) {
-entry:
-  %0 = load i64*, i64** %pp, align 8
-  %1 = load i64, i64* %0, align 8
-  %cmp = icmp slt i64 %1, 0
-  %pint = ptrtoint i64* %p to i64
-  br i1 %cmp, label %cond.true, label %cond.false
-
-cond.true:
-  %p1 = add i64 %pint, 8
-  br label %cond.end
-
-cond.false:
-  %p2 = or i64 %pint, 16
-  br label %cond.end
-
-cond.end:
-  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
-  %ptr = inttoptr i64 %p3 to i64*
-  %val = load i64, i64* %ptr, align 8
-  %2 = add i64 %pint, 2
-  %3 = add i64 %pint, 3
-  %4 = add i64 %2, 4
-  %5 = add i64 %3, 5
-  %6 = add i64 %4, 6
-  %7 = add i64 %5, 7
-  %8 = add i64 %6, 6
-  %9 = add i64 %7, 7
-  %10 = add i64 %8, 6
-  %11 = add i64 %9, 7
-  %12 = add i64 %10, 6
-  %13 = add i64 %11, 7
-  %14 = add i64 %12, 6
-  %15 = add i64 %13, 7
-  %16 = add i64 %14, 6
-  %17 = add i64 %15, 7
-  %18 = add i64 %16, 6
-  %19 = add i64 %17, 7
-  %20 = add i64 %18, 6
-  %21 = add i64 %19, 7
-  %22 = add i64 %20, 6
-  %23 = add i64 %21, 7
-  %24 = add i64 %22, 6
-  %25 = add i64 %23, 7
-  %26 = add i64 %24, 6
-  %27 = add i64 %25, 7
-  %28 = add i64 %26, 6
-  %29 = add i64 %27, 7
-  %30 = add i64 %28, 6
-  %31 = add i64 %29, 7
-  %32 = add i64 %30, 8
-  %33 = add i64 %31, 9
-  %34 = add i64 %32, %33
-  %35 = and i64 %34, 255
-  %res = add i64 %val, %35
-
-  ret i64 %res
-
-; CHECK-LABEL: @test6
-; CHECK: select
-}
diff --git a/test/tools/llvm-cov/cov-comdat.test b/test/tools/llvm-cov/cov-comdat.test
index 9d2271636994..e8018d58be62 100644
--- a/test/tools/llvm-cov/cov-comdat.test
+++ b/test/tools/llvm-cov/cov-comdat.test
@@ -9,7 +9,7 @@ REQUIRES: shell
 
 // RUN: llvm-cov show %S/Inputs/binary-formats.v1.linux64l -instr-profile %S/Inputs/elf_binary_comdat.profdata -path-equivalence=/tmp,%S/Inputs %S/Inputs/instrprof-comdat.h -dump 2> %t.err | FileCheck --check-prefix=HEADER %S/Inputs/instrprof-comdat.h
 // RUN: FileCheck --check-prefix=ERROR -input-file %t.err %s
-// ERROR: hash-mismatch: No profile record found for 'main' with hash = 0xA
+// ERROR: hash-mismatch: No profile record found for 'main' with hash = 0xa
 
 // RUN: llvm-cov show %S/Inputs/binary-formats.v2.linux64l -instr-profile %S/Inputs/elf_binary_comdat.profdata -path-equivalence=/root/llvm/test/tools,%S/.. %S/Inputs/instrprof-comdat.h | FileCheck --check-prefix=HEADER %S/Inputs/instrprof-comdat.h
 // RUN: llvm-cov show %S/Inputs/binary-formats.v2.linux32l -instr-profile %S/Inputs/elf_binary_comdat.profdata -path-equivalence=/root/llvm/R/../test/tools,%S/.. %S/Inputs/instrprof-comdat.h | FileCheck --check-prefix=HEADER %S/Inputs/instrprof-comdat.h
diff --git a/test/tools/llvm-objdump/X86/hex-displacement.test b/test/tools/llvm-objdump/X86/hex-displacement.test
index dd2332e572f0..541cca53869b 100644
--- a/test/tools/llvm-objdump/X86/hex-displacement.test
+++ b/test/tools/llvm-objdump/X86/hex-displacement.test
@@ -3,4 +3,4 @@
 
 # RUN: llvm-objdump -d %p/Inputs/hello.exe.macho-i386 | FileCheck %s
 
-# CHECK: 1f47:   e8 00 00 00 00  calll   0 <_main+0xC>
+# CHECK: 1f47:   e8 00 00 00 00  calll   0 <_main+0xc>
diff --git a/test/tools/llvm-readobj/Inputs/needed-libs.obj.coff-am64 b/test/tools/llvm-readobj/Inputs/needed-libs.obj.coff-am64
new file mode 100644
index 000000000000..4cfc6e25396f
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/needed-libs.obj.coff-am64
diff --git a/test/tools/llvm-readobj/coff-needed-libs.test b/test/tools/llvm-readobj/coff-needed-libs.test
new file mode 100644
index 000000000000..deb6bc299eb5
--- /dev/null
+++ b/test/tools/llvm-readobj/coff-needed-libs.test
@@ -0,0 +1,5 @@
+RUN: llvm-readobj -needed-libs %p/Inputs/needed-libs.obj.coff-am64 | FileCheck %s
+
+CHECK:      NeededLibraries [
+CHECK-NEXT:   KERNEL32.dll
+CHECK-NEXT: ]