18 files changed, 421 insertions, 50 deletions
diff --git a/test/Bitcode/compatibility-3.6.ll b/test/Bitcode/compatibility-3.6.ll
index 6c47a853e24a..e9313dfba870 100644
--- a/test/Bitcode/compatibility-3.6.ll
+++ b/test/Bitcode/compatibility-3.6.ll
@@ -612,9 +612,7 @@ define void @fastmathflags(float %op1, float %op2) {
   %f.arcp = fadd arcp float %op1, %op2
   ; CHECK: %f.arcp = fadd arcp float %op1, %op2
   %f.fast = fadd fast float %op1, %op2
-  ; 'fast' used to be its own bit, but this changed in Oct 2017.
-  ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'.
-  ; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2
+  ; CHECK: %f.fast = fadd fast float %op1, %op2
   ret void
 }
 
diff --git a/test/Bitcode/compatibility-3.7.ll b/test/Bitcode/compatibility-3.7.ll
index 55844e5c4986..82fc99055357 100644
--- a/test/Bitcode/compatibility-3.7.ll
+++ b/test/Bitcode/compatibility-3.7.ll
@@ -656,9 +656,7 @@ define void @fastmathflags(float %op1, float %op2) {
   %f.arcp = fadd arcp float %op1, %op2
   ; CHECK: %f.arcp = fadd arcp float %op1, %op2
   %f.fast = fadd fast float %op1, %op2
-  ; 'fast' used to be its own bit, but this changed in Oct 2017.
-  ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'.
-  ; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2
+  ; CHECK: %f.fast = fadd fast float %op1, %op2
   ret void
 }
 
diff --git a/test/Bitcode/compatibility-3.8.ll b/test/Bitcode/compatibility-3.8.ll
index a7fa20f2bc08..2e70a380d10e 100644
--- a/test/Bitcode/compatibility-3.8.ll
+++ b/test/Bitcode/compatibility-3.8.ll
@@ -687,9 +687,7 @@ define void @fastmathflags(float %op1, float %op2) {
   %f.arcp = fadd arcp float %op1, %op2
   ; CHECK: %f.arcp = fadd arcp float %op1, %op2
   %f.fast = fadd fast float %op1, %op2
-  ; 'fast' used to be its own bit, but this changed in Oct 2017.
-  ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'.
-  ; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2
+  ; CHECK: %f.fast = fadd fast float %op1, %op2
   ret void
 }
 
@@ -702,9 +700,7 @@ declare <4 x double> @fmf3()
 ; CHECK-LABEL: fastMathFlagsForCalls(
 define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) {
   %call.fast = call fast float @fmf1()
-  ; 'fast' used to be its own bit, but this changed in Oct 2017.
-  ; The binary test file does not have the newer 'contract' and 'aml' bits set, so this is not fully 'fast'.
-  ; CHECK: %call.fast = call reassoc nnan ninf nsz arcp float @fmf1()
+  ; CHECK: %call.fast = call fast float @fmf1()
 
   ; Throw in some other attributes to make sure those stay in the right places.
 
diff --git a/test/Bitcode/compatibility-3.9.ll b/test/Bitcode/compatibility-3.9.ll
index c456fefe9d40..7c84daa7d3c4 100644
--- a/test/Bitcode/compatibility-3.9.ll
+++ b/test/Bitcode/compatibility-3.9.ll
@@ -758,9 +758,7 @@ define void @fastmathflags(float %op1, float %op2) {
   %f.arcp = fadd arcp float %op1, %op2
   ; CHECK: %f.arcp = fadd arcp float %op1, %op2
   %f.fast = fadd fast float %op1, %op2
-  ; 'fast' used to be its own bit, but this changed in Oct 2017.
-  ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'.
-  ; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2
+  ; CHECK: %f.fast = fadd fast float %op1, %op2
   ret void
 }
 
@@ -773,9 +771,7 @@ declare <4 x double> @fmf3()
 ; CHECK-LABEL: fastMathFlagsForCalls(
 define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) {
   %call.fast = call fast float @fmf1()
-  ; 'fast' used to be its own bit, but this changed in Oct 2017.
-  ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'.
-  ; CHECK: %call.fast = call reassoc nnan ninf nsz arcp float @fmf1()
+  ; CHECK: %call.fast = call fast float @fmf1()
 
   ; Throw in some other attributes to make sure those stay in the right places.
 
diff --git a/test/Bitcode/compatibility-4.0.ll b/test/Bitcode/compatibility-4.0.ll
index 68446a7d5b0a..9e34d48c95f7 100644
--- a/test/Bitcode/compatibility-4.0.ll
+++ b/test/Bitcode/compatibility-4.0.ll
@@ -757,10 +757,8 @@ define void @fastmathflags(float %op1, float %op2) {
   ; CHECK: %f.nsz = fadd nsz float %op1, %op2
   %f.arcp = fadd arcp float %op1, %op2
   ; CHECK: %f.arcp = fadd arcp float %op1, %op2
-  ; 'fast' used to be its own bit, but this changed in Oct 2017.
-  ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'.
   %f.fast = fadd fast float %op1, %op2
-  ; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2
+  ; CHECK: %f.fast = fadd fast float %op1, %op2
   ret void
 }
 
@@ -773,9 +771,7 @@ declare <4 x double> @fmf3()
 ; CHECK-LABEL: fastMathFlagsForCalls(
 define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) {
   %call.fast = call fast float @fmf1()
-  ; 'fast' used to be its own bit, but this changed in Oct 2017.
-  ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'.
-  ; CHECK: %call.fast = call reassoc nnan ninf nsz arcp float @fmf1()
+  ; CHECK: %call.fast = call fast float @fmf1()
 
   ; Throw in some other attributes to make sure those stay in the right places.
 
diff --git a/test/Bitcode/compatibility-5.0.ll b/test/Bitcode/compatibility-5.0.ll
index cdadc032d87b..a4b3fca82b7b 100644
--- a/test/Bitcode/compatibility-5.0.ll
+++ b/test/Bitcode/compatibility-5.0.ll
@@ -765,9 +765,7 @@ define void @fastmathflags(float %op1, float %op2) {
   %f.contract = fadd contract float %op1, %op2
   ; CHECK: %f.contract = fadd contract float %op1, %op2
   %f.fast = fadd fast float %op1, %op2
-  ; 'fast' used to be its own bit, but this changed in Oct 2017.
-  ; The binary test file does not have the newer 'afn' bit set, so this is not fully 'fast'.
-  ; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp contract float %op1, %op2
+  ; CHECK: %f.fast = fadd fast float %op1, %op2
   ret void
 }
 
@@ -780,9 +778,7 @@ declare <4 x double> @fmf3()
 ; CHECK-LABEL: fastMathFlagsForCalls(
 define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) {
   %call.fast = call fast float @fmf1()
-  ; 'fast' used to be its own bit, but this changed in Oct 2017.
-  ; The binary test file does not have the newer 'afn' bit set, so this is not fully 'fast'.
-  ; CHECK: %call.fast = call reassoc nnan ninf nsz arcp contract float @fmf1()
+  ; CHECK: %call.fast = call fast float @fmf1()
 
   ; Throw in some other attributes to make sure those stay in the right places.
 
diff --git a/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir b/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir
new file mode 100644
index 000000000000..fd1998037d38
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir
@@ -0,0 +1,131 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-unknown-unknown -o - -global-isel -verify-machineinstrs -run-pass=instruction-select %s | FileCheck %s
+
+# PR36345
+--- |
+  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64-arm-none-eabi"
+
+  ; Function Attrs: noinline nounwind optnone
+  define void @fp16_to_gpr([2 x half], [2 x half]* %addr) {
+    ret void
+  }
+
+  define void @gpr_to_fp16() {
+    ret void
+  }
+
+  define void @gpr_to_fp16_physreg() {
+    ret void
+  }
+...
+---
+name:            fp16_to_gpr
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: fpr }
+  - { id: 2, class: fpr }
+  - { id: 3, class: gpr }
+  - { id: 4, class: gpr }
+  - { id: 5, class: gpr }
+  - { id: 6, class: gpr }
+  - { id: 7, class: gpr }
+  - { id: 8, class: gpr }
+  - { id: 9, class: gpr }
+  - { id: 10, class: gpr }
+  - { id: 11, class: gpr }
+  - { id: 12, class: gpr }
+body:             |
+  bb.1 (%ir-block.1):
+    liveins: %h0, %h1, %x0
+
+    ; CHECK-LABEL: name: fp16_to_gpr
+    ; CHECK: liveins: %h0, %h1, %x0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY %h0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY %h1
+    ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+    ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG]]
+    ; CHECK: [[BFMWri:%[0-9]+]]:gpr32 = BFMWri [[DEF]], [[COPY2]], 0, 15
+    ; CHECK: [[SUBREG_TO_REG1:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub
+    ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG1]]
+    ; CHECK: [[BFMWri1:%[0-9]+]]:gpr32 = BFMWri [[BFMWri]], [[COPY3]], 16, 15
+    ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY [[BFMWri1]]
+    ; CHECK: [[COPY5:%[0-9]+]]:gpr64sp = COPY %x0
+    ; CHECK: STRWui [[COPY4]], [[COPY5]], 0 :: (store 4 into %ir.addr, align 2)
+    ; CHECK: RET_ReallyLR
+    %1:fpr(s16) = COPY %h0
+    %2:fpr(s16) = COPY %h1
+    %3:gpr(s32) = G_IMPLICIT_DEF
+    %11:gpr(s16) = COPY %1(s16)
+    %4:gpr(s32) = G_INSERT %3, %11(s16), 0
+    %12:gpr(s16) = COPY %2(s16)
+    %5:gpr(s32) = G_INSERT %4, %12(s16), 16
+    %0:gpr(s32) = COPY %5(s32)
+    %6:gpr(p0) = COPY %x0
+    G_STORE %0(s32), %6(p0) :: (store 4 into %ir.addr, align 2)
+    RET_ReallyLR
+
+...
+
+---
+name:            gpr_to_fp16
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: fpr }
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %w0
+
+    ; CHECK-LABEL: name: gpr_to_fp16
+    ; CHECK: liveins: %w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[COPY1]]
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr16 = COPY [[COPY2]].hsub
+    ; CHECK: [[COPY4:%[0-9]+]]:fpr16 = COPY [[COPY3]]
+    ; CHECK: %h0 = COPY [[COPY4]]
+    ; CHECK: RET_ReallyLR implicit %h0
+    %0:gpr(s32) = COPY %w0
+    %1:gpr(s16) = G_TRUNC %0(s32)
+    %2:fpr(s16) = COPY %1(s16)
+    %h0 = COPY %2(s16)
+    RET_ReallyLR implicit %h0
+
+...
+---
+name:            gpr_to_fp16_physreg
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %w0
+
+    ; CHECK-LABEL: name: gpr_to_fp16_physreg
+    ; CHECK: liveins: %w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[COPY1]]
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr16 = COPY [[COPY2]].hsub
+    ; CHECK: %h0 = COPY [[COPY3]]
+    ; CHECK: RET_ReallyLR implicit %h0
+    %0:gpr(s32) = COPY %w0
+    %1:gpr(s16) = G_TRUNC %0(s32)
+    %h0 = COPY %1(s16)
+    RET_ReallyLR implicit %h0
+
+...
diff --git a/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir b/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir
index 33b483511065..1980048eb456 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir
@@ -1,8 +1,8 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s
 
 ---
-# CHECK-LABEL: name: insert_gprs
-name:            insert_gprs
+name:            insert_gprx
 legalized:       true
 regBankSelected: true
 
@@ -10,26 +10,56 @@ body:             |
   bb.0:
     liveins: %x0
 
+    ; CHECK-LABEL: name: insert_gprx
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0
+    ; CHECK: [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF
+    ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32
+    ; CHECK: [[BFMXri:%[0-9]+]]:gpr64 = BFMXri [[DEF]], [[SUBREG_TO_REG]], 0, 31
+    ; CHECK: [[SUBREG_TO_REG1:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32
+    ; CHECK: [[BFMXri1:%[0-9]+]]:gpr64 = BFMXri [[DEF]], [[SUBREG_TO_REG1]], 51, 31
+    ; CHECK: %x0 = COPY [[BFMXri]]
+    ; CHECK: %x1 = COPY [[BFMXri1]]
     %0:gpr(s32) = COPY %w0
 
     %1:gpr(s64) = G_IMPLICIT_DEF
 
-    ; CHECK:  body:
-    ; CHECK: [[TMP:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32
-    ; CHECK: %2:gpr64 = BFMXri %1, [[TMP]], 0, 31
     %2:gpr(s64) = G_INSERT %1, %0, 0
 
-    ; CHECK: [[TMP:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32
-    ; CHECK: %3:gpr64 = BFMXri %1, [[TMP]], 51, 31
     %3:gpr(s64) = G_INSERT %1, %0, 13
 
     %x0 = COPY %2
     %x1 = COPY %3
 ...
 
+---
+name:            insert_gprw
+legalized:       true
+regBankSelected: true
+
+body:             |
+  bb.0:
+    liveins: %w0, %w1
+    ; CHECK-LABEL: name: insert_gprw
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY]]
+    ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+    ; CHECK: [[BFMWri:%[0-9]+]]:gpr32 = BFMWri [[DEF]], [[COPY1]], 0, 15
+    ; CHECK: [[BFMWri1:%[0-9]+]]:gpr32 = BFMWri [[BFMWri]], [[COPY2]], 16, 15
+    ; CHECK: [[COPY3:%[0-9]+]]:gpr32all = COPY [[BFMWri1]]
+    ; CHECK: %w0 = COPY [[COPY3]]
+    %1:gpr(s32) = COPY %w0
+    %2:gpr(s32) = COPY %w1
+    %3:gpr(s16) = G_TRUNC %1(s32)
+    %4:gpr(s16) = G_TRUNC %1(s32)
+    %5:gpr(s32) = G_IMPLICIT_DEF
+    %6:gpr(s32) = G_INSERT %5, %3(s16), 0
+    %7:gpr(s32) = G_INSERT %6, %4(s16), 16
+    %0:gpr(s32) = COPY %7(s32)
+    %w0 = COPY %0
+...
 
 ---
-# CHECK-LABEL: name: extract_gprs
 name:            extract_gprs
 legalized:       true
 regBankSelected: true
@@ -38,17 +68,49 @@ body:             |
   bb.0:
     liveins: %x0
 
+    ; CHECK-LABEL: name: extract_gprs
+    ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0
+    ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[COPY]], 0, 31
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[UBFMXri]].sub_32
+    ; CHECK: [[UBFMXri1:%[0-9]+]]:gpr64 = UBFMXri [[COPY]], 13, 44
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[UBFMXri1]].sub_32
+    ; CHECK: %w0 = COPY [[COPY1]]
+    ; CHECK: %w1 = COPY [[COPY2]]
     %0:gpr(s64) = COPY %x0
 
-    ; CHECK:  body:
-    ; CHECK: [[TMP:%[0-9]+]]:gpr64 = UBFMXri %0, 0, 31
-    ; CHECK: %1:gpr32 = COPY [[TMP]].sub_32
     %1:gpr(s32) = G_EXTRACT %0, 0
 
-    ; CHECK: [[TMP:%[0-9]+]]:gpr64 = UBFMXri %0, 13, 44
-    ; CHECK: %2:gpr32 = COPY [[TMP]].sub_32
     %2:gpr(s32) = G_EXTRACT %0, 13
 
     %w0 = COPY %1
     %w1 = COPY %2
 ...
+
+---
+name:            extract_gprw
+legalized:       true
+regBankSelected: true
+
+body:             |
+  bb.0:
+    liveins: %w0
+
+    ; CHECK-LABEL: name: extract_gprw
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0
+    ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY]], 0, 15
+    ; CHECK: [[UBFMWri1:%[0-9]+]]:gpr32 = UBFMWri [[COPY]], 15, 30
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[UBFMWri]]
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub
+    ; CHECK: %h0 = COPY [[COPY2]]
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[UBFMWri1]]
+    ; CHECK: [[COPY4:%[0-9]+]]:fpr16 = COPY [[COPY3]].hsub
+    ; CHECK: %h1 = COPY [[COPY4]]
+    %0:gpr(s32) = COPY %w0
+
+    %1:gpr(s16) = G_EXTRACT %0, 0
+
+    %2:gpr(s16) = G_EXTRACT %0, 15
+
+    %h0 = COPY %1
+    %h1 = COPY %2
+...
diff --git a/test/CodeGen/AMDGPU/smrd.ll b/test/CodeGen/AMDGPU/smrd.ll
index 420c7b80b8d3..adf22323ae65 100644
--- a/test/CodeGen/AMDGPU/smrd.ll
+++ b/test/CodeGen/AMDGPU/smrd.ll
@@ -261,8 +261,42 @@ main_body:
   ret void
 }
 
+; GCN-LABEL: {{^}}smrd_sgpr_descriptor_promoted
+; GCN: v_readfirstlane
+define amdgpu_cs void @smrd_sgpr_descriptor_promoted([0 x i8] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), i32) #0 {
+main_body:
+  %descptr = bitcast [0 x i8] addrspace(2)* %0 to <4 x i32> addrspace(2)*, !amdgpu.uniform !0
+  br label %.outer_loop_header
+
+ret_block:                                       ; preds = %.outer, %.label22, %main_body
+  ret void
+
+.outer_loop_header:
+  br label %.inner_loop_header
+
+.inner_loop_header:                                     ; preds = %.inner_loop_body, %.outer_loop_header
+  %loopctr.1 = phi i32 [ 0, %.outer_loop_header ], [ %loopctr.2, %.inner_loop_body ]
+  %loopctr.2 = add i32 %loopctr.1, 1
+  %inner_br1 = icmp slt i32 %loopctr.2, 10
+  br i1 %inner_br1, label %.inner_loop_body, label %ret_block
+
+.inner_loop_body:
+  %descriptor = load <4 x i32>, <4 x i32> addrspace(2)* %descptr, align 16, !invariant.load !0
+  %load1result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 0)
+  %inner_br2 = icmp uge i32 %1, 10
+  br i1 %inner_br2, label %.inner_loop_header, label %.outer_loop_body
+
+.outer_loop_body:
+  %offset = shl i32 %loopctr.2, 6
+  %load2result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 %offset)
+  %outer_br = fcmp ueq float %load2result, 0x0
+  br i1 %outer_br, label %.outer_loop_header, label %ret_block
+}
+
 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
+
+!0 = !{}
diff --git a/test/CodeGen/PowerPC/pr36292.ll b/test/CodeGen/PowerPC/pr36292.ll
new file mode 100644
index 000000000000..a171918b9e07
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr36292.ll
@@ -0,0 +1,46 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown < %s  | \
+; RUN:   FileCheck %s --implicit-check-not=mtctr --implicit-check-not=bdnz
+$test = comdat any
+
+; No CTR loop due to frem (since it is always a call).
+define void @test() #0 comdat {
+; CHECK-LABEL: test:
+; CHECK:    ld 29, 0(3)
+; CHECK:    ld 30, 40(1)
+; CHECK:    xxlxor 31, 31, 31
+; CHECK:    cmpld 30, 29
+; CHECK-NEXT:    bge- 0, .LBB0_2
+; CHECK-NEXT:    .p2align 5
+; CHECK-NEXT:  .LBB0_1: # %bounds.ok
+; CHECK:    fmr 1, 31
+; CHECK-NEXT:    lfsx 2, 0, 3
+; CHECK-NEXT:    bl fmodf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    addi 30, 30, 1
+; CHECK-NEXT:    stfsx 1, 0, 3
+; CHECK-NEXT:    cmpld 30, 29
+; CHECK-NEXT:    blt+ 0, .LBB0_1
+; CHECK-NEXT:  .LBB0_2: # %bounds.fail
+; CHECK-NEXT:    std 30, 40(1)
+  %pos = alloca i64, align 8
+  br label %forcond
+
+forcond:                                          ; preds = %bounds.ok, %0
+  %1 = load i64, i64* %pos
+  %.len1 = load i64, i64* undef
+  %bounds.cmp = icmp ult i64 %1, %.len1
+  br i1 %bounds.cmp, label %bounds.ok, label %bounds.fail
+
+bounds.ok:                                        ; preds = %forcond
+  %2 = load float, float* undef
+  %3 = frem float 0.000000e+00, %2
+  store float %3, float* undef
+  %4 = load i64, i64* %pos
+  %5 = add i64 %4, 1
+  store i64 %5, i64* %pos
+  br label %forcond
+
+bounds.fail:                                      ; preds = %forcond
+  unreachable
+}
+
diff --git a/test/CodeGen/X86/clwb.ll b/test/CodeGen/X86/clwb.ll
index 0bbb14917f7f..e5906c6ce68c 100644
--- a/test/CodeGen/X86/clwb.ll
+++ b/test/CodeGen/X86/clwb.ll
@@ -1,5 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: clwb is available in Skylake Server, not available in the newer
+; NOTE: Cannon Lake arch, but available again in the newer Ice Lake arch.
 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=clwb | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=skx | FileCheck %s
+; RUN: not llc < %s -mtriple=i686-apple-darwin -mcpu=cannonlake 2>&1 | FileCheck %s --check-prefix=CNL
+; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=icelake | FileCheck %s
+
+; CNL: LLVM ERROR: Cannot select: intrinsic %llvm.x86.clwb
 
 define void @clwb(i8* %p) nounwind {
 ; CHECK-LABEL: clwb:
diff --git a/test/Transforms/InstCombine/pr36362.ll b/test/Transforms/InstCombine/pr36362.ll
new file mode 100644
index 000000000000..412691543a15
--- /dev/null
+++ b/test/Transforms/InstCombine/pr36362.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+;RUN: opt -instcombine -S %s | FileCheck %s
+
+; We shouldn't remove the select before the srem
+define i32 @foo(i1 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[A:%.*]], i32 [[B:%.*]], i32 -1
+; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[C:%.*]], [[SEL1]]
+; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[A]], i32 [[REM]], i32 0
+; CHECK-NEXT:    ret i32 [[SEL2]]
+;
+  %sel1 = select i1 %a, i32 %b, i32 -1
+  %rem = srem i32 %c, %sel1
+  %sel2 = select i1 %a, i32 %rem, i32 0
+  ret i32 %sel2
+}
+
diff --git a/test/Transforms/LICM/sinking.ll b/test/Transforms/LICM/sinking.ll
index b28eea0bc2aa..6d747877c58e 100644
--- a/test/Transforms/LICM/sinking.ll
+++ b/test/Transforms/LICM/sinking.ll
@@ -670,6 +670,67 @@ try.cont:
   ret void
 }
 
+; The sinkable call should be sunk into an exit block split. After splitting
+; the exit block, BlockColor for new blocks should be added properly so
+; that we should be able to access valid ColorVector.
+;
+; CHECK-LABEL:@test21_pr36184
+; CHECK-LABEL: Loop
+; CHECK-NOT: %sinkableCall
+; CHECK-LABEL:Out.split.loop.exit
+; CHECK: %sinkableCall
+define i32 @test21_pr36184(i8* %P) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  br label %loop.ph
+
+loop.ph:
+  br label %Loop
+
+Loop:
+  %sinkableCall = call i32 @strlen( i8* %P ) readonly
+  br i1 undef, label %ContLoop, label %Out
+
+ContLoop:
+  br i1 undef, label %Loop, label %Out
+
+Out:
+  %idx = phi i32 [ %sinkableCall, %Loop ], [0, %ContLoop ]
+  ret i32 %idx
+}
+
+; We do not support splitting a landingpad block if BlockColors is not empty.
+; CHECK-LABEL: @test22
+; CHECK-LABEL: while.body2
+; CHECK-LABEL: %mul
+; CHECK-NOT: lpadBB.split{{.*}}
+define void @test22(i1 %b, i32 %v1, i32 %v2) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  br label %while.cond
+while.cond:
+  br i1 %b, label %try.cont, label %while.body
+
+while.body:
+  invoke void @may_throw()
+          to label %while.body2 unwind label %lpadBB
+
+while.body2:
+  %v = call i32 @getv()
+  %mul = mul i32 %v, %v2
+  invoke void @may_throw2()
+          to label %while.cond unwind label %lpadBB
+lpadBB:
+  %.lcssa1 = phi i32 [ 0, %while.body ], [ %mul, %while.body2 ]
+  landingpad { i8*, i32 }
+               catch i8* null
+  br label %lpadBBSucc1
+
+lpadBBSucc1:
+  ret void
+
+try.cont:
+  ret void
+}
+
 declare void @may_throw()
 declare void @may_throw2()
 declare i32 @__CxxFrameHandler3(...)
diff --git a/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
index d9c9632be047..08d163fe6299 100644
--- a/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
+++ b/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -scev-version-unknown < %s 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/pr35773.ll b/test/Transforms/LoopVectorize/pr35773.ll
index 362ece70b898..308bb393cc4e 100644
--- a/test/Transforms/LoopVectorize/pr35773.ll
+++ b/test/Transforms/LoopVectorize/pr35773.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -scev-version-unknown < %s 2>&1 | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 @a = common local_unnamed_addr global i32 0, align 4
 @b = common local_unnamed_addr global i8 0, align 1
diff --git a/test/Transforms/LoopVectorize/reduction-small-size.ll b/test/Transforms/LoopVectorize/reduction-small-size.ll
index b44beb8ce68f..879f1c3c5ad4 100644
--- a/test/Transforms/LoopVectorize/reduction-small-size.ll
+++ b/test/Transforms/LoopVectorize/reduction-small-size.ll
@@ -14,7 +14,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK-NEXT:    [[TMP17]] = zext <4 x i8> [[TMP16]] to <4 x i32>
 ; CHECK-NEXT:    br i1 {{.*}}, label %middle.block, label %vector.body
 ;
-define void @PR34687(i1 %c, i32 %x, i32 %n) {
+define i8 @PR34687(i1 %c, i32 %x, i32 %n) {
 entry:
   br label %for.body
 
@@ -36,5 +36,38 @@ if.end:
 
 for.end:
   %tmp2 = phi i32 [ %r.next, %if.end ]
-  ret void
+  %tmp3 = trunc i32 %tmp2 to i8
+  ret i8 %tmp3
+}
+
+; CHECK-LABEL: @PR35734(
+; CHECK:       vector.ph:
+; CHECK:         [[TMP3:%.*]] = insertelement <4 x i32> zeroinitializer, i32 %y, i32 0
+; CHECK-NEXT:    br label %vector.body
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP3]], %vector.ph ], [ [[TMP9:%.*]], %vector.body ]
+; CHECK:         [[TMP5:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK:         [[TMP8:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i1>
+; CHECK-NEXT:    [[TMP9]] = sext <4 x i1> [[TMP8]] to <4 x i32>
+; CHECK-NEXT:    br i1 {{.*}}, label %middle.block, label %vector.body
+;
+define i32 @PR35734(i32 %x, i32 %y) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ %x, %entry ], [ %i.next, %for.body ]
+  %r = phi i32 [ %y, %entry ], [ %r.next, %for.body ]
+  %tmp0 = and i32 %r, 1
+  %r.next = add i32 %tmp0, -1
+  %i.next = add nsw i32 %i, 1
+  %cond = icmp sgt i32 %i, 77
+  br i1 %cond, label %for.end, label %for.body
+
+for.end:
+  %tmp1 = phi i32 [ %r.next, %for.body ]
+  ret i32 %tmp1
 }
diff --git a/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll b/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
index 4ddc6a652179..f7877245b0d4 100644
--- a/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
+++ b/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s | FileCheck %s -check-prefix=VF8
-; RUN: opt -S -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s | FileCheck %s -check-prefix=VF1
+; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -scev-version-unknown < %s | FileCheck %s -check-prefix=VF8
+; RUN: opt -S -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -scev-version-unknown < %s | FileCheck %s -check-prefix=VF1
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
diff --git a/test/tools/llvm-config/system-libs.windows.test b/test/tools/llvm-config/system-libs.windows.test
index 2c6e03afa2d9..09970cf68994 100644
--- a/test/tools/llvm-config/system-libs.windows.test
+++ b/test/tools/llvm-config/system-libs.windows.test
@@ -2,6 +2,6 @@ RUN: llvm-config --link-static --system-libs 2>&1 | FileCheck %s
 REQUIRES: static-libs
 REQUIRES: system-windows
 CHECK-NOT: -l
-CHECK: psapi.lib shell32.lib ole32.lib uuid.lib
+CHECK: psapi.lib shell32.lib ole32.lib uuid.lib advapi32.lib
 CHECK-NOT: error
 CHECK-NOT: warning