summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/avx512-intrinsics.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/avx512-intrinsics.ll')
-rw-r--r--test/CodeGen/X86/avx512-intrinsics.ll422
1 files changed, 183 insertions, 239 deletions
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll
index f800d01064ba9..563cad04b8c2d 100644
--- a/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -112,6 +112,8 @@ define i16 @unpckbw_test(i16 %a0, i16 %a1) {
}
declare i16 @llvm.x86.avx512.kxnor.w(i16, i16) nounwind readnone
+; TODO: the two kxnor instructions here a no op and should be elimintaed,
+; probably by FoldConstantArithmetic in SelectionDAG.
define i16 @test_kxnor(i16 %a0, i16 %a1) {
; CHECK-LABEL: test_kxnor:
; CHECK: ## BB#0:
@@ -121,6 +123,8 @@ define i16 @test_kxnor(i16 %a0, i16 %a1) {
; CHECK-NEXT: kmovw %eax, %k2
; CHECK-NEXT: kxorw %k0, %k1, %k0
; CHECK-NEXT: kxorw %k0, %k2, %k0
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
+; CHECK-NEXT: kxnorw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
@@ -269,16 +273,15 @@ declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x
define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_sqrt_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm2
-; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
-; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
+; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
@@ -296,16 +299,15 @@ declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <
define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_sqrt_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %xmm2, %xmm3
; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm2
-; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
-; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
+; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
@@ -477,11 +479,11 @@ declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone
define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtsd2usi64:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtsd2usi %xmm0, %rax
-; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rcx
-; CHECK-NEXT: addq %rax, %rcx
-; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %rax
+; CHECK-NEXT: vcvtsd2usi %xmm0, %rcx
+; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rax
+; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %rdx
; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: addq %rdx, %rax
; CHECK-NEXT: retq
%res = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 4)
@@ -496,11 +498,11 @@ declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32) nounwind readnone
define i64 @test_x86_avx512_cvtsd2si64(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtsd2si64:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtsd2si %xmm0, %rax
-; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rcx
-; CHECK-NEXT: addq %rax, %rcx
-; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %rax
+; CHECK-NEXT: vcvtsd2si %xmm0, %rcx
+; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rax
+; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %rdx
; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: addq %rdx, %rax
; CHECK-NEXT: retq
%res = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 4)
@@ -515,11 +517,11 @@ declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32) nounwind readnone
define i64 @test_x86_avx512_cvtss2usi64(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtss2usi64:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtss2usi %xmm0, %rax
-; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rcx
-; CHECK-NEXT: addq %rax, %rcx
-; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %rax
+; CHECK-NEXT: vcvtss2usi %xmm0, %rcx
+; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rax
+; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %rdx
; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: addq %rdx, %rax
; CHECK-NEXT: retq
%res = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 4)
@@ -534,11 +536,11 @@ declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32) nounwind readnone
define i64 @test_x86_avx512_cvtss2si64(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtss2si64:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtss2si %xmm0, %rax
-; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rcx
-; CHECK-NEXT: addq %rax, %rcx
-; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %rax
+; CHECK-NEXT: vcvtss2si %xmm0, %rcx
+; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rax
+; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %rdx
; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: addq %rdx, %rax
; CHECK-NEXT: retq
%res = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 4)
@@ -553,11 +555,11 @@ declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32) nounwind readnone
define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtsd2usi32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtsd2usi %xmm0, %eax
-; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %ecx
-; CHECK-NEXT: addl %eax, %ecx
-; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %eax
+; CHECK-NEXT: vcvtsd2usi %xmm0, %ecx
+; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %eax
+; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %edx
; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: addl %edx, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 4)
@@ -572,11 +574,11 @@ declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) nounwind readnone
define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtsd2si32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtsd2si %xmm0, %eax
-; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %ecx
-; CHECK-NEXT: addl %eax, %ecx
-; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %eax
+; CHECK-NEXT: vcvtsd2si %xmm0, %ecx
+; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %eax
+; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %edx
; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: addl %edx, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 4)
@@ -591,11 +593,11 @@ declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) nounwind readnone
define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtss2usi32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtss2usi %xmm0, %eax
-; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %ecx
-; CHECK-NEXT: addl %eax, %ecx
-; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %eax
+; CHECK-NEXT: vcvtss2usi %xmm0, %ecx
+; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %eax
+; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %edx
; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: addl %edx, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 4)
@@ -610,11 +612,11 @@ declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone
define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtss2si32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtss2si %xmm0, %eax
-; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %ecx
-; CHECK-NEXT: addl %eax, %ecx
-; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %eax
+; CHECK-NEXT: vcvtss2si %xmm0, %ecx
+; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %eax
+; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %edx
; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: addl %edx, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 4)
@@ -683,9 +685,8 @@ define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm1 {%k1}
; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vpaddw %ymm1, %ymm2, %ymm1
; CHECK-NEXT: vcvtps2ph $2, %zmm0, (%rsi)
-; CHECK-NEXT: vmovdqa %ymm1, %ymm0
+; CHECK-NEXT: vpaddw %ymm1, %ymm2, %ymm0
; CHECK-NEXT: retq
%res1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
%res2 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 %mask)
@@ -2215,7 +2216,6 @@ declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>,
define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_ss_rn:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
@@ -2227,7 +2227,6 @@ define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x f
define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_ss_rd:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
@@ -2239,7 +2238,6 @@ define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x f
define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_ss_ru:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
@@ -2251,7 +2249,6 @@ define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x f
define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_ss_rz:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
@@ -2263,7 +2260,6 @@ define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x f
define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_ss_current:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
@@ -2275,7 +2271,6 @@ define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <
define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
; CHECK-LABEL: test_maskz_add_ss_rn:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
@@ -2295,7 +2290,6 @@ define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
define <4 x float> @test_mask_add_ss_current_memfold(<4 x float> %a0, float* %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_ss_current_memfold:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vaddss (%rdi), %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
@@ -2312,7 +2306,6 @@ define <4 x float> @test_mask_add_ss_current_memfold(<4 x float> %a0, float* %a1
define <4 x float> @test_maskz_add_ss_current_memfold(<4 x float> %a0, float* %a1, i8 %mask) {
; CHECK-LABEL: test_maskz_add_ss_current_memfold:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vaddss (%rdi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
@@ -2330,7 +2323,6 @@ declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x doubl
define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_sd_rn:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovapd %xmm2, %xmm0
@@ -2342,7 +2334,6 @@ define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2
define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_sd_rd:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovapd %xmm2, %xmm0
@@ -2354,7 +2345,6 @@ define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2
define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_sd_ru:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovapd %xmm2, %xmm0
@@ -2366,7 +2356,6 @@ define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2
define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_sd_rz:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovapd %xmm2, %xmm0
@@ -2378,7 +2367,6 @@ define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2
define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_sd_current:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovapd %xmm2, %xmm0
@@ -2390,7 +2378,6 @@ define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1
define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
; CHECK-LABEL: test_maskz_add_sd_rn:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
@@ -2410,7 +2397,6 @@ define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
define <2 x double> @test_mask_add_sd_current_memfold(<2 x double> %a0, double* %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_add_sd_current_memfold:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vaddsd (%rdi), %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovapd %xmm1, %xmm0
@@ -2425,7 +2411,6 @@ define <2 x double> @test_mask_add_sd_current_memfold(<2 x double> %a0, double*
define <2 x double> @test_maskz_add_sd_current_memfold(<2 x double> %a0, double* %a1, i8 %mask) {
; CHECK-LABEL: test_maskz_add_sd_current_memfold:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vaddsd (%rdi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
@@ -2441,7 +2426,6 @@ declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>,
define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_max_ss_sae:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
@@ -2453,7 +2437,6 @@ define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x
define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
; CHECK-LABEL: test_maskz_max_ss_sae:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
@@ -2473,7 +2456,6 @@ define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) {
define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_max_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %xmm2, %xmm0
@@ -2485,7 +2467,6 @@ define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x floa
define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
; CHECK-LABEL: test_maskz_max_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
@@ -2505,7 +2486,6 @@ define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) {
define <4 x float> @test_mask_max_ss_memfold(<4 x float> %a0, float* %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_max_ss_memfold:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmaxss (%rdi), %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
@@ -2522,7 +2502,6 @@ define <4 x float> @test_mask_max_ss_memfold(<4 x float> %a0, float* %a1, <4 x f
define <4 x float> @test_maskz_max_ss_memfold(<4 x float> %a0, float* %a1, i8 %mask) {
; CHECK-LABEL: test_maskz_max_ss_memfold:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmaxss (%rdi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
@@ -2539,7 +2518,6 @@ declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x doubl
define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_max_sd_sae:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovapd %xmm2, %xmm0
@@ -2551,7 +2529,6 @@ define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2
define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
; CHECK-LABEL: test_maskz_max_sd_sae:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
@@ -2571,7 +2548,6 @@ define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) {
define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_max_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovapd %xmm2, %xmm0
@@ -2583,7 +2559,6 @@ define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x d
define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
; CHECK-LABEL: test_maskz_max_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
@@ -2603,7 +2578,6 @@ define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
define <2 x double> @test_mask_max_sd_memfold(<2 x double> %a0, double* %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_max_sd_memfold:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmaxsd (%rdi), %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovapd %xmm1, %xmm0
@@ -2618,7 +2592,6 @@ define <2 x double> @test_mask_max_sd_memfold(<2 x double> %a0, double* %a1, <2
define <2 x double> @test_maskz_max_sd_memfold(<2 x double> %a0, double* %a1, i8 %mask) {
; CHECK-LABEL: test_maskz_max_sd_memfold:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
@@ -3652,16 +3625,15 @@ declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4
define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_getexp_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
+; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm5
; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm2
-; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm3 {%k1} {z}
-; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
-; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm0
+; CHECK-NEXT: vaddps %xmm5, %xmm4, %xmm1
+; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
@@ -3679,16 +3651,15 @@ declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>,
define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_getexp_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovapd %xmm2, %xmm3
-; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1}
-; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4
+; CHECK-NEXT: vmovapd %xmm2, %xmm4
+; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4 {%k1}
+; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm5 {%k1} {z}
; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm2
-; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
-; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0
-; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vaddpd %xmm2, %xmm4, %xmm0
+; CHECK-NEXT: vaddpd %xmm3, %xmm5, %xmm1
+; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
@@ -3706,11 +3677,9 @@ declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32
define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
@@ -3721,18 +3690,18 @@ define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8
define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all:
; CHECK: ## BB#0:
+; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %ecx
; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0
-; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k1
-; CHECK-NEXT: korw %k0, %k1, %k0
-; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k1
-; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k2
-; CHECK-NEXT: korw %k1, %k2, %k1
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: kmovw %edi, %k2
-; CHECK-NEXT: kandw %k2, %k1, %k1
-; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: kmovw %k0, %edx
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %esi
+; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: orb %cl, %dl
+; CHECK-NEXT: orb %sil, %al
+; CHECK-NEXT: orb %dl, %al
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
@@ -3752,11 +3721,9 @@ declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
@@ -3768,17 +3735,17 @@ define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %
define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k1
-; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %ecx
+; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %edx
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k2 {%k1}
-; CHECK-NEXT: kmovw %k2, %ecx
-; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1}
-; CHECK-NEXT: kmovw %k1, %edx
-; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %esi
+; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: andb %cl, %al
+; CHECK-NEXT: andb %cl, %dl
+; CHECK-NEXT: andb %sil, %al
; CHECK-NEXT: andb %dl, %al
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
@@ -3899,16 +3866,15 @@ declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>
define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovapd %xmm2, %xmm3
-; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
-; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
-; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm3
-; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4
+; CHECK-NEXT: vmovapd %xmm2, %xmm4
+; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1}
+; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5 {%k1} {z}
; CHECK-NEXT: vgetmantsd $11, {sae}, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vaddpd %xmm4, %xmm2, %xmm0
-; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: vaddpd %xmm5, %xmm4, %xmm0
+; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> zeroinitializer, i8 %x3, i32 4)
@@ -3925,15 +3891,14 @@ declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i
define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 {%k1} {z}
-; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm2
-; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3
+; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
; CHECK-NEXT: vgetmantss $11, {sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm4, %xmm2, %xmm1
; CHECK-NEXT: vaddps %xmm3, %xmm0, %xmm0
-; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> zeroinitializer, i8 %x3, i32 4)
@@ -4057,7 +4022,6 @@ declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double>, <4 x flo
define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<2 x double> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0
@@ -4074,7 +4038,6 @@ declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float>, <2 x doubl
define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<4 x float> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0
@@ -4435,8 +4398,8 @@ define <16 x i32>@test_int_x86_avx512_mask_prol_d_512(<16 x i32> %x0, i32 %x1, <
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vprold $3, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vprold $3, %zmm0, %zmm2 {%k1} {z}
-; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
; CHECK-NEXT: vprold $3, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
@@ -4455,8 +4418,8 @@ define <8 x i64>@test_int_x86_avx512_mask_prol_q_512(<8 x i64> %x0, i32 %x1, <8
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vprolq $3, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vprolq $3, %zmm0, %zmm2 {%k1} {z}
-; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
; CHECK-NEXT: vprolq $3, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
@@ -4557,9 +4520,9 @@ define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512(<8 x double> %x0, <
; CHECK-NEXT: vfixupimmpd $4, %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
; CHECK-NEXT: vfixupimmpd $5, %zmm2, %zmm1, %zmm4 {%k1} {z}
-; CHECK-NEXT: vaddpd %zmm4, %zmm3, %zmm3
; CHECK-NEXT: vfixupimmpd $3, {sae}, %zmm2, %zmm1, %zmm0
-; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: vaddpd %zmm4, %zmm3, %zmm1
+; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 4, i8 %x4, i32 4)
%res1 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> zeroinitializer, <8 x double> %x1, <8 x i64> %x2, i32 5, i8 %x4, i32 4)
@@ -4580,9 +4543,9 @@ define <8 x double>@test_int_x86_avx512_maskz_fixupimm_pd_512(<8 x double> %x0,
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
; CHECK-NEXT: vmovapd %zmm0, %zmm5
; CHECK-NEXT: vfixupimmpd $5, %zmm4, %zmm1, %zmm5 {%k1} {z}
-; CHECK-NEXT: vaddpd %zmm5, %zmm3, %zmm3
; CHECK-NEXT: vfixupimmpd $2, {sae}, %zmm2, %zmm1, %zmm0
-; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: vaddpd %zmm5, %zmm3, %zmm1
+; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 %x4, i32 4)
%res1 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> zeroinitializer, i32 5, i8 %x4, i32 4)
@@ -4597,16 +4560,15 @@ declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float>, <4 x float>,
define <4 x float>@test_int_x86_avx512_mask_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %xmm0, %xmm3
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1}
; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vmovaps %xmm0, %xmm5
; CHECK-NEXT: vfixupimmss $5, %xmm4, %xmm1, %xmm5 {%k1}
-; CHECK-NEXT: vaddps %xmm5, %xmm3, %xmm3
; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0
-; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: vaddps %xmm5, %xmm3, %xmm1
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 4)
@@ -4621,16 +4583,15 @@ declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float>, <4 x float>,
define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %xmm0, %xmm3
-; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
+; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %xmm0, %xmm4
-; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm4
+; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm4 {%k1} {z}
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
-; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
-; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
+; CHECK-NEXT: vaddps %xmm3, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 8)
@@ -4651,9 +4612,9 @@ define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512(<16 x float> %x0, <
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
; CHECK-NEXT: vmovaps %zmm0, %zmm5
; CHECK-NEXT: vfixupimmps $5, %zmm4, %zmm1, %zmm5 {%k1}
-; CHECK-NEXT: vaddps %zmm5, %zmm3, %zmm3
; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0
-; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: vaddps %zmm5, %zmm3, %zmm1
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4)
%res1 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 4)
@@ -4691,16 +4652,15 @@ declare <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double>, <2 x double
define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %xmm0, %xmm3
-; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %xmm0, %xmm4
-; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm4
+; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm4 {%k1}
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1}
-; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
-; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0
+; CHECK-NEXT: vaddpd %xmm3, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8)
@@ -4715,16 +4675,15 @@ declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double>, <2 x doubl
define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %xmm0, %xmm3
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vmovapd %xmm0, %xmm5
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm4, %xmm1, %xmm5 {%k1} {z}
-; CHECK-NEXT: vaddpd %xmm5, %xmm3, %xmm3
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
-; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: vaddpd %xmm5, %xmm3, %xmm1
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8)
@@ -4816,18 +4775,17 @@ declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>,
define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %xmm0, %xmm3
-; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1}
-; CHECK-NEXT: vmovapd %xmm0, %xmm4
-; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm4
-; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm3
+; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %xmm0, %xmm4
-; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm4 {%k1}
-; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0
-; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0
-; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm4 {%k1}
+; CHECK-NEXT: vmovapd %xmm0, %xmm5
+; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm5
+; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm1
+; CHECK-NEXT: vaddpd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
@@ -4844,18 +4802,17 @@ declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4
define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %xmm0, %xmm3
-; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm3 {%k1}
-; CHECK-NEXT: vmovaps %xmm0, %xmm4
-; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm4
-; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm3
+; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %xmm0, %xmm4
-; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm4 {%k1}
-; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm0
-; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0
-; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm4 {%k1}
+; CHECK-NEXT: vmovaps %xmm0, %xmm5
+; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm5
+; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vaddps %xmm4, %xmm3, %xmm1
+; CHECK-NEXT: vaddps %xmm0, %xmm5, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
@@ -4872,7 +4829,6 @@ declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double>
define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %xmm0, %xmm3
; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z}
@@ -4890,7 +4846,6 @@ declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <
define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
@@ -4904,18 +4859,17 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>
define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %xmm2, %xmm3
-; CHECK-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3 {%k1}
-; CHECK-NEXT: vmovapd %xmm2, %xmm4
-; CHECK-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm4
-; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm3
+; CHECK-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %xmm2, %xmm4
-; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm4 {%k1}
-; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm2
-; CHECK-NEXT: vaddpd %xmm4, %xmm2, %xmm0
-; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm4 {%k1}
+; CHECK-NEXT: vmovapd %xmm2, %xmm5
+; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm5
+; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0
+; CHECK-NEXT: vaddpd %xmm2, %xmm5, %xmm1
+; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
@@ -4932,18 +4886,17 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <
define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %xmm2, %xmm3
-; CHECK-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3 {%k1}
-; CHECK-NEXT: vmovaps %xmm2, %xmm4
-; CHECK-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm4
-; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm3
+; CHECK-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %xmm2, %xmm4
-; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm4 {%k1}
-; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm2
-; CHECK-NEXT: vaddps %xmm4, %xmm2, %xmm0
-; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm4 {%k1}
+; CHECK-NEXT: vmovaps %xmm2, %xmm5
+; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm5
+; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vaddps %xmm4, %xmm3, %xmm0
+; CHECK-NEXT: vaddps %xmm2, %xmm5, %xmm1
+; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
@@ -4959,7 +4912,6 @@ define void @fmadd_ss_mask_memfold(float* %a, float* %b, i8 %c) {
; CHECK-LABEL: fmadd_ss_mask_memfold:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: andl $1, %edx
; CHECK-NEXT: kmovw %edx, %k1
; CHECK-NEXT: vfmadd132ss (%rsi), %xmm0, %xmm0 {%k1}
; CHECK-NEXT: vmovss %xmm0, (%rdi)
@@ -4987,7 +4939,6 @@ define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) {
; CHECK-LABEL: fmadd_ss_maskz_memfold:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: andl $1, %edx
; CHECK-NEXT: kmovw %edx, %k1
; CHECK-NEXT: vfmadd132ss (%rsi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vmovss %xmm0, (%rdi)
@@ -5015,7 +4966,6 @@ define void @fmadd_sd_mask_memfold(double* %a, double* %b, i8 %c) {
; CHECK-LABEL: fmadd_sd_mask_memfold:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: andl $1, %edx
; CHECK-NEXT: kmovw %edx, %k1
; CHECK-NEXT: vfmadd132sd (%rsi), %xmm0, %xmm0 {%k1}
; CHECK-NEXT: vmovlpd %xmm0, (%rdi)
@@ -5039,7 +4989,6 @@ define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) {
; CHECK-LABEL: fmadd_sd_maskz_memfold:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: andl $1, %edx
; CHECK-NEXT: kmovw %edx, %k1
; CHECK-NEXT: vfmadd132sd (%rsi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vmovlpd %xmm0, (%rdi)
@@ -5064,18 +5013,17 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>
define <2 x double>@test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %xmm2, %xmm3
-; CHECK-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm3 {%k1}
-; CHECK-NEXT: vmovapd %xmm2, %xmm4
-; CHECK-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm4
-; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm3
+; CHECK-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %xmm2, %xmm4
-; CHECK-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 {%k1}
-; CHECK-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2
-; CHECK-NEXT: vaddpd %xmm4, %xmm2, %xmm0
-; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm4 {%k1}
+; CHECK-NEXT: vmovapd %xmm2, %xmm5
+; CHECK-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm5
+; CHECK-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0
+; CHECK-NEXT: vaddpd %xmm2, %xmm5, %xmm1
+; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
@@ -5092,18 +5040,17 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <
define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %xmm2, %xmm3
-; CHECK-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm3 {%k1}
-; CHECK-NEXT: vmovaps %xmm2, %xmm4
-; CHECK-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm4
-; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm3
+; CHECK-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %xmm2, %xmm4
-; CHECK-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 {%k1}
-; CHECK-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2
-; CHECK-NEXT: vaddps %xmm4, %xmm2, %xmm0
-; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm4 {%k1}
+; CHECK-NEXT: vmovaps %xmm2, %xmm5
+; CHECK-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm5
+; CHECK-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vaddps %xmm4, %xmm3, %xmm0
+; CHECK-NEXT: vaddps %xmm2, %xmm5, %xmm1
+; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
@@ -5120,18 +5067,17 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double>, <2 x double
define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %xmm2, %xmm3
-; CHECK-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm3 {%k1}
-; CHECK-NEXT: vmovapd %xmm2, %xmm4
-; CHECK-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm4
-; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm3
+; CHECK-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %xmm2, %xmm4
-; CHECK-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 {%k1}
-; CHECK-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2
-; CHECK-NEXT: vaddpd %xmm4, %xmm2, %xmm0
-; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm4 {%k1}
+; CHECK-NEXT: vmovapd %xmm2, %xmm5
+; CHECK-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm5
+; CHECK-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0
+; CHECK-NEXT: vaddpd %xmm2, %xmm5, %xmm1
+; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
@@ -5148,18 +5094,17 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float>, <4 x float>,
define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %xmm2, %xmm3
-; CHECK-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm3 {%k1}
-; CHECK-NEXT: vmovaps %xmm2, %xmm4
-; CHECK-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm4
-; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm3
+; CHECK-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %xmm2, %xmm4
-; CHECK-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 {%k1}
-; CHECK-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2
-; CHECK-NEXT: vaddps %xmm4, %xmm2, %xmm0
-; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm4 {%k1}
+; CHECK-NEXT: vmovaps %xmm2, %xmm5
+; CHECK-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm5
+; CHECK-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vaddps %xmm4, %xmm3, %xmm0
+; CHECK-NEXT: vaddps %xmm2, %xmm5, %xmm1
+; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
@@ -5174,7 +5119,6 @@ define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x fl
define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1, float *%ptr_b ,i8 %x3,i32 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
@@ -5188,7 +5132,6 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x
define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm:
; CHECK: ## BB#0:
-; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vfmadd132ss (%rdi), %xmm1, %xmm0 {%k1}
; CHECK-NEXT: retq
@@ -5202,7 +5145,8 @@ define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x f
define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_rm:
; CHECK: ## BB#0:
-; CHECK-NEXT: kxorw %k0, %k0, %k1
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%q = load float, float* %ptr_b