diff options
Diffstat (limited to 'test/CodeGen/X86/avx512-intrinsics.ll')
-rw-r--r-- | test/CodeGen/X86/avx512-intrinsics.ll | 422 |
1 files changed, 183 insertions, 239 deletions
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index f800d01064ba9..563cad04b8c2d 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -112,6 +112,8 @@ define i16 @unpckbw_test(i16 %a0, i16 %a1) { } declare i16 @llvm.x86.avx512.kxnor.w(i16, i16) nounwind readnone +; TODO: the two kxnor instructions here a no op and should be elimintaed, +; probably by FoldConstantArithmetic in SelectionDAG. define i16 @test_kxnor(i16 %a0, i16 %a1) { ; CHECK-LABEL: test_kxnor: ; CHECK: ## BB#0: @@ -121,6 +123,8 @@ define i16 @test_kxnor(i16 %a0, i16 %a1) { ; CHECK-NEXT: kmovw %eax, %k2 ; CHECK-NEXT: kxorw %k0, %k1, %k0 ; CHECK-NEXT: kxorw %k0, %k2, %k0 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: kxnorw %k1, %k0, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> ; CHECK-NEXT: retq @@ -269,16 +273,15 @@ declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_sqrt_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm2 -; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm3 {%k1} {z} +; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} ; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 -; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1 +; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1) @@ -296,16 +299,15 @@ declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, < define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_sqrt_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %xmm2, %xmm3 ; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm2 -; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm3 {%k1} {z} +; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} ; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 -; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1 +; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1) @@ -477,11 +479,11 @@ declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtsd2usi64: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtsd2usi %xmm0, %rax -; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rcx -; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %rax +; CHECK-NEXT: vcvtsd2usi %xmm0, %rcx +; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rax +; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %rdx ; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: retq %res = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 4) @@ -496,11 +498,11 @@ declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32) nounwind readnone define i64 @test_x86_avx512_cvtsd2si64(<2 x double> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtsd2si64: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtsd2si %xmm0, %rax -; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rcx -; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %rax +; CHECK-NEXT: vcvtsd2si %xmm0, %rcx +; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rax +; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %rdx ; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: retq %res = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 4) @@ -515,11 +517,11 @@ declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32) nounwind readnone define i64 @test_x86_avx512_cvtss2usi64(<4 x float> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtss2usi64: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtss2usi %xmm0, %rax -; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rcx -; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %rax +; CHECK-NEXT: vcvtss2usi %xmm0, %rcx +; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rax +; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %rdx ; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: retq %res = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 4) @@ -534,11 +536,11 @@ declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32) nounwind readnone define i64 @test_x86_avx512_cvtss2si64(<4 x float> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtss2si64: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtss2si %xmm0, %rax -; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rcx -; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %rax +; CHECK-NEXT: vcvtss2si %xmm0, %rcx +; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rax +; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %rdx ; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: retq %res = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 4) @@ -553,11 +555,11 @@ declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32) nounwind readnone define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtsd2usi32: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtsd2usi %xmm0, %eax -; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %ecx -; CHECK-NEXT: addl %eax, %ecx -; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %eax +; CHECK-NEXT: vcvtsd2usi %xmm0, %ecx +; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %eax +; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %edx ; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 4) @@ -572,11 +574,11 @@ declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) nounwind readnone define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtsd2si32: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtsd2si %xmm0, %eax -; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %ecx -; CHECK-NEXT: addl %eax, %ecx -; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %eax +; CHECK-NEXT: vcvtsd2si %xmm0, %ecx +; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %eax +; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %edx ; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 4) @@ -591,11 +593,11 @@ declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) nounwind readnone define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtss2usi32: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtss2usi %xmm0, %eax -; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %ecx -; CHECK-NEXT: addl %eax, %ecx -; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %eax +; CHECK-NEXT: vcvtss2usi %xmm0, %ecx +; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %eax +; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %edx ; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 4) @@ -610,11 +612,11 @@ declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtss2si32: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtss2si %xmm0, %eax -; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %ecx -; CHECK-NEXT: addl %eax, %ecx -; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %eax +; CHECK-NEXT: vcvtss2si %xmm0, %ecx +; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %eax +; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %edx ; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 4) @@ -683,9 +685,8 @@ define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16 ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpaddw %ymm1, %ymm2, %ymm1 ; CHECK-NEXT: vcvtps2ph $2, %zmm0, (%rsi) -; CHECK-NEXT: vmovdqa %ymm1, %ymm0 +; CHECK-NEXT: vpaddw %ymm1, %ymm2, %ymm0 ; CHECK-NEXT: retq %res1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1) %res2 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 %mask) @@ -2215,7 +2216,6 @@ declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_add_ss_rn: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -2227,7 +2227,6 @@ define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x f define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_add_ss_rd: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -2239,7 +2238,6 @@ define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x f define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_add_ss_ru: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -2251,7 +2249,6 @@ define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x f define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_add_ss_rz: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -2263,7 +2260,6 @@ define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x f define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_add_ss_current: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -2275,7 +2271,6 @@ define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, < define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) { ; CHECK-LABEL: test_maskz_add_ss_rn: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2295,7 +2290,6 @@ define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) { define <4 x float> @test_mask_add_ss_current_memfold(<4 x float> %a0, float* %a1, <4 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_add_ss_current_memfold: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %esi ; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vaddss (%rdi), %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -2312,7 +2306,6 @@ define <4 x float> @test_mask_add_ss_current_memfold(<4 x float> %a0, float* %a1 define <4 x float> @test_maskz_add_ss_current_memfold(<4 x float> %a0, float* %a1, i8 %mask) { ; CHECK-LABEL: test_maskz_add_ss_current_memfold: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %esi ; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vaddss (%rdi), %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2330,7 +2323,6 @@ declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x doubl define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_add_sd_rn: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovapd %xmm2, %xmm0 @@ -2342,7 +2334,6 @@ define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_add_sd_rd: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovapd %xmm2, %xmm0 @@ -2354,7 +2345,6 @@ define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_add_sd_ru: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovapd %xmm2, %xmm0 @@ -2366,7 +2356,6 @@ define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_add_sd_rz: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovapd %xmm2, %xmm0 @@ -2378,7 +2367,6 @@ define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_add_sd_current: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovapd %xmm2, %xmm0 @@ -2390,7 +2378,6 @@ define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1 define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) { ; CHECK-LABEL: test_maskz_add_sd_rn: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2410,7 +2397,6 @@ define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) { define <2 x double> @test_mask_add_sd_current_memfold(<2 x double> %a0, double* %a1, <2 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_add_sd_current_memfold: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %esi ; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vaddsd (%rdi), %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovapd %xmm1, %xmm0 @@ -2425,7 +2411,6 @@ define <2 x double> @test_mask_add_sd_current_memfold(<2 x double> %a0, double* define <2 x double> @test_maskz_add_sd_current_memfold(<2 x double> %a0, double* %a1, i8 %mask) { ; CHECK-LABEL: test_maskz_add_sd_current_memfold: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %esi ; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vaddsd (%rdi), %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2441,7 +2426,6 @@ declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_max_ss_sae: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -2453,7 +2437,6 @@ define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) { ; CHECK-LABEL: test_maskz_max_ss_sae: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2473,7 +2456,6 @@ define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) { define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_max_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -2485,7 +2467,6 @@ define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x floa define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) { ; CHECK-LABEL: test_maskz_max_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2505,7 +2486,6 @@ define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) { define <4 x float> @test_mask_max_ss_memfold(<4 x float> %a0, float* %a1, <4 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_max_ss_memfold: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %esi ; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmaxss (%rdi), %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -2522,7 +2502,6 @@ define <4 x float> @test_mask_max_ss_memfold(<4 x float> %a0, float* %a1, <4 x f define <4 x float> @test_maskz_max_ss_memfold(<4 x float> %a0, float* %a1, i8 %mask) { ; CHECK-LABEL: test_maskz_max_ss_memfold: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %esi ; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmaxss (%rdi), %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2539,7 +2518,6 @@ declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x doubl define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_max_sd_sae: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovapd %xmm2, %xmm0 @@ -2551,7 +2529,6 @@ define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) { ; CHECK-LABEL: test_maskz_max_sd_sae: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2571,7 +2548,6 @@ define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) { define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_max_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovapd %xmm2, %xmm0 @@ -2583,7 +2559,6 @@ define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x d define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) { ; CHECK-LABEL: test_maskz_max_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2603,7 +2578,6 @@ define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) { define <2 x double> @test_mask_max_sd_memfold(<2 x double> %a0, double* %a1, <2 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_mask_max_sd_memfold: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %esi ; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmaxsd (%rdi), %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovapd %xmm1, %xmm0 @@ -2618,7 +2592,6 @@ define <2 x double> @test_mask_max_sd_memfold(<2 x double> %a0, double* %a1, <2 define <2 x double> @test_maskz_max_sd_memfold(<2 x double> %a0, double* %a1, i8 %mask) { ; CHECK-LABEL: test_maskz_max_sd_memfold: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %esi ; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -3652,16 +3625,15 @@ declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { ; CHECK-LABEL: test_getexp_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1} +; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} +; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm5 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm2 -; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm3 {%k1} {z} -; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 -; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm0 +; CHECK-NEXT: vaddps %xmm5, %xmm4, %xmm1 +; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8) @@ -3679,16 +3651,15 @@ declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { ; CHECK-LABEL: test_getexp_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovapd %xmm2, %xmm3 -; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1} -; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4 +; CHECK-NEXT: vmovapd %xmm2, %xmm4 +; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4 {%k1} +; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm5 {%k1} {z} ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm2 -; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} -; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0 -; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: vaddpd %xmm2, %xmm4, %xmm0 +; CHECK-NEXT: vaddpd %xmm3, %xmm5, %xmm1 +; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8) @@ -3706,11 +3677,9 @@ declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32 define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> ; CHECK-NEXT: retq @@ -3721,18 +3690,18 @@ define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all: ; CHECK: ## BB#0: +; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0 -; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k1 -; CHECK-NEXT: korw %k0, %k1, %k0 -; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k1 -; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k2 -; CHECK-NEXT: korw %k1, %k2, %k1 -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k2 -; CHECK-NEXT: kandw %k2, %k1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 +; CHECK-NEXT: kmovw %k0, %edx +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %esi +; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: orb %cl, %dl +; CHECK-NEXT: orb %sil, %al +; CHECK-NEXT: orb %dl, %al ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> ; CHECK-NEXT: retq @@ -3752,11 +3721,9 @@ declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32) define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> ; CHECK-NEXT: retq @@ -3768,17 +3735,17 @@ define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 % define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all: ; CHECK: ## BB#0: -; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k1 -; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %ecx +; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %edx ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k2 {%k1} -; CHECK-NEXT: kmovw %k2, %ecx -; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1} -; CHECK-NEXT: kmovw %k1, %edx -; CHECK-NEXT: andl $1, %edx +; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %esi +; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: andb %cl, %al +; CHECK-NEXT: andb %cl, %dl +; CHECK-NEXT: andb %sil, %al ; CHECK-NEXT: andb %dl, %al ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> ; CHECK-NEXT: retq @@ -3899,16 +3866,15 @@ declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double> define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovapd %xmm2, %xmm3 -; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1} -; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z} -; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm3 -; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 +; CHECK-NEXT: vmovapd %xmm2, %xmm4 +; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} +; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5 {%k1} {z} ; CHECK-NEXT: vgetmantsd $11, {sae}, %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vaddpd %xmm4, %xmm2, %xmm0 -; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vaddpd %xmm5, %xmm4, %xmm0 +; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4) %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> zeroinitializer, i8 %x3, i32 4) @@ -3925,15 +3891,14 @@ declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 {%k1} {z} -; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm2 -; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 +; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4 {%k1} {z} ; CHECK-NEXT: vgetmantss $11, {sae}, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddps %xmm4, %xmm2, %xmm1 ; CHECK-NEXT: vaddps %xmm3, %xmm0, %xmm0 -; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4) %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> zeroinitializer, i8 %x3, i32 4) @@ -4057,7 +4022,6 @@ declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double>, <4 x flo define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<2 x double> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0 @@ -4074,7 +4038,6 @@ declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float>, <2 x doubl define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<4 x float> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0 @@ -4435,8 +4398,8 @@ define <16 x i32>@test_int_x86_avx512_mask_prol_d_512(<16 x i32> %x0, i32 %x1, < ; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vprold $3, %zmm0, %zmm2 {%k1} {z} -; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vprold $3, %zmm0, %zmm0 +; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) @@ -4455,8 +4418,8 @@ define <8 x i64>@test_int_x86_avx512_mask_prol_q_512(<8 x i64> %x0, i32 %x1, <8 ; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vprolq $3, %zmm0, %zmm2 {%k1} {z} -; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vprolq $3, %zmm0, %zmm0 +; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) @@ -4557,9 +4520,9 @@ define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512(<8 x double> %x0, < ; CHECK-NEXT: vfixupimmpd $4, %zmm2, %zmm1, %zmm3 {%k1} ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 ; CHECK-NEXT: vfixupimmpd $5, %zmm2, %zmm1, %zmm4 {%k1} {z} -; CHECK-NEXT: vaddpd %zmm4, %zmm3, %zmm3 ; CHECK-NEXT: vfixupimmpd $3, {sae}, %zmm2, %zmm1, %zmm0 -; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: vaddpd %zmm4, %zmm3, %zmm1 +; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 4, i8 %x4, i32 4) %res1 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> zeroinitializer, <8 x double> %x1, <8 x i64> %x2, i32 5, i8 %x4, i32 4) @@ -4580,9 +4543,9 @@ define <8 x double>@test_int_x86_avx512_maskz_fixupimm_pd_512(<8 x double> %x0, ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 ; CHECK-NEXT: vmovapd %zmm0, %zmm5 ; CHECK-NEXT: vfixupimmpd $5, %zmm4, %zmm1, %zmm5 {%k1} {z} -; CHECK-NEXT: vaddpd %zmm5, %zmm3, %zmm3 ; CHECK-NEXT: vfixupimmpd $2, {sae}, %zmm2, %zmm1, %zmm0 -; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: vaddpd %zmm5, %zmm3, %zmm1 +; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 %x4, i32 4) %res1 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> zeroinitializer, i32 5, i8 %x4, i32 4) @@ -4597,16 +4560,15 @@ declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float>, <4 x float>, define <4 x float>@test_int_x86_avx512_mask_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %xmm0, %xmm3 ; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vmovaps %xmm0, %xmm5 ; CHECK-NEXT: vfixupimmss $5, %xmm4, %xmm1, %xmm5 {%k1} -; CHECK-NEXT: vaddps %xmm5, %xmm3, %xmm3 ; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 -; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vaddps %xmm5, %xmm3, %xmm1 +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4) %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 4) @@ -4621,16 +4583,15 @@ declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float>, <4 x float>, define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %xmm0, %xmm3 -; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} {z} +; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %xmm0, %xmm4 -; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm4 +; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm4 {%k1} {z} ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} -; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 -; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0 +; CHECK-NEXT: vaddps %xmm3, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4) %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 8) @@ -4651,9 +4612,9 @@ define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512(<16 x float> %x0, < ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 ; CHECK-NEXT: vmovaps %zmm0, %zmm5 ; CHECK-NEXT: vfixupimmps $5, %zmm4, %zmm1, %zmm5 {%k1} -; CHECK-NEXT: vaddps %zmm5, %zmm3, %zmm3 ; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 -; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: vaddps %zmm5, %zmm3, %zmm1 +; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4) %res1 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 4) @@ -4691,16 +4652,15 @@ declare <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double>, <2 x double define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %xmm0, %xmm3 -; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %xmm0, %xmm4 -; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm4 +; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm4 {%k1} ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} -; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 -; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0 +; CHECK-NEXT: vaddpd %xmm3, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4) %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8) @@ -4715,16 +4675,15 @@ declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double>, <2 x doubl define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %xmm0, %xmm3 ; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vmovapd %xmm0, %xmm5 ; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm4, %xmm1, %xmm5 {%k1} {z} -; CHECK-NEXT: vaddpd %xmm5, %xmm3, %xmm3 ; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} -; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vaddpd %xmm5, %xmm3, %xmm1 +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4) %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8) @@ -4816,18 +4775,17 @@ declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %xmm0, %xmm3 -; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} -; CHECK-NEXT: vmovapd %xmm0, %xmm4 -; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm4 -; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm3 +; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %xmm0, %xmm4 -; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm4 {%k1} -; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 -; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0 -; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm4 {%k1} +; CHECK-NEXT: vmovapd %xmm0, %xmm5 +; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm5 +; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm1 +; CHECK-NEXT: vaddpd %xmm0, %xmm5, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) @@ -4844,18 +4802,17 @@ declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %xmm0, %xmm3 -; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm3 {%k1} -; CHECK-NEXT: vmovaps %xmm0, %xmm4 -; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm4 -; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm3 +; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm3 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %xmm0, %xmm4 -; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm4 {%k1} -; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm0 -; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 -; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm4 {%k1} +; CHECK-NEXT: vmovaps %xmm0, %xmm5 +; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm5 +; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vaddps %xmm4, %xmm3, %xmm1 +; CHECK-NEXT: vaddps %xmm0, %xmm5, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) @@ -4872,7 +4829,6 @@ declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double> define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %xmm0, %xmm3 ; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z} @@ -4890,7 +4846,6 @@ declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, < define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -4904,18 +4859,17 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double> define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %xmm2, %xmm3 -; CHECK-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3 {%k1} -; CHECK-NEXT: vmovapd %xmm2, %xmm4 -; CHECK-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm4 -; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm3 +; CHECK-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %xmm2, %xmm4 -; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm4 {%k1} -; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vaddpd %xmm4, %xmm2, %xmm0 -; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm4 {%k1} +; CHECK-NEXT: vmovapd %xmm2, %xmm5 +; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm5 +; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0 +; CHECK-NEXT: vaddpd %xmm2, %xmm5, %xmm1 +; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) @@ -4932,18 +4886,17 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, < define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 -; CHECK-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3 {%k1} -; CHECK-NEXT: vmovaps %xmm2, %xmm4 -; CHECK-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm4 -; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm3 +; CHECK-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %xmm2, %xmm4 -; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm4 {%k1} -; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vaddps %xmm4, %xmm2, %xmm0 -; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm4 {%k1} +; CHECK-NEXT: vmovaps %xmm2, %xmm5 +; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm5 +; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vaddps %xmm4, %xmm3, %xmm0 +; CHECK-NEXT: vaddps %xmm2, %xmm5, %xmm1 +; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) @@ -4959,7 +4912,6 @@ define void @fmadd_ss_mask_memfold(float* %a, float* %b, i8 %c) { ; CHECK-LABEL: fmadd_ss_mask_memfold: ; CHECK: ## BB#0: ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: andl $1, %edx ; CHECK-NEXT: kmovw %edx, %k1 ; CHECK-NEXT: vfmadd132ss (%rsi), %xmm0, %xmm0 {%k1} ; CHECK-NEXT: vmovss %xmm0, (%rdi) @@ -4987,7 +4939,6 @@ define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) { ; CHECK-LABEL: fmadd_ss_maskz_memfold: ; CHECK: ## BB#0: ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: andl $1, %edx ; CHECK-NEXT: kmovw %edx, %k1 ; CHECK-NEXT: vfmadd132ss (%rsi), %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vmovss %xmm0, (%rdi) @@ -5015,7 +4966,6 @@ define void @fmadd_sd_mask_memfold(double* %a, double* %b, i8 %c) { ; CHECK-LABEL: fmadd_sd_mask_memfold: ; CHECK: ## BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: andl $1, %edx ; CHECK-NEXT: kmovw %edx, %k1 ; CHECK-NEXT: vfmadd132sd (%rsi), %xmm0, %xmm0 {%k1} ; CHECK-NEXT: vmovlpd %xmm0, (%rdi) @@ -5039,7 +4989,6 @@ define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) { ; CHECK-LABEL: fmadd_sd_maskz_memfold: ; CHECK: ## BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: andl $1, %edx ; CHECK-NEXT: kmovw %edx, %k1 ; CHECK-NEXT: vfmadd132sd (%rsi), %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vmovlpd %xmm0, (%rdi) @@ -5064,18 +5013,17 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double> define <2 x double>@test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %xmm2, %xmm3 -; CHECK-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm3 {%k1} -; CHECK-NEXT: vmovapd %xmm2, %xmm4 -; CHECK-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm4 -; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm3 +; CHECK-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm3 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %xmm2, %xmm4 -; CHECK-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 {%k1} -; CHECK-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vaddpd %xmm4, %xmm2, %xmm0 -; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm4 {%k1} +; CHECK-NEXT: vmovapd %xmm2, %xmm5 +; CHECK-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm5 +; CHECK-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0 +; CHECK-NEXT: vaddpd %xmm2, %xmm5, %xmm1 +; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) @@ -5092,18 +5040,17 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, < define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 -; CHECK-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm3 {%k1} -; CHECK-NEXT: vmovaps %xmm2, %xmm4 -; CHECK-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm4 -; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm3 +; CHECK-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm3 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %xmm2, %xmm4 -; CHECK-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 {%k1} -; CHECK-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vaddps %xmm4, %xmm2, %xmm0 -; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm4 {%k1} +; CHECK-NEXT: vmovaps %xmm2, %xmm5 +; CHECK-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm5 +; CHECK-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vaddps %xmm4, %xmm3, %xmm0 +; CHECK-NEXT: vaddps %xmm2, %xmm5, %xmm1 +; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) @@ -5120,18 +5067,17 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double>, <2 x double define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %xmm2, %xmm3 -; CHECK-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm3 {%k1} -; CHECK-NEXT: vmovapd %xmm2, %xmm4 -; CHECK-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm4 -; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm3 +; CHECK-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm3 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %xmm2, %xmm4 -; CHECK-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 {%k1} -; CHECK-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vaddpd %xmm4, %xmm2, %xmm0 -; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm4 {%k1} +; CHECK-NEXT: vmovapd %xmm2, %xmm5 +; CHECK-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm5 +; CHECK-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0 +; CHECK-NEXT: vaddpd %xmm2, %xmm5, %xmm1 +; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) @@ -5148,18 +5094,17 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float>, <4 x float>, define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 -; CHECK-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm3 {%k1} -; CHECK-NEXT: vmovaps %xmm2, %xmm4 -; CHECK-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm4 -; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm3 +; CHECK-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm3 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %xmm2, %xmm4 -; CHECK-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 {%k1} -; CHECK-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vaddps %xmm4, %xmm2, %xmm0 -; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm4 {%k1} +; CHECK-NEXT: vmovaps %xmm2, %xmm5 +; CHECK-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm5 +; CHECK-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vaddps %xmm4, %xmm3, %xmm0 +; CHECK-NEXT: vaddps %xmm2, %xmm5, %xmm1 +; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) @@ -5174,7 +5119,6 @@ define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x fl define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1, float *%ptr_b ,i8 %x3,i32 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %esi ; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -5188,7 +5132,6 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm: ; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %esi ; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vfmadd132ss (%rdi), %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq @@ -5202,7 +5145,8 @@ define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x f define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) { ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_rm: ; CHECK: ## BB#0: -; CHECK-NEXT: kxorw %k0, %k0, %k1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %q = load float, float* %ptr_b |