diff options
Diffstat (limited to 'test/CodeGen/X86/avx512bw-intrinsics.ll')
-rw-r--r-- | test/CodeGen/X86/avx512bw-intrinsics.ll | 240 |
1 files changed, 113 insertions, 127 deletions
diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index d9a9a2d655b40..3337f42eb1428 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -660,8 +660,8 @@ define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: retl - %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + ret <32 x i16> %1 } define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { @@ -678,8 +678,10 @@ define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, < ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512F-32-NEXT: retl - %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 } define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { @@ -694,8 +696,10 @@ define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512F-32-NEXT: retl - %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 } define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { @@ -710,8 +714,8 @@ define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_ ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 ; AVX512F-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b - %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + ret <32 x i16> %1 } define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { @@ -730,8 +734,10 @@ define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b - %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 } define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { @@ -748,8 +754,10 @@ define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %pt ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} ; AVX512F-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b - %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 } define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { @@ -766,8 +774,8 @@ define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer - %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + ret <32 x i16> %1 } define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { @@ -788,8 +796,10 @@ define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <3 %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer - %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 } define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { @@ -808,11 +818,13 @@ define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer - %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 } -declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) +declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>) define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-LABEL: test_mask_packs_epi16_rr_512: @@ -824,8 +836,8 @@ define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: retl - %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) - ret <64 x i8> %res + %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) + ret <64 x i8> %1 } define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { @@ -838,14 +850,14 @@ define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <6 ; ; AVX512F-32-LABEL: test_mask_packs_epi16_rrk_512: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512F-32-NEXT: retl - %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) - ret <64 x i8> %res + %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru + ret <64 x i8> %3 } define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { @@ -857,13 +869,13 @@ define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i ; ; AVX512F-32-LABEL: test_mask_packs_epi16_rrkz_512: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512F-32-NEXT: retl - %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) - ret <64 x i8> %res + %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer + ret <64 x i8> %3 } define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { @@ -878,8 +890,8 @@ define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 ; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b - %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) - ret <64 x i8> %res + %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) + ret <64 x i8> %1 } define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { @@ -893,15 +905,15 @@ define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_ ; AVX512F-32-LABEL: test_mask_packs_epi16_rmk_512: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1} ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b - %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) - ret <64 x i8> %res + %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru + ret <64 x i8> %3 } define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { @@ -914,17 +926,17 @@ define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr ; AVX512F-32-LABEL: test_mask_packs_epi16_rmkz_512: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} ; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b - %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) - ret <64 x i8> %res + %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer + ret <64 x i8> %3 } -declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) +declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>) define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { @@ -937,8 +949,8 @@ define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: retl - %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + ret <32 x i16> %1 } define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { @@ -955,8 +967,10 @@ define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512F-32-NEXT: retl - %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 } define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { @@ -971,8 +985,10 @@ define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512F-32-NEXT: retl - %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 } define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { @@ -987,8 +1003,8 @@ define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 ; AVX512F-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b - %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + ret <32 x i16> %1 } define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { @@ -1007,8 +1023,10 @@ define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %pt ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b - %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 } define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { @@ -1025,8 +1043,10 @@ define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %p ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} ; AVX512F-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b - %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 } define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { @@ -1043,8 +1063,8 @@ define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer - %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + ret <32 x i16> %1 } define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { @@ -1065,8 +1085,10 @@ define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, < %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer - %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 } define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { @@ -1085,11 +1107,13 @@ define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer - %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) - ret <32 x i16> %res + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 } -declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) +declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>) define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-LABEL: test_mask_packus_epi16_rr_512: @@ -1101,8 +1125,8 @@ define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: retl - %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) - ret <64 x i8> %res + %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) + ret <64 x i8> %1 } define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { @@ -1115,14 +1139,14 @@ define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, < ; ; AVX512F-32-LABEL: test_mask_packus_epi16_rrk_512: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512F-32-NEXT: retl - %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) - ret <64 x i8> %res + %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru + ret <64 x i8> %3 } define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { @@ -1134,13 +1158,13 @@ define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, ; ; AVX512F-32-LABEL: test_mask_packus_epi16_rrkz_512: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512F-32-NEXT: retl - %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) - ret <64 x i8> %res + %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer + ret <64 x i8> %3 } define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { @@ -1155,8 +1179,8 @@ define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_ ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 ; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b - %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) - ret <64 x i8> %res + %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) + ret <64 x i8> %1 } define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { @@ -1170,15 +1194,15 @@ define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr ; AVX512F-32-LABEL: test_mask_packus_epi16_rmk_512: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1} ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b - %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) - ret <64 x i8> %res + %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru + ret <64 x i8> %3 } define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { @@ -1191,17 +1215,17 @@ define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %pt ; AVX512F-32-LABEL: test_mask_packus_epi16_rmkz_512: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} ; AVX512F-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b - %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) - ret <64 x i8> %res + %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer + ret <64 x i8> %3 } -declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) +declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-LABEL: test_mask_adds_epi16_rr_512: @@ -2108,7 +2132,7 @@ declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <1 define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) { ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovw %edi, %k1 +; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} ; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddd %zmm0, %zmm2, %zmm0 @@ -2271,44 +2295,6 @@ define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) { ret i32 %res } -declare <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64) - -define <64 x i8>@test_int_x86_avx512_cvtmask2b_512(i64 %x0) { -; AVX512BW-LABEL: test_int_x86_avx512_cvtmask2b_512: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovq %rdi, %k0 -; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2b_512: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: retl - %res = call <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64 %x0) - ret <64 x i8> %res -} - -declare <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32) - -define <32 x i16>@test_int_x86_avx512_cvtmask2w_512(i32 %x0) { -; AVX512BW-LABEL: test_int_x86_avx512_cvtmask2w_512: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovd %edi, %k0 -; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2w_512: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: vpmovm2w %k0, %zmm0 -; AVX512F-32-NEXT: retl - %res = call <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32 %x0) - ret <32 x i16> %res -} - declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { |