summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/avx512bw-intrinsics.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/avx512bw-intrinsics.ll')
-rw-r--r--test/CodeGen/X86/avx512bw-intrinsics.ll240
1 files changed, 113 insertions, 127 deletions
diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll
index d9a9a2d655b40..3337f42eb1428 100644
--- a/test/CodeGen/X86/avx512bw-intrinsics.ll
+++ b/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -660,8 +660,8 @@ define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
- %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
+ ret <32 x i16> %1
}
define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
@@ -678,8 +678,10 @@ define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <
; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
- %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
+ ret <32 x i16> %3
}
define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
@@ -694,8 +696,10 @@ define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b,
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
- %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
+ ret <32 x i16> %3
}
define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
@@ -710,8 +714,8 @@ define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_
; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%b = load <16 x i32>, <16 x i32>* %ptr_b
- %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
+ ret <32 x i16> %1
}
define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
@@ -730,8 +734,10 @@ define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%b = load <16 x i32>, <16 x i32>* %ptr_b
- %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
+ ret <32 x i16> %3
}
define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
@@ -748,8 +754,10 @@ define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %pt
; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
%b = load <16 x i32>, <16 x i32>* %ptr_b
- %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
+ ret <32 x i16> %3
}
define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
@@ -766,8 +774,8 @@ define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
+ ret <32 x i16> %1
}
define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
@@ -788,8 +796,10 @@ define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <3
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
+ ret <32 x i16> %3
}
define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
@@ -808,11 +818,13 @@ define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
+ ret <32 x i16> %3
}
-declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
+declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>)
define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
; AVX512BW-LABEL: test_mask_packs_epi16_rr_512:
@@ -824,8 +836,8 @@ define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
- %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
- ret <64 x i8> %res
+ %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
+ ret <64 x i8> %1
}
define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
@@ -838,14 +850,14 @@ define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <6
;
; AVX512F-32-LABEL: test_mask_packs_epi16_rrk_512:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
- %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
- ret <64 x i8> %res
+ %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
+ %2 = bitcast i64 %mask to <64 x i1>
+ %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
+ ret <64 x i8> %3
}
define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
@@ -857,13 +869,13 @@ define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i
;
; AVX512F-32-LABEL: test_mask_packs_epi16_rrkz_512:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
- %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
- ret <64 x i8> %res
+ %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
+ %2 = bitcast i64 %mask to <64 x i1>
+ %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
+ ret <64 x i8> %3
}
define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
@@ -878,8 +890,8 @@ define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b
; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
- %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
- ret <64 x i8> %res
+ %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
+ ret <64 x i8> %1
}
define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
@@ -893,15 +905,15 @@ define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_
; AVX512F-32-LABEL: test_mask_packs_epi16_rmk_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
- %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
- ret <64 x i8> %res
+ %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
+ %2 = bitcast i64 %mask to <64 x i1>
+ %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
+ ret <64 x i8> %3
}
define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
@@ -914,17 +926,17 @@ define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr
; AVX512F-32-LABEL: test_mask_packs_epi16_rmkz_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
- %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
- ret <64 x i8> %res
+ %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
+ %2 = bitcast i64 %mask to <64 x i1>
+ %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
+ ret <64 x i8> %3
}
-declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
+declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>)
define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
@@ -937,8 +949,8 @@ define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
- %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
+ ret <32 x i16> %1
}
define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
@@ -955,8 +967,10 @@ define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b,
; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
- %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
+ ret <32 x i16> %3
}
define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
@@ -971,8 +985,10 @@ define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b,
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
- %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
+ ret <32 x i16> %3
}
define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
@@ -987,8 +1003,8 @@ define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr
; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%b = load <16 x i32>, <16 x i32>* %ptr_b
- %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
+ ret <32 x i16> %1
}
define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
@@ -1007,8 +1023,10 @@ define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %pt
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%b = load <16 x i32>, <16 x i32>* %ptr_b
- %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
+ ret <32 x i16> %3
}
define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
@@ -1025,8 +1043,10 @@ define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %p
; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
%b = load <16 x i32>, <16 x i32>* %ptr_b
- %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
+ ret <32 x i16> %3
}
define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
@@ -1043,8 +1063,8 @@ define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
+ ret <32 x i16> %1
}
define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
@@ -1065,8 +1085,10 @@ define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
+ ret <32 x i16> %3
}
define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
@@ -1085,11 +1107,13 @@ define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b,
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
+ ret <32 x i16> %3
}
-declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
+declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>)
define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
; AVX512BW-LABEL: test_mask_packus_epi16_rr_512:
@@ -1101,8 +1125,8 @@ define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
- %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
- ret <64 x i8> %res
+ %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
+ ret <64 x i8> %1
}
define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
@@ -1115,14 +1139,14 @@ define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <
;
; AVX512F-32-LABEL: test_mask_packus_epi16_rrk_512:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
- %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
- ret <64 x i8> %res
+ %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
+ %2 = bitcast i64 %mask to <64 x i1>
+ %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
+ ret <64 x i8> %3
}
define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
@@ -1134,13 +1158,13 @@ define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b,
;
; AVX512F-32-LABEL: test_mask_packus_epi16_rrkz_512:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
- %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
- ret <64 x i8> %res
+ %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
+ %2 = bitcast i64 %mask to <64 x i1>
+ %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
+ ret <64 x i8> %3
}
define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
@@ -1155,8 +1179,8 @@ define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_
; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
- %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
- ret <64 x i8> %res
+ %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
+ ret <64 x i8> %1
}
define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
@@ -1170,15 +1194,15 @@ define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr
; AVX512F-32-LABEL: test_mask_packus_epi16_rmk_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
- %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
- ret <64 x i8> %res
+ %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
+ %2 = bitcast i64 %mask to <64 x i1>
+ %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
+ ret <64 x i8> %3
}
define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
@@ -1191,17 +1215,17 @@ define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %pt
; AVX512F-32-LABEL: test_mask_packus_epi16_rmkz_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
- %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
- ret <64 x i8> %res
+ %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
+ %2 = bitcast i64 %mask to <64 x i1>
+ %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
+ ret <64 x i8> %3
}
-declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
+declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>)
define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
; AVX512BW-LABEL: test_mask_adds_epi16_rr_512:
@@ -2108,7 +2132,7 @@ declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <1
define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: kmovw %edi, %k1
+; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddd %zmm0, %zmm2, %zmm0
@@ -2271,44 +2295,6 @@ define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) {
ret i32 %res
}
-declare <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64)
-
-define <64 x i8>@test_int_x86_avx512_cvtmask2b_512(i64 %x0) {
-; AVX512BW-LABEL: test_int_x86_avx512_cvtmask2b_512:
-; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: kmovq %rdi, %k0
-; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2b_512:
-; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
-; AVX512F-32-NEXT: retl
- %res = call <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64 %x0)
- ret <64 x i8> %res
-}
-
-declare <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32)
-
-define <32 x i16>@test_int_x86_avx512_cvtmask2w_512(i32 %x0) {
-; AVX512BW-LABEL: test_int_x86_avx512_cvtmask2w_512:
-; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: kmovd %edi, %k0
-; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2w_512:
-; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; AVX512F-32-NEXT: vpmovm2w %k0, %zmm0
-; AVX512F-32-NEXT: retl
- %res = call <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32 %x0)
- ret <32 x i16> %res
-}
-
declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {