summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/avx512-extract-subvector.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/avx512-extract-subvector.ll')
-rw-r--r--test/CodeGen/X86/avx512-extract-subvector.ll95
1 files changed, 79 insertions, 16 deletions
diff --git a/test/CodeGen/X86/avx512-extract-subvector.ll b/test/CodeGen/X86/avx512-extract-subvector.ll
index 391bf6ba4554..2d0a81046b4e 100644
--- a/test/CodeGen/X86/avx512-extract-subvector.ll
+++ b/test/CodeGen/X86/avx512-extract-subvector.ll
@@ -6,6 +6,7 @@ define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind {
; SKX-LABEL: extract_subvector128_v32i16:
; SKX: ## BB#0:
; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <8 x i16> %r1
@@ -15,6 +16,7 @@ define <8 x i16> @extract_subvector128_v32i16_first_element(<32 x i16> %x) nounw
; SKX-LABEL: extract_subvector128_v32i16_first_element:
; SKX: ## BB#0:
; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %r1
@@ -24,6 +26,7 @@ define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind {
; SKX-LABEL: extract_subvector128_v64i8:
; SKX: ## BB#0:
; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
ret <16 x i8> %r1
@@ -33,6 +36,7 @@ define <16 x i8> @extract_subvector128_v64i8_first_element(<64 x i8> %x) nounwin
; SKX-LABEL: extract_subvector128_v64i8_first_element:
; SKX: ## BB#0:
; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %r1
@@ -61,6 +65,7 @@ define void @extract_subvector256_v8f64_store(double* nocapture %addr, <4 x doub
; SKX-LABEL: extract_subvector256_v8f64_store:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3>
@@ -73,6 +78,7 @@ define void @extract_subvector256_v8f32_store(float* nocapture %addr, <8 x float
; SKX-LABEL: extract_subvector256_v8f32_store:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -85,6 +91,7 @@ define void @extract_subvector256_v4i64_store(i64* nocapture %addr, <4 x i64> %a
; SKX-LABEL: extract_subvector256_v4i64_store:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
@@ -97,6 +104,7 @@ define void @extract_subvector256_v8i32_store(i32* nocapture %addr, <8 x i32> %a
; SKX-LABEL: extract_subvector256_v8i32_store:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -109,6 +117,7 @@ define void @extract_subvector256_v16i16_store(i16* nocapture %addr, <16 x i16>
; SKX-LABEL: extract_subvector256_v16i16_store:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -121,6 +130,7 @@ define void @extract_subvector256_v32i8_store(i8* nocapture %addr, <32 x i8> %a)
; SKX-LABEL: extract_subvector256_v32i8_store:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
@@ -133,6 +143,7 @@ define void @extract_subvector256_v4f64_store_lo(double* nocapture %addr, <4 x d
; SKX-LABEL: extract_subvector256_v4f64_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
@@ -145,6 +156,7 @@ define void @extract_subvector256_v4f64_store_lo_align_16(double* nocapture %add
; SKX-LABEL: extract_subvector256_v4f64_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
@@ -157,6 +169,7 @@ define void @extract_subvector256_v4f32_store_lo(float* nocapture %addr, <8 x fl
; SKX-LABEL: extract_subvector256_v4f32_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -169,6 +182,7 @@ define void @extract_subvector256_v4f32_store_lo_align_16(float* nocapture %addr
; SKX-LABEL: extract_subvector256_v4f32_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -181,6 +195,7 @@ define void @extract_subvector256_v2i64_store_lo(i64* nocapture %addr, <4 x i64>
; SKX-LABEL: extract_subvector256_v2i64_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
@@ -193,6 +208,7 @@ define void @extract_subvector256_v2i64_store_lo_align_16(i64* nocapture %addr,
; SKX-LABEL: extract_subvector256_v2i64_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
@@ -205,6 +221,7 @@ define void @extract_subvector256_v4i32_store_lo(i32* nocapture %addr, <8 x i32>
; SKX-LABEL: extract_subvector256_v4i32_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -217,6 +234,7 @@ define void @extract_subvector256_v4i32_store_lo_align_16(i32* nocapture %addr,
; SKX-LABEL: extract_subvector256_v4i32_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -229,6 +247,7 @@ define void @extract_subvector256_v8i16_store_lo(i16* nocapture %addr, <16 x i16
; SKX-LABEL: extract_subvector256_v8i16_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -241,6 +260,7 @@ define void @extract_subvector256_v8i16_store_lo_align_16(i16* nocapture %addr,
; SKX-LABEL: extract_subvector256_v8i16_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -253,6 +273,7 @@ define void @extract_subvector256_v16i8_store_lo(i8* nocapture %addr, <32 x i8>
; SKX-LABEL: extract_subvector256_v16i8_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -265,6 +286,7 @@ define void @extract_subvector256_v16i8_store_lo_align_16(i8* nocapture %addr, <
; SKX-LABEL: extract_subvector256_v16i8_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -277,6 +299,7 @@ define void @extract_subvector512_v2f64_store_lo(double* nocapture %addr, <8 x d
; SKX-LABEL: extract_subvector512_v2f64_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
@@ -289,6 +312,7 @@ define void @extract_subvector512_v2f64_store_lo_align_16(double* nocapture %add
; SKX-LABEL: extract_subvector512_v2f64_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
@@ -301,6 +325,7 @@ define void @extract_subvector512_v4f32_store_lo(float* nocapture %addr, <16 x f
; SKX-LABEL: extract_subvector512_v4f32_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -313,6 +338,7 @@ define void @extract_subvector512_v4f32_store_lo_align_16(float* nocapture %addr
; SKX-LABEL: extract_subvector512_v4f32_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -325,6 +351,7 @@ define void @extract_subvector512_v2i64_store_lo(i64* nocapture %addr, <8 x i64>
; SKX-LABEL: extract_subvector512_v2i64_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
@@ -337,6 +364,7 @@ define void @extract_subvector512_v2i64_store_lo_align_16(i64* nocapture %addr,
; SKX-LABEL: extract_subvector512_v2i64_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
@@ -349,6 +377,7 @@ define void @extract_subvector512_v4i32_store_lo(i32* nocapture %addr, <16 x i32
; SKX-LABEL: extract_subvector512_v4i32_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -361,6 +390,7 @@ define void @extract_subvector512_v4i32_store_lo_align_16(i32* nocapture %addr,
; SKX-LABEL: extract_subvector512_v4i32_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -373,6 +403,7 @@ define void @extract_subvector512_v8i16_store_lo(i16* nocapture %addr, <32 x i16
; SKX-LABEL: extract_subvector512_v8i16_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <32 x i16> %a, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -385,6 +416,7 @@ define void @extract_subvector512_v16i8_store_lo(i8* nocapture %addr, <64 x i8>
; SKX-LABEL: extract_subvector512_v16i8_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -397,6 +429,7 @@ define void @extract_subvector512_v16i8_store_lo_align_16(i8* nocapture %addr, <
; SKX-LABEL: extract_subvector512_v16i8_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %xmm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -409,6 +442,7 @@ define void @extract_subvector512_v4f64_store_lo(double* nocapture %addr, <8 x d
; SKX-LABEL: extract_subvector512_v4f64_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -421,6 +455,7 @@ define void @extract_subvector512_v4f64_store_lo_align_16(double* nocapture %add
; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -433,6 +468,7 @@ define void @extract_subvector512_v4f64_store_lo_align_32(double* nocapture %add
; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_32:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -445,6 +481,7 @@ define void @extract_subvector512_v8f32_store_lo(float* nocapture %addr, <16 x f
; SKX-LABEL: extract_subvector512_v8f32_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -457,6 +494,7 @@ define void @extract_subvector512_v8f32_store_lo_align_16(float* nocapture %addr
; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -469,6 +507,7 @@ define void @extract_subvector512_v8f32_store_lo_align_32(float* nocapture %addr
; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_32:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -481,6 +520,7 @@ define void @extract_subvector512_v4i64_store_lo(i64* nocapture %addr, <8 x i64>
; SKX-LABEL: extract_subvector512_v4i64_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -493,6 +533,7 @@ define void @extract_subvector512_v4i64_store_lo_align_16(i64* nocapture %addr,
; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -505,6 +546,7 @@ define void @extract_subvector512_v4i64_store_lo_align_32(i64* nocapture %addr,
; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_32:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -517,6 +559,7 @@ define void @extract_subvector512_v8i32_store_lo(i32* nocapture %addr, <16 x i32
; SKX-LABEL: extract_subvector512_v8i32_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -529,6 +572,7 @@ define void @extract_subvector512_v8i32_store_lo_align_16(i32* nocapture %addr,
; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -541,6 +585,7 @@ define void @extract_subvector512_v8i32_store_lo_align_32(i32* nocapture %addr,
; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_32:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -553,6 +598,7 @@ define void @extract_subvector512_v16i16_store_lo(i16* nocapture %addr, <32 x i1
; SKX-LABEL: extract_subvector512_v16i16_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -565,6 +611,7 @@ define void @extract_subvector512_v16i16_store_lo_align_16(i16* nocapture %addr,
; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -577,6 +624,7 @@ define void @extract_subvector512_v16i16_store_lo_align_32(i16* nocapture %addr,
; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_32:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -589,6 +637,7 @@ define void @extract_subvector512_v32i8_store_lo(i8* nocapture %addr, <64 x i8>
; SKX-LABEL: extract_subvector512_v32i8_store_lo:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
@@ -601,6 +650,7 @@ define void @extract_subvector512_v32i8_store_lo_align_16(i8* nocapture %addr, <
; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovups %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
@@ -613,6 +663,7 @@ define void @extract_subvector512_v32i8_store_lo_align_32(i8* nocapture %addr, <
; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_32:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %ymm0, (%rdi)
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
@@ -624,7 +675,7 @@ entry:
define <4 x double> @test_mm512_mask_extractf64x4_pd(<4 x double> %__W, i8 %__U, <8 x double> %__A) {
; SKX-LABEL: test_mm512_mask_extractf64x4_pd:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vextractf64x4 $1, %zmm1, %ymm0 {%k1}
; SKX-NEXT: retq
entry:
@@ -638,7 +689,7 @@ entry:
define <4 x double> @test_mm512_maskz_extractf64x4_pd(i8 %__U, <8 x double> %__A) {
; SKX-LABEL: test_mm512_maskz_extractf64x4_pd:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z}
; SKX-NEXT: retq
entry:
@@ -652,8 +703,9 @@ entry:
define <4 x float> @test_mm512_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x double> %__A) {
; SKX-LABEL: test_mm512_mask_extractf32x4_ps:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vextractf32x4 $1, %zmm1, %xmm0 {%k1}
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x double> %__A to <16 x float>
@@ -667,8 +719,9 @@ entry:
define <4 x float> @test_mm512_maskz_extractf32x4_ps(i8 %__U, <8 x double> %__A) {
; SKX-LABEL: test_mm512_maskz_extractf32x4_ps:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z}
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = bitcast <8 x double> %__A to <16 x float>
@@ -682,8 +735,9 @@ entry:
define <2 x double> @test_mm256_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <4 x double> %__A) {
; SKX-LABEL: test_mm256_mask_extractf64x2_pd:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vextractf64x2 $1, %ymm1, %xmm0 {%k1}
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3>
@@ -696,8 +750,9 @@ entry:
define <2 x double> @test_mm256_maskz_extractf64x2_pd(i8 %__U, <4 x double> %__A) {
; SKX-LABEL: test_mm256_maskz_extractf64x2_pd:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z}
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3>
@@ -710,8 +765,9 @@ entry:
define <2 x i64> @test_mm256_mask_extracti64x2_epi64(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) {
; SKX-LABEL: test_mm256_mask_extracti64x2_epi64:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vextracti64x2 $1, %ymm1, %xmm0 {%k1}
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
@@ -724,8 +780,9 @@ entry:
define <2 x i64> @test_mm256_maskz_extracti64x2_epi64(i8 %__U, <4 x i64> %__A) {
; SKX-LABEL: test_mm256_maskz_extracti64x2_epi64:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z}
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
@@ -738,8 +795,9 @@ entry:
define <4 x float> @test_mm256_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x float> %__A) {
; SKX-LABEL: test_mm256_mask_extractf32x4_ps:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vextractf32x4 $1, %ymm1, %xmm0 {%k1}
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -752,8 +810,9 @@ entry:
define <4 x float> @test_mm256_maskz_extractf32x4_ps(i8 %__U, <8 x float> %__A) {
; SKX-LABEL: test_mm256_maskz_extractf32x4_ps:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z}
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -766,8 +825,9 @@ entry:
define <2 x i64> @test_mm256_mask_extracti32x4_epi32(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) {
; SKX-LABEL: test_mm256_mask_extracti32x4_epi32:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vextracti32x4 $1, %ymm1, %xmm0 {%k1}
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
@@ -783,8 +843,9 @@ entry:
define <2 x i64> @test_mm256_maskz_extracti32x4_epi32(i8 %__U, <4 x i64> %__A) {
; SKX-LABEL: test_mm256_maskz_extracti32x4_epi32:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z}
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
@@ -799,7 +860,7 @@ entry:
define <8 x float> @test_mm512_mask_extractf32x8_ps(<8 x float> %__W, i8 %__U, <16 x float> %__A) {
; SKX-LABEL: test_mm512_mask_extractf32x8_ps:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vextractf32x8 $1, %zmm1, %ymm0 {%k1}
; SKX-NEXT: retq
entry:
@@ -812,7 +873,7 @@ entry:
define <8 x float> @test_mm512_maskz_extractf32x8_ps(i8 %__U, <16 x float> %__A) {
; SKX-LABEL: test_mm512_maskz_extractf32x8_ps:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z}
; SKX-NEXT: retq
entry:
@@ -825,8 +886,9 @@ entry:
define <2 x double> @test_mm512_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <8 x double> %__A) {
; SKX-LABEL: test_mm512_mask_extractf64x2_pd:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vextractf64x2 $3, %zmm1, %xmm0 {%k1}
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7>
@@ -839,8 +901,9 @@ entry:
define <2 x double> @test_mm512_maskz_extractf64x2_pd(i8 %__U, <8 x double> %__A) {
; SKX-LABEL: test_mm512_maskz_extractf64x2_pd:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm0 {%k1} {z}
+; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:
%shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7>