diff options
Diffstat (limited to 'test/CodeGen/X86/avx512-extract-subvector.ll')
-rw-r--r-- | test/CodeGen/X86/avx512-extract-subvector.ll | 95 |
1 files changed, 79 insertions, 16 deletions
diff --git a/test/CodeGen/X86/avx512-extract-subvector.ll b/test/CodeGen/X86/avx512-extract-subvector.ll index 391bf6ba4554..2d0a81046b4e 100644 --- a/test/CodeGen/X86/avx512-extract-subvector.ll +++ b/test/CodeGen/X86/avx512-extract-subvector.ll @@ -6,6 +6,7 @@ define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind { ; SKX-LABEL: extract_subvector128_v32i16: ; SKX: ## BB#0: ; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm0 +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> ret <8 x i16> %r1 @@ -15,6 +16,7 @@ define <8 x i16> @extract_subvector128_v32i16_first_element(<32 x i16> %x) nounw ; SKX-LABEL: extract_subvector128_v32i16_first_element: ; SKX: ## BB#0: ; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ret <8 x i16> %r1 @@ -24,6 +26,7 @@ define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind { ; SKX-LABEL: extract_subvector128_v64i8: ; SKX: ## BB#0: ; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm0 +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47> ret <16 x i8> %r1 @@ -33,6 +36,7 @@ define <16 x i8> @extract_subvector128_v64i8_first_element(<64 x i8> %x) nounwin ; SKX-LABEL: extract_subvector128_v64i8_first_element: ; SKX: ## BB#0: ; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ret <16 x i8> %r1 @@ -61,6 +65,7 @@ define void @extract_subvector256_v8f64_store(double* nocapture %addr, <4 x doub ; SKX-LABEL: extract_subvector256_v8f64_store: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3> @@ -73,6 +78,7 @@ define void @extract_subvector256_v8f32_store(float* nocapture %addr, <8 x float ; SKX-LABEL: extract_subvector256_v8f32_store: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -85,6 +91,7 @@ define void @extract_subvector256_v4i64_store(i64* nocapture %addr, <4 x i64> %a ; SKX-LABEL: extract_subvector256_v4i64_store: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3> @@ -97,6 +104,7 @@ define void @extract_subvector256_v8i32_store(i32* nocapture %addr, <8 x i32> %a ; SKX-LABEL: extract_subvector256_v8i32_store: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -109,6 +117,7 @@ define void @extract_subvector256_v16i16_store(i16* nocapture %addr, <16 x i16> ; SKX-LABEL: extract_subvector256_v16i16_store: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> @@ -121,6 +130,7 @@ define void @extract_subvector256_v32i8_store(i8* nocapture %addr, <32 x i8> %a) ; SKX-LABEL: extract_subvector256_v32i8_store: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> @@ -133,6 +143,7 @@ define void @extract_subvector256_v4f64_store_lo(double* nocapture %addr, <4 x d ; SKX-LABEL: extract_subvector256_v4f64_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1> @@ -145,6 +156,7 @@ define void @extract_subvector256_v4f64_store_lo_align_16(double* nocapture %add ; SKX-LABEL: extract_subvector256_v4f64_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1> @@ -157,6 +169,7 @@ define void @extract_subvector256_v4f32_store_lo(float* nocapture %addr, <8 x fl ; SKX-LABEL: extract_subvector256_v4f32_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -169,6 +182,7 @@ define void @extract_subvector256_v4f32_store_lo_align_16(float* nocapture %addr ; SKX-LABEL: extract_subvector256_v4f32_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -181,6 +195,7 @@ define void @extract_subvector256_v2i64_store_lo(i64* nocapture %addr, <4 x i64> ; SKX-LABEL: extract_subvector256_v2i64_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1> @@ -193,6 +208,7 @@ define void @extract_subvector256_v2i64_store_lo_align_16(i64* nocapture %addr, ; SKX-LABEL: extract_subvector256_v2i64_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1> @@ -205,6 +221,7 @@ define void @extract_subvector256_v4i32_store_lo(i32* nocapture %addr, <8 x i32> ; SKX-LABEL: extract_subvector256_v4i32_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -217,6 +234,7 @@ define void @extract_subvector256_v4i32_store_lo_align_16(i32* nocapture %addr, ; SKX-LABEL: extract_subvector256_v4i32_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -229,6 +247,7 @@ define void @extract_subvector256_v8i16_store_lo(i16* nocapture %addr, <16 x i16 ; SKX-LABEL: extract_subvector256_v8i16_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -241,6 +260,7 @@ define void @extract_subvector256_v8i16_store_lo_align_16(i16* nocapture %addr, ; SKX-LABEL: extract_subvector256_v8i16_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -253,6 +273,7 @@ define void @extract_subvector256_v16i8_store_lo(i8* nocapture %addr, <32 x i8> ; SKX-LABEL: extract_subvector256_v16i8_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> @@ -265,6 +286,7 @@ define void @extract_subvector256_v16i8_store_lo_align_16(i8* nocapture %addr, < ; SKX-LABEL: extract_subvector256_v16i8_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> @@ -277,6 +299,7 @@ define void @extract_subvector512_v2f64_store_lo(double* nocapture %addr, <8 x d ; SKX-LABEL: extract_subvector512_v2f64_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1> @@ -289,6 +312,7 @@ define void @extract_subvector512_v2f64_store_lo_align_16(double* nocapture %add ; SKX-LABEL: extract_subvector512_v2f64_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1> @@ -301,6 +325,7 @@ define void @extract_subvector512_v4f32_store_lo(float* nocapture %addr, <16 x f ; SKX-LABEL: extract_subvector512_v4f32_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -313,6 +338,7 @@ define void @extract_subvector512_v4f32_store_lo_align_16(float* nocapture %addr ; SKX-LABEL: extract_subvector512_v4f32_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -325,6 +351,7 @@ define void @extract_subvector512_v2i64_store_lo(i64* nocapture %addr, <8 x i64> ; SKX-LABEL: extract_subvector512_v2i64_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1> @@ -337,6 +364,7 @@ define void @extract_subvector512_v2i64_store_lo_align_16(i64* nocapture %addr, ; SKX-LABEL: extract_subvector512_v2i64_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1> @@ -349,6 +377,7 @@ define void @extract_subvector512_v4i32_store_lo(i32* nocapture %addr, <16 x i32 ; SKX-LABEL: extract_subvector512_v4i32_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -361,6 +390,7 @@ define void @extract_subvector512_v4i32_store_lo_align_16(i32* nocapture %addr, ; SKX-LABEL: extract_subvector512_v4i32_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -373,6 +403,7 @@ define void @extract_subvector512_v8i16_store_lo(i16* nocapture %addr, <32 x i16 ; SKX-LABEL: extract_subvector512_v8i16_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -385,6 +416,7 @@ define void @extract_subvector512_v16i8_store_lo(i8* nocapture %addr, <64 x i8> ; SKX-LABEL: extract_subvector512_v16i8_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> @@ -397,6 +429,7 @@ define void @extract_subvector512_v16i8_store_lo_align_16(i8* nocapture %addr, < ; SKX-LABEL: extract_subvector512_v16i8_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %xmm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> @@ -409,6 +442,7 @@ define void @extract_subvector512_v4f64_store_lo(double* nocapture %addr, <8 x d ; SKX-LABEL: extract_subvector512_v4f64_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -421,6 +455,7 @@ define void @extract_subvector512_v4f64_store_lo_align_16(double* nocapture %add ; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -433,6 +468,7 @@ define void @extract_subvector512_v4f64_store_lo_align_32(double* nocapture %add ; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_32: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -445,6 +481,7 @@ define void @extract_subvector512_v8f32_store_lo(float* nocapture %addr, <16 x f ; SKX-LABEL: extract_subvector512_v8f32_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -457,6 +494,7 @@ define void @extract_subvector512_v8f32_store_lo_align_16(float* nocapture %addr ; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -469,6 +507,7 @@ define void @extract_subvector512_v8f32_store_lo_align_32(float* nocapture %addr ; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_32: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -481,6 +520,7 @@ define void @extract_subvector512_v4i64_store_lo(i64* nocapture %addr, <8 x i64> ; SKX-LABEL: extract_subvector512_v4i64_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -493,6 +533,7 @@ define void @extract_subvector512_v4i64_store_lo_align_16(i64* nocapture %addr, ; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -505,6 +546,7 @@ define void @extract_subvector512_v4i64_store_lo_align_32(i64* nocapture %addr, ; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_32: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -517,6 +559,7 @@ define void @extract_subvector512_v8i32_store_lo(i32* nocapture %addr, <16 x i32 ; SKX-LABEL: extract_subvector512_v8i32_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -529,6 +572,7 @@ define void @extract_subvector512_v8i32_store_lo_align_16(i32* nocapture %addr, ; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -541,6 +585,7 @@ define void @extract_subvector512_v8i32_store_lo_align_32(i32* nocapture %addr, ; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_32: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -553,6 +598,7 @@ define void @extract_subvector512_v16i16_store_lo(i16* nocapture %addr, <32 x i1 ; SKX-LABEL: extract_subvector512_v16i16_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> @@ -565,6 +611,7 @@ define void @extract_subvector512_v16i16_store_lo_align_16(i16* nocapture %addr, ; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> @@ -577,6 +624,7 @@ define void @extract_subvector512_v16i16_store_lo_align_32(i16* nocapture %addr, ; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_32: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> @@ -589,6 +637,7 @@ define void @extract_subvector512_v32i8_store_lo(i8* nocapture %addr, <64 x i8> ; SKX-LABEL: extract_subvector512_v32i8_store_lo: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> @@ -601,6 +650,7 @@ define void @extract_subvector512_v32i8_store_lo_align_16(i8* nocapture %addr, < ; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_16: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovups %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> @@ -613,6 +663,7 @@ define void @extract_subvector512_v32i8_store_lo_align_32(i8* nocapture %addr, < ; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_32: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: vmovaps %ymm0, (%rdi) +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> @@ -624,7 +675,7 @@ entry: define <4 x double> @test_mm512_mask_extractf64x4_pd(<4 x double> %__W, i8 %__U, <8 x double> %__A) { ; SKX-LABEL: test_mm512_mask_extractf64x4_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf64x4 $1, %zmm1, %ymm0 {%k1} ; SKX-NEXT: retq entry: @@ -638,7 +689,7 @@ entry: define <4 x double> @test_mm512_maskz_extractf64x4_pd(i8 %__U, <8 x double> %__A) { ; SKX-LABEL: test_mm512_maskz_extractf64x4_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -652,8 +703,9 @@ entry: define <4 x float> @test_mm512_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x double> %__A) { ; SKX-LABEL: test_mm512_mask_extractf32x4_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf32x4 $1, %zmm1, %xmm0 {%k1} +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = bitcast <8 x double> %__A to <16 x float> @@ -667,8 +719,9 @@ entry: define <4 x float> @test_mm512_maskz_extractf32x4_ps(i8 %__U, <8 x double> %__A) { ; SKX-LABEL: test_mm512_maskz_extractf32x4_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z} +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = bitcast <8 x double> %__A to <16 x float> @@ -682,8 +735,9 @@ entry: define <2 x double> @test_mm256_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <4 x double> %__A) { ; SKX-LABEL: test_mm256_mask_extractf64x2_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf64x2 $1, %ymm1, %xmm0 {%k1} +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3> @@ -696,8 +750,9 @@ entry: define <2 x double> @test_mm256_maskz_extractf64x2_pd(i8 %__U, <4 x double> %__A) { ; SKX-LABEL: test_mm256_maskz_extractf64x2_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3> @@ -710,8 +765,9 @@ entry: define <2 x i64> @test_mm256_mask_extracti64x2_epi64(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) { ; SKX-LABEL: test_mm256_mask_extracti64x2_epi64: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextracti64x2 $1, %ymm1, %xmm0 {%k1} +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3> @@ -724,8 +780,9 @@ entry: define <2 x i64> @test_mm256_maskz_extracti64x2_epi64(i8 %__U, <4 x i64> %__A) { ; SKX-LABEL: test_mm256_maskz_extracti64x2_epi64: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z} +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3> @@ -738,8 +795,9 @@ entry: define <4 x float> @test_mm256_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x float> %__A) { ; SKX-LABEL: test_mm256_mask_extractf32x4_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf32x4 $1, %ymm1, %xmm0 {%k1} +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -752,8 +810,9 @@ entry: define <4 x float> @test_mm256_maskz_extractf32x4_ps(i8 %__U, <8 x float> %__A) { ; SKX-LABEL: test_mm256_maskz_extractf32x4_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -766,8 +825,9 @@ entry: define <2 x i64> @test_mm256_mask_extracti32x4_epi32(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) { ; SKX-LABEL: test_mm256_mask_extracti32x4_epi32: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextracti32x4 $1, %ymm1, %xmm0 {%k1} +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = bitcast <4 x i64> %__A to <8 x i32> @@ -783,8 +843,9 @@ entry: define <2 x i64> @test_mm256_maskz_extracti32x4_epi32(i8 %__U, <4 x i64> %__A) { ; SKX-LABEL: test_mm256_maskz_extracti32x4_epi32: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z} +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %0 = bitcast <4 x i64> %__A to <8 x i32> @@ -799,7 +860,7 @@ entry: define <8 x float> @test_mm512_mask_extractf32x8_ps(<8 x float> %__W, i8 %__U, <16 x float> %__A) { ; SKX-LABEL: test_mm512_mask_extractf32x8_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf32x8 $1, %zmm1, %ymm0 {%k1} ; SKX-NEXT: retq entry: @@ -812,7 +873,7 @@ entry: define <8 x float> @test_mm512_maskz_extractf32x8_ps(i8 %__U, <16 x float> %__A) { ; SKX-LABEL: test_mm512_maskz_extractf32x8_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -825,8 +886,9 @@ entry: define <2 x double> @test_mm512_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <8 x double> %__A) { ; SKX-LABEL: test_mm512_mask_extractf64x2_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf64x2 $3, %zmm1, %xmm0 {%k1} +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7> @@ -839,8 +901,9 @@ entry: define <2 x double> @test_mm512_maskz_extractf64x2_pd(i8 %__U, <8 x double> %__A) { ; SKX-LABEL: test_mm512_maskz_extractf64x2_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm0 {%k1} {z} +; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7> |