diff options
Diffstat (limited to 'test/CodeGen/X86/vec_int_to_fp.ll')
-rw-r--r-- | test/CodeGen/X86/vec_int_to_fp.ll | 97 |
1 files changed, 70 insertions, 27 deletions
diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll index 923af1216d05..649b45712f57 100644 --- a/test/CodeGen/X86/vec_int_to_fp.ll +++ b/test/CodeGen/X86/vec_int_to_fp.ll @@ -61,6 +61,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: sitofp_2i64_to_2f64: @@ -92,18 +93,12 @@ define <2 x double> @sitofp_4i32_to_2f64(<4 x i32> %a) { ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE-NEXT: retq ; -; VEX-LABEL: sitofp_4i32_to_2f64: -; VEX: # BB#0: -; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 -; VEX-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> -; VEX-NEXT: vzeroupper -; VEX-NEXT: retq -; -; AVX512-LABEL: sitofp_4i32_to_2f64: -; AVX512: # BB#0: -; AVX512-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> -; AVX512-NEXT: retq +; AVX-LABEL: sitofp_4i32_to_2f64: +; AVX: # BB#0: +; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq %cvt = sitofp <4 x i32> %a to <4 x double> %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1> ret <2 x double> %shuf @@ -156,6 +151,7 @@ define <2 x double> @sitofp_8i16_to_2f64(<8 x i16> %a) { ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %cvt = sitofp <8 x i16> %a to <8 x double> %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1> @@ -211,6 +207,7 @@ define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) { ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %cvt = sitofp <16 x i8> %a to <16 x double> %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1> @@ -498,6 +495,7 @@ define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) { ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: uitofp_2i64_to_2f64: @@ -536,6 +534,7 @@ define <2 x double> @uitofp_2i32_to_2f64(<4 x i32> %a) { ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: uitofp_2i32_to_2f64: @@ -548,6 +547,7 @@ define <2 x double> @uitofp_2i32_to_2f64(<4 x i32> %a) { ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: uitofp_2i32_to_2f64: @@ -603,12 +603,14 @@ define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) { ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: uitofp_4i32_to_2f64: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0 ; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: uitofp_4i32_to_2f64: @@ -616,12 +618,14 @@ define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) { ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: uitofp_4i32_to_2f64: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %ymm0 ; AVX512VLDQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512VLDQ-NEXT: vzeroupper ; AVX512VLDQ-NEXT: retq %cvt = uitofp <4 x i32> %a to <4 x double> %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1> @@ -675,6 +679,7 @@ define <2 x double> @uitofp_8i16_to_2f64(<8 x i16> %a) { ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %cvt = uitofp <8 x i16> %a to <8 x double> %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1> @@ -730,6 +735,7 @@ define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) { ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %cvt = uitofp <16 x i8> %a to <16 x double> %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1> @@ -1089,6 +1095,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: sitofp_2i64_to_4f32: @@ -1147,6 +1154,7 @@ define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) { ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: sitofp_2i64_to_4f32_zero: @@ -1212,12 +1220,14 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: sitofp_4i64_to_4f32_undef: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; AVX512VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0 +; AVX512VLDQ-NEXT: vzeroupper ; AVX512VLDQ-NEXT: retq %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %cvt = sitofp <4 x i64> %ext to <4 x float> @@ -1288,6 +1298,7 @@ define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) { ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %cvt = sitofp <8 x i16> %a to <8 x float> %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -1346,6 +1357,7 @@ define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) { ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %cvt = sitofp <16 x i8> %a to <16 x float> %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -1421,6 +1433,7 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) { ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_4i64_to_4f32: @@ -1437,6 +1450,7 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) { ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_4i64_to_4f32: @@ -1444,11 +1458,13 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) { ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: sitofp_4i64_to_4f32: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0 +; AVX512VLDQ-NEXT: vzeroupper ; AVX512VLDQ-NEXT: retq %cvt = sitofp <4 x i64> %a to <4 x float> ret <4 x float> %cvt @@ -1697,6 +1713,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: uitofp_2i64_to_4f32: @@ -1805,6 +1822,7 @@ define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) { ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: uitofp_2i64_to_2f32: @@ -1932,12 +1950,14 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: uitofp_4i64_to_4f32_undef: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; AVX512VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0 +; AVX512VLDQ-NEXT: vzeroupper ; AVX512VLDQ-NEXT: retq %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %cvt = uitofp <4 x i64> %ext to <4 x float> @@ -1982,6 +2002,7 @@ define <4 x float> @uitofp_4i32_to_4f32(<4 x i32> %a) { ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: uitofp_4i32_to_4f32: @@ -1994,6 +2015,7 @@ define <4 x float> @uitofp_4i32_to_4f32(<4 x i32> %a) { ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: uitofp_4i32_to_4f32: @@ -2032,10 +2054,10 @@ define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) { ; ; AVX1-LABEL: uitofp_8i16_to_4f32: ; AVX1: # BB#0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; AVX1-NEXT: vzeroupper @@ -2054,6 +2076,7 @@ define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) { ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %cvt = uitofp <8 x i16> %a to <8 x float> %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -2112,6 +2135,7 @@ define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) { ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %cvt = uitofp <16 x i8> %a to <16 x float> %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -2335,6 +2359,7 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: uitofp_4i64_to_4f32: @@ -2351,6 +2376,7 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: uitofp_4i64_to_4f32: @@ -2358,11 +2384,13 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: uitofp_4i64_to_4f32: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0 +; AVX512VLDQ-NEXT: vzeroupper ; AVX512VLDQ-NEXT: retq %cvt = uitofp <4 x i64> %a to <4 x float> ret <4 x float> %cvt @@ -2456,10 +2484,10 @@ define <8 x float> @uitofp_8i16_to_8f32(<8 x i16> %a) { ; ; AVX1-LABEL: uitofp_8i16_to_8f32: ; AVX1: # BB#0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -2607,6 +2635,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: sitofp_load_2i64_to_2f64: @@ -2661,7 +2690,7 @@ define <2 x double> @sitofp_load_2i16_to_2f64(<2 x i16> *%a) { ; SSE-LABEL: sitofp_load_2i16_to_2f64: ; SSE: # BB#0: ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] ; SSE-NEXT: psrad $16, %xmm0 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE-NEXT: retq @@ -2723,7 +2752,7 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; ; AVX1-LABEL: sitofp_load_4i64_to_4f64: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovdqa (%rdi), %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 @@ -2926,6 +2955,7 @@ define <2 x double> @uitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: uitofp_load_2i64_to_2f64: @@ -2967,6 +2997,7 @@ define <2 x double> @uitofp_load_2i32_to_2f64(<2 x i32> *%a) { ; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: uitofp_load_2i32_to_2f64: @@ -2981,6 +3012,7 @@ define <2 x double> @uitofp_load_2i32_to_2f64(<2 x i32> *%a) { ; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: uitofp_load_2i32_to_2f64: @@ -3021,8 +3053,8 @@ define <2 x double> @uitofp_load_2i16_to_2f64(<2 x i16> *%a) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3,4,5,6,7] +; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; @@ -3037,8 +3069,8 @@ define <2 x double> @uitofp_load_2i16_to_2f64(<2 x i16> *%a) { ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero ; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VLDQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3,4,5,6,7] +; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512VLDQ-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX512VLDQ-NEXT: retq %ld = load <2 x i16>, <2 x i16> *%a @@ -3076,7 +3108,8 @@ define <2 x double> @uitofp_load_2i8_to_2f64(<2 x i8> *%a) { ; AVX512VL-LABEL: uitofp_load_2i8_to_2f64: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[u],zero,zero,zero,xmm0[u],zero,zero,zero +; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; @@ -3091,7 +3124,8 @@ define <2 x double> @uitofp_load_2i8_to_2f64(<2 x i8> *%a) { ; AVX512VLDQ-LABEL: uitofp_load_2i8_to_2f64: ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[u],zero,zero,zero,xmm0[u],zero,zero,zero +; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512VLDQ-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX512VLDQ-NEXT: retq %ld = load <2 x i8>, <2 x i8> *%a @@ -3130,7 +3164,7 @@ define <4 x double> @uitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; ; AVX1-LABEL: uitofp_load_4i64_to_4f64: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovdqa (%rdi), %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0] ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] @@ -3416,6 +3450,7 @@ define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_load_4i64_to_4f32: @@ -3433,6 +3468,7 @@ define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_load_4i64_to_4f32: @@ -3440,6 +3476,7 @@ define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: sitofp_load_4i64_to_4f32: @@ -4003,6 +4040,7 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: uitofp_load_4i64_to_4f32: @@ -4020,6 +4058,7 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: uitofp_load_4i64_to_4f32: @@ -4027,6 +4066,7 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: uitofp_load_4i64_to_4f32: @@ -4079,6 +4119,7 @@ define <4 x float> @uitofp_load_4i32_to_4f32(<4 x i32> *%a) { ; AVX512F-NEXT: vmovaps (%rdi), %xmm0 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: uitofp_load_4i32_to_4f32: @@ -4091,6 +4132,7 @@ define <4 x float> @uitofp_load_4i32_to_4f32(<4 x i32> *%a) { ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: uitofp_load_4i32_to_4f32: @@ -4810,6 +4852,7 @@ define void @aggregate_sitofp_8i16_to_8f32(%Arguments* nocapture readonly %a0) { ; AVX512-NEXT: vpmovsxwd 8(%rdi), %ymm0 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX512-NEXT: vmovaps %ymm0, (%rax) +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = load %Arguments, %Arguments* %a0, align 1 %2 = extractvalue %Arguments %1, 1 |