diff options
Diffstat (limited to 'test/CodeGen/X86/nontemporal-2.ll')
-rw-r--r-- | test/CodeGen/X86/nontemporal-2.ll | 29 |
1 files changed, 24 insertions, 5 deletions
diff --git a/test/CodeGen/X86/nontemporal-2.ll b/test/CodeGen/X86/nontemporal-2.ll index 92a35436d90d5..d1bb8d3e923b6 100644 --- a/test/CodeGen/X86/nontemporal-2.ll +++ b/test/CodeGen/X86/nontemporal-2.ll @@ -255,6 +255,7 @@ define void @test_zero_v8f32(<8 x float>* %dst) { ; VLX: # BB#0: ; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0 ; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <8 x float> zeroinitializer, <8 x float>* %dst, align 32, !nontemporal !1 ret void @@ -279,6 +280,7 @@ define void @test_zero_v8i32(<8 x i32>* %dst) { ; VLX: # BB#0: ; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0 ; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <8 x i32> zeroinitializer, <8 x i32>* %dst, align 32, !nontemporal !1 ret void @@ -303,6 +305,7 @@ define void @test_zero_v4f64(<4 x double>* %dst) { ; VLX: # BB#0: ; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0 ; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <4 x double> zeroinitializer, <4 x double>* %dst, align 32, !nontemporal !1 ret void @@ -327,6 +330,7 @@ define void @test_zero_v4i64(<4 x i64>* %dst) { ; VLX: # BB#0: ; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0 ; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <4 x i64> zeroinitializer, <4 x i64>* %dst, align 32, !nontemporal !1 ret void @@ -351,6 +355,7 @@ define void @test_zero_v16i16(<16 x i16>* %dst) { ; VLX: # BB#0: ; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0 ; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <16 x i16> zeroinitializer, <16 x i16>* %dst, align 32, !nontemporal !1 ret void @@ -375,6 +380,7 @@ define void @test_zero_v32i8(<32 x i8>* %dst) { ; VLX: # BB#0: ; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0 ; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <32 x i8> zeroinitializer, <32 x i8>* %dst, align 32, !nontemporal !1 ret void @@ -757,6 +763,7 @@ define void @test_arg_v8f32(<8 x float> %arg, <8 x float>* %dst) { ; VLX-LABEL: test_arg_v8f32: ; VLX: # BB#0: ; VLX-NEXT: vmovntps %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <8 x float> %arg, <8 x float>* %dst, align 32, !nontemporal !1 ret void @@ -777,7 +784,8 @@ define void @test_arg_v8i32(<8 x i32> %arg, <8 x i32>* %dst) { ; ; VLX-LABEL: test_arg_v8i32: ; VLX: # BB#0: -; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vmovntps %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <8 x i32> %arg, <8 x i32>* %dst, align 32, !nontemporal !1 ret void @@ -798,7 +806,8 @@ define void @test_arg_v4f64(<4 x double> %arg, <4 x double>* %dst) { ; ; VLX-LABEL: test_arg_v4f64: ; VLX: # BB#0: -; VLX-NEXT: vmovntpd %ymm0, (%rdi) +; VLX-NEXT: vmovntps %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <4 x double> %arg, <4 x double>* %dst, align 32, !nontemporal !1 ret void @@ -819,7 +828,8 @@ define void @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %dst) { ; ; VLX-LABEL: test_arg_v4i64: ; VLX: # BB#0: -; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vmovntps %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <4 x i64> %arg, <4 x i64>* %dst, align 32, !nontemporal !1 ret void @@ -840,7 +850,8 @@ define void @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %dst) { ; ; VLX-LABEL: test_arg_v16i16: ; VLX: # BB#0: -; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vmovntps %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <16 x i16> %arg, <16 x i16>* %dst, align 32, !nontemporal !1 ret void @@ -861,7 +872,8 @@ define void @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %dst) { ; ; VLX-LABEL: test_arg_v32i8: ; VLX: # BB#0: -; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vmovntps %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <32 x i8> %arg, <32 x i8>* %dst, align 32, !nontemporal !1 ret void @@ -1031,6 +1043,7 @@ define void @test_op_v8f32(<8 x float> %a, <8 x float> %b, <8 x float>* %dst) { ; VLX: # BB#0: ; VLX-NEXT: vaddps %ymm1, %ymm0, %ymm0 ; VLX-NEXT: vmovntps %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq %r = fadd <8 x float> %a, %b store <8 x float> %r, <8 x float>* %dst, align 32, !nontemporal !1 @@ -1068,6 +1081,7 @@ define void @test_op_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32>* %dst) { ; VLX: # BB#0: ; VLX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq %r = add <8 x i32> %a, %b store <8 x i32> %r, <8 x i32>* %dst, align 32, !nontemporal !1 @@ -1094,6 +1108,7 @@ define void @test_op_v4f64(<4 x double> %a, <4 x double> %b, <4 x double>* %dst) ; VLX: # BB#0: ; VLX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; VLX-NEXT: vmovntpd %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq %r = fadd <4 x double> %a, %b store <4 x double> %r, <4 x double>* %dst, align 32, !nontemporal !1 @@ -1131,6 +1146,7 @@ define void @test_op_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %dst) { ; VLX: # BB#0: ; VLX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq %r = add <4 x i64> %a, %b store <4 x i64> %r, <4 x i64>* %dst, align 32, !nontemporal !1 @@ -1168,6 +1184,7 @@ define void @test_op_v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16>* %dst) { ; VLX: # BB#0: ; VLX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq %r = add <16 x i16> %a, %b store <16 x i16> %r, <16 x i16>* %dst, align 32, !nontemporal !1 @@ -1205,6 +1222,7 @@ define void @test_op_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8>* %dst) { ; VLX: # BB#0: ; VLX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq %r = add <32 x i8> %a, %b store <32 x i8> %r, <32 x i8>* %dst, align 32, !nontemporal !1 @@ -1235,6 +1253,7 @@ define void @test_unaligned_v8f32(<8 x float> %a, <8 x float> %b, <8 x float>* % ; VLX: # BB#0: ; VLX-NEXT: vaddps %ymm1, %ymm0, %ymm0 ; VLX-NEXT: vmovups %ymm0, (%rdi) +; VLX-NEXT: vzeroupper ; VLX-NEXT: retq %r = fadd <8 x float> %a, %b store <8 x float> %r, <8 x float>* %dst, align 16, !nontemporal !1 |