summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/nontemporal-2.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/nontemporal-2.ll')
-rw-r--r--test/CodeGen/X86/nontemporal-2.ll29
1 files changed, 24 insertions, 5 deletions
diff --git a/test/CodeGen/X86/nontemporal-2.ll b/test/CodeGen/X86/nontemporal-2.ll
index 92a35436d90d5..d1bb8d3e923b6 100644
--- a/test/CodeGen/X86/nontemporal-2.ll
+++ b/test/CodeGen/X86/nontemporal-2.ll
@@ -255,6 +255,7 @@ define void @test_zero_v8f32(<8 x float>* %dst) {
; VLX: # BB#0:
; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0
; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <8 x float> zeroinitializer, <8 x float>* %dst, align 32, !nontemporal !1
ret void
@@ -279,6 +280,7 @@ define void @test_zero_v8i32(<8 x i32>* %dst) {
; VLX: # BB#0:
; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0
; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <8 x i32> zeroinitializer, <8 x i32>* %dst, align 32, !nontemporal !1
ret void
@@ -303,6 +305,7 @@ define void @test_zero_v4f64(<4 x double>* %dst) {
; VLX: # BB#0:
; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0
; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <4 x double> zeroinitializer, <4 x double>* %dst, align 32, !nontemporal !1
ret void
@@ -327,6 +330,7 @@ define void @test_zero_v4i64(<4 x i64>* %dst) {
; VLX: # BB#0:
; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0
; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <4 x i64> zeroinitializer, <4 x i64>* %dst, align 32, !nontemporal !1
ret void
@@ -351,6 +355,7 @@ define void @test_zero_v16i16(<16 x i16>* %dst) {
; VLX: # BB#0:
; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0
; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <16 x i16> zeroinitializer, <16 x i16>* %dst, align 32, !nontemporal !1
ret void
@@ -375,6 +380,7 @@ define void @test_zero_v32i8(<32 x i8>* %dst) {
; VLX: # BB#0:
; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0
; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <32 x i8> zeroinitializer, <32 x i8>* %dst, align 32, !nontemporal !1
ret void
@@ -757,6 +763,7 @@ define void @test_arg_v8f32(<8 x float> %arg, <8 x float>* %dst) {
; VLX-LABEL: test_arg_v8f32:
; VLX: # BB#0:
; VLX-NEXT: vmovntps %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <8 x float> %arg, <8 x float>* %dst, align 32, !nontemporal !1
ret void
@@ -777,7 +784,8 @@ define void @test_arg_v8i32(<8 x i32> %arg, <8 x i32>* %dst) {
;
; VLX-LABEL: test_arg_v8i32:
; VLX: # BB#0:
-; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vmovntps %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <8 x i32> %arg, <8 x i32>* %dst, align 32, !nontemporal !1
ret void
@@ -798,7 +806,8 @@ define void @test_arg_v4f64(<4 x double> %arg, <4 x double>* %dst) {
;
; VLX-LABEL: test_arg_v4f64:
; VLX: # BB#0:
-; VLX-NEXT: vmovntpd %ymm0, (%rdi)
+; VLX-NEXT: vmovntps %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <4 x double> %arg, <4 x double>* %dst, align 32, !nontemporal !1
ret void
@@ -819,7 +828,8 @@ define void @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %dst) {
;
; VLX-LABEL: test_arg_v4i64:
; VLX: # BB#0:
-; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vmovntps %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <4 x i64> %arg, <4 x i64>* %dst, align 32, !nontemporal !1
ret void
@@ -840,7 +850,8 @@ define void @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %dst) {
;
; VLX-LABEL: test_arg_v16i16:
; VLX: # BB#0:
-; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vmovntps %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <16 x i16> %arg, <16 x i16>* %dst, align 32, !nontemporal !1
ret void
@@ -861,7 +872,8 @@ define void @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %dst) {
;
; VLX-LABEL: test_arg_v32i8:
; VLX: # BB#0:
-; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vmovntps %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <32 x i8> %arg, <32 x i8>* %dst, align 32, !nontemporal !1
ret void
@@ -1031,6 +1043,7 @@ define void @test_op_v8f32(<8 x float> %a, <8 x float> %b, <8 x float>* %dst) {
; VLX: # BB#0:
; VLX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; VLX-NEXT: vmovntps %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
%r = fadd <8 x float> %a, %b
store <8 x float> %r, <8 x float>* %dst, align 32, !nontemporal !1
@@ -1068,6 +1081,7 @@ define void @test_op_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32>* %dst) {
; VLX: # BB#0:
; VLX-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
%r = add <8 x i32> %a, %b
store <8 x i32> %r, <8 x i32>* %dst, align 32, !nontemporal !1
@@ -1094,6 +1108,7 @@ define void @test_op_v4f64(<4 x double> %a, <4 x double> %b, <4 x double>* %dst)
; VLX: # BB#0:
; VLX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; VLX-NEXT: vmovntpd %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
%r = fadd <4 x double> %a, %b
store <4 x double> %r, <4 x double>* %dst, align 32, !nontemporal !1
@@ -1131,6 +1146,7 @@ define void @test_op_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %dst) {
; VLX: # BB#0:
; VLX-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
%r = add <4 x i64> %a, %b
store <4 x i64> %r, <4 x i64>* %dst, align 32, !nontemporal !1
@@ -1168,6 +1184,7 @@ define void @test_op_v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16>* %dst) {
; VLX: # BB#0:
; VLX-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
%r = add <16 x i16> %a, %b
store <16 x i16> %r, <16 x i16>* %dst, align 32, !nontemporal !1
@@ -1205,6 +1222,7 @@ define void @test_op_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8>* %dst) {
; VLX: # BB#0:
; VLX-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
%r = add <32 x i8> %a, %b
store <32 x i8> %r, <32 x i8>* %dst, align 32, !nontemporal !1
@@ -1235,6 +1253,7 @@ define void @test_unaligned_v8f32(<8 x float> %a, <8 x float> %b, <8 x float>* %
; VLX: # BB#0:
; VLX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; VLX-NEXT: vmovups %ymm0, (%rdi)
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
%r = fadd <8 x float> %a, %b
store <8 x float> %r, <8 x float>* %dst, align 16, !nontemporal !1