summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/avx512-cvt.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/avx512-cvt.ll')
-rw-r--r--test/CodeGen/X86/avx512-cvt.ll525
1 files changed, 391 insertions, 134 deletions
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
index 87deeb9e16c03..2b55372f30667 100644
--- a/test/CodeGen/X86/avx512-cvt.ll
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=DQ --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQ --check-prefix=AVX512DQ
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=DQ --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=DQ --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NOVL --check-prefix=DQ --check-prefix=AVX512DQ
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=DQ --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW
define <16 x float> @sitof32(<16 x i32> %a) nounwind {
@@ -110,40 +110,78 @@ define <2 x float> @sltof2f32(<2 x i64> %a) {
; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
%b = sitofp <2 x i64> %a to <2 x float>
ret <2 x float>%b
}
define <4 x float> @sltof4f32_mem(<4 x i64>* %a) {
-; NODQ-LABEL: sltof4f32_mem:
-; NODQ: ## BB#0:
-; NODQ-NEXT: vmovdqu (%rdi), %ymm0
-; NODQ-NEXT: vpextrq $1, %xmm0, %rax
-; NODQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
-; NODQ-NEXT: vmovq %xmm0, %rax
-; NODQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
-; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
-; NODQ-NEXT: vmovq %xmm0, %rax
-; NODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
-; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
-; NODQ-NEXT: vpextrq $1, %xmm0, %rax
-; NODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
-; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; NODQ-NEXT: retq
+; KNL-LABEL: sltof4f32_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vmovdqu (%rdi), %ymm0
+; KNL-NEXT: vpextrq $1, %xmm0, %rax
+; KNL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
+; KNL-NEXT: vmovq %xmm0, %rax
+; KNL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
+; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; KNL-NEXT: vmovq %xmm0, %rax
+; KNL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
+; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; KNL-NEXT: vpextrq $1, %xmm0, %rax
+; KNL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
+; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; KNL-NEXT: retq
;
; VLDQ-LABEL: sltof4f32_mem:
; VLDQ: ## BB#0:
; VLDQ-NEXT: vcvtqq2psy (%rdi), %xmm0
; VLDQ-NEXT: retq
;
+; VLNODQ-LABEL: sltof4f32_mem:
+; VLNODQ: ## BB#0:
+; VLNODQ-NEXT: vmovdqu (%rdi), %ymm0
+; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
+; VLNODQ-NEXT: vmovq %xmm0, %rax
+; VLNODQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
+; VLNODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; VLNODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
+; VLNODQ-NEXT: vmovq %xmm0, %rax
+; VLNODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
+; VLNODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
+; VLNODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; VLNODQ-NEXT: vzeroupper
+; VLNODQ-NEXT: retq
+;
; AVX512DQ-LABEL: sltof4f32_mem:
; AVX512DQ: ## BB#0:
; AVX512DQ-NEXT: vmovups (%rdi), %ymm0
; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: sltof4f32_mem:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vmovdqu (%rdi), %ymm0
+; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512BW-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
+; AVX512BW-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
+; AVX512BW-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512BW-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
+; AVX512BW-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%a1 = load <4 x i64>, <4 x i64>* %a, align 8
%b = sitofp <4 x i64> %a1 to <4 x float>
ret <4 x float>%b
@@ -218,65 +256,137 @@ define <4 x i64> @f32tosl(<4 x float> %a) {
}
define <4 x float> @sltof432(<4 x i64> %a) {
-; NODQ-LABEL: sltof432:
-; NODQ: ## BB#0:
-; NODQ-NEXT: vpextrq $1, %xmm0, %rax
-; NODQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
-; NODQ-NEXT: vmovq %xmm0, %rax
-; NODQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
-; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
-; NODQ-NEXT: vmovq %xmm0, %rax
-; NODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
-; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
-; NODQ-NEXT: vpextrq $1, %xmm0, %rax
-; NODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
-; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; NODQ-NEXT: retq
+; KNL-LABEL: sltof432:
+; KNL: ## BB#0:
+; KNL-NEXT: vpextrq $1, %xmm0, %rax
+; KNL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
+; KNL-NEXT: vmovq %xmm0, %rax
+; KNL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
+; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; KNL-NEXT: vmovq %xmm0, %rax
+; KNL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
+; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; KNL-NEXT: vpextrq $1, %xmm0, %rax
+; KNL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
+; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; KNL-NEXT: retq
;
; VLDQ-LABEL: sltof432:
; VLDQ: ## BB#0:
; VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0
+; VLDQ-NEXT: vzeroupper
; VLDQ-NEXT: retq
;
+; VLNODQ-LABEL: sltof432:
+; VLNODQ: ## BB#0:
+; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
+; VLNODQ-NEXT: vmovq %xmm0, %rax
+; VLNODQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
+; VLNODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; VLNODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
+; VLNODQ-NEXT: vmovq %xmm0, %rax
+; VLNODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
+; VLNODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
+; VLNODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; VLNODQ-NEXT: vzeroupper
+; VLNODQ-NEXT: retq
+;
; AVX512DQ-LABEL: sltof432:
; AVX512DQ: ## BB#0:
; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: sltof432:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512BW-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
+; AVX512BW-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
+; AVX512BW-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512BW-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
+; AVX512BW-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%b = sitofp <4 x i64> %a to <4 x float>
ret <4 x float> %b
}
define <4 x float> @ultof432(<4 x i64> %a) {
-; NODQ-LABEL: ultof432:
-; NODQ: ## BB#0:
-; NODQ-NEXT: vpextrq $1, %xmm0, %rax
-; NODQ-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
-; NODQ-NEXT: vmovq %xmm0, %rax
-; NODQ-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
-; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
-; NODQ-NEXT: vmovq %xmm0, %rax
-; NODQ-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2
-; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
-; NODQ-NEXT: vpextrq $1, %xmm0, %rax
-; NODQ-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0
-; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; NODQ-NEXT: retq
+; KNL-LABEL: ultof432:
+; KNL: ## BB#0:
+; KNL-NEXT: vpextrq $1, %xmm0, %rax
+; KNL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
+; KNL-NEXT: vmovq %xmm0, %rax
+; KNL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
+; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; KNL-NEXT: vmovq %xmm0, %rax
+; KNL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2
+; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; KNL-NEXT: vpextrq $1, %xmm0, %rax
+; KNL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0
+; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; KNL-NEXT: retq
;
; VLDQ-LABEL: ultof432:
; VLDQ: ## BB#0:
; VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0
+; VLDQ-NEXT: vzeroupper
; VLDQ-NEXT: retq
;
+; VLNODQ-LABEL: ultof432:
+; VLNODQ: ## BB#0:
+; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
+; VLNODQ-NEXT: vmovq %xmm0, %rax
+; VLNODQ-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
+; VLNODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; VLNODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
+; VLNODQ-NEXT: vmovq %xmm0, %rax
+; VLNODQ-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2
+; VLNODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0
+; VLNODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; VLNODQ-NEXT: vzeroupper
+; VLNODQ-NEXT: retq
+;
; AVX512DQ-LABEL: ultof432:
; AVX512DQ: ## BB#0:
; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: ultof432:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512BW-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
+; AVX512BW-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2
+; AVX512BW-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512BW-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0
+; AVX512BW-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%b = uitofp <4 x i64> %a to <4 x float>
ret <4 x float> %b
}
@@ -355,17 +465,33 @@ define <8 x i32> @fptoui_256(<8 x float> %a) nounwind {
}
define <4 x i32> @fptoui_128(<4 x float> %a) nounwind {
-; NOVL-LABEL: fptoui_128:
-; NOVL: ## BB#0:
-; NOVL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
-; NOVL-NEXT: vcvttps2udq %zmm0, %zmm0
-; NOVL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
-; NOVL-NEXT: retq
+; KNL-LABEL: fptoui_128:
+; KNL: ## BB#0:
+; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL-NEXT: vcvttps2udq %zmm0, %zmm0
+; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; KNL-NEXT: retq
;
; VL-LABEL: fptoui_128:
; VL: ## BB#0:
; VL-NEXT: vcvttps2udq %xmm0, %xmm0
; VL-NEXT: retq
+;
+; AVX512DQ-LABEL: fptoui_128:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
+; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: fptoui_128:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512BW-NEXT: vcvttps2udq %zmm0, %zmm0
+; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%b = fptoui <4 x float> %a to <4 x i32>
ret <4 x i32> %b
}
@@ -380,17 +506,34 @@ define <8 x i32> @fptoui01(<8 x double> %a) nounwind {
}
define <4 x i32> @fptoui_256d(<4 x double> %a) nounwind {
-; NOVL-LABEL: fptoui_256d:
-; NOVL: ## BB#0:
-; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; NOVL-NEXT: vcvttpd2udq %zmm0, %ymm0
-; NOVL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
-; NOVL-NEXT: retq
+; KNL-LABEL: fptoui_256d:
+; KNL: ## BB#0:
+; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; KNL-NEXT: vcvttpd2udq %zmm0, %ymm0
+; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; KNL-NEXT: retq
;
; VL-LABEL: fptoui_256d:
; VL: ## BB#0:
; VL-NEXT: vcvttpd2udq %ymm0, %xmm0
+; VL-NEXT: vzeroupper
; VL-NEXT: retq
+;
+; AVX512DQ-LABEL: fptoui_256d:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
+; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: fptoui_256d:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; AVX512BW-NEXT: vcvttpd2udq %zmm0, %ymm0
+; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%b = fptoui <4 x double> %a to <4 x i32>
ret <4 x i32> %b
}
@@ -404,34 +547,70 @@ define <8 x double> @sitof64(<8 x i32> %a) {
ret <8 x double> %b
}
define <8 x double> @sitof64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
-; NODQ-LABEL: sitof64_mask:
-; NODQ: ## BB#0:
-; NODQ-NEXT: kmovw %edi, %k1
-; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
-; NODQ-NEXT: retq
+; KNL-LABEL: sitof64_mask:
+; KNL: ## BB#0:
+; KNL-NEXT: kmovw %edi, %k1
+; KNL-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
+; KNL-NEXT: retq
;
-; DQ-LABEL: sitof64_mask:
-; DQ: ## BB#0:
-; DQ-NEXT: kmovb %edi, %k1
-; DQ-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
-; DQ-NEXT: retq
+; VLBW-LABEL: sitof64_mask:
+; VLBW: ## BB#0:
+; VLBW-NEXT: kmovd %edi, %k1
+; VLBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
+; VLBW-NEXT: retq
+;
+; VLNOBW-LABEL: sitof64_mask:
+; VLNOBW: ## BB#0:
+; VLNOBW-NEXT: kmovw %edi, %k1
+; VLNOBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
+; VLNOBW-NEXT: retq
+;
+; AVX512DQ-LABEL: sitof64_mask:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: kmovw %edi, %k1
+; AVX512DQ-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: sitof64_mask:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
+; AVX512BW-NEXT: retq
%1 = bitcast i8 %c to <8 x i1>
%2 = sitofp <8 x i32> %b to <8 x double>
%3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
ret <8 x double> %3
}
define <8 x double> @sitof64_maskz(<8 x i32> %a, i8 %b) nounwind {
-; NODQ-LABEL: sitof64_maskz:
-; NODQ: ## BB#0:
-; NODQ-NEXT: kmovw %edi, %k1
-; NODQ-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
-; NODQ-NEXT: retq
+; KNL-LABEL: sitof64_maskz:
+; KNL: ## BB#0:
+; KNL-NEXT: kmovw %edi, %k1
+; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
+; KNL-NEXT: retq
;
-; DQ-LABEL: sitof64_maskz:
-; DQ: ## BB#0:
-; DQ-NEXT: kmovb %edi, %k1
-; DQ-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
-; DQ-NEXT: retq
+; VLBW-LABEL: sitof64_maskz:
+; VLBW: ## BB#0:
+; VLBW-NEXT: kmovd %edi, %k1
+; VLBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
+; VLBW-NEXT: retq
+;
+; VLNOBW-LABEL: sitof64_maskz:
+; VLNOBW: ## BB#0:
+; VLNOBW-NEXT: kmovw %edi, %k1
+; VLNOBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
+; VLNOBW-NEXT: retq
+;
+; AVX512DQ-LABEL: sitof64_maskz:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: kmovw %edi, %k1
+; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: sitof64_maskz:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
%1 = bitcast i8 %b to <8 x i1>
%2 = sitofp <8 x i32> %a to <8 x double>
%3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
@@ -448,10 +627,16 @@ define <8 x i32> @fptosi01(<8 x double> %a) {
}
define <4 x i32> @fptosi03(<4 x double> %a) {
-; ALL-LABEL: fptosi03:
-; ALL: ## BB#0:
-; ALL-NEXT: vcvttpd2dq %ymm0, %xmm0
-; ALL-NEXT: retq
+; KNL-LABEL: fptosi03:
+; KNL: ## BB#0:
+; KNL-NEXT: vcvttpd2dq %ymm0, %xmm0
+; KNL-NEXT: retq
+;
+; AVX512-LABEL: fptosi03:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vcvttpd2dq %ymm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%b = fptosi <4 x double> %a to <4 x i32>
ret <4 x i32> %b
}
@@ -475,29 +660,54 @@ define <16 x float> @fptrunc00(<16 x double> %b) nounwind {
}
define <4 x float> @fptrunc01(<4 x double> %b) {
-; ALL-LABEL: fptrunc01:
-; ALL: ## BB#0:
-; ALL-NEXT: vcvtpd2ps %ymm0, %xmm0
-; ALL-NEXT: retq
+; KNL-LABEL: fptrunc01:
+; KNL: ## BB#0:
+; KNL-NEXT: vcvtpd2ps %ymm0, %xmm0
+; KNL-NEXT: retq
+;
+; AVX512-LABEL: fptrunc01:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vcvtpd2ps %ymm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%a = fptrunc <4 x double> %b to <4 x float>
ret <4 x float> %a
}
define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) {
-; NOVL-LABEL: fptrunc02:
-; NOVL: ## BB#0:
-; NOVL-NEXT: vpslld $31, %xmm1, %xmm1
-; NOVL-NEXT: vpsrad $31, %xmm1, %xmm1
-; NOVL-NEXT: vcvtpd2ps %ymm0, %xmm0
-; NOVL-NEXT: vpand %xmm0, %xmm1, %xmm0
-; NOVL-NEXT: retq
+; KNL-LABEL: fptrunc02:
+; KNL: ## BB#0:
+; KNL-NEXT: vpslld $31, %xmm1, %xmm1
+; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
+; KNL-NEXT: vcvtpd2ps %ymm0, %xmm0
+; KNL-NEXT: vpand %xmm0, %xmm1, %xmm0
+; KNL-NEXT: retq
;
; VL-LABEL: fptrunc02:
; VL: ## BB#0:
; VL-NEXT: vpslld $31, %xmm1, %xmm1
; VL-NEXT: vptestmd %xmm1, %xmm1, %k1
; VL-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
+; VL-NEXT: vzeroupper
; VL-NEXT: retq
+;
+; AVX512DQ-LABEL: fptrunc02:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX512DQ-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX512DQ-NEXT: vcvtpd2ps %ymm0, %xmm0
+; AVX512DQ-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: fptrunc02:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX512BW-NEXT: vcvtpd2ps %ymm0, %xmm0
+; AVX512BW-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%a = fptrunc <4 x double> %b to <4 x float>
%c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer
ret <4 x float> %c
@@ -685,34 +895,70 @@ define <16 x double> @uitof64(<16 x i32> %a) nounwind {
ret <16 x double> %b
}
define <8 x double> @uitof64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
-; NODQ-LABEL: uitof64_mask:
-; NODQ: ## BB#0:
-; NODQ-NEXT: kmovw %edi, %k1
-; NODQ-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
-; NODQ-NEXT: retq
+; KNL-LABEL: uitof64_mask:
+; KNL: ## BB#0:
+; KNL-NEXT: kmovw %edi, %k1
+; KNL-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
+; KNL-NEXT: retq
;
-; DQ-LABEL: uitof64_mask:
-; DQ: ## BB#0:
-; DQ-NEXT: kmovb %edi, %k1
-; DQ-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
-; DQ-NEXT: retq
+; VLBW-LABEL: uitof64_mask:
+; VLBW: ## BB#0:
+; VLBW-NEXT: kmovd %edi, %k1
+; VLBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
+; VLBW-NEXT: retq
+;
+; VLNOBW-LABEL: uitof64_mask:
+; VLNOBW: ## BB#0:
+; VLNOBW-NEXT: kmovw %edi, %k1
+; VLNOBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
+; VLNOBW-NEXT: retq
+;
+; AVX512DQ-LABEL: uitof64_mask:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: kmovw %edi, %k1
+; AVX512DQ-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: uitof64_mask:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
+; AVX512BW-NEXT: retq
%1 = bitcast i8 %c to <8 x i1>
%2 = uitofp <8 x i32> %b to <8 x double>
%3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
ret <8 x double> %3
}
define <8 x double> @uitof64_maskz(<8 x i32> %a, i8 %b) nounwind {
-; NODQ-LABEL: uitof64_maskz:
-; NODQ: ## BB#0:
-; NODQ-NEXT: kmovw %edi, %k1
-; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
-; NODQ-NEXT: retq
+; KNL-LABEL: uitof64_maskz:
+; KNL: ## BB#0:
+; KNL-NEXT: kmovw %edi, %k1
+; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
+; KNL-NEXT: retq
;
-; DQ-LABEL: uitof64_maskz:
-; DQ: ## BB#0:
-; DQ-NEXT: kmovb %edi, %k1
-; DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
-; DQ-NEXT: retq
+; VLBW-LABEL: uitof64_maskz:
+; VLBW: ## BB#0:
+; VLBW-NEXT: kmovd %edi, %k1
+; VLBW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
+; VLBW-NEXT: retq
+;
+; VLNOBW-LABEL: uitof64_maskz:
+; VLNOBW: ## BB#0:
+; VLNOBW-NEXT: kmovw %edi, %k1
+; VLNOBW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
+; VLNOBW-NEXT: retq
+;
+; AVX512DQ-LABEL: uitof64_maskz:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: kmovw %edi, %k1
+; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: uitof64_maskz:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
%1 = bitcast i8 %b to <8 x i1>
%2 = uitofp <8 x i32> %a to <8 x double>
%3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
@@ -761,17 +1007,33 @@ define <8 x float> @uitof32_256(<8 x i32> %a) nounwind {
}
define <4 x float> @uitof32_128(<4 x i32> %a) nounwind {
-; NOVL-LABEL: uitof32_128:
-; NOVL: ## BB#0:
-; NOVL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
-; NOVL-NEXT: vcvtudq2ps %zmm0, %zmm0
-; NOVL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
-; NOVL-NEXT: retq
+; KNL-LABEL: uitof32_128:
+; KNL: ## BB#0:
+; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL-NEXT: vcvtudq2ps %zmm0, %zmm0
+; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; KNL-NEXT: retq
;
; VL-LABEL: uitof32_128:
; VL: ## BB#0:
; VL-NEXT: vcvtudq2ps %xmm0, %xmm0
; VL-NEXT: retq
+;
+; AVX512DQ-LABEL: uitof32_128:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
+; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: uitof32_128:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512BW-NEXT: vcvtudq2ps %zmm0, %zmm0
+; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%b = uitofp <4 x i32> %a to <4 x float>
ret <4 x float> %b
}
@@ -917,11 +1179,9 @@ define <16 x double> @sitofp_16i1_double(<16 x double> %a) {
; AVX512DQ-NEXT: vxorpd %zmm2, %zmm2, %zmm2
; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm2, %k0
; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm2, %k1
-; AVX512DQ-NEXT: vpmovm2q %k1, %zmm0
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0
; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0
-; AVX512DQ-NEXT: vpmovm2q %k0, %zmm1
-; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
; AVX512DQ-NEXT: vcvtdq2pd %ymm1, %zmm1
; AVX512DQ-NEXT: retq
%cmpres = fcmp ogt <16 x double> %a, zeroinitializer
@@ -960,8 +1220,7 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
; AVX512DQ: ## BB#0:
; AVX512DQ-NEXT: vxorpd %zmm1, %zmm1, %zmm1
; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0
-; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0
; AVX512DQ-NEXT: retq
%cmpres = fcmp ogt <8 x double> %a, zeroinitializer
@@ -1002,8 +1261,7 @@ define <8 x float> @sitofp_8i1_float(<8 x float> %a) {
; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512DQ-NEXT: vxorps %ymm1, %ymm1, %ymm1
; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0
-; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX512DQ-NEXT: retq
%cmpres = fcmp ogt <8 x float> %a, zeroinitializer
@@ -1075,7 +1333,6 @@ define <2 x float> @sitofp_2i1_float(<2 x float> %a) {
; NOVL: ## BB#0:
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; NOVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1]
; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
; NOVL-NEXT: retq
;