summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/vector-shuffle-128-v8.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/vector-shuffle-128-v8.ll')
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v8.ll397
1 files changed, 289 insertions, 108 deletions
diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll
index 60bc36948d23f..072d71fae570a 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -3,8 +3,10 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST
define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: shuffle_v8i16_01012323:
@@ -85,11 +87,33 @@ define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
; SSE-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_00004444:
-; AVX: # %bb.0:
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_00004444:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shuffle_v8i16_00004444:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8i16_00004444:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9]
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i16_00004444:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i16_00004444:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9]
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x i16> %shuffle
}
@@ -126,11 +150,33 @@ define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
; SSE-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_31206745:
-; AVX: # %bb.0:
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_31206745:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shuffle_v8i16_31206745:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
+; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8i16_31206745:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i16_31206745:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
+; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i16_31206745:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
ret <8 x i16> %shuffle
}
@@ -179,11 +225,33 @@ define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) {
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
; SSE-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_23026745:
-; AVX: # %bb.0:
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_23026745:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shuffle_v8i16_23026745:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
+; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8i16_23026745:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i16_23026745:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
+; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i16_23026745:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5>
ret <8 x i16> %shuffle
}
@@ -194,11 +262,33 @@ define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) {
; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
; SSE-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_23016747:
-; AVX: # %bb.0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
-; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_23016747:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shuffle_v8i16_23016747:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
+; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8i16_23016747:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15]
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i16_23016747:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
+; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i16_23016747:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15]
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7>
ret <8 x i16> %shuffle
}
@@ -597,11 +687,33 @@ define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
; SSE-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_04404567:
-; AVX: # %bb.0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_04404567:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shuffle_v8i16_04404567:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8i16_04404567:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15]
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i16_04404567:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i16_04404567:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15]
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %shuffle
}
@@ -700,17 +812,10 @@ define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
; SSE41-NEXT: retq
;
-; AVX1OR2-LABEL: shuffle_v8i16_0127XXXX:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8i16_0127XXXX:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v8i16_0127XXXX:
+; AVX: # %bb.0:
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
+; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
@@ -733,17 +838,10 @@ define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
; SSE41-NEXT: retq
;
-; AVX1OR2-LABEL: shuffle_v8i16_XXXX4563:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8i16_XXXX4563:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v8i16_XXXX4563:
+; AVX: # %bb.0:
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
+; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
ret <8 x i16> %shuffle
}
@@ -766,17 +864,10 @@ define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
; SSE41-NEXT: retq
;
-; AVX1OR2-LABEL: shuffle_v8i16_4563XXXX:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8i16_4563XXXX:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v8i16_4563XXXX:
+; AVX: # %bb.0:
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
+; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
@@ -799,17 +890,10 @@ define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
; SSE41-NEXT: retq
;
-; AVX1OR2-LABEL: shuffle_v8i16_01274563:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8i16_01274563:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v8i16_01274563:
+; AVX: # %bb.0:
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
+; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
ret <8 x i16> %shuffle
}
@@ -832,17 +916,10 @@ define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
; SSE41-NEXT: retq
;
-; AVX1OR2-LABEL: shuffle_v8i16_45630127:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8i16_45630127:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v8i16_45630127:
+; AVX: # %bb.0:
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
+; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
ret <8 x i16> %shuffle
}
@@ -980,12 +1057,38 @@ define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
; SSE-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_109832ba:
-; AVX: # %bb.0:
-; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
-; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_109832ba:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shuffle_v8i16_109832ba:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
+; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8i16_109832ba:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11]
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i16_109832ba:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
+; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i16_109832ba:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11]
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
ret <8 x i16> %shuffle
}
@@ -1028,13 +1131,43 @@ define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_0213cedf:
-; AVX: # %bb.0:
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
-; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_0213cedf:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shuffle_v8i16_0213cedf:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
+; AVX2-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8i16_0213cedf:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,10,11,12,13,14,15]
+; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; AVX2-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i16_0213cedf:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
+; AVX512VL-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i16_0213cedf:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,10,11,12,13,14,15]
+; AVX512VL-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; AVX512VL-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
ret <8 x i16> %shuffle
}
@@ -1064,12 +1197,38 @@ define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
; SSE41-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_443aXXXX:
-; AVX: # %bb.0:
-; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_443aXXXX:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shuffle_v8i16_443aXXXX:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
+; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8i16_443aXXXX:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15]
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i16_443aXXXX:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
+; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i16_443aXXXX:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
+; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15]
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
@@ -1336,13 +1495,35 @@ define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v8i16_XXX1X579:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
-; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
-; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
-; AVX2OR512VL-NEXT: retq
+; AVX2-SLOW-LABEL: shuffle_v8i16_XXX1X579:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
+; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
+; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8i16_XXX1X579:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15]
+; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i16_XXX1X579:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
+; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
+; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i16_XXX1X579:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15]
+; AVX512VL-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
+; AVX512VL-FAST-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
ret <8 x i16> %shuffle
}