diff options
Diffstat (limited to 'test/Transforms/SLPVectorizer/X86/arith-mul.ll')
-rw-r--r-- | test/Transforms/SLPVectorizer/X86/arith-mul.ll | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/test/Transforms/SLPVectorizer/X86/arith-mul.ll b/test/Transforms/SLPVectorizer/X86/arith-mul.ll index 95875d7f01fd..4763a9a2bf12 100644 --- a/test/Transforms/SLPVectorizer/X86/arith-mul.ll +++ b/test/Transforms/SLPVectorizer/X86/arith-mul.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F @@ -54,6 +55,41 @@ define void @mul_v8i64() { ; SSE-NEXT: store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8 ; SSE-NEXT: ret void ; +; SLM-LABEL: @mul_v8i64( +; SLM-NEXT: [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8 +; SLM-NEXT: [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8 +; SLM-NEXT: [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8 +; SLM-NEXT: [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8 +; SLM-NEXT: [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8 +; SLM-NEXT: [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8 +; SLM-NEXT: [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8 +; SLM-NEXT: [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8 +; SLM-NEXT: [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8 +; SLM-NEXT: [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8 +; SLM-NEXT: [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8 +; SLM-NEXT: [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8 +; SLM-NEXT: [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8 +; SLM-NEXT: [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8 +; SLM-NEXT: [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8 +; SLM-NEXT: [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8 +; SLM-NEXT: [[R0:%.*]] = mul i64 [[A0]], [[B0]] +; SLM-NEXT: [[R1:%.*]] = mul i64 [[A1]], [[B1]] +; SLM-NEXT: [[R2:%.*]] = mul i64 [[A2]], [[B2]] +; SLM-NEXT: [[R3:%.*]] = mul i64 [[A3]], [[B3]] +; SLM-NEXT: [[R4:%.*]] = mul i64 [[A4]], [[B4]] +; SLM-NEXT: [[R5:%.*]] = mul i64 [[A5]], [[B5]] +; SLM-NEXT: [[R6:%.*]] = mul i64 [[A6]], [[B6]] +; SLM-NEXT: [[R7:%.*]] = mul i64 [[A7]], [[B7]] +; SLM-NEXT: store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8 +; SLM-NEXT: store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8 +; SLM-NEXT: store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8 +; SLM-NEXT: store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8 +; SLM-NEXT: store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8 +; SLM-NEXT: store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8 +; SLM-NEXT: store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8 +; SLM-NEXT: store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8 +; SLM-NEXT: ret void +; ; AVX1-LABEL: @mul_v8i64( ; AVX1-NEXT: [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8 ; AVX1-NEXT: [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8 @@ -162,6 +198,25 @@ define void @mul_v16i32() { ; SSE-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4 ; SSE-NEXT: ret void ; +; SLM-LABEL: @mul_v16i32( +; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4 +; SLM-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]] +; SLM-NEXT: [[TMP10:%.*]] = mul <4 x i32> [[TMP2]], [[TMP6]] +; SLM-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[TMP3]], [[TMP7]] +; SLM-NEXT: [[TMP12:%.*]] = mul <4 x i32> [[TMP4]], [[TMP8]] +; SLM-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4 +; SLM-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4 +; SLM-NEXT: ret void +; ; AVX-LABEL: @mul_v16i32( ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 @@ -267,6 +322,25 @@ define void @mul_v32i16() { ; SSE-NEXT: store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2 ; SSE-NEXT: ret void ; +; SLM-LABEL: @mul_v32i16( +; SLM-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2 +; SLM-NEXT: [[TMP9:%.*]] = mul <8 x i16> [[TMP1]], [[TMP5]] +; SLM-NEXT: [[TMP10:%.*]] = mul <8 x i16> [[TMP2]], [[TMP6]] +; SLM-NEXT: [[TMP11:%.*]] = mul <8 x i16> [[TMP3]], [[TMP7]] +; SLM-NEXT: [[TMP12:%.*]] = mul <8 x i16> [[TMP4]], [[TMP8]] +; SLM-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2 +; SLM-NEXT: store <8 x i16> [[TMP10]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2 +; SLM-NEXT: store <8 x i16> [[TMP11]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2 +; SLM-NEXT: store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2 +; SLM-NEXT: ret void +; ; AVX-LABEL: @mul_v32i16( ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2 ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2 |