summaryrefslogtreecommitdiff
path: root/test/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'test/Analysis')
-rw-r--r--test/Analysis/CostModel/AArch64/bswap.ll70
-rw-r--r--test/Analysis/CostModel/AArch64/falkor.ll26
-rw-r--r--test/Analysis/CostModel/AArch64/gep.ll66
-rw-r--r--test/Analysis/CostModel/X86/arith.ll4
-rw-r--r--test/Analysis/CostModel/X86/shuffle-broadcast.ll140
-rw-r--r--test/Analysis/CostModel/X86/vdiv-cost.ll66
-rw-r--r--test/Analysis/CostModel/X86/vshift-ashr-cost.ll256
-rw-r--r--test/Analysis/CostModel/X86/vshift-lshr-cost.ll259
-rw-r--r--test/Analysis/CostModel/X86/vshift-shl-cost.ll261
9 files changed, 1075 insertions, 73 deletions
diff --git a/test/Analysis/CostModel/AArch64/bswap.ll b/test/Analysis/CostModel/AArch64/bswap.ll
new file mode 100644
index 000000000000..a97127a631d8
--- /dev/null
+++ b/test/Analysis/CostModel/AArch64/bswap.ll
@@ -0,0 +1,70 @@
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu < %s | FileCheck %s
+
+; Verify the cost of bswap instructions.
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+
+declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
+declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
+
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
+declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
+
+define i16 @bswap_i16(i16 %a) {
+; CHECK: 'Cost Model Analysis' for function 'bswap_i16':
+; CHECK: Found an estimated cost of 1 for instruction: %bswap
+ %bswap = tail call i16 @llvm.bswap.i16(i16 %a)
+ ret i16 %bswap
+}
+
+define i32 @bswap_i32(i32 %a) {
+; CHECK: 'Cost Model Analysis' for function 'bswap_i32':
+; CHECK: Found an estimated cost of 1 for instruction: %bswap
+ %bswap = tail call i32 @llvm.bswap.i32(i32 %a)
+ ret i32 %bswap
+}
+
+define i64 @bswap_i64(i64 %a) {
+; CHECK: 'Cost Model Analysis' for function 'bswap_i64':
+; CHECK: Found an estimated cost of 1 for instruction: %bswap
+ %bswap = tail call i64 @llvm.bswap.i64(i64 %a)
+ ret i64 %bswap
+}
+
+define <2 x i32> @bswap_v2i32(<2 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'bswap_v2i32':
+; CHECK: Found an estimated cost of 8 for instruction: %bswap
+ %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+ ret <2 x i32> %bswap
+}
+
+define <4 x i16> @bswap_v4i16(<4 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'bswap_v4i16':
+; CHECK: Found an estimated cost of 22 for instruction: %bswap
+ %bswap = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %a)
+ ret <4 x i16> %bswap
+}
+
+define <2 x i64> @bswap_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'bswap_v2i64':
+; CHECK: Found an estimated cost of 8 for instruction: %bswap
+ %bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a)
+ ret <2 x i64> %bswap
+}
+
+define <4 x i32> @bswap_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'bswap_v4i32':
+; CHECK: Found an estimated cost of 22 for instruction: %bswap
+ %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a)
+ ret <4 x i32> %bswap
+}
+
+define <8 x i16> @bswap_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'bswap_v8i16':
+; CHECK: Found an estimated cost of 50 for instruction: %bswap
+ %bswap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a)
+ ret <8 x i16> %bswap
+}
diff --git a/test/Analysis/CostModel/AArch64/falkor.ll b/test/Analysis/CostModel/AArch64/falkor.ll
new file mode 100644
index 000000000000..e9563191f077
--- /dev/null
+++ b/test/Analysis/CostModel/AArch64/falkor.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -cost-model -analyze -mcpu=falkor | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: vectorInstrCost
+define void @vectorInstrCost() {
+
+ ; Vector extracts - extracting the first element should have a zero cost;
+ ; all other elements should have a cost of two.
+ ;
+ ; CHECK: cost of 0 {{.*}} extractelement <2 x i64> undef, i32 0
+ ; CHECK: cost of 2 {{.*}} extractelement <2 x i64> undef, i32 1
+ %t1 = extractelement <2 x i64> undef, i32 0
+ %t2 = extractelement <2 x i64> undef, i32 1
+
+ ; Vector inserts - inserting the first element should have a zero cost; all
+ ; other elements should have a cost of two.
+ ;
+ ; CHECK: cost of 0 {{.*}} insertelement <2 x i64> undef, i64 undef, i32 0
+ ; CHECK: cost of 2 {{.*}} insertelement <2 x i64> undef, i64 undef, i32 1
+ %t3 = insertelement <2 x i64> undef, i64 undef, i32 0
+ %t4 = insertelement <2 x i64> undef, i64 undef, i32 1
+
+ ret void
+}
diff --git a/test/Analysis/CostModel/AArch64/gep.ll b/test/Analysis/CostModel/AArch64/gep.ll
index f3d83c133027..08bfc3d21238 100644
--- a/test/Analysis/CostModel/AArch64/gep.ll
+++ b/test/Analysis/CostModel/AArch64/gep.ll
@@ -1,9 +1,9 @@
-; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mcpu=kryo < %s | FileCheck %s
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu < %s | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
-define i8 @test1(i8* %p, i32 %i) {
+define i8 @test1(i8* %p) {
; CHECK-LABEL: test1
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a = getelementptr inbounds i8, i8* %p, i32 0
@@ -11,7 +11,7 @@ define i8 @test1(i8* %p, i32 %i) {
ret i8 %v
}
-define i16 @test2(i16* %p, i32 %i) {
+define i16 @test2(i16* %p) {
; CHECK-LABEL: test2
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a = getelementptr inbounds i16, i16* %p, i32 0
@@ -19,7 +19,7 @@ define i16 @test2(i16* %p, i32 %i) {
ret i16 %v
}
-define i32 @test3(i32* %p, i32 %i) {
+define i32 @test3(i32* %p) {
; CHECK-LABEL: test3
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a = getelementptr inbounds i32, i32* %p, i32 0
@@ -27,7 +27,7 @@ define i32 @test3(i32* %p, i32 %i) {
ret i32 %v
}
-define i64 @test4(i64* %p, i32 %i) {
+define i64 @test4(i64* %p) {
; CHECK-LABEL: test4
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a = getelementptr inbounds i64, i64* %p, i32 0
@@ -35,7 +35,7 @@ define i64 @test4(i64* %p, i32 %i) {
ret i64 %v
}
-define i8 @test5(i8* %p, i32 %i) {
+define i8 @test5(i8* %p) {
; CHECK-LABEL: test5
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a = getelementptr inbounds i8, i8* %p, i32 1024
@@ -43,7 +43,7 @@ define i8 @test5(i8* %p, i32 %i) {
ret i8 %v
}
-define i16 @test6(i16* %p, i32 %i) {
+define i16 @test6(i16* %p) {
; CHECK-LABEL: test6
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a = getelementptr inbounds i16, i16* %p, i32 1024
@@ -51,7 +51,7 @@ define i16 @test6(i16* %p, i32 %i) {
ret i16 %v
}
-define i32 @test7(i32* %p, i32 %i) {
+define i32 @test7(i32* %p) {
; CHECK-LABEL: test7
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a = getelementptr inbounds i32, i32* %p, i32 1024
@@ -59,7 +59,7 @@ define i32 @test7(i32* %p, i32 %i) {
ret i32 %v
}
-define i64 @test8(i64* %p, i32 %i) {
+define i64 @test8(i64* %p) {
; CHECK-LABEL: test8
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a = getelementptr inbounds i64, i64* %p, i32 1024
@@ -67,7 +67,7 @@ define i64 @test8(i64* %p, i32 %i) {
ret i64 %v
}
-define i8 @test9(i8* %p, i32 %i) {
+define i8 @test9(i8* %p) {
; CHECK-LABEL: test9
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a = getelementptr inbounds i8, i8* %p, i32 4096
@@ -75,7 +75,7 @@ define i8 @test9(i8* %p, i32 %i) {
ret i8 %v
}
-define i16 @test10(i16* %p, i32 %i) {
+define i16 @test10(i16* %p) {
; CHECK-LABEL: test10
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a = getelementptr inbounds i16, i16* %p, i32 4096
@@ -83,7 +83,7 @@ define i16 @test10(i16* %p, i32 %i) {
ret i16 %v
}
-define i32 @test11(i32* %p, i32 %i) {
+define i32 @test11(i32* %p) {
; CHECK-LABEL: test11
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a = getelementptr inbounds i32, i32* %p, i32 4096
@@ -91,7 +91,7 @@ define i32 @test11(i32* %p, i32 %i) {
ret i32 %v
}
-define i64 @test12(i64* %p, i32 %i) {
+define i64 @test12(i64* %p) {
; CHECK-LABEL: test12
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a = getelementptr inbounds i64, i64* %p, i32 4096
@@ -99,7 +99,7 @@ define i64 @test12(i64* %p, i32 %i) {
ret i64 %v
}
-define i8 @test13(i8* %p, i32 %i) {
+define i8 @test13(i8* %p) {
; CHECK-LABEL: test13
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a = getelementptr inbounds i8, i8* %p, i32 -64
@@ -107,7 +107,7 @@ define i8 @test13(i8* %p, i32 %i) {
ret i8 %v
}
-define i16 @test14(i16* %p, i32 %i) {
+define i16 @test14(i16* %p) {
; CHECK-LABEL: test14
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a = getelementptr inbounds i16, i16* %p, i32 -64
@@ -115,7 +115,7 @@ define i16 @test14(i16* %p, i32 %i) {
ret i16 %v
}
-define i32 @test15(i32* %p, i32 %i) {
+define i32 @test15(i32* %p) {
; CHECK-LABEL: test15
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a = getelementptr inbounds i32, i32* %p, i32 -64
@@ -123,7 +123,7 @@ define i32 @test15(i32* %p, i32 %i) {
ret i32 %v
}
-define i64 @test16(i64* %p, i32 %i) {
+define i64 @test16(i64* %p) {
; CHECK-LABEL: test16
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a = getelementptr inbounds i64, i64* %p, i32 -64
@@ -131,7 +131,7 @@ define i64 @test16(i64* %p, i32 %i) {
ret i64 %v
}
-define i8 @test17(i8* %p, i32 %i) {
+define i8 @test17(i8* %p) {
; CHECK-LABEL: test17
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a = getelementptr inbounds i8, i8* %p, i32 -1024
@@ -139,7 +139,7 @@ define i8 @test17(i8* %p, i32 %i) {
ret i8 %v
}
-define i16 @test18(i16* %p, i32 %i) {
+define i16 @test18(i16* %p) {
; CHECK-LABEL: test18
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a = getelementptr inbounds i16, i16* %p, i32 -1024
@@ -147,7 +147,7 @@ define i16 @test18(i16* %p, i32 %i) {
ret i16 %v
}
-define i32 @test19(i32* %p, i32 %i) {
+define i32 @test19(i32* %p) {
; CHECK-LABEL: test19
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a = getelementptr inbounds i32, i32* %p, i32 -1024
@@ -155,7 +155,7 @@ define i32 @test19(i32* %p, i32 %i) {
ret i32 %v
}
-define i64 @test20(i64* %p, i32 %i) {
+define i64 @test20(i64* %p) {
; CHECK-LABEL: test20
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a = getelementptr inbounds i64, i64* %p, i32 -1024
@@ -195,7 +195,7 @@ define i64 @test24(i64* %p, i32 %i) {
ret i64 %v
}
-define i8 @test25(i8* %p, i32 %i) {
+define i8 @test25(i8* %p) {
; CHECK-LABEL: test25
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a = getelementptr inbounds i8, i8* %p, i32 -128
@@ -203,7 +203,7 @@ define i8 @test25(i8* %p, i32 %i) {
ret i8 %v
}
-define i16 @test26(i16* %p, i32 %i) {
+define i16 @test26(i16* %p) {
; CHECK-LABEL: test26
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a = getelementptr inbounds i16, i16* %p, i32 -128
@@ -211,7 +211,7 @@ define i16 @test26(i16* %p, i32 %i) {
ret i16 %v
}
-define i32 @test27(i32* %p, i32 %i) {
+define i32 @test27(i32* %p) {
; CHECK-LABEL: test27
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a = getelementptr inbounds i32, i32* %p, i32 -128
@@ -219,7 +219,7 @@ define i32 @test27(i32* %p, i32 %i) {
ret i32 %v
}
-define i64 @test28(i64* %p, i32 %i) {
+define i64 @test28(i64* %p) {
; CHECK-LABEL: test28
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a = getelementptr inbounds i64, i64* %p, i32 -128
@@ -227,7 +227,7 @@ define i64 @test28(i64* %p, i32 %i) {
ret i64 %v
}
-define i8 @test29(i8* %p, i32 %i) {
+define i8 @test29(i8* %p) {
; CHECK-LABEL: test29
; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a = getelementptr inbounds i8, i8* %p, i32 -256
@@ -235,7 +235,7 @@ define i8 @test29(i8* %p, i32 %i) {
ret i8 %v
}
-define i16 @test30(i16* %p, i32 %i) {
+define i16 @test30(i16* %p) {
; CHECK-LABEL: test30
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a = getelementptr inbounds i16, i16* %p, i32 -256
@@ -243,7 +243,7 @@ define i16 @test30(i16* %p, i32 %i) {
ret i16 %v
}
-define i32 @test31(i32* %p, i32 %i) {
+define i32 @test31(i32* %p) {
; CHECK-LABEL: test31
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a = getelementptr inbounds i32, i32* %p, i32 -256
@@ -251,7 +251,7 @@ define i32 @test31(i32* %p, i32 %i) {
ret i32 %v
}
-define i64 @test32(i64* %p, i32 %i) {
+define i64 @test32(i64* %p) {
; CHECK-LABEL: test32
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a = getelementptr inbounds i64, i64* %p, i32 -256
@@ -259,7 +259,7 @@ define i64 @test32(i64* %p, i32 %i) {
ret i64 %v
}
-define i8 @test33(i8* %p, i32 %i) {
+define i8 @test33(i8* %p) {
; CHECK-LABEL: test33
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a = getelementptr inbounds i8, i8* %p, i32 -512
@@ -267,7 +267,7 @@ define i8 @test33(i8* %p, i32 %i) {
ret i8 %v
}
-define i16 @test34(i16* %p, i32 %i) {
+define i16 @test34(i16* %p) {
; CHECK-LABEL: test34
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a = getelementptr inbounds i16, i16* %p, i32 -512
@@ -275,7 +275,7 @@ define i16 @test34(i16* %p, i32 %i) {
ret i16 %v
}
-define i32 @test35(i32* %p, i32 %i) {
+define i32 @test35(i32* %p) {
; CHECK-LABEL: test35
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a = getelementptr inbounds i32, i32* %p, i32 -512
@@ -283,7 +283,7 @@ define i32 @test35(i32* %p, i32 %i) {
ret i32 %v
}
-define i64 @test36(i64* %p, i32 %i) {
+define i64 @test36(i64* %p) {
; CHECK-LABEL: test36
; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a = getelementptr inbounds i64, i64* %p, i32 -512
diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll
index 7319efb413d6..b7a615f55cde 100644
--- a/test/Analysis/CostModel/X86/arith.ll
+++ b/test/Analysis/CostModel/X86/arith.ll
@@ -436,7 +436,7 @@ define i32 @mul(i32 %arg) {
%A = mul <2 x i64> undef, undef
; SSSE3: cost of 16 {{.*}} %B = mul
; SSE42: cost of 16 {{.*}} %B = mul
- ; AVX: cost of 16 {{.*}} %B = mul
+ ; AVX: cost of 18 {{.*}} %B = mul
; AVX2: cost of 8 {{.*}} %B = mul
; AVX512F: cost of 8 {{.*}} %B = mul
; AVX512BW: cost of 8 {{.*}} %B = mul
@@ -444,7 +444,7 @@ define i32 @mul(i32 %arg) {
%B = mul <4 x i64> undef, undef
; SSSE3: cost of 32 {{.*}} %C = mul
; SSE42: cost of 32 {{.*}} %C = mul
- ; AVX: cost of 32 {{.*}} %C = mul
+ ; AVX: cost of 36 {{.*}} %C = mul
; AVX2: cost of 16 {{.*}} %C = mul
; AVX512F: cost of 8 {{.*}} %C = mul
; AVX512BW: cost of 8 {{.*}} %C = mul
diff --git a/test/Analysis/CostModel/X86/shuffle-broadcast.ll b/test/Analysis/CostModel/X86/shuffle-broadcast.ll
index a829a47f89f2..86cf7569a728 100644
--- a/test/Analysis/CostModel/X86/shuffle-broadcast.ll
+++ b/test/Analysis/CostModel/X86/shuffle-broadcast.ll
@@ -18,14 +18,150 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
%V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer
; SSE: cost of 1 {{.*}} %V256 = shufflevector
- ; AVX: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
; AVX512: cost of 1 {{.*}} %V256 = shufflevector
%V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
; SSE: cost of 1 {{.*}} %V512 = shufflevector
- ; AVX: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V512 = shufflevector
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
%V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer
ret void
}
+
+; CHECK-LABEL: 'test_vXi64'
+define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) {
+ ; SSE: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXf32'
+define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) {
+ ; SSE: cost of 1 {{.*}} %V64 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V64 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V64 = shufflevector
+ %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXi32'
+define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) {
+ ; SSE: cost of 1 {{.*}} %V64 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V64 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V64 = shufflevector
+ %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
+
+ ; SSE: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXi16'
+define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) {
+ ; SSE2: cost of 2 {{.*}} %V128 = shufflevector
+ ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
+ ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+
+ ; SSE2: cost of 2 {{.*}} %V256 = shufflevector
+ ; SSSE3: cost of 1 {{.*}} %V256 = shufflevector
+ ; SSE42: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 3 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+
+ ; SSE2: cost of 2 {{.*}} %V512 = shufflevector
+ ; SSSE3: cost of 1 {{.*}} %V512 = shufflevector
+ ; SSE42: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 3 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX512F: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
+
+ ret void
+}
+
+; CHECK-LABEL: 'test_vXi8'
+define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) {
+ ; SSE2: cost of 3 {{.*}} %V128 = shufflevector
+ ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
+ ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX: cost of 1 {{.*}} %V128 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
+ %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+
+ ; SSE2: cost of 3 {{.*}} %V256 = shufflevector
+ ; SSSE3: cost of 1 {{.*}} %V256 = shufflevector
+ ; SSE42: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
+ %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+
+ ; SSE2: cost of 3 {{.*}} %V512 = shufflevector
+ ; SSSE3: cost of 1 {{.*}} %V512 = shufflevector
+ ; SSE42: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX1: cost of 2 {{.*}} %V512 = shufflevector
+ ; AVX2: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX512F: cost of 1 {{.*}} %V512 = shufflevector
+ ; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector
+ %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
+
+ ret void
+}
diff --git a/test/Analysis/CostModel/X86/vdiv-cost.ll b/test/Analysis/CostModel/X86/vdiv-cost.ll
index c8e4557cbefd..a45bb4b3d0d5 100644
--- a/test/Analysis/CostModel/X86/vdiv-cost.ll
+++ b/test/Analysis/CostModel/X86/vdiv-cost.ll
@@ -1,13 +1,20 @@
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSSE3
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512DQ
define <4 x i32> @test1(<4 x i32> %a) {
%div = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
ret <4 x i32> %div
; CHECK: 'Cost Model Analysis' for function 'test1':
-; SSE2: Found an estimated cost of 15 for instruction: %div
-; AVX2: Found an estimated cost of 15 for instruction: %div
+; SSE: Found an estimated cost of 15 for instruction: %div
+; AVX: Found an estimated cost of 15 for instruction: %div
+; AVX512: Found an estimated cost of 15 for instruction: %div
}
define <8 x i32> @test2(<8 x i32> %a) {
@@ -15,8 +22,10 @@ define <8 x i32> @test2(<8 x i32> %a) {
ret <8 x i32> %div
; CHECK: 'Cost Model Analysis' for function 'test2':
-; SSE2: Found an estimated cost of 30 for instruction: %div
+; SSE: Found an estimated cost of 30 for instruction: %div
+; AVX1: Found an estimated cost of 30 for instruction: %div
; AVX2: Found an estimated cost of 15 for instruction: %div
+; AVX512: Found an estimated cost of 15 for instruction: %div
}
define <8 x i16> @test3(<8 x i16> %a) {
@@ -24,8 +33,9 @@ define <8 x i16> @test3(<8 x i16> %a) {
ret <8 x i16> %div
; CHECK: 'Cost Model Analysis' for function 'test3':
-; SSE2: Found an estimated cost of 6 for instruction: %div
-; AVX2: Found an estimated cost of 6 for instruction: %div
+; SSE: Found an estimated cost of 6 for instruction: %div
+; AVX: Found an estimated cost of 6 for instruction: %div
+; AVX512: Found an estimated cost of 6 for instruction: %div
}
define <16 x i16> @test4(<16 x i16> %a) {
@@ -33,8 +43,10 @@ define <16 x i16> @test4(<16 x i16> %a) {
ret <16 x i16> %div
; CHECK: 'Cost Model Analysis' for function 'test4':
-; SSE2: Found an estimated cost of 12 for instruction: %div
+; SSE: Found an estimated cost of 12 for instruction: %div
+; AVX1: Found an estimated cost of 12 for instruction: %div
; AVX2: Found an estimated cost of 6 for instruction: %div
+; AVX512: Found an estimated cost of 6 for instruction: %div
}
define <8 x i16> @test5(<8 x i16> %a) {
@@ -42,8 +54,9 @@ define <8 x i16> @test5(<8 x i16> %a) {
ret <8 x i16> %div
; CHECK: 'Cost Model Analysis' for function 'test5':
-; SSE2: Found an estimated cost of 6 for instruction: %div
-; AVX2: Found an estimated cost of 6 for instruction: %div
+; SSE: Found an estimated cost of 6 for instruction: %div
+; AVX: Found an estimated cost of 6 for instruction: %div
+; AVX512: Found an estimated cost of 6 for instruction: %div
}
define <16 x i16> @test6(<16 x i16> %a) {
@@ -51,8 +64,10 @@ define <16 x i16> @test6(<16 x i16> %a) {
ret <16 x i16> %div
; CHECK: 'Cost Model Analysis' for function 'test6':
-; SSE2: Found an estimated cost of 12 for instruction: %div
+; SSE: Found an estimated cost of 12 for instruction: %div
+; AVX1: Found an estimated cost of 12 for instruction: %div
; AVX2: Found an estimated cost of 6 for instruction: %div
+; AVX512: Found an estimated cost of 6 for instruction: %div
}
define <16 x i8> @test7(<16 x i8> %a) {
@@ -60,8 +75,9 @@ define <16 x i8> @test7(<16 x i8> %a) {
ret <16 x i8> %div
; CHECK: 'Cost Model Analysis' for function 'test7':
-; SSE2: Found an estimated cost of 320 for instruction: %div
-; AVX2: Found an estimated cost of 320 for instruction: %div
+; SSE: Found an estimated cost of 320 for instruction: %div
+; AVX: Found an estimated cost of 320 for instruction: %div
+; AVX512: Found an estimated cost of 320 for instruction: %div
}
define <4 x i32> @test8(<4 x i32> %a) {
@@ -69,8 +85,9 @@ define <4 x i32> @test8(<4 x i32> %a) {
ret <4 x i32> %div
; CHECK: 'Cost Model Analysis' for function 'test8':
-; SSE2: Found an estimated cost of 19 for instruction: %div
-; AVX2: Found an estimated cost of 15 for instruction: %div
+; SSE: Found an estimated cost of 19 for instruction: %div
+; AVX: Found an estimated cost of 15 for instruction: %div
+; AVX512: Found an estimated cost of 15 for instruction: %div
}
define <8 x i32> @test9(<8 x i32> %a) {
@@ -78,8 +95,10 @@ define <8 x i32> @test9(<8 x i32> %a) {
ret <8 x i32> %div
; CHECK: 'Cost Model Analysis' for function 'test9':
-; SSE2: Found an estimated cost of 38 for instruction: %div
+; SSE: Found an estimated cost of 38 for instruction: %div
+; AVX1: Found an estimated cost of 38 for instruction: %div
; AVX2: Found an estimated cost of 15 for instruction: %div
+; AVX512: Found an estimated cost of 15 for instruction: %div
}
define <8 x i32> @test10(<8 x i32> %a) {
@@ -87,6 +106,17 @@ define <8 x i32> @test10(<8 x i32> %a) {
ret <8 x i32> %div
; CHECK: 'Cost Model Analysis' for function 'test10':
-; SSE2: Found an estimated cost of 160 for instruction: %div
-; AVX2: Found an estimated cost of 160 for instruction: %div
+; SSE: Found an estimated cost of 160 for instruction: %div
+; AVX: Found an estimated cost of 160 for instruction: %div
+; AVX512: Found an estimated cost of 160 for instruction: %div
+}
+
+define <16 x i32> @test11(<16 x i32> %a) {
+ %div = sdiv <16 x i32> %a, <i32 8, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7, i32 8, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
+ ret <16 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test11':
+; SSE: Found an estimated cost of 320 for instruction: %div
+; AVX: Found an estimated cost of 320 for instruction: %div
+; AVX512: Found an estimated cost of 320 for instruction: %div
}
diff --git a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
index e53e40b57e1d..888164df75f5 100644
--- a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -1,9 +1,12 @@
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK -check-prefix=XOP --check-prefix=XOPAVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK -check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
; Verify the cost of vector arithmetic shift right instructions.
@@ -17,6 +20,7 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <2 x i64> %a, %b
ret <2 x i64> %shift
@@ -28,17 +32,31 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <4 x i64> %a, %b
ret <4 x i64> %shift
}
+define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64':
+; SSE2: Found an estimated cost of 48 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 8 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <8 x i64> %a, %b
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <4 x i32> %a, %b
@@ -51,18 +69,33 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <8 x i32> %a, %b
ret <8 x i32> %shift
}
+define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = ashr <16 x i32> %a, %b
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
; SSE41: Found an estimated cost of 14 for instruction: %shift
; AVX: Found an estimated cost of 14 for instruction: %shift
; AVX2: Found an estimated cost of 14 for instruction: %shift
+; AVX512: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <8 x i16> %a, %b
ret <8 x i16> %shift
@@ -74,17 +107,32 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE41: Found an estimated cost of 28 for instruction: %shift
; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <16 x i16> %a, %b
ret <16 x i16> %shift
}
+define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16':
+; SSE2: Found an estimated cost of 128 for instruction: %shift
+; SSE41: Found an estimated cost of 56 for instruction: %shift
+; AVX: Found an estimated cost of 56 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <32 x i16> %a, %b
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
; SSE2: Found an estimated cost of 54 for instruction: %shift
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
+; AVX512: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <16 x i8> %a, %b
ret <16 x i8> %shift
@@ -96,11 +144,26 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; SSE41: Found an estimated cost of 48 for instruction: %shift
; AVX: Found an estimated cost of 48 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
+; AVX512F: Found an estimated cost of 24 for instruction: %shift
+; AVX512BW: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <32 x i8> %a, %b
ret <32 x i8> %shift
}
+define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8':
+; SSE2: Found an estimated cost of 216 for instruction: %shift
+; SSE41: Found an estimated cost of 96 for instruction: %shift
+; AVX: Found an estimated cost of 96 for instruction: %shift
+; AVX2: Found an estimated cost of 48 for instruction: %shift
+; AVX512F: Found an estimated cost of 48 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <64 x i8> %a, %b
+ ret <64 x i8> %shift
+}
+
;
; Uniform Variable Shifts
;
@@ -111,6 +174,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
%shift = ashr <2 x i64> %a, %splat
@@ -123,18 +187,33 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
%shift = ashr <4 x i64> %a, %splat
ret <4 x i64> %shift
}
+define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i64':
+; SSE2: Found an estimated cost of 48 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 8 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
+ %shift = ashr <8 x i64> %a, %splat
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -148,6 +227,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -155,12 +235,27 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
ret <8 x i32> %shift
}
+define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+ %shift = ashr <16 x i32> %a, %splat
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
; SSE41: Found an estimated cost of 14 for instruction: %shift
; AVX: Found an estimated cost of 14 for instruction: %shift
; AVX2: Found an estimated cost of 14 for instruction: %shift
+; AVX512: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = ashr <8 x i16> %a, %splat
@@ -173,18 +268,34 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE41: Found an estimated cost of 28 for instruction: %shift
; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
%shift = ashr <16 x i16> %a, %splat
ret <16 x i16> %shift
}
+define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i16':
+; SSE2: Found an estimated cost of 128 for instruction: %shift
+; SSE41: Found an estimated cost of 56 for instruction: %shift
+; AVX: Found an estimated cost of 56 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
+ %shift = ashr <32 x i16> %a, %splat
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
; SSE2: Found an estimated cost of 54 for instruction: %shift
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
+; AVX512: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = ashr <16 x i8> %a, %splat
@@ -197,12 +308,27 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; SSE41: Found an estimated cost of 48 for instruction: %shift
; AVX: Found an estimated cost of 48 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
+; AVX512: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
%shift = ashr <32 x i8> %a, %splat
ret <32 x i8> %shift
}
+define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8':
+; SSE2: Found an estimated cost of 216 for instruction: %shift
+; SSE41: Found an estimated cost of 96 for instruction: %shift
+; AVX: Found an estimated cost of 96 for instruction: %shift
+; AVX2: Found an estimated cost of 48 for instruction: %shift
+; AVX512F: Found an estimated cost of 48 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+ %shift = ashr <64 x i8> %a, %splat
+ ret <64 x i8> %shift
+}
+
;
; Constant Shifts
;
@@ -213,6 +339,7 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <2 x i64> %a, <i64 1, i64 7>
ret <2 x i64> %shift
@@ -224,17 +351,31 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
ret <4 x i64> %shift
}
+define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64':
+; SSE2: Found an estimated cost of 48 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 8 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
@@ -247,18 +388,33 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
ret <8 x i32> %shift
}
+define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i32':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
; SSE41: Found an estimated cost of 14 for instruction: %shift
; AVX: Found an estimated cost of 14 for instruction: %shift
; AVX2: Found an estimated cost of 14 for instruction: %shift
+; AVX512: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <8 x i16> %shift
@@ -270,17 +426,32 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
; SSE41: Found an estimated cost of 28 for instruction: %shift
; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <16 x i16> %shift
}
+define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i16':
+; SSE2: Found an estimated cost of 128 for instruction: %shift
+; SSE41: Found an estimated cost of 56 for instruction: %shift
+; AVX: Found an estimated cost of 56 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
; SSE2: Found an estimated cost of 54 for instruction: %shift
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
+; AVX512: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
@@ -292,11 +463,25 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; SSE41: Found an estimated cost of 48 for instruction: %shift
; AVX: Found an estimated cost of 48 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
+; AVX512: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <32 x i8> %shift
}
+define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8':
+; SSE2: Found an estimated cost of 216 for instruction: %shift
+; SSE41: Found an estimated cost of 96 for instruction: %shift
+; AVX: Found an estimated cost of 96 for instruction: %shift
+; AVX2: Found an estimated cost of 48 for instruction: %shift
+; AVX512F: Found an estimated cost of 48 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+ ret <64 x i8> %shift
+}
+
;
; Uniform Constant Shifts
;
@@ -307,6 +492,7 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <2 x i64> %a, <i64 7, i64 7>
ret <2 x i64> %shift
@@ -318,17 +504,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
; SSE41: Found an estimated cost of 8 for instruction: %shift
; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
ret <4 x i64> %shift
}
+define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i64':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 8 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -341,18 +541,33 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
ret <8 x i32> %shift
}
+define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i32':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %shift
@@ -364,17 +579,32 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <16 x i16> %shift
}
+define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i16':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
; SSE2: Found an estimated cost of 4 for instruction: %shift
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
+; AVX512: Found an estimated cost of 4 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %shift
@@ -386,7 +616,21 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
; SSE41: Found an estimated cost of 8 for instruction: %shift
; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
+; AVX512: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <32 x i8> %shift
}
+
+define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 48 for instruction: %shift
+; AVX512F: Found an estimated cost of 48 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <64 x i8> %shift
+}
diff --git a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
index 6d028268ea55..b3382253739f 100644
--- a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -1,9 +1,12 @@
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=XOP --check-prefix=XOPAVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
; Verify the cost of vector logical shift right instructions.
@@ -17,6 +20,7 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <2 x i64> %a, %b
@@ -29,18 +33,33 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE41: Found an estimated cost of 8 for instruction: %shift
; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <4 x i64> %a, %b
ret <4 x i64> %shift
}
+define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <8 x i64> %a, %b
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
; SSE41: Found an estimated cost of 11 for instruction: %shift
; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <4 x i32> %a, %b
@@ -53,18 +72,33 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE41: Found an estimated cost of 22 for instruction: %shift
; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <8 x i32> %a, %b
ret <8 x i32> %shift
}
+define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 44 for instruction: %shift
+; AVX: Found an estimated cost of 44 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <16 x i32> %a, %b
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
; SSE41: Found an estimated cost of 14 for instruction: %shift
; AVX: Found an estimated cost of 14 for instruction: %shift
; AVX2: Found an estimated cost of 14 for instruction: %shift
+; AVX512: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <8 x i16> %a, %b
ret <8 x i16> %shift
@@ -76,17 +110,32 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE41: Found an estimated cost of 28 for instruction: %shift
; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <16 x i16> %a, %b
ret <16 x i16> %shift
}
+define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16':
+; SSE2: Found an estimated cost of 128 for instruction: %shift
+; SSE41: Found an estimated cost of 56 for instruction: %shift
+; AVX: Found an estimated cost of 56 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = lshr <32 x i16> %a, %b
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 12 for instruction: %shift
+; AVX512: Found an estimated cost of 12 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <16 x i8> %a, %b
ret <16 x i8> %shift
@@ -98,11 +147,25 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <32 x i8> %a, %b
ret <32 x i8> %shift
}
+define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8':
+; SSE2: Found an estimated cost of 104 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 22 for instruction: %shift
+; AVX512F: Found an estimated cost of 22 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = lshr <64 x i8> %a, %b
+ ret <64 x i8> %shift
+}
+
;
; Uniform Variable Shifts
;
@@ -113,6 +176,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -126,6 +190,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE41: Found an estimated cost of 8 for instruction: %shift
; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -133,12 +198,27 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
ret <4 x i64> %shift
}
+define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i64':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
+ %shift = lshr <8 x i64> %a, %splat
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
; SSE41: Found an estimated cost of 11 for instruction: %shift
; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -152,6 +232,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE41: Found an estimated cost of 22 for instruction: %shift
; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -159,12 +240,27 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
ret <8 x i32> %shift
}
+define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 44 for instruction: %shift
+; AVX: Found an estimated cost of 44 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+ %shift = lshr <16 x i32> %a, %splat
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
; SSE41: Found an estimated cost of 14 for instruction: %shift
; AVX: Found an estimated cost of 14 for instruction: %shift
; AVX2: Found an estimated cost of 14 for instruction: %shift
+; AVX512: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = lshr <8 x i16> %a, %splat
@@ -177,18 +273,34 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE41: Found an estimated cost of 28 for instruction: %shift
; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
%shift = lshr <16 x i16> %a, %splat
ret <16 x i16> %shift
}
+define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i16':
+; SSE2: Found an estimated cost of 128 for instruction: %shift
+; SSE41: Found an estimated cost of 56 for instruction: %shift
+; AVX: Found an estimated cost of 56 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
+ %shift = lshr <32 x i16> %a, %splat
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 12 for instruction: %shift
+; AVX512: Found an estimated cost of 12 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = lshr <16 x i8> %a, %splat
@@ -201,12 +313,27 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
%shift = lshr <32 x i8> %a, %splat
ret <32 x i8> %shift
}
+define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8':
+; SSE2: Found an estimated cost of 104 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 22 for instruction: %shift
+; AVX512F: Found an estimated cost of 22 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+ %shift = lshr <64 x i8> %a, %splat
+ ret <64 x i8> %shift
+}
+
;
; Constant Shifts
;
@@ -217,6 +344,7 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <2 x i64> %a, <i64 1, i64 7>
@@ -229,18 +357,33 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
; SSE41: Found an estimated cost of 8 for instruction: %shift
; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
ret <4 x i64> %shift
}
+define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
; SSE41: Found an estimated cost of 11 for instruction: %shift
; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
@@ -253,18 +396,33 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
; SSE41: Found an estimated cost of 22 for instruction: %shift
; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
ret <8 x i32> %shift
}
+define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i32':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 44 for instruction: %shift
+; AVX: Found an estimated cost of 44 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
; SSE41: Found an estimated cost of 14 for instruction: %shift
; AVX: Found an estimated cost of 14 for instruction: %shift
; AVX2: Found an estimated cost of 14 for instruction: %shift
+; AVX512: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <8 x i16> %shift
@@ -276,17 +434,32 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
; SSE41: Found an estimated cost of 28 for instruction: %shift
; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <16 x i16> %shift
}
+define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i16':
+; SSE2: Found an estimated cost of 128 for instruction: %shift
+; SSE41: Found an estimated cost of 56 for instruction: %shift
+; AVX: Found an estimated cost of 56 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
; SSE41: Found an estimated cost of 12 for instruction: %shift
; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 12 for instruction: %shift
+; AVX512: Found an estimated cost of 12 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
@@ -298,11 +471,25 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; SSE41: Found an estimated cost of 24 for instruction: %shift
; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <32 x i8> %shift
}
+define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8':
+; SSE2: Found an estimated cost of 104 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
+; AVX2: Found an estimated cost of 22 for instruction: %shift
+; AVX512F: Found an estimated cost of 22 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+ ret <64 x i8> %shift
+}
+
;
; Uniform Constant Shifts
;
@@ -313,6 +500,7 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <2 x i64> %a, <i64 7, i64 7>
@@ -325,18 +513,33 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
ret <4 x i64> %shift
}
+define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i64':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -349,18 +552,33 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
ret <8 x i32> %shift
}
+define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i32':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %shift
@@ -372,17 +590,32 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <16 x i16> %shift
}
+define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i16':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %shift
@@ -394,7 +627,21 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <32 x i8> %shift
}
+
+define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 22 for instruction: %shift
+; AVX512F: Found an estimated cost of 22 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 8 for instruction: %shift
+ %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <64 x i8> %shift
+}
diff --git a/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/test/Analysis/CostModel/X86/vshift-shl-cost.ll
index 60ba3adea42a..804c5a76c319 100644
--- a/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -1,9 +1,12 @@
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=XOP --check-prefix=XOPAVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
; Verify the cost of vector shift left instructions.
@@ -18,6 +21,7 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <2 x i64> %a, %b
@@ -30,18 +34,33 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE41: Found an estimated cost of 8 for instruction: %shift
; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <4 x i64> %a, %b
ret <4 x i64> %shift
}
+define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <8 x i64> %a, %b
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
; SSE2: Found an estimated cost of 10 for instruction: %shift
; SSE41: Found an estimated cost of 10 for instruction: %shift
; AVX: Found an estimated cost of 10 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <4 x i32> %a, %b
@@ -54,18 +73,33 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE41: Found an estimated cost of 20 for instruction: %shift
; AVX: Found an estimated cost of 20 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <8 x i32> %a, %b
ret <8 x i32> %shift
}
+define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32':
+; SSE2: Found an estimated cost of 40 for instruction: %shift
+; SSE41: Found an estimated cost of 40 for instruction: %shift
+; AVX: Found an estimated cost of 40 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <16 x i32> %a, %b
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
; SSE41: Found an estimated cost of 14 for instruction: %shift
; AVX: Found an estimated cost of 14 for instruction: %shift
; AVX2: Found an estimated cost of 14 for instruction: %shift
+; AVX512: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <8 x i16> %a, %b
ret <8 x i16> %shift
@@ -77,17 +111,32 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE41: Found an estimated cost of 28 for instruction: %shift
; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <16 x i16> %a, %b
ret <16 x i16> %shift
}
+define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16':
+; SSE2: Found an estimated cost of 128 for instruction: %shift
+; SSE41: Found an estimated cost of 56 for instruction: %shift
+; AVX: Found an estimated cost of 56 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = shl <32 x i16> %a, %b
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
; SSE41: Found an estimated cost of 11 for instruction: %shift
; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i8> %a, %b
ret <16 x i8> %shift
@@ -99,11 +148,25 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; SSE41: Found an estimated cost of 22 for instruction: %shift
; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <32 x i8> %a, %b
ret <32 x i8> %shift
}
+define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8':
+; SSE2: Found an estimated cost of 104 for instruction: %shift
+; SSE41: Found an estimated cost of 44 for instruction: %shift
+; AVX: Found an estimated cost of 44 for instruction: %shift
+; AVX2: Found an estimated cost of 22 for instruction: %shift
+; AVX512F: Found an estimated cost of 22 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = shl <64 x i8> %a, %b
+ ret <64 x i8> %shift
+}
+
;
; Uniform Variable Shifts
;
@@ -114,6 +177,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -127,6 +191,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE41: Found an estimated cost of 8 for instruction: %shift
; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -134,12 +199,27 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
ret <4 x i64> %shift
}
+define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i64':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
+ %shift = shl <8 x i64> %a, %splat
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
; SSE2: Found an estimated cost of 10 for instruction: %shift
; SSE41: Found an estimated cost of 10 for instruction: %shift
; AVX: Found an estimated cost of 10 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -153,6 +233,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE41: Found an estimated cost of 20 for instruction: %shift
; AVX: Found an estimated cost of 20 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -160,12 +241,27 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
ret <8 x i32> %shift
}
+define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32':
+; SSE2: Found an estimated cost of 40 for instruction: %shift
+; SSE41: Found an estimated cost of 40 for instruction: %shift
+; AVX: Found an estimated cost of 40 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+ %shift = shl <16 x i32> %a, %splat
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
; SSE41: Found an estimated cost of 14 for instruction: %shift
; AVX: Found an estimated cost of 14 for instruction: %shift
; AVX2: Found an estimated cost of 14 for instruction: %shift
+; AVX512: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = shl <8 x i16> %a, %splat
@@ -178,18 +274,34 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE41: Found an estimated cost of 28 for instruction: %shift
; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
+; AVX512: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
%shift = shl <16 x i16> %a, %splat
ret <16 x i16> %shift
}
+define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i16':
+; SSE2: Found an estimated cost of 128 for instruction: %shift
+; SSE41: Found an estimated cost of 56 for instruction: %shift
+; AVX: Found an estimated cost of 56 for instruction: %shift
+; AVX2: Found an estimated cost of 20 for instruction: %shift
+; AVX512F: Found an estimated cost of 20 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
+ %shift = shl <32 x i16> %a, %splat
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
; SSE41: Found an estimated cost of 11 for instruction: %shift
; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = shl <16 x i8> %a, %splat
@@ -202,12 +314,27 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; SSE41: Found an estimated cost of 22 for instruction: %shift
; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
%shift = shl <32 x i8> %a, %splat
ret <32 x i8> %shift
}
+define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8':
+; SSE2: Found an estimated cost of 104 for instruction: %shift
+; SSE41: Found an estimated cost of 44 for instruction: %shift
+; AVX: Found an estimated cost of 44 for instruction: %shift
+; AVX2: Found an estimated cost of 22 for instruction: %shift
+; AVX512F: Found an estimated cost of 22 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+ %shift = shl <64 x i8> %a, %splat
+ ret <64 x i8> %shift
+}
+
;
; Constant Shifts
;
@@ -218,6 +345,7 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <2 x i64> %a, <i64 1, i64 7>
@@ -230,18 +358,33 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
; SSE41: Found an estimated cost of 8 for instruction: %shift
; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
ret <4 x i64> %shift
}
+define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
; SSE2: Found an estimated cost of 6 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
@@ -254,18 +397,33 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
ret <8 x i32> %shift
}
+define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i32':
+; SSE2: Found an estimated cost of 24 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 8 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <8 x i16> %shift
@@ -277,18 +435,34 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <16 x i16> %shift
}
+define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i16':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 8 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512F: Found an estimated cost of 2 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
; SSE41: Found an estimated cost of 11 for instruction: %shift
; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
@@ -300,11 +474,25 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; SSE41: Found an estimated cost of 22 for instruction: %shift
; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <32 x i8> %shift
}
+define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8':
+; SSE2: Found an estimated cost of 104 for instruction: %shift
+; SSE41: Found an estimated cost of 44 for instruction: %shift
+; AVX: Found an estimated cost of 44 for instruction: %shift
+; AVX2: Found an estimated cost of 22 for instruction: %shift
+; AVX512F: Found an estimated cost of 22 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+ ret <64 x i8> %shift
+}
+
;
; Uniform Constant Shifts
;
@@ -315,6 +503,7 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <2 x i64> %a, <i64 7, i64 7>
@@ -327,18 +516,33 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
ret <4 x i64> %shift
}
+define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i64':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+ ret <8 x i64> %shift
+}
+
define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -351,18 +555,33 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
ret <8 x i32> %shift
}
+define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i32':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <16 x i32> %shift
+}
+
define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %shift
@@ -374,18 +593,34 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <16 x i16> %shift
}
+define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i16':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512F: Found an estimated cost of 2 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <32 x i16> %shift
+}
+
define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
; SSE2: Found an estimated cost of 1 for instruction: %shift
; SSE41: Found an estimated cost of 1 for instruction: %shift
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %shift
@@ -397,11 +632,25 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
+; AVX512: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <32 x i8> %shift
}
+define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 22 for instruction: %shift
+; AVX512F: Found an estimated cost of 22 for instruction: %shift
+; AVX512BW: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <64 x i8> %shift
+}
+
;
; Special Cases
;