summaryrefslogtreecommitdiff
path: root/test/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'test/Analysis')
-rw-r--r--test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll4
-rw-r--r--test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll2
-rw-r--r--test/Analysis/BasicAA/bug.23540.ll2
-rw-r--r--test/Analysis/BasicAA/bug.23626.ll14
-rw-r--r--test/Analysis/BasicAA/constant-over-index.ll2
-rw-r--r--test/Analysis/BasicAA/fallback-mayalias.ll23
-rw-r--r--test/Analysis/BasicAA/q.bad.ll14
-rw-r--r--test/Analysis/BasicAA/returned.ll16
-rw-r--r--test/Analysis/BasicAA/sequential-gep.ll6
-rw-r--r--test/Analysis/BasicAA/struct-geps.ll78
-rw-r--r--test/Analysis/BasicAA/zext.ll8
-rw-r--r--test/Analysis/CostModel/X86/arith.ll1002
-rw-r--r--test/Analysis/CostModel/X86/interleaved-load-i8.ll98
-rw-r--r--test/Analysis/CostModel/X86/interleaved-store-i8.ll85
-rw-r--r--test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll87
-rw-r--r--test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll19
16 files changed, 901 insertions, 559 deletions
diff --git a/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll b/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll
index 8388d6c97adfe..200e24f428ef3 100644
--- a/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll
+++ b/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll
@@ -3,9 +3,9 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK: Function: foo
-; CHECK: PartialAlias: i32* %Ipointer, i32* %Jpointer
+; CHECK: MayAlias: i32* %Ipointer, i32* %Jpointer
; CHECK: 9 no alias responses
-; CHECK: 6 partial alias responses
+; CHECK: 6 may alias responses
define void @foo(i32* noalias %p, i32* noalias %q, i32 %i, i32 %j) {
%Ipointer = getelementptr i32, i32* %p, i32 %i
diff --git a/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll b/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
index b2e7a60047bd3..79421824e6ea2 100644
--- a/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
+++ b/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
@@ -3,7 +3,7 @@
target datalayout = "e-p:32:32:32"
-; CHECK: 1 partial alias response
+; CHECK: 1 may alias responses
define i32 @test(i32* %tab, i32 %indvar) nounwind {
%tmp31 = mul i32 %indvar, -2
diff --git a/test/Analysis/BasicAA/bug.23540.ll b/test/Analysis/BasicAA/bug.23540.ll
index f693bcf73cd63..6a00abdce3a10 100644
--- a/test/Analysis/BasicAA/bug.23540.ll
+++ b/test/Analysis/BasicAA/bug.23540.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-unknown-linux-gnu"
@c = external global i32
; CHECK-LABEL: f
-; CHECK: PartialAlias: i32* %arrayidx, i32* %arrayidx6
+; CHECK: MayAlias: i32* %arrayidx, i32* %arrayidx6
define void @f() {
%idxprom = zext i32 undef to i64
%add4 = add i32 0, 1
diff --git a/test/Analysis/BasicAA/bug.23626.ll b/test/Analysis/BasicAA/bug.23626.ll
index 6a1478c65cefd..7d5b5ad06698d 100644
--- a/test/Analysis/BasicAA/bug.23626.ll
+++ b/test/Analysis/BasicAA/bug.23626.ll
@@ -3,12 +3,12 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-darwin13.4.0"
; CHECK-LABEL: compute1
-; CHECK: PartialAlias: i32* %arrayidx8, i32* %out
-; CHECK: PartialAlias: i32* %arrayidx11, i32* %out
-; CHECK: PartialAlias: i32* %arrayidx11, i32* %arrayidx8
-; CHECK: PartialAlias: i32* %arrayidx14, i32* %out
-; CHECK: PartialAlias: i32* %arrayidx14, i32* %arrayidx8
-; CHECK: PartialAlias: i32* %arrayidx11, i32* %arrayidx14
+; CHECK: MayAlias: i32* %arrayidx8, i32* %out
+; CHECK: MayAlias: i32* %arrayidx11, i32* %out
+; CHECK: MayAlias: i32* %arrayidx11, i32* %arrayidx8
+; CHECK: MayAlias: i32* %arrayidx14, i32* %out
+; CHECK: MayAlias: i32* %arrayidx14, i32* %arrayidx8
+; CHECK: MayAlias: i32* %arrayidx11, i32* %arrayidx14
define void @compute1(i32 %num.0.lcssa, i32* %out) {
%idxprom = zext i32 %num.0.lcssa to i64
%arrayidx8 = getelementptr inbounds i32, i32* %out, i64 %idxprom
@@ -22,7 +22,7 @@ define void @compute1(i32 %num.0.lcssa, i32* %out) {
}
; CHECK-LABEL: compute2
-; CHECK: PartialAlias: i32* %arrayidx11, i32* %out.addr
+; CHECK: MayAlias: i32* %arrayidx11, i32* %out.addr
define void @compute2(i32 %num, i32* %out.addr) {
%add9 = add i32 %num, 1
%idxprom10 = zext i32 %add9 to i64
diff --git a/test/Analysis/BasicAA/constant-over-index.ll b/test/Analysis/BasicAA/constant-over-index.ll
index f5e2c7c13617c..f77156305c260 100644
--- a/test/Analysis/BasicAA/constant-over-index.ll
+++ b/test/Analysis/BasicAA/constant-over-index.ll
@@ -3,7 +3,7 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-; CHECK: PartialAlias: double* %p.0.i.0, double* %p3
+; CHECK: MayAlias: double* %p.0.i.0, double* %p3
; %p3 is equal to %p.0.i.0 on the second iteration of the loop,
; so MayAlias is needed. In practice, basicaa returns PartialAlias
diff --git a/test/Analysis/BasicAA/fallback-mayalias.ll b/test/Analysis/BasicAA/fallback-mayalias.ll
new file mode 100644
index 0000000000000..a1e4b12d20ade
--- /dev/null
+++ b/test/Analysis/BasicAA/fallback-mayalias.ll
@@ -0,0 +1,23 @@
+; RUN: opt -basicaa -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s
+
+; Check that BasicAA falls back to MayAlias (instead of PartialAlias) when none
+; of its little tricks are applicable.
+
+; CHECK: MayAlias: float* %arrayidxA, float* %arrayidxB
+
+define void @fallback_mayalias(float* noalias nocapture %C, i64 %i, i64 %j) local_unnamed_addr {
+entry:
+ %shl = shl i64 %i, 3
+ %mul = shl nsw i64 %j, 4
+ %addA = add nsw i64 %mul, %shl
+ %orB = or i64 %shl, 1
+ %addB = add nsw i64 %mul, %orB
+
+ %arrayidxA = getelementptr inbounds float, float* %C, i64 %addA
+ store float undef, float* %arrayidxA, align 4
+
+ %arrayidxB = getelementptr inbounds float, float* %C, i64 %addB
+ store float undef, float* %arrayidxB, align 4
+
+ ret void
+}
diff --git a/test/Analysis/BasicAA/q.bad.ll b/test/Analysis/BasicAA/q.bad.ll
index f2de6a76c5e01..2c7bc1d8591ee 100644
--- a/test/Analysis/BasicAA/q.bad.ll
+++ b/test/Analysis/BasicAA/q.bad.ll
@@ -15,7 +15,7 @@ define void @test_zext_sext_amounts255(i8* %mem) {
}
; CHECK-LABEL: test_zext_sext_amounts
-; CHECK: PartialAlias: i8* %a, i8* %b
+; CHECK: MayAlias: i8* %a, i8* %b
; %a and %b only PartialAlias as, although they're both zext(sext(%num)) they'll extend the sign by a different
; number of bits before zext-ing the remainder.
define void @test_zext_sext_amounts(i8* %mem, i8 %num) {
@@ -44,9 +44,9 @@ define void @based_on_pr18068(i32 %loaded, i8* %mem) {
}
; CHECK-LABEL: test_path_dependence
-; CHECK: PartialAlias: i8* %a, i8* %b
+; CHECK: MayAlias: i8* %a, i8* %b
; CHECK: MustAlias: i8* %a, i8* %c
-; CHECK: PartialAlias: i8* %a, i8* %d
+; CHECK: MayAlias: i8* %a, i8* %d
define void @test_path_dependence(i32 %p, i8* %mem) {
%p.minus1 = add i32 %p, -1 ; this will always unsigned-wrap, unless %p == 0
%p.minus1.64 = zext i32 %p.minus1 to i64
@@ -83,7 +83,7 @@ define void @test_zext_sext_255(i8* %mem) {
}
; CHECK-LABEL: test_zext_sext_num
-; CHECK: PartialAlias: i8* %a, i8* %b
+; CHECK: MayAlias: i8* %a, i8* %b
; %a and %b NoAlias if %num == 255 (see @test_zext_sext_255), but %a and %b NoAlias for other values of %num (e.g. 0)
define void @test_zext_sext_num(i8* %mem, i8 %num) {
%zext.num = zext i8 %num to i16
@@ -142,9 +142,9 @@ define void @constantOffsetHeuristic_i8_i32(i32* %mem, i8 %val) {
}
; CHECK-LABEL: constantOffsetHeuristic_i3_i8
-; CHECK: PartialAlias: i32* %a, i32* %b
+; CHECK: MayAlias: i32* %a, i32* %b
; CHECK: NoAlias: i32* %a, i32* %c
-; CHECK: PartialAlias: i32* %b, i32* %c
+; CHECK: MayAlias: i32* %b, i32* %c
define void @constantOffsetHeuristic_i3_i8(i8* %mem, i3 %val) {
%zext.plus.7 = add nsw i3 %val, 7
%zext.plus.4 = add nsw i3 %val, 4
@@ -161,7 +161,7 @@ define void @constantOffsetHeuristic_i3_i8(i8* %mem, i3 %val) {
}
; CHECK-LABEL: constantOffsetHeuristic_i8_i8
-; CHECK: PartialAlias: i32* %a, i32* %b
+; CHECK: MayAlias: i32* %a, i32* %b
; CHECK: NoAlias: i32* %a, i32* %c
; CHECK: NoAlias: i32* %b, i32* %c
define void @constantOffsetHeuristic_i8_i8(i8* %mem, i8 %val) {
diff --git a/test/Analysis/BasicAA/returned.ll b/test/Analysis/BasicAA/returned.ll
index c6ef6806140df..f0d0f1ec1fe9b 100644
--- a/test/Analysis/BasicAA/returned.ll
+++ b/test/Analysis/BasicAA/returned.ll
@@ -8,20 +8,20 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK-DAG: MustAlias: %struct* %st, %struct* %sta
-; CHECK-DAG: PartialAlias: %struct* %st, i32* %x
-; CHECK-DAG: PartialAlias: %struct* %st, i32* %y
-; CHECK-DAG: PartialAlias: %struct* %st, i32* %z
+; CHECK-DAG: MayAlias: %struct* %st, i32* %x
+; CHECK-DAG: MayAlias: %struct* %st, i32* %y
+; CHECK-DAG: MayAlias: %struct* %st, i32* %z
; CHECK-DAG: NoAlias: i32* %x, i32* %y
; CHECK-DAG: NoAlias: i32* %x, i32* %z
; CHECK-DAG: NoAlias: i32* %y, i32* %z
-; CHECK-DAG: PartialAlias: %struct* %st, %struct* %y_12
-; CHECK-DAG: PartialAlias: %struct* %y_12, i32* %x
-; CHECK-DAG: PartialAlias: i32* %x, i80* %y_10
+; CHECK-DAG: MayAlias: %struct* %st, %struct* %y_12
+; CHECK-DAG: MayAlias: %struct* %y_12, i32* %x
+; CHECK-DAG: MayAlias: i32* %x, i80* %y_10
-; CHECK-DAG: PartialAlias: %struct* %st, i64* %y_8
-; CHECK-DAG: PartialAlias: i32* %z, i64* %y_8
+; CHECK-DAG: MayAlias: %struct* %st, i64* %y_8
+; CHECK-DAG: MayAlias: i32* %z, i64* %y_8
; CHECK-DAG: NoAlias: i32* %x, i64* %y_8
; CHECK-DAG: MustAlias: %struct* %y_12, i32* %y
diff --git a/test/Analysis/BasicAA/sequential-gep.ll b/test/Analysis/BasicAA/sequential-gep.ll
index c17a782aa04b6..5bedab61e17dc 100644
--- a/test/Analysis/BasicAA/sequential-gep.ll
+++ b/test/Analysis/BasicAA/sequential-gep.ll
@@ -11,7 +11,7 @@ define void @t1([8 x i32]* %p, i32 %addend, i32* %q) {
}
; CHECK: Function: t2
-; CHECK: PartialAlias: i32* %gep1, i32* %gep2
+; CHECK: MayAlias: i32* %gep1, i32* %gep2
define void @t2([8 x i32]* %p, i32 %addend, i32* %q) {
%knownnonzero = load i32, i32* %q, !range !0
%add = add nsw nuw i32 %addend, %knownnonzero
@@ -31,7 +31,7 @@ define void @t3([8 x i32]* %p, i32 %addend, i32* %q) {
}
; CHECK: Function: t4
-; CHECK: PartialAlias: i32* %gep1, i32* %gep2
+; CHECK: MayAlias: i32* %gep1, i32* %gep2
define void @t4([8 x i32]* %p, i32 %addend, i32* %q) {
%knownnonzero = load i32, i32* %q, !range !0
%add = add nsw nuw i32 %addend, %knownnonzero
@@ -41,7 +41,7 @@ define void @t4([8 x i32]* %p, i32 %addend, i32* %q) {
}
; CHECK: Function: t5
-; CHECK: PartialAlias: i32* %gep2, i64* %bc
+; CHECK: MayAlias: i32* %gep2, i64* %bc
define void @t5([8 x i32]* %p, i32 %addend, i32* %q) {
%knownnonzero = load i32, i32* %q, !range !0
%add = add nsw nuw i32 %addend, %knownnonzero
diff --git a/test/Analysis/BasicAA/struct-geps.ll b/test/Analysis/BasicAA/struct-geps.ll
index 2d85e1dd0173d..e048baf4c64a9 100644
--- a/test/Analysis/BasicAA/struct-geps.ll
+++ b/test/Analysis/BasicAA/struct-geps.ll
@@ -6,20 +6,20 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK-LABEL: test_simple
-; CHECK-DAG: PartialAlias: %struct* %st, i32* %x
-; CHECK-DAG: PartialAlias: %struct* %st, i32* %y
-; CHECK-DAG: PartialAlias: %struct* %st, i32* %z
+; CHECK-DAG: MayAlias: %struct* %st, i32* %x
+; CHECK-DAG: MayAlias: %struct* %st, i32* %y
+; CHECK-DAG: MayAlias: %struct* %st, i32* %z
; CHECK-DAG: NoAlias: i32* %x, i32* %y
; CHECK-DAG: NoAlias: i32* %x, i32* %z
; CHECK-DAG: NoAlias: i32* %y, i32* %z
-; CHECK-DAG: PartialAlias: %struct* %st, %struct* %y_12
-; CHECK-DAG: PartialAlias: %struct* %y_12, i32* %x
-; CHECK-DAG: PartialAlias: i32* %x, i80* %y_10
+; CHECK-DAG: MayAlias: %struct* %st, %struct* %y_12
+; CHECK-DAG: MayAlias: %struct* %y_12, i32* %x
+; CHECK-DAG: MayAlias: i32* %x, i80* %y_10
-; CHECK-DAG: PartialAlias: %struct* %st, i64* %y_8
-; CHECK-DAG: PartialAlias: i32* %z, i64* %y_8
+; CHECK-DAG: MayAlias: %struct* %st, i64* %y_8
+; CHECK-DAG: MayAlias: i32* %z, i64* %y_8
; CHECK-DAG: NoAlias: i32* %x, i64* %y_8
; CHECK-DAG: MustAlias: %struct* %y_12, i32* %y
@@ -38,20 +38,20 @@ define void @test_simple(%struct* %st, i64 %i, i64 %j, i64 %k) {
; CHECK-LABEL: test_in_array
-; CHECK-DAG: PartialAlias: [1 x %struct]* %st, i32* %x
-; CHECK-DAG: PartialAlias: [1 x %struct]* %st, i32* %y
-; CHECK-DAG: PartialAlias: [1 x %struct]* %st, i32* %z
+; CHECK-DAG: MayAlias: [1 x %struct]* %st, i32* %x
+; CHECK-DAG: MayAlias: [1 x %struct]* %st, i32* %y
+; CHECK-DAG: MayAlias: [1 x %struct]* %st, i32* %z
; CHECK-DAG: NoAlias: i32* %x, i32* %y
; CHECK-DAG: NoAlias: i32* %x, i32* %z
; CHECK-DAG: NoAlias: i32* %y, i32* %z
-; CHECK-DAG: PartialAlias: %struct* %y_12, [1 x %struct]* %st
-; CHECK-DAG: PartialAlias: %struct* %y_12, i32* %x
-; CHECK-DAG: PartialAlias: i32* %x, i80* %y_10
+; CHECK-DAG: MayAlias: %struct* %y_12, [1 x %struct]* %st
+; CHECK-DAG: MayAlias: %struct* %y_12, i32* %x
+; CHECK-DAG: MayAlias: i32* %x, i80* %y_10
-; CHECK-DAG: PartialAlias: [1 x %struct]* %st, i64* %y_8
-; CHECK-DAG: PartialAlias: i32* %z, i64* %y_8
+; CHECK-DAG: MayAlias: [1 x %struct]* %st, i64* %y_8
+; CHECK-DAG: MayAlias: i32* %z, i64* %y_8
; CHECK-DAG: NoAlias: i32* %x, i64* %y_8
; CHECK-DAG: MustAlias: %struct* %y_12, i32* %y
@@ -70,20 +70,20 @@ define void @test_in_array([1 x %struct]* %st, i64 %i, i64 %j, i64 %k, i64 %i1,
; CHECK-LABEL: test_in_3d_array
-; CHECK-DAG: PartialAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %x
-; CHECK-DAG: PartialAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %y
-; CHECK-DAG: PartialAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %z
+; CHECK-DAG: MayAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %x
+; CHECK-DAG: MayAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %y
+; CHECK-DAG: MayAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %z
; CHECK-DAG: NoAlias: i32* %x, i32* %y
; CHECK-DAG: NoAlias: i32* %x, i32* %z
; CHECK-DAG: NoAlias: i32* %y, i32* %z
-; CHECK-DAG: PartialAlias: %struct* %y_12, [1 x [1 x [1 x %struct]]]* %st
-; CHECK-DAG: PartialAlias: %struct* %y_12, i32* %x
-; CHECK-DAG: PartialAlias: i32* %x, i80* %y_10
+; CHECK-DAG: MayAlias: %struct* %y_12, [1 x [1 x [1 x %struct]]]* %st
+; CHECK-DAG: MayAlias: %struct* %y_12, i32* %x
+; CHECK-DAG: MayAlias: i32* %x, i80* %y_10
-; CHECK-DAG: PartialAlias: [1 x [1 x [1 x %struct]]]* %st, i64* %y_8
-; CHECK-DAG: PartialAlias: i32* %z, i64* %y_8
+; CHECK-DAG: MayAlias: [1 x [1 x [1 x %struct]]]* %st, i64* %y_8
+; CHECK-DAG: MayAlias: i32* %z, i64* %y_8
; CHECK-DAG: NoAlias: i32* %x, i64* %y_8
; CHECK-DAG: MustAlias: %struct* %y_12, i32* %y
@@ -106,14 +106,14 @@ define void @test_in_3d_array([1 x [1 x [1 x %struct]]]* %st, i64 %i, i64 %j, i6
; CHECK-DAG: NoAlias: i32* %y, i32* %y2
; CHECK-DAG: NoAlias: i32* %z, i32* %z2
-; CHECK-DAG: PartialAlias: i32* %x, i32* %y2
-; CHECK-DAG: PartialAlias: i32* %x, i32* %z2
+; CHECK-DAG: MayAlias: i32* %x, i32* %y2
+; CHECK-DAG: MayAlias: i32* %x, i32* %z2
-; CHECK-DAG: PartialAlias: i32* %x2, i32* %y
-; CHECK-DAG: PartialAlias: i32* %y, i32* %z2
+; CHECK-DAG: MayAlias: i32* %x2, i32* %y
+; CHECK-DAG: MayAlias: i32* %y, i32* %z2
-; CHECK-DAG: PartialAlias: i32* %x2, i32* %z
-; CHECK-DAG: PartialAlias: i32* %y2, i32* %z
+; CHECK-DAG: MayAlias: i32* %x2, i32* %z
+; CHECK-DAG: MayAlias: i32* %y2, i32* %z
define void @test_same_underlying_object_same_indices(%struct* %st, i64 %i, i64 %j, i64 %k) {
%st2 = getelementptr %struct, %struct* %st, i32 10
@@ -128,18 +128,18 @@ define void @test_same_underlying_object_same_indices(%struct* %st, i64 %i, i64
; CHECK-LABEL: test_same_underlying_object_different_indices
-; CHECK-DAG: PartialAlias: i32* %x, i32* %x2
-; CHECK-DAG: PartialAlias: i32* %y, i32* %y2
-; CHECK-DAG: PartialAlias: i32* %z, i32* %z2
+; CHECK-DAG: MayAlias: i32* %x, i32* %x2
+; CHECK-DAG: MayAlias: i32* %y, i32* %y2
+; CHECK-DAG: MayAlias: i32* %z, i32* %z2
-; CHECK-DAG: PartialAlias: i32* %x, i32* %y2
-; CHECK-DAG: PartialAlias: i32* %x, i32* %z2
+; CHECK-DAG: MayAlias: i32* %x, i32* %y2
+; CHECK-DAG: MayAlias: i32* %x, i32* %z2
-; CHECK-DAG: PartialAlias: i32* %x2, i32* %y
-; CHECK-DAG: PartialAlias: i32* %y, i32* %z2
+; CHECK-DAG: MayAlias: i32* %x2, i32* %y
+; CHECK-DAG: MayAlias: i32* %y, i32* %z2
-; CHECK-DAG: PartialAlias: i32* %x2, i32* %z
-; CHECK-DAG: PartialAlias: i32* %y2, i32* %z
+; CHECK-DAG: MayAlias: i32* %x2, i32* %z
+; CHECK-DAG: MayAlias: i32* %y2, i32* %z
define void @test_same_underlying_object_different_indices(%struct* %st, i64 %i1, i64 %j1, i64 %k1, i64 %i2, i64 %k2, i64 %j2) {
%st2 = getelementptr %struct, %struct* %st, i32 10
diff --git a/test/Analysis/BasicAA/zext.ll b/test/Analysis/BasicAA/zext.ll
index 685d45be61512..f8f02353db2ec 100644
--- a/test/Analysis/BasicAA/zext.ll
+++ b/test/Analysis/BasicAA/zext.ll
@@ -69,7 +69,7 @@ for.loop.exit:
}
; CHECK-LABEL: test_sign_extension
-; CHECK: PartialAlias: i64* %b.i64, i8* %a
+; CHECK: MayAlias: i64* %b.i64, i8* %a
define void @test_sign_extension(i32 %p) {
%1 = tail call i8* @malloc(i64 120)
@@ -83,7 +83,7 @@ define void @test_sign_extension(i32 %p) {
}
; CHECK-LABEL: test_fe_tools
-; CHECK: PartialAlias: i32* %a, i32* %b
+; CHECK: MayAlias: i32* %a, i32* %b
define void @test_fe_tools([8 x i32]* %values) {
br label %reorder
@@ -108,7 +108,7 @@ for.loop.exit:
@d = global i32 0, align 4
; CHECK-LABEL: test_spec2006
-; CHECK: PartialAlias: i32** %x, i32** %y
+; CHECK: MayAlias: i32** %x, i32** %y
define void @test_spec2006() {
%h = alloca [1 x [2 x i32*]], align 16
@@ -164,7 +164,7 @@ for.loop.exit:
}
; CHECK-LABEL: test_modulo_analysis_with_global
-; CHECK: PartialAlias: i32** %x, i32** %y
+; CHECK: MayAlias: i32** %x, i32** %y
define void @test_modulo_analysis_with_global() {
%h = alloca [1 x [2 x i32*]], align 16
diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll
index b7a615f55cdef..d9e06a3e7b411 100644
--- a/test/Analysis/CostModel/X86/arith.ll
+++ b/test/Analysis/CostModel/X86/arith.ll
@@ -1,516 +1,564 @@
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSSE3
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
-; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512DQ
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSSE3
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512DQ
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
; CHECK-LABEL: 'add'
define i32 @add(i32 %arg) {
- ; SSSE3: cost of 1 {{.*}} %A = add
- ; SSE42: cost of 1 {{.*}} %A = add
- ; AVX: cost of 1 {{.*}} %A = add
- ; AVX2: cost of 1 {{.*}} %A = add
- ; AVX512: cost of 1 {{.*}} %A = add
- %A = add <2 x i64> undef, undef
- ; SSSE3: cost of 2 {{.*}} %B = add
- ; SSE42: cost of 2 {{.*}} %B = add
- ; AVX: cost of 4 {{.*}} %B = add
- ; AVX2: cost of 1 {{.*}} %B = add
- ; AVX512: cost of 1 {{.*}} %B = add
- %B = add <4 x i64> undef, undef
- ; SSSE3: cost of 4 {{.*}} %C = add
- ; SSE42: cost of 4 {{.*}} %C = add
- ; AVX: cost of 8 {{.*}} %C = add
- ; AVX2: cost of 2 {{.*}} %C = add
- ; AVX512: cost of 1 {{.*}} %C = add
- %C = add <8 x i64> undef, undef
-
- ; SSSE3: cost of 1 {{.*}} %D = add
- ; SSE42: cost of 1 {{.*}} %D = add
- ; AVX: cost of 1 {{.*}} %D = add
- ; AVX2: cost of 1 {{.*}} %D = add
- ; AVX512: cost of 1 {{.*}} %D = add
- %D = add <4 x i32> undef, undef
- ; SSSE3: cost of 2 {{.*}} %E = add
- ; SSE42: cost of 2 {{.*}} %E = add
- ; AVX: cost of 4 {{.*}} %E = add
- ; AVX2: cost of 1 {{.*}} %E = add
- ; AVX512: cost of 1 {{.*}} %E = add
- %E = add <8 x i32> undef, undef
- ; SSSE3: cost of 4 {{.*}} %F = add
- ; SSE42: cost of 4 {{.*}} %F = add
- ; AVX: cost of 8 {{.*}} %F = add
- ; AVX2: cost of 2 {{.*}} %F = add
- ; AVX512: cost of 1 {{.*}} %F = add
- %F = add <16 x i32> undef, undef
-
- ; SSSE3: cost of 1 {{.*}} %G = add
- ; SSE42: cost of 1 {{.*}} %G = add
- ; AVX: cost of 1 {{.*}} %G = add
- ; AVX2: cost of 1 {{.*}} %G = add
- ; AVX512: cost of 1 {{.*}} %G = add
- %G = add <8 x i16> undef, undef
- ; SSSE3: cost of 2 {{.*}} %H = add
- ; SSE42: cost of 2 {{.*}} %H = add
- ; AVX: cost of 4 {{.*}} %H = add
- ; AVX2: cost of 1 {{.*}} %H = add
- ; AVX512: cost of 1 {{.*}} %H = add
- %H = add <16 x i16> undef, undef
- ; SSSE3: cost of 4 {{.*}} %I = add
- ; SSE42: cost of 4 {{.*}} %I = add
- ; AVX: cost of 8 {{.*}} %I = add
- ; AVX2: cost of 2 {{.*}} %I = add
- ; AVX512F: cost of 2 {{.*}} %I = add
- ; AVX512BW: cost of 1 {{.*}} %I = add
- %I = add <32 x i16> undef, undef
-
- ; SSSE3: cost of 1 {{.*}} %J = add
- ; SSE42: cost of 1 {{.*}} %J = add
- ; AVX: cost of 1 {{.*}} %J = add
- ; AVX2: cost of 1 {{.*}} %J = add
- ; AVX512: cost of 1 {{.*}} %J = add
- %J = add <16 x i8> undef, undef
- ; SSSE3: cost of 2 {{.*}} %K = add
- ; SSE42: cost of 2 {{.*}} %K = add
- ; AVX: cost of 4 {{.*}} %K = add
- ; AVX2: cost of 1 {{.*}} %K = add
- ; AVX512: cost of 1 {{.*}} %K = add
- %K = add <32 x i8> undef, undef
- ; SSSE3: cost of 4 {{.*}} %L = add
- ; SSE42: cost of 4 {{.*}} %L = add
- ; AVX: cost of 8 {{.*}} %L = add
- ; AVX2: cost of 2 {{.*}} %L = add
- ; AVX512F: cost of 2 {{.*}} %L = add
- ; AVX512BW: cost of 1 {{.*}} %L = add
- %L = add <64 x i8> undef, undef
+ ; CHECK: cost of 1 {{.*}} %I64 = add
+ %I64 = add i64 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V2I64 = add
+ ; SSE42: cost of 1 {{.*}} %V2I64 = add
+ ; AVX: cost of 1 {{.*}} %V2I64 = add
+ ; AVX2: cost of 1 {{.*}} %V2I64 = add
+ ; AVX512: cost of 1 {{.*}} %V2I64 = add
+ %V2I64 = add <2 x i64> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V4I64 = add
+ ; SSE42: cost of 2 {{.*}} %V4I64 = add
+ ; AVX: cost of 4 {{.*}} %V4I64 = add
+ ; AVX2: cost of 1 {{.*}} %V4I64 = add
+ ; AVX512: cost of 1 {{.*}} %V4I64 = add
+ %V4I64 = add <4 x i64> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V8I64 = add
+ ; SSE42: cost of 4 {{.*}} %V8I64 = add
+ ; AVX: cost of 8 {{.*}} %V8I64 = add
+ ; AVX2: cost of 2 {{.*}} %V8I64 = add
+ ; AVX512: cost of 1 {{.*}} %V8I64 = add
+ %V8I64 = add <8 x i64> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I32 = add
+ %I32 = add i32 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V4I32 = add
+ ; SSE42: cost of 1 {{.*}} %V4I32 = add
+ ; AVX: cost of 1 {{.*}} %V4I32 = add
+ ; AVX2: cost of 1 {{.*}} %V4I32 = add
+ ; AVX512: cost of 1 {{.*}} %V4I32 = add
+ %V4I32 = add <4 x i32> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V8I32 = add
+ ; SSE42: cost of 2 {{.*}} %V8I32 = add
+ ; AVX: cost of 4 {{.*}} %V8I32 = add
+ ; AVX2: cost of 1 {{.*}} %V8I32 = add
+ ; AVX512: cost of 1 {{.*}} %V8I32 = add
+ %V8I32 = add <8 x i32> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V16I32 = add
+ ; SSE42: cost of 4 {{.*}} %V16I32 = add
+ ; AVX: cost of 8 {{.*}} %V16I32 = add
+ ; AVX2: cost of 2 {{.*}} %V16I32 = add
+ ; AVX512: cost of 1 {{.*}} %V16I32 = add
+ %V16I32 = add <16 x i32> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I16 = add
+ %I16 = add i16 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V8I16 = add
+ ; SSE42: cost of 1 {{.*}} %V8I16 = add
+ ; AVX: cost of 1 {{.*}} %V8I16 = add
+ ; AVX2: cost of 1 {{.*}} %V8I16 = add
+ ; AVX512: cost of 1 {{.*}} %V8I16 = add
+ %V8I16 = add <8 x i16> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V16I16 = add
+ ; SSE42: cost of 2 {{.*}} %V16I16 = add
+ ; AVX: cost of 4 {{.*}} %V16I16 = add
+ ; AVX2: cost of 1 {{.*}} %V16I16 = add
+ ; AVX512: cost of 1 {{.*}} %V16I16 = add
+ %V16I16 = add <16 x i16> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V32I16 = add
+ ; SSE42: cost of 4 {{.*}} %V32I16 = add
+ ; AVX: cost of 8 {{.*}} %V32I16 = add
+ ; AVX2: cost of 2 {{.*}} %V32I16 = add
+ ; AVX512F: cost of 2 {{.*}} %V32I16 = add
+ ; AVX512BW: cost of 1 {{.*}} %V32I16 = add
+ %V32I16 = add <32 x i16> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I8 = add
+ %I8 = add i8 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V16I8 = add
+ ; SSE42: cost of 1 {{.*}} %V16I8 = add
+ ; AVX: cost of 1 {{.*}} %V16I8 = add
+ ; AVX2: cost of 1 {{.*}} %V16I8 = add
+ ; AVX512: cost of 1 {{.*}} %V16I8 = add
+ %V16I8 = add <16 x i8> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V32I8 = add
+ ; SSE42: cost of 2 {{.*}} %V32I8 = add
+ ; AVX: cost of 4 {{.*}} %V32I8 = add
+ ; AVX2: cost of 1 {{.*}} %V32I8 = add
+ ; AVX512: cost of 1 {{.*}} %V32I8 = add
+ %V32I8 = add <32 x i8> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V64I8 = add
+ ; SSE42: cost of 4 {{.*}} %V64I8 = add
+ ; AVX: cost of 8 {{.*}} %V64I8 = add
+ ; AVX2: cost of 2 {{.*}} %V64I8 = add
+ ; AVX512F: cost of 2 {{.*}} %V64I8 = add
+ ; AVX512BW: cost of 1 {{.*}} %V64I8 = add
+ %V64I8 = add <64 x i8> undef, undef
ret i32 undef
}
; CHECK-LABEL: 'sub'
define i32 @sub(i32 %arg) {
- ; SSSE3: cost of 1 {{.*}} %A = sub
- ; SSE42: cost of 1 {{.*}} %A = sub
- ; AVX: cost of 1 {{.*}} %A = sub
- ; AVX2: cost of 1 {{.*}} %A = sub
- ; AVX512: cost of 1 {{.*}} %A = sub
- %A = sub <2 x i64> undef, undef
- ; SSSE3: cost of 2 {{.*}} %B = sub
- ; SSE42: cost of 2 {{.*}} %B = sub
- ; AVX: cost of 4 {{.*}} %B = sub
- ; AVX2: cost of 1 {{.*}} %B = sub
- ; AVX512: cost of 1 {{.*}} %B = sub
- %B = sub <4 x i64> undef, undef
- ; SSSE3: cost of 4 {{.*}} %C = sub
- ; SSE42: cost of 4 {{.*}} %C = sub
- ; AVX: cost of 8 {{.*}} %C = sub
- ; AVX2: cost of 2 {{.*}} %C = sub
- ; AVX512: cost of 1 {{.*}} %C = sub
- %C = sub <8 x i64> undef, undef
-
- ; SSSE3: cost of 1 {{.*}} %D = sub
- ; SSE42: cost of 1 {{.*}} %D = sub
- ; AVX: cost of 1 {{.*}} %D = sub
- ; AVX2: cost of 1 {{.*}} %D = sub
- ; AVX512: cost of 1 {{.*}} %D = sub
- %D = sub <4 x i32> undef, undef
- ; SSSE3: cost of 2 {{.*}} %E = sub
- ; SSE42: cost of 2 {{.*}} %E = sub
- ; AVX: cost of 4 {{.*}} %E = sub
- ; AVX2: cost of 1 {{.*}} %E = sub
- ; AVX512: cost of 1 {{.*}} %E = sub
- %E = sub <8 x i32> undef, undef
- ; SSSE3: cost of 4 {{.*}} %F = sub
- ; SSE42: cost of 4 {{.*}} %F = sub
- ; AVX: cost of 8 {{.*}} %F = sub
- ; AVX2: cost of 2 {{.*}} %F = sub
- ; AVX512: cost of 1 {{.*}} %F = sub
- %F = sub <16 x i32> undef, undef
-
- ; SSSE3: cost of 1 {{.*}} %G = sub
- ; SSE42: cost of 1 {{.*}} %G = sub
- ; AVX: cost of 1 {{.*}} %G = sub
- ; AVX2: cost of 1 {{.*}} %G = sub
- ; AVX512: cost of 1 {{.*}} %G = sub
- %G = sub <8 x i16> undef, undef
- ; SSSE3: cost of 2 {{.*}} %H = sub
- ; SSE42: cost of 2 {{.*}} %H = sub
- ; AVX: cost of 4 {{.*}} %H = sub
- ; AVX2: cost of 1 {{.*}} %H = sub
- ; AVX512: cost of 1 {{.*}} %H = sub
- %H = sub <16 x i16> undef, undef
- ; SSSE3: cost of 4 {{.*}} %I = sub
- ; SSE42: cost of 4 {{.*}} %I = sub
- ; AVX: cost of 8 {{.*}} %I = sub
- ; AVX2: cost of 2 {{.*}} %I = sub
- ; AVX512F: cost of 2 {{.*}} %I = sub
- ; AVX512BW: cost of 1 {{.*}} %I = sub
- %I = sub <32 x i16> undef, undef
-
- ; SSSE3: cost of 1 {{.*}} %J = sub
- ; SSE42: cost of 1 {{.*}} %J = sub
- ; AVX: cost of 1 {{.*}} %J = sub
- ; AVX2: cost of 1 {{.*}} %J = sub
- ; AVX512: cost of 1 {{.*}} %J = sub
- %J = sub <16 x i8> undef, undef
- ; SSSE3: cost of 2 {{.*}} %K = sub
- ; SSE42: cost of 2 {{.*}} %K = sub
- ; AVX: cost of 4 {{.*}} %K = sub
- ; AVX2: cost of 1 {{.*}} %K = sub
- ; AVX512: cost of 1 {{.*}} %K = sub
- %K = sub <32 x i8> undef, undef
- ; SSSE3: cost of 4 {{.*}} %L = sub
- ; SSE42: cost of 4 {{.*}} %L = sub
- ; AVX: cost of 8 {{.*}} %L = sub
- ; AVX2: cost of 2 {{.*}} %L = sub
- ; AVX512F: cost of 2 {{.*}} %L = sub
- ; AVX512BW: cost of 1 {{.*}} %L = sub
- %L = sub <64 x i8> undef, undef
+ ; CHECK: cost of 1 {{.*}} %I64 = sub
+ %I64 = sub i64 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V2I64 = sub
+ ; SSE42: cost of 1 {{.*}} %V2I64 = sub
+ ; AVX: cost of 1 {{.*}} %V2I64 = sub
+ ; AVX2: cost of 1 {{.*}} %V2I64 = sub
+ ; AVX512: cost of 1 {{.*}} %V2I64 = sub
+ %V2I64 = sub <2 x i64> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V4I64 = sub
+ ; SSE42: cost of 2 {{.*}} %V4I64 = sub
+ ; AVX: cost of 4 {{.*}} %V4I64 = sub
+ ; AVX2: cost of 1 {{.*}} %V4I64 = sub
+ ; AVX512: cost of 1 {{.*}} %V4I64 = sub
+ %V4I64 = sub <4 x i64> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V8I64 = sub
+ ; SSE42: cost of 4 {{.*}} %V8I64 = sub
+ ; AVX: cost of 8 {{.*}} %V8I64 = sub
+ ; AVX2: cost of 2 {{.*}} %V8I64 = sub
+ ; AVX512: cost of 1 {{.*}} %V8I64 = sub
+ %V8I64 = sub <8 x i64> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I32 = sub
+ %I32 = sub i32 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V4I32 = sub
+ ; SSE42: cost of 1 {{.*}} %V4I32 = sub
+ ; AVX: cost of 1 {{.*}} %V4I32 = sub
+ ; AVX2: cost of 1 {{.*}} %V4I32 = sub
+ ; AVX512: cost of 1 {{.*}} %V4I32 = sub
+ %V4I32 = sub <4 x i32> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V8I32 = sub
+ ; SSE42: cost of 2 {{.*}} %V8I32 = sub
+ ; AVX: cost of 4 {{.*}} %V8I32 = sub
+ ; AVX2: cost of 1 {{.*}} %V8I32 = sub
+ ; AVX512: cost of 1 {{.*}} %V8I32 = sub
+ %V8I32 = sub <8 x i32> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V16I32 = sub
+ ; SSE42: cost of 4 {{.*}} %V16I32 = sub
+ ; AVX: cost of 8 {{.*}} %V16I32 = sub
+ ; AVX2: cost of 2 {{.*}} %V16I32 = sub
+ ; AVX512: cost of 1 {{.*}} %V16I32 = sub
+ %V16I32 = sub <16 x i32> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I16 = sub
+ %I16 = sub i16 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V8I16 = sub
+ ; SSE42: cost of 1 {{.*}} %V8I16 = sub
+ ; AVX: cost of 1 {{.*}} %V8I16 = sub
+ ; AVX2: cost of 1 {{.*}} %V8I16 = sub
+ ; AVX512: cost of 1 {{.*}} %V8I16 = sub
+ %V8I16 = sub <8 x i16> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V16I16 = sub
+ ; SSE42: cost of 2 {{.*}} %V16I16 = sub
+ ; AVX: cost of 4 {{.*}} %V16I16 = sub
+ ; AVX2: cost of 1 {{.*}} %V16I16 = sub
+ ; AVX512: cost of 1 {{.*}} %V16I16 = sub
+ %V16I16 = sub <16 x i16> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V32I16 = sub
+ ; SSE42: cost of 4 {{.*}} %V32I16 = sub
+ ; AVX: cost of 8 {{.*}} %V32I16 = sub
+ ; AVX2: cost of 2 {{.*}} %V32I16 = sub
+ ; AVX512F: cost of 2 {{.*}} %V32I16 = sub
+ ; AVX512BW: cost of 1 {{.*}} %V32I16 = sub
+ %V32I16 = sub <32 x i16> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I8 = sub
+ %I8 = sub i8 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V16I8 = sub
+ ; SSE42: cost of 1 {{.*}} %V16I8 = sub
+ ; AVX: cost of 1 {{.*}} %V16I8 = sub
+ ; AVX2: cost of 1 {{.*}} %V16I8 = sub
+ ; AVX512: cost of 1 {{.*}} %V16I8 = sub
+ %V16I8 = sub <16 x i8> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V32I8 = sub
+ ; SSE42: cost of 2 {{.*}} %V32I8 = sub
+ ; AVX: cost of 4 {{.*}} %V32I8 = sub
+ ; AVX2: cost of 1 {{.*}} %V32I8 = sub
+ ; AVX512: cost of 1 {{.*}} %V32I8 = sub
+ %V32I8 = sub <32 x i8> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V64I8 = sub
+ ; SSE42: cost of 4 {{.*}} %V64I8 = sub
+ ; AVX: cost of 8 {{.*}} %V64I8 = sub
+ ; AVX2: cost of 2 {{.*}} %V64I8 = sub
+ ; AVX512F: cost of 2 {{.*}} %V64I8 = sub
+ ; AVX512BW: cost of 1 {{.*}} %V64I8 = sub
+ %V64I8 = sub <64 x i8> undef, undef
ret i32 undef
}
; CHECK-LABEL: 'or'
define i32 @or(i32 %arg) {
- ; SSSE3: cost of 1 {{.*}} %A = or
- ; SSE42: cost of 1 {{.*}} %A = or
- ; AVX: cost of 1 {{.*}} %A = or
- ; AVX2: cost of 1 {{.*}} %A = or
- ; AVX512: cost of 1 {{.*}} %A = or
- %A = or <2 x i64> undef, undef
- ; SSSE3: cost of 2 {{.*}} %B = or
- ; SSE42: cost of 2 {{.*}} %B = or
- ; AVX: cost of 1 {{.*}} %B = or
- ; AVX2: cost of 1 {{.*}} %B = or
- ; AVX512: cost of 1 {{.*}} %B = or
- %B = or <4 x i64> undef, undef
- ; SSSE3: cost of 4 {{.*}} %C = or
- ; SSE42: cost of 4 {{.*}} %C = or
- ; AVX: cost of 2 {{.*}} %C = or
- ; AVX2: cost of 2 {{.*}} %C = or
- ; AVX512: cost of 1 {{.*}} %C = or
- %C = or <8 x i64> undef, undef
-
- ; SSSE3: cost of 1 {{.*}} %D = or
- ; SSE42: cost of 1 {{.*}} %D = or
- ; AVX: cost of 1 {{.*}} %D = or
- ; AVX2: cost of 1 {{.*}} %D = or
- ; AVX512: cost of 1 {{.*}} %D = or
- %D = or <4 x i32> undef, undef
- ; SSSE3: cost of 2 {{.*}} %E = or
- ; SSE42: cost of 2 {{.*}} %E = or
- ; AVX: cost of 1 {{.*}} %E = or
- ; AVX2: cost of 1 {{.*}} %E = or
- ; AVX512: cost of 1 {{.*}} %E = or
- %E = or <8 x i32> undef, undef
- ; SSSE3: cost of 4 {{.*}} %F = or
- ; SSE42: cost of 4 {{.*}} %F = or
- ; AVX: cost of 2 {{.*}} %F = or
- ; AVX2: cost of 2 {{.*}} %F = or
- ; AVX512: cost of 1 {{.*}} %F = or
- %F = or <16 x i32> undef, undef
-
- ; SSSE3: cost of 1 {{.*}} %G = or
- ; SSE42: cost of 1 {{.*}} %G = or
- ; AVX: cost of 1 {{.*}} %G = or
- ; AVX2: cost of 1 {{.*}} %G = or
- ; AVX512: cost of 1 {{.*}} %G = or
- %G = or <8 x i16> undef, undef
- ; SSSE3: cost of 2 {{.*}} %H = or
- ; SSE42: cost of 2 {{.*}} %H = or
- ; AVX: cost of 1 {{.*}} %H = or
- ; AVX2: cost of 1 {{.*}} %H = or
- ; AVX512: cost of 1 {{.*}} %H = or
- %H = or <16 x i16> undef, undef
- ; SSSE3: cost of 4 {{.*}} %I = or
- ; SSE42: cost of 4 {{.*}} %I = or
- ; AVX: cost of 2 {{.*}} %I = or
- ; AVX2: cost of 2 {{.*}} %I = or
- ; AVX512F: cost of 2 {{.*}} %I = or
- ; AVX512BW: cost of 1 {{.*}} %I = or
- %I = or <32 x i16> undef, undef
-
- ; SSSE3: cost of 1 {{.*}} %J = or
- ; SSE42: cost of 1 {{.*}} %J = or
- ; AVX: cost of 1 {{.*}} %J = or
- ; AVX2: cost of 1 {{.*}} %J = or
- ; AVX512: cost of 1 {{.*}} %J = or
- %J = or <16 x i8> undef, undef
- ; SSSE3: cost of 2 {{.*}} %K = or
- ; SSE42: cost of 2 {{.*}} %K = or
- ; AVX: cost of 1 {{.*}} %K = or
- ; AVX2: cost of 1 {{.*}} %K = or
- ; AVX512: cost of 1 {{.*}} %K = or
- %K = or <32 x i8> undef, undef
- ; SSSE3: cost of 4 {{.*}} %L = or
- ; SSE42: cost of 4 {{.*}} %L = or
- ; AVX: cost of 2 {{.*}} %L = or
- ; AVX2: cost of 2 {{.*}} %L = or
- ; AVX512F: cost of 2 {{.*}} %L = or
- ; AVX512BW: cost of 1 {{.*}} %L = or
- %L = or <64 x i8> undef, undef
+ ; CHECK: cost of 1 {{.*}} %I64 = or
+ %I64 = or i64 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V2I64 = or
+ ; SSE42: cost of 1 {{.*}} %V2I64 = or
+ ; AVX: cost of 1 {{.*}} %V2I64 = or
+ ; AVX2: cost of 1 {{.*}} %V2I64 = or
+ ; AVX512: cost of 1 {{.*}} %V2I64 = or
+ %V2I64 = or <2 x i64> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V4I64 = or
+ ; SSE42: cost of 2 {{.*}} %V4I64 = or
+ ; AVX: cost of 1 {{.*}} %V4I64 = or
+ ; AVX2: cost of 1 {{.*}} %V4I64 = or
+ ; AVX512: cost of 1 {{.*}} %V4I64 = or
+ %V4I64 = or <4 x i64> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V8I64 = or
+ ; SSE42: cost of 4 {{.*}} %V8I64 = or
+ ; AVX: cost of 2 {{.*}} %V8I64 = or
+ ; AVX2: cost of 2 {{.*}} %V8I64 = or
+ ; AVX512: cost of 1 {{.*}} %V8I64 = or
+ %V8I64 = or <8 x i64> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I32 = or
+ %I32 = or i32 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V4I32 = or
+ ; SSE42: cost of 1 {{.*}} %V4I32 = or
+ ; AVX: cost of 1 {{.*}} %V4I32 = or
+ ; AVX2: cost of 1 {{.*}} %V4I32 = or
+ ; AVX512: cost of 1 {{.*}} %V4I32 = or
+ %V4I32 = or <4 x i32> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V8I32 = or
+ ; SSE42: cost of 2 {{.*}} %V8I32 = or
+ ; AVX: cost of 1 {{.*}} %V8I32 = or
+ ; AVX2: cost of 1 {{.*}} %V8I32 = or
+ ; AVX512: cost of 1 {{.*}} %V8I32 = or
+ %V8I32 = or <8 x i32> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V16I32 = or
+ ; SSE42: cost of 4 {{.*}} %V16I32 = or
+ ; AVX: cost of 2 {{.*}} %V16I32 = or
+ ; AVX2: cost of 2 {{.*}} %V16I32 = or
+ ; AVX512: cost of 1 {{.*}} %V16I32 = or
+ %V16I32 = or <16 x i32> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I16 = or
+ %I16 = or i16 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V8I16 = or
+ ; SSE42: cost of 1 {{.*}} %V8I16 = or
+ ; AVX: cost of 1 {{.*}} %V8I16 = or
+ ; AVX2: cost of 1 {{.*}} %V8I16 = or
+ ; AVX512: cost of 1 {{.*}} %V8I16 = or
+ %V8I16 = or <8 x i16> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V16I16 = or
+ ; SSE42: cost of 2 {{.*}} %V16I16 = or
+ ; AVX: cost of 1 {{.*}} %V16I16 = or
+ ; AVX2: cost of 1 {{.*}} %V16I16 = or
+ ; AVX512: cost of 1 {{.*}} %V16I16 = or
+ %V16I16 = or <16 x i16> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V32I16 = or
+ ; SSE42: cost of 4 {{.*}} %V32I16 = or
+ ; AVX: cost of 2 {{.*}} %V32I16 = or
+ ; AVX2: cost of 2 {{.*}} %V32I16 = or
+ ; AVX512F: cost of 2 {{.*}} %V32I16 = or
+ ; AVX512BW: cost of 1 {{.*}} %V32I16 = or
+ %V32I16 = or <32 x i16> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I8 = or
+ %I8 = or i8 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V16I8 = or
+ ; SSE42: cost of 1 {{.*}} %V16I8 = or
+ ; AVX: cost of 1 {{.*}} %V16I8 = or
+ ; AVX2: cost of 1 {{.*}} %V16I8 = or
+ ; AVX512: cost of 1 {{.*}} %V16I8 = or
+ %V16I8 = or <16 x i8> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V32I8 = or
+ ; SSE42: cost of 2 {{.*}} %V32I8 = or
+ ; AVX: cost of 1 {{.*}} %V32I8 = or
+ ; AVX2: cost of 1 {{.*}} %V32I8 = or
+ ; AVX512: cost of 1 {{.*}} %V32I8 = or
+ %V32I8 = or <32 x i8> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V64I8 = or
+ ; SSE42: cost of 4 {{.*}} %V64I8 = or
+ ; AVX: cost of 2 {{.*}} %V64I8 = or
+ ; AVX2: cost of 2 {{.*}} %V64I8 = or
+ ; AVX512F: cost of 2 {{.*}} %V64I8 = or
+ ; AVX512BW: cost of 1 {{.*}} %V64I8 = or
+ %V64I8 = or <64 x i8> undef, undef
ret i32 undef
}
; CHECK-LABEL: 'xor'
define i32 @xor(i32 %arg) {
- ; SSSE3: cost of 1 {{.*}} %A = xor
- ; SSE42: cost of 1 {{.*}} %A = xor
- ; AVX: cost of 1 {{.*}} %A = xor
- ; AVX2: cost of 1 {{.*}} %A = xor
- ; AVX512: cost of 1 {{.*}} %A = xor
- %A = xor <2 x i64> undef, undef
- ; SSSE3: cost of 2 {{.*}} %B = xor
- ; SSE42: cost of 2 {{.*}} %B = xor
- ; AVX: cost of 1 {{.*}} %B = xor
- ; AVX2: cost of 1 {{.*}} %B = xor
- ; AVX512: cost of 1 {{.*}} %B = xor
- %B = xor <4 x i64> undef, undef
- ; SSSE3: cost of 4 {{.*}} %C = xor
- ; SSE42: cost of 4 {{.*}} %C = xor
- ; AVX: cost of 2 {{.*}} %C = xor
- ; AVX2: cost of 2 {{.*}} %C = xor
- ; AVX512: cost of 1 {{.*}} %C = xor
- %C = xor <8 x i64> undef, undef
-
- ; SSSE3: cost of 1 {{.*}} %D = xor
- ; SSE42: cost of 1 {{.*}} %D = xor
- ; AVX: cost of 1 {{.*}} %D = xor
- ; AVX2: cost of 1 {{.*}} %D = xor
- ; AVX512: cost of 1 {{.*}} %D = xor
- %D = xor <4 x i32> undef, undef
- ; SSSE3: cost of 2 {{.*}} %E = xor
- ; SSE42: cost of 2 {{.*}} %E = xor
- ; AVX: cost of 1 {{.*}} %E = xor
- ; AVX2: cost of 1 {{.*}} %E = xor
- ; AVX512: cost of 1 {{.*}} %E = xor
- %E = xor <8 x i32> undef, undef
- ; SSSE3: cost of 4 {{.*}} %F = xor
- ; SSE42: cost of 4 {{.*}} %F = xor
- ; AVX: cost of 2 {{.*}} %F = xor
- ; AVX2: cost of 2 {{.*}} %F = xor
- ; AVX512: cost of 1 {{.*}} %F = xor
- %F = xor <16 x i32> undef, undef
-
- ; SSSE3: cost of 1 {{.*}} %G = xor
- ; SSE42: cost of 1 {{.*}} %G = xor
- ; AVX: cost of 1 {{.*}} %G = xor
- ; AVX2: cost of 1 {{.*}} %G = xor
- ; AVX512: cost of 1 {{.*}} %G = xor
- %G = xor <8 x i16> undef, undef
- ; SSSE3: cost of 2 {{.*}} %H = xor
- ; SSE42: cost of 2 {{.*}} %H = xor
- ; AVX: cost of 1 {{.*}} %H = xor
- ; AVX2: cost of 1 {{.*}} %H = xor
- ; AVX512: cost of 1 {{.*}} %H = xor
- %H = xor <16 x i16> undef, undef
- ; SSSE3: cost of 4 {{.*}} %I = xor
- ; SSE42: cost of 4 {{.*}} %I = xor
- ; AVX: cost of 2 {{.*}} %I = xor
- ; AVX2: cost of 2 {{.*}} %I = xor
- ; AVX512F: cost of 2 {{.*}} %I = xor
- ; AVX512BW: cost of 1 {{.*}} %I = xor
- %I = xor <32 x i16> undef, undef
-
- ; SSSE3: cost of 1 {{.*}} %J = xor
- ; SSE42: cost of 1 {{.*}} %J = xor
- ; AVX: cost of 1 {{.*}} %J = xor
- ; AVX2: cost of 1 {{.*}} %J = xor
- ; AVX512: cost of 1 {{.*}} %J = xor
- %J = xor <16 x i8> undef, undef
- ; SSSE3: cost of 2 {{.*}} %K = xor
- ; SSE42: cost of 2 {{.*}} %K = xor
- ; AVX: cost of 1 {{.*}} %K = xor
- ; AVX2: cost of 1 {{.*}} %K = xor
- ; AVX512: cost of 1 {{.*}} %K = xor
- %K = xor <32 x i8> undef, undef
- ; SSSE3: cost of 4 {{.*}} %L = xor
- ; SSE42: cost of 4 {{.*}} %L = xor
- ; AVX: cost of 2 {{.*}} %L = xor
- ; AVX2: cost of 2 {{.*}} %L = xor
- ; AVX512F: cost of 2 {{.*}} %L = xor
- ; AVX512BW: cost of 1 {{.*}} %L = xor
- %L = xor <64 x i8> undef, undef
+ ; CHECK: cost of 1 {{.*}} %I64 = xor
+ %I64 = xor i64 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V2I64 = xor
+ ; SSE42: cost of 1 {{.*}} %V2I64 = xor
+ ; AVX: cost of 1 {{.*}} %V2I64 = xor
+ ; AVX2: cost of 1 {{.*}} %V2I64 = xor
+ ; AVX512: cost of 1 {{.*}} %V2I64 = xor
+ %V2I64 = xor <2 x i64> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V4I64 = xor
+ ; SSE42: cost of 2 {{.*}} %V4I64 = xor
+ ; AVX: cost of 1 {{.*}} %V4I64 = xor
+ ; AVX2: cost of 1 {{.*}} %V4I64 = xor
+ ; AVX512: cost of 1 {{.*}} %V4I64 = xor
+ %V4I64 = xor <4 x i64> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V8I64 = xor
+ ; SSE42: cost of 4 {{.*}} %V8I64 = xor
+ ; AVX: cost of 2 {{.*}} %V8I64 = xor
+ ; AVX2: cost of 2 {{.*}} %V8I64 = xor
+ ; AVX512: cost of 1 {{.*}} %V8I64 = xor
+ %V8I64 = xor <8 x i64> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I32 = xor
+ %I32 = xor i32 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V4I32 = xor
+ ; SSE42: cost of 1 {{.*}} %V4I32 = xor
+ ; AVX: cost of 1 {{.*}} %V4I32 = xor
+ ; AVX2: cost of 1 {{.*}} %V4I32 = xor
+ ; AVX512: cost of 1 {{.*}} %V4I32 = xor
+ %V4I32 = xor <4 x i32> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V8I32 = xor
+ ; SSE42: cost of 2 {{.*}} %V8I32 = xor
+ ; AVX: cost of 1 {{.*}} %V8I32 = xor
+ ; AVX2: cost of 1 {{.*}} %V8I32 = xor
+ ; AVX512: cost of 1 {{.*}} %V8I32 = xor
+ %V8I32 = xor <8 x i32> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V16I32 = xor
+ ; SSE42: cost of 4 {{.*}} %V16I32 = xor
+ ; AVX: cost of 2 {{.*}} %V16I32 = xor
+ ; AVX2: cost of 2 {{.*}} %V16I32 = xor
+ ; AVX512: cost of 1 {{.*}} %V16I32 = xor
+ %V16I32 = xor <16 x i32> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I16 = xor
+ %I16 = xor i16 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V8I16 = xor
+ ; SSE42: cost of 1 {{.*}} %V8I16 = xor
+ ; AVX: cost of 1 {{.*}} %V8I16 = xor
+ ; AVX2: cost of 1 {{.*}} %V8I16 = xor
+ ; AVX512: cost of 1 {{.*}} %V8I16 = xor
+ %V8I16 = xor <8 x i16> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V16I16 = xor
+ ; SSE42: cost of 2 {{.*}} %V16I16 = xor
+ ; AVX: cost of 1 {{.*}} %V16I16 = xor
+ ; AVX2: cost of 1 {{.*}} %V16I16 = xor
+ ; AVX512: cost of 1 {{.*}} %V16I16 = xor
+ %V16I16 = xor <16 x i16> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V32I16 = xor
+ ; SSE42: cost of 4 {{.*}} %V32I16 = xor
+ ; AVX: cost of 2 {{.*}} %V32I16 = xor
+ ; AVX2: cost of 2 {{.*}} %V32I16 = xor
+ ; AVX512F: cost of 2 {{.*}} %V32I16 = xor
+ ; AVX512BW: cost of 1 {{.*}} %V32I16 = xor
+ %V32I16 = xor <32 x i16> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I8 = xor
+ %I8 = xor i8 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V16I8 = xor
+ ; SSE42: cost of 1 {{.*}} %V16I8 = xor
+ ; AVX: cost of 1 {{.*}} %V16I8 = xor
+ ; AVX2: cost of 1 {{.*}} %V16I8 = xor
+ ; AVX512: cost of 1 {{.*}} %V16I8 = xor
+ %V16I8 = xor <16 x i8> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V32I8 = xor
+ ; SSE42: cost of 2 {{.*}} %V32I8 = xor
+ ; AVX: cost of 1 {{.*}} %V32I8 = xor
+ ; AVX2: cost of 1 {{.*}} %V32I8 = xor
+ ; AVX512: cost of 1 {{.*}} %V32I8 = xor
+ %V32I8 = xor <32 x i8> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V64I8 = xor
+ ; SSE42: cost of 4 {{.*}} %V64I8 = xor
+ ; AVX: cost of 2 {{.*}} %V64I8 = xor
+ ; AVX2: cost of 2 {{.*}} %V64I8 = xor
+ ; AVX512F: cost of 2 {{.*}} %V64I8 = xor
+ ; AVX512BW: cost of 1 {{.*}} %V64I8 = xor
+ %V64I8 = xor <64 x i8> undef, undef
ret i32 undef
}
; CHECK-LABEL: 'and'
define i32 @and(i32 %arg) {
- ; SSSE3: cost of 1 {{.*}} %A = and
- ; SSE42: cost of 1 {{.*}} %A = and
- ; AVX: cost of 1 {{.*}} %A = and
- ; AVX2: cost of 1 {{.*}} %A = and
- ; AVX512: cost of 1 {{.*}} %A = and
- %A = and <2 x i64> undef, undef
- ; SSSE3: cost of 2 {{.*}} %B = and
- ; SSE42: cost of 2 {{.*}} %B = and
- ; AVX: cost of 1 {{.*}} %B = and
- ; AVX2: cost of 1 {{.*}} %B = and
- ; AVX512: cost of 1 {{.*}} %B = and
- %B = and <4 x i64> undef, undef
- ; SSSE3: cost of 4 {{.*}} %C = and
- ; SSE42: cost of 4 {{.*}} %C = and
- ; AVX: cost of 2 {{.*}} %C = and
- ; AVX2: cost of 2 {{.*}} %C = and
- ; AVX512: cost of 1 {{.*}} %C = and
- %C = and <8 x i64> undef, undef
-
- ; SSSE3: cost of 1 {{.*}} %D = and
- ; SSE42: cost of 1 {{.*}} %D = and
- ; AVX: cost of 1 {{.*}} %D = and
- ; AVX2: cost of 1 {{.*}} %D = and
- ; AVX512: cost of 1 {{.*}} %D = and
- %D = and <4 x i32> undef, undef
- ; SSSE3: cost of 2 {{.*}} %E = and
- ; SSE42: cost of 2 {{.*}} %E = and
- ; AVX: cost of 1 {{.*}} %E = and
- ; AVX2: cost of 1 {{.*}} %E = and
- ; AVX512: cost of 1 {{.*}} %E = and
- %E = and <8 x i32> undef, undef
- ; SSSE3: cost of 4 {{.*}} %F = and
- ; SSE42: cost of 4 {{.*}} %F = and
- ; AVX: cost of 2 {{.*}} %F = and
- ; AVX2: cost of 2 {{.*}} %F = and
- ; AVX512: cost of 1 {{.*}} %F = and
- %F = and <16 x i32> undef, undef
-
- ; SSSE3: cost of 1 {{.*}} %G = and
- ; SSE42: cost of 1 {{.*}} %G = and
- ; AVX: cost of 1 {{.*}} %G = and
- ; AVX2: cost of 1 {{.*}} %G = and
- ; AVX512: cost of 1 {{.*}} %G = and
- %G = and <8 x i16> undef, undef
- ; SSSE3: cost of 2 {{.*}} %H = and
- ; SSE42: cost of 2 {{.*}} %H = and
- ; AVX: cost of 1 {{.*}} %H = and
- ; AVX2: cost of 1 {{.*}} %H = and
- ; AVX512: cost of 1 {{.*}} %H = and
- %H = and <16 x i16> undef, undef
- ; SSSE3: cost of 4 {{.*}} %I = and
- ; SSE42: cost of 4 {{.*}} %I = and
- ; AVX: cost of 2 {{.*}} %I = and
- ; AVX2: cost of 2 {{.*}} %I = and
- ; AVX512F: cost of 2 {{.*}} %I = and
- ; AVX512BW: cost of 1 {{.*}} %I = and
- %I = and <32 x i16> undef, undef
-
- ; SSSE3: cost of 1 {{.*}} %J = and
- ; SSE42: cost of 1 {{.*}} %J = and
- ; AVX: cost of 1 {{.*}} %J = and
- ; AVX2: cost of 1 {{.*}} %J = and
- ; AVX512: cost of 1 {{.*}} %J = and
- %J = and <16 x i8> undef, undef
- ; SSSE3: cost of 2 {{.*}} %K = and
- ; SSE42: cost of 2 {{.*}} %K = and
- ; AVX: cost of 1 {{.*}} %K = and
- ; AVX2: cost of 1 {{.*}} %K = and
- ; AVX512: cost of 1 {{.*}} %K = and
- %K = and <32 x i8> undef, undef
- ; SSSE3: cost of 4 {{.*}} %L = and
- ; SSE42: cost of 4 {{.*}} %L = and
- ; AVX: cost of 2 {{.*}} %L = and
- ; AVX2: cost of 2 {{.*}} %L = and
- ; AVX512F: cost of 2 {{.*}} %L = and
- ; AVX512BW: cost of 1 {{.*}} %L = and
- %L = and <64 x i8> undef, undef
+ ; CHECK: cost of 1 {{.*}} %I64 = and
+ %I64 = and i64 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V2I64 = and
+ ; SSE42: cost of 1 {{.*}} %V2I64 = and
+ ; AVX: cost of 1 {{.*}} %V2I64 = and
+ ; AVX2: cost of 1 {{.*}} %V2I64 = and
+ ; AVX512: cost of 1 {{.*}} %V2I64 = and
+ %V2I64 = and <2 x i64> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V4I64 = and
+ ; SSE42: cost of 2 {{.*}} %V4I64 = and
+ ; AVX: cost of 1 {{.*}} %V4I64 = and
+ ; AVX2: cost of 1 {{.*}} %V4I64 = and
+ ; AVX512: cost of 1 {{.*}} %V4I64 = and
+ %V4I64 = and <4 x i64> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V8I64 = and
+ ; SSE42: cost of 4 {{.*}} %V8I64 = and
+ ; AVX: cost of 2 {{.*}} %V8I64 = and
+ ; AVX2: cost of 2 {{.*}} %V8I64 = and
+ ; AVX512: cost of 1 {{.*}} %V8I64 = and
+ %V8I64 = and <8 x i64> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I32 = and
+ %I32 = and i32 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V4I32 = and
+ ; SSE42: cost of 1 {{.*}} %V4I32 = and
+ ; AVX: cost of 1 {{.*}} %V4I32 = and
+ ; AVX2: cost of 1 {{.*}} %V4I32 = and
+ ; AVX512: cost of 1 {{.*}} %V4I32 = and
+ %V4I32 = and <4 x i32> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V8I32 = and
+ ; SSE42: cost of 2 {{.*}} %V8I32 = and
+ ; AVX: cost of 1 {{.*}} %V8I32 = and
+ ; AVX2: cost of 1 {{.*}} %V8I32 = and
+ ; AVX512: cost of 1 {{.*}} %V8I32 = and
+ %V8I32 = and <8 x i32> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V16I32 = and
+ ; SSE42: cost of 4 {{.*}} %V16I32 = and
+ ; AVX: cost of 2 {{.*}} %V16I32 = and
+ ; AVX2: cost of 2 {{.*}} %V16I32 = and
+ ; AVX512: cost of 1 {{.*}} %V16I32 = and
+ %V16I32 = and <16 x i32> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I16 = and
+ %I16 = and i16 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V8I16 = and
+ ; SSE42: cost of 1 {{.*}} %V8I16 = and
+ ; AVX: cost of 1 {{.*}} %V8I16 = and
+ ; AVX2: cost of 1 {{.*}} %V8I16 = and
+ ; AVX512: cost of 1 {{.*}} %V8I16 = and
+ %V8I16 = and <8 x i16> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V16I16 = and
+ ; SSE42: cost of 2 {{.*}} %V16I16 = and
+ ; AVX: cost of 1 {{.*}} %V16I16 = and
+ ; AVX2: cost of 1 {{.*}} %V16I16 = and
+ ; AVX512: cost of 1 {{.*}} %V16I16 = and
+ %V16I16 = and <16 x i16> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V32I16 = and
+ ; SSE42: cost of 4 {{.*}} %V32I16 = and
+ ; AVX: cost of 2 {{.*}} %V32I16 = and
+ ; AVX2: cost of 2 {{.*}} %V32I16 = and
+ ; AVX512F: cost of 2 {{.*}} %V32I16 = and
+ ; AVX512BW: cost of 1 {{.*}} %V32I16 = and
+ %V32I16 = and <32 x i16> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I8 = and
+ %I8 = and i8 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V16I8 = and
+ ; SSE42: cost of 1 {{.*}} %V16I8 = and
+ ; AVX: cost of 1 {{.*}} %V16I8 = and
+ ; AVX2: cost of 1 {{.*}} %V16I8 = and
+ ; AVX512: cost of 1 {{.*}} %V16I8 = and
+ %V16I8 = and <16 x i8> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V32I8 = and
+ ; SSE42: cost of 2 {{.*}} %V32I8 = and
+ ; AVX: cost of 1 {{.*}} %V32I8 = and
+ ; AVX2: cost of 1 {{.*}} %V32I8 = and
+ ; AVX512: cost of 1 {{.*}} %V32I8 = and
+ %V32I8 = and <32 x i8> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V64I8 = and
+ ; SSE42: cost of 4 {{.*}} %V64I8 = and
+ ; AVX: cost of 2 {{.*}} %V64I8 = and
+ ; AVX2: cost of 2 {{.*}} %V64I8 = and
+ ; AVX512F: cost of 2 {{.*}} %V64I8 = and
+ ; AVX512BW: cost of 1 {{.*}} %V64I8 = and
+ %V64I8 = and <64 x i8> undef, undef
ret i32 undef
}
; CHECK-LABEL: 'mul'
define i32 @mul(i32 %arg) {
- ; SSSE3: cost of 8 {{.*}} %A = mul
- ; SSE42: cost of 8 {{.*}} %A = mul
- ; AVX: cost of 8 {{.*}} %A = mul
- ; AVX2: cost of 8 {{.*}} %A = mul
- ; AVX512F: cost of 8 {{.*}} %A = mul
- ; AVX512BW: cost of 8 {{.*}} %A = mul
- ; AVX512DQ: cost of 1 {{.*}} %A = mul
- %A = mul <2 x i64> undef, undef
- ; SSSE3: cost of 16 {{.*}} %B = mul
- ; SSE42: cost of 16 {{.*}} %B = mul
- ; AVX: cost of 18 {{.*}} %B = mul
- ; AVX2: cost of 8 {{.*}} %B = mul
- ; AVX512F: cost of 8 {{.*}} %B = mul
- ; AVX512BW: cost of 8 {{.*}} %B = mul
- ; AVX512DQ: cost of 1 {{.*}} %B = mul
- %B = mul <4 x i64> undef, undef
- ; SSSE3: cost of 32 {{.*}} %C = mul
- ; SSE42: cost of 32 {{.*}} %C = mul
- ; AVX: cost of 36 {{.*}} %C = mul
- ; AVX2: cost of 16 {{.*}} %C = mul
- ; AVX512F: cost of 8 {{.*}} %C = mul
- ; AVX512BW: cost of 8 {{.*}} %C = mul
- ; AVX512DQ: cost of 1 {{.*}} %C = mul
- %C = mul <8 x i64> undef, undef
-
- ; SSSE3: cost of 6 {{.*}} %D = mul
- ; SSE42: cost of 1 {{.*}} %D = mul
- ; AVX: cost of 1 {{.*}} %D = mul
- ; AVX2: cost of 1 {{.*}} %D = mul
- ; AVX512: cost of 1 {{.*}} %D = mul
- %D = mul <4 x i32> undef, undef
- ; SSSE3: cost of 12 {{.*}} %E = mul
- ; SSE42: cost of 2 {{.*}} %E = mul
- ; AVX: cost of 4 {{.*}} %E = mul
- ; AVX2: cost of 1 {{.*}} %E = mul
- ; AVX512: cost of 1 {{.*}} %E = mul
- %E = mul <8 x i32> undef, undef
- ; SSSE3: cost of 24 {{.*}} %F = mul
- ; SSE42: cost of 4 {{.*}} %F = mul
- ; AVX: cost of 8 {{.*}} %F = mul
- ; AVX2: cost of 2 {{.*}} %F = mul
- ; AVX512: cost of 1 {{.*}} %F = mul
- %F = mul <16 x i32> undef, undef
-
- ; SSSE3: cost of 1 {{.*}} %G = mul
- ; SSE42: cost of 1 {{.*}} %G = mul
- ; AVX: cost of 1 {{.*}} %G = mul
- ; AVX2: cost of 1 {{.*}} %G = mul
- ; AVX512: cost of 1 {{.*}} %G = mul
- %G = mul <8 x i16> undef, undef
- ; SSSE3: cost of 2 {{.*}} %H = mul
- ; SSE42: cost of 2 {{.*}} %H = mul
- ; AVX: cost of 4 {{.*}} %H = mul
- ; AVX2: cost of 1 {{.*}} %H = mul
- ; AVX512: cost of 1 {{.*}} %H = mul
- %H = mul <16 x i16> undef, undef
- ; SSSE3: cost of 4 {{.*}} %I = mul
- ; SSE42: cost of 4 {{.*}} %I = mul
- ; AVX: cost of 8 {{.*}} %I = mul
- ; AVX2: cost of 2 {{.*}} %I = mul
- ; AVX512F: cost of 2 {{.*}} %I = mul
- ; AVX512BW: cost of 1 {{.*}} %I = mul
- %I = mul <32 x i16> undef, undef
-
- ; SSSE3: cost of 12 {{.*}} %J = mul
- ; SSE42: cost of 12 {{.*}} %J = mul
- ; AVX: cost of 12 {{.*}} %J = mul
- ; AVX2: cost of 7 {{.*}} %J = mul
- ; AVX512F: cost of 5 {{.*}} %J = mul
- ; AVX512BW: cost of 4 {{.*}} %J = mul
- %J = mul <16 x i8> undef, undef
- ; SSSE3: cost of 24 {{.*}} %K = mul
- ; SSE42: cost of 24 {{.*}} %K = mul
- ; AVX: cost of 26 {{.*}} %K = mul
- ; AVX2: cost of 17 {{.*}} %K = mul
- ; AVX512F: cost of 13 {{.*}} %K = mul
- ; AVX512BW: cost of 4 {{.*}} %K = mul
- %K = mul <32 x i8> undef, undef
- ; SSSE3: cost of 48 {{.*}} %L = mul
- ; SSE42: cost of 48 {{.*}} %L = mul
- ; AVX: cost of 52 {{.*}} %L = mul
- ; AVX2: cost of 34 {{.*}} %L = mul
- ; AVX512F: cost of 26 {{.*}} %L = mul
- ; AVX512BW: cost of 11 {{.*}} %L = mul
- %L = mul <64 x i8> undef, undef
+ ; CHECK: cost of 1 {{.*}} %I64 = mul
+ %I64 = mul i64 undef, undef
+ ; SSSE3: cost of 8 {{.*}} %V2I64 = mul
+ ; SSE42: cost of 8 {{.*}} %V2I64 = mul
+ ; AVX: cost of 8 {{.*}} %V2I64 = mul
+ ; AVX2: cost of 8 {{.*}} %V2I64 = mul
+ ; AVX512F: cost of 8 {{.*}} %V2I64 = mul
+ ; AVX512BW: cost of 8 {{.*}} %V2I64 = mul
+ ; AVX512DQ: cost of 1 {{.*}} %V2I64 = mul
+ %V2I64 = mul <2 x i64> undef, undef
+ ; SSSE3: cost of 16 {{.*}} %V4I64 = mul
+ ; SSE42: cost of 16 {{.*}} %V4I64 = mul
+ ; AVX: cost of 18 {{.*}} %V4I64 = mul
+ ; AVX2: cost of 8 {{.*}} %V4I64 = mul
+ ; AVX512F: cost of 8 {{.*}} %V4I64 = mul
+ ; AVX512BW: cost of 8 {{.*}} %V4I64 = mul
+ ; AVX512DQ: cost of 1 {{.*}} %V4I64 = mul
+ %V4I64 = mul <4 x i64> undef, undef
+ ; SSSE3: cost of 32 {{.*}} %V8I64 = mul
+ ; SSE42: cost of 32 {{.*}} %V8I64 = mul
+ ; AVX: cost of 36 {{.*}} %V8I64 = mul
+ ; AVX2: cost of 16 {{.*}} %V8I64 = mul
+ ; AVX512F: cost of 8 {{.*}} %V8I64 = mul
+ ; AVX512BW: cost of 8 {{.*}} %V8I64 = mul
+ ; AVX512DQ: cost of 1 {{.*}} %V8I64 = mul
+ %V8I64 = mul <8 x i64> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I32 = mul
+ %I32 = mul i32 undef, undef
+ ; SSSE3: cost of 6 {{.*}} %V4I32 = mul
+ ; SSE42: cost of 1 {{.*}} %V4I32 = mul
+ ; AVX: cost of 1 {{.*}} %V4I32 = mul
+ ; AVX2: cost of 1 {{.*}} %V4I32 = mul
+ ; AVX512: cost of 1 {{.*}} %V4I32 = mul
+ %V4I32 = mul <4 x i32> undef, undef
+ ; SSSE3: cost of 12 {{.*}} %V8I32 = mul
+ ; SSE42: cost of 2 {{.*}} %V8I32 = mul
+ ; AVX: cost of 4 {{.*}} %V8I32 = mul
+ ; AVX2: cost of 1 {{.*}} %V8I32 = mul
+ ; AVX512: cost of 1 {{.*}} %V8I32 = mul
+ %V8I32 = mul <8 x i32> undef, undef
+ ; SSSE3: cost of 24 {{.*}} %V16I32 = mul
+ ; SSE42: cost of 4 {{.*}} %V16I32 = mul
+ ; AVX: cost of 8 {{.*}} %V16I32 = mul
+ ; AVX2: cost of 2 {{.*}} %V16I32 = mul
+ ; AVX512: cost of 1 {{.*}} %V16I32 = mul
+ %V16I32 = mul <16 x i32> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I16 = mul
+ %I16 = mul i16 undef, undef
+ ; SSSE3: cost of 1 {{.*}} %V8I16 = mul
+ ; SSE42: cost of 1 {{.*}} %V8I16 = mul
+ ; AVX: cost of 1 {{.*}} %V8I16 = mul
+ ; AVX2: cost of 1 {{.*}} %V8I16 = mul
+ ; AVX512: cost of 1 {{.*}} %V8I16 = mul
+ %V8I16 = mul <8 x i16> undef, undef
+ ; SSSE3: cost of 2 {{.*}} %V16I16 = mul
+ ; SSE42: cost of 2 {{.*}} %V16I16 = mul
+ ; AVX: cost of 4 {{.*}} %V16I16 = mul
+ ; AVX2: cost of 1 {{.*}} %V16I16 = mul
+ ; AVX512: cost of 1 {{.*}} %V16I16 = mul
+ %V16I16 = mul <16 x i16> undef, undef
+ ; SSSE3: cost of 4 {{.*}} %V32I16 = mul
+ ; SSE42: cost of 4 {{.*}} %V32I16 = mul
+ ; AVX: cost of 8 {{.*}} %V32I16 = mul
+ ; AVX2: cost of 2 {{.*}} %V32I16 = mul
+ ; AVX512F: cost of 2 {{.*}} %V32I16 = mul
+ ; AVX512BW: cost of 1 {{.*}} %V32I16 = mul
+ %V32I16 = mul <32 x i16> undef, undef
+
+ ; CHECK: cost of 1 {{.*}} %I8 = mul
+ %I8 = mul i8 undef, undef
+ ; SSSE3: cost of 12 {{.*}} %V16I8 = mul
+ ; SSE42: cost of 12 {{.*}} %V16I8 = mul
+ ; AVX: cost of 12 {{.*}} %V16I8 = mul
+ ; AVX2: cost of 7 {{.*}} %V16I8 = mul
+ ; AVX512F: cost of 5 {{.*}} %V16I8 = mul
+ ; AVX512BW: cost of 4 {{.*}} %V16I8 = mul
+ %V16I8 = mul <16 x i8> undef, undef
+ ; SSSE3: cost of 24 {{.*}} %V32I8 = mul
+ ; SSE42: cost of 24 {{.*}} %V32I8 = mul
+ ; AVX: cost of 26 {{.*}} %V32I8 = mul
+ ; AVX2: cost of 17 {{.*}} %V32I8 = mul
+ ; AVX512F: cost of 13 {{.*}} %V32I8 = mul
+ ; AVX512BW: cost of 4 {{.*}} %V32I8 = mul
+ %V32I8 = mul <32 x i8> undef, undef
+ ; SSSE3: cost of 48 {{.*}} %V64I8 = mul
+ ; SSE42: cost of 48 {{.*}} %V64I8 = mul
+ ; AVX: cost of 52 {{.*}} %V64I8 = mul
+ ; AVX2: cost of 34 {{.*}} %V64I8 = mul
+ ; AVX512F: cost of 26 {{.*}} %V64I8 = mul
+ ; AVX512BW: cost of 11 {{.*}} %V64I8 = mul
+ %V64I8 = mul <64 x i8> undef, undef
ret i32 undef
}
diff --git a/test/Analysis/CostModel/X86/interleaved-load-i8.ll b/test/Analysis/CostModel/X86/interleaved-load-i8.ll
new file mode 100644
index 0000000000000..382e5e5301d69
--- /dev/null
+++ b/test/Analysis/CostModel/X86/interleaved-load-i8.ll
@@ -0,0 +1,98 @@
+; REQUIRES: asserts
+; RUN: opt -loop-vectorize -S -mcpu=core-avx2 --debug-only=loop-vectorize -vectorizer-maximize-bandwidth < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: norecurse nounwind readonly uwtable
+define i32 @doit_stride3(i8* nocapture readonly %Ptr, i32 %Nels) {
+;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %0 = load i8
+;CHECK: LV: Found an estimated cost of 11 for VF 2 For instruction: %0 = load i8
+;CHECK: LV: Found an estimated cost of 5 for VF 4 For instruction: %0 = load i8
+;CHECK: LV: Found an estimated cost of 10 for VF 8 For instruction: %0 = load i8
+;CHECK: LV: Found an estimated cost of 20 for VF 16 For instruction: %0 = load i8
+;CHECK: LV: Found an estimated cost of 45 for VF 32 For instruction: %0 = load i8
+entry:
+ %cmp13 = icmp sgt i32 %Nels, 0
+ br i1 %cmp13, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %Ptr.addr.016 = phi i8* [ %incdec.ptr2, %for.body ], [ %Ptr, %for.body.preheader ]
+ %i.015 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %s.014 = phi i32 [ %add6, %for.body ], [ 0, %for.body.preheader ]
+ %incdec.ptr = getelementptr inbounds i8, i8* %Ptr.addr.016, i64 1
+ %0 = load i8, i8* %Ptr.addr.016, align 1
+ %incdec.ptr1 = getelementptr inbounds i8, i8* %Ptr.addr.016, i64 2
+ %1 = load i8, i8* %incdec.ptr, align 1
+ %incdec.ptr2 = getelementptr inbounds i8, i8* %Ptr.addr.016, i64 3
+ %2 = load i8, i8* %incdec.ptr1, align 1
+ %conv = zext i8 %0 to i32
+ %conv3 = zext i8 %1 to i32
+ %conv4 = zext i8 %2 to i32
+ %add = add i32 %s.014, %conv
+ %add5 = add i32 %add, %conv3
+ %add6 = add i32 %add5, %conv4
+ %inc = add nuw nsw i32 %i.015, 1
+ %exitcond = icmp eq i32 %inc, %Nels
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ %add6.lcssa = phi i32 [ %add6, %for.body ]
+ br label %for.end
+
+for.end:
+ %s.0.lcssa = phi i32 [ 0, %entry ], [ %add6.lcssa, %for.end.loopexit ]
+ ret i32 %s.0.lcssa
+}
+
+; Function Attrs: norecurse nounwind readonly uwtable
+define i32 @doit_stride4(i8* nocapture readonly %Ptr, i32 %Nels) local_unnamed_addr {
+;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %0 = load i8
+;CHECK: LV: Found an estimated cost of 13 for VF 2 For instruction: %0 = load i8
+;CHECK: LV: Found an estimated cost of 5 for VF 4 For instruction: %0 = load i8
+;CHECK: LV: Found an estimated cost of 21 for VF 8 For instruction: %0 = load i8
+;CHECK: LV: Found an estimated cost of 41 for VF 16 For instruction: %0 = load i8
+;CHECK: LV: Found an estimated cost of 84 for VF 32 For instruction: %0 = load i8
+entry:
+ %cmp59 = icmp sgt i32 %Nels, 0
+ br i1 %cmp59, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %Ptr.addr.062 = phi i8* [ %incdec.ptr3, %for.body ], [ %Ptr, %for.body.preheader ]
+ %i.061 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %s.060 = phi i32 [ %cond39, %for.body ], [ 0, %for.body.preheader ]
+ %incdec.ptr = getelementptr inbounds i8, i8* %Ptr.addr.062, i64 1
+ %0 = load i8, i8* %Ptr.addr.062, align 1
+ %incdec.ptr1 = getelementptr inbounds i8, i8* %Ptr.addr.062, i64 2
+ %1 = load i8, i8* %incdec.ptr, align 1
+ %incdec.ptr2 = getelementptr inbounds i8, i8* %Ptr.addr.062, i64 3
+ %2 = load i8, i8* %incdec.ptr1, align 1
+ %incdec.ptr3 = getelementptr inbounds i8, i8* %Ptr.addr.062, i64 4
+ %3 = load i8, i8* %incdec.ptr2, align 1
+ %cmp5 = icmp ult i8 %0, %1
+ %.sink = select i1 %cmp5, i8 %0, i8 %1
+ %cmp12 = icmp ult i8 %.sink, %2
+ %.sink40 = select i1 %cmp12, i8 %.sink, i8 %2
+ %cmp23 = icmp ult i8 %.sink40, %3
+ %.sink41 = select i1 %cmp23, i8 %.sink40, i8 %3
+ %conv28 = zext i8 %.sink41 to i32
+ %cmp33 = icmp slt i32 %s.060, %conv28
+ %cond39 = select i1 %cmp33, i32 %s.060, i32 %conv28
+ %inc = add nuw nsw i32 %i.061, 1
+ %exitcond = icmp eq i32 %inc, %Nels
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ %cond39.lcssa = phi i32 [ %cond39, %for.body ]
+ br label %for.end
+
+for.end:
+ %s.0.lcssa = phi i32 [ 0, %entry ], [ %cond39.lcssa, %for.end.loopexit ]
+ ret i32 %s.0.lcssa
+}
diff --git a/test/Analysis/CostModel/X86/interleaved-store-i8.ll b/test/Analysis/CostModel/X86/interleaved-store-i8.ll
new file mode 100644
index 0000000000000..d8408c1527633
--- /dev/null
+++ b/test/Analysis/CostModel/X86/interleaved-store-i8.ll
@@ -0,0 +1,85 @@
+; REQUIRES: asserts
+; RUN: opt -loop-vectorize -S -mcpu=core-avx2 --debug-only=loop-vectorize -vectorizer-maximize-bandwidth < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: norecurse nounwind uwtable
+define void @doit_stride3(i8* nocapture %Ptr, i32 %Nels) local_unnamed_addr {
+;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %conv4
+;CHECK: LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %conv4
+;CHECK: LV: Found an estimated cost of 9 for VF 4 For instruction: store i8 %conv4
+;CHECK: LV: Found an estimated cost of 12 for VF 8 For instruction: store i8 %conv4
+;CHECK: LV: Found an estimated cost of 19 for VF 16 For instruction: store i8 %conv4
+;CHECK: LV: Found an estimated cost of 35 for VF 32 For instruction: store i8 %conv4
+entry:
+ %cmp14 = icmp sgt i32 %Nels, 0
+ br i1 %cmp14, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+ %conv = trunc i32 %Nels to i8
+ %conv1 = shl i8 %conv, 1
+ %conv4 = shl i8 %conv, 2
+ br label %for.body
+
+for.body:
+ %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ %Ptr.addr.015 = phi i8* [ %Ptr, %for.body.lr.ph ], [ %incdec.ptr5, %for.body ]
+ %incdec.ptr = getelementptr inbounds i8, i8* %Ptr.addr.015, i64 1
+ store i8 %conv, i8* %Ptr.addr.015, align 1
+ %incdec.ptr2 = getelementptr inbounds i8, i8* %Ptr.addr.015, i64 2
+ store i8 %conv1, i8* %incdec.ptr, align 1
+ %incdec.ptr5 = getelementptr inbounds i8, i8* %Ptr.addr.015, i64 3
+ store i8 %conv4, i8* %incdec.ptr2, align 1
+ %inc = add nuw nsw i32 %i.016, 1
+ %exitcond = icmp eq i32 %inc, %Nels
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; Function Attrs: norecurse nounwind uwtable
+define void @doit_stride4(i8* nocapture %Ptr, i32 %Nels) local_unnamed_addr {
+;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %conv7
+;CHECK: LV: Found an estimated cost of 13 for VF 2 For instruction: store i8 %conv7
+;CHECK: LV: Found an estimated cost of 10 for VF 4 For instruction: store i8 %conv7
+;CHECK: LV: Found an estimated cost of 17 for VF 8 For instruction: store i8 %conv7
+;CHECK: LV: Found an estimated cost of 22 for VF 16 For instruction: store i8 %conv7
+;CHECK: LV: Found an estimated cost of 44 for VF 32 For instruction: store i8 %conv7
+entry:
+ %cmp19 = icmp sgt i32 %Nels, 0
+ br i1 %cmp19, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+ %conv = trunc i32 %Nels to i8
+ %conv1 = shl i8 %conv, 1
+ %conv4 = shl i8 %conv, 2
+ %mul6 = mul nsw i32 %Nels, 5
+ %conv7 = trunc i32 %mul6 to i8
+ br label %for.body
+
+for.body:
+ %i.021 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ %Ptr.addr.020 = phi i8* [ %Ptr, %for.body.lr.ph ], [ %incdec.ptr8, %for.body ]
+ %incdec.ptr = getelementptr inbounds i8, i8* %Ptr.addr.020, i64 1
+ store i8 %conv, i8* %Ptr.addr.020, align 1
+ %incdec.ptr2 = getelementptr inbounds i8, i8* %Ptr.addr.020, i64 2
+ store i8 %conv1, i8* %incdec.ptr, align 1
+ %incdec.ptr5 = getelementptr inbounds i8, i8* %Ptr.addr.020, i64 3
+ store i8 %conv4, i8* %incdec.ptr2, align 1
+ %incdec.ptr8 = getelementptr inbounds i8, i8* %Ptr.addr.020, i64 4
+ store i8 %conv7, i8* %incdec.ptr5, align 1
+ %inc = add nuw nsw i32 %i.021, 1
+ %exitcond = icmp eq i32 %inc, %Nels
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
diff --git a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll
index 3a0ab0f03b995..e797b377556e9 100644
--- a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll
+++ b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll
@@ -100,3 +100,90 @@ backedge:
exit:
ret i8 0
}
+
+; Merging cont block into do block. Make sure that we do not incorrectly have the cont
+; LVI info as LVI info for the beginning of do block. LVI info for %i is Range[0,1)
+; at beginning of cont Block, which is incorrect at the beginning of do block.
+define i32 @test3(i32 %i, i1 %f, i32 %n) {
+; CHECK-LABEL: LVI for function 'test3':
+; CHECK-LABEL: entry
+; CHECK: ; LatticeVal for: 'i32 %i' is: overdefined
+; CHECK: %c = icmp ne i32 %i, -2134
+; CHECK: br i1 %c, label %cont, label %exit
+entry:
+ %c = icmp ne i32 %i, -2134
+ br i1 %c, label %do, label %exit
+
+exit:
+ %c1 = icmp ne i32 %i, -42
+ br i1 %c1, label %exit2, label %exit
+
+; CHECK-LABEL: cont:
+; Here cont is merged to do and i is any value except -2134.
+; i is not the single value: zero.
+; CHECK-NOT: ; LatticeVal for: 'i32 %i' is: constantrange<0, 1>
+; CHECK: ; LatticeVal for: 'i32 %i' is: constantrange<-2133, -2134>
+; CHECK: ; LatticeVal for: ' %cond.0 = icmp sgt i32 %i, 0' in BB: '%cont' is: overdefined
+; CHECK: %cond.0 = icmp sgt i32 %i, 0
+; CHECK: %consume = call i32 @consume
+; CHECK: %cond = icmp eq i32 %i, 0
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %cond)
+; CHECK: %cond.3 = icmp sgt i32 %i, %n
+; CHECK: br i1 %cond.3, label %exit2, label %exit
+cont:
+ %cond.3 = icmp sgt i32 %i, %n
+ br i1 %cond.3, label %exit2, label %exit
+
+do:
+ %cond.0 = icmp sgt i32 %i, 0
+ %consume = call i32 @consume(i1 %cond.0)
+ %cond = icmp eq i32 %i, 0
+ call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+ %cond.2 = icmp sgt i32 %i, 0
+ br i1 %cond.2, label %exit, label %cont
+
+exit2:
+; CHECK-LABEL: exit2:
+; LatticeVal for: 'i32 %i' is: constantrange<-2134, 1>
+ ret i32 30
+}
+
+; FIXME: We should be able to merge cont into do.
+; When we do so, LVI for cont cannot be the one for the merged do block.
+define i32 @test4(i32 %i, i1 %f, i32 %n) {
+; CHECK-LABEL: LVI for function 'test4':
+entry:
+ %c = icmp ne i32 %i, -2134
+ br i1 %c, label %do, label %exit
+
+exit: ; preds = %do, %cont, %exit, %entry
+ %c1 = icmp ne i32 %i, -42
+ br i1 %c1, label %exit2, label %exit
+
+cont: ; preds = %do
+; CHECK-LABEL: cont:
+; CHECK: ; LatticeVal for: 'i1 %f' is: constantrange<-1, 0>
+; CHECK: call void @dummy(i1 %f)
+ call void @dummy(i1 %f)
+ br label %exit2
+
+do: ; preds = %entry
+; CHECK-LABEL: do:
+; CHECK: ; LatticeVal for: 'i1 %f' is: overdefined
+; CHECK: call void @dummy(i1 %f)
+; CHECK: br i1 %cond, label %exit, label %cont
+ call void @dummy(i1 %f)
+ %consume = call i32 @exit()
+ call void @llvm.assume(i1 %f)
+ %cond = icmp eq i1 %f, false
+ br i1 %cond, label %exit, label %cont
+
+exit2: ; preds = %cont, %exit
+ ret i32 30
+}
+
+declare i32 @exit()
+declare i32 @consume(i1)
+declare void @llvm.assume(i1) nounwind
+declare void @dummy(i1) nounwind
+declare void @llvm.experimental.guard(i1, ...)
diff --git a/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll b/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
index 455968d7a4017..8ac6301f93184 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
@@ -26,21 +26,21 @@ for.body: ; preds = %entry, %for.body
%idxprom = sext i32 %sub to i64
%half = bitcast %union.vector_t* %vb to [8 x i16]*
%arrayidx = getelementptr inbounds [8 x i16], [8 x i16]* %half, i64 0, i64 %idxprom
- %tmp4 = load i16, i16* %arrayidx, align 2, !tbaa !0
+ %tmp4 = load i16, i16* %arrayidx, align 2, !tbaa !10
%conv = zext i16 %tmp4 to i32
%and = and i32 %conv, 15
%sub6 = sub nsw i32 7, %i.01
%idxprom7 = sext i32 %sub6 to i64
%half9 = bitcast %union.vector_t* %va to [8 x i16]*
%arrayidx10 = getelementptr inbounds [8 x i16], [8 x i16]* %half9, i64 0, i64 %idxprom7
- %tmp11 = load i16, i16* %arrayidx10, align 2, !tbaa !0
+ %tmp11 = load i16, i16* %arrayidx10, align 2, !tbaa !10
%conv12 = zext i16 %tmp11 to i32
%shl = shl i32 %conv12, %and
%sub15 = sub nsw i32 7, %i.01
%idxprom16 = sext i32 %sub15 to i64
%half18 = bitcast %union.vector_t* %va to [8 x i16]*
%arrayidx19 = getelementptr inbounds [8 x i16], [8 x i16]* %half18, i64 0, i64 %idxprom16
- %tmp20 = load i16, i16* %arrayidx19, align 2, !tbaa !0
+ %tmp20 = load i16, i16* %arrayidx19, align 2, !tbaa !10
%conv21 = zext i16 %tmp20 to i32
%sub23 = sub nsw i32 16, %and
%shr = lshr i32 %conv21, %sub23
@@ -50,20 +50,20 @@ for.body: ; preds = %entry, %for.body
%idxprom27 = sext i32 %sub26 to i64
%half28 = bitcast %union.vector_t* %t to [8 x i16]*
%arrayidx29 = getelementptr inbounds [8 x i16], [8 x i16]* %half28, i64 0, i64 %idxprom27
- store i16 %conv24, i16* %arrayidx29, align 2, !tbaa !0
+ store i16 %conv24, i16* %arrayidx29, align 2, !tbaa !10
%inc = add nsw i32 %i.01, 1
%cmp = icmp slt i32 %inc, 8
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body
%arrayidx31 = getelementptr inbounds %union.vector_t, %union.vector_t* %t, i64 0, i32 0, i64 1
- %tmp32 = load i64, i64* %arrayidx31, align 8, !tbaa !3
+ %tmp32 = load i64, i64* %arrayidx31, align 8, !tbaa !10
%arrayidx35 = getelementptr inbounds %union.vector_t, %union.vector_t* %vd, i64 0, i32 0, i64 1
- store i64 %tmp32, i64* %arrayidx35, align 8, !tbaa !3
+ store i64 %tmp32, i64* %arrayidx35, align 8, !tbaa !10
%arrayidx37 = getelementptr inbounds %union.vector_t, %union.vector_t* %t, i64 0, i32 0, i64 0
- %tmp38 = load i64, i64* %arrayidx37, align 8, !tbaa !3
+ %tmp38 = load i64, i64* %arrayidx37, align 8, !tbaa !10
%arrayidx41 = getelementptr inbounds %union.vector_t, %union.vector_t* %vd, i64 0, i32 0, i64 0
- store i64 %tmp38, i64* %arrayidx41, align 8, !tbaa !3
+ store i64 %tmp38, i64* %arrayidx41, align 8, !tbaa !10
ret void
}
@@ -124,7 +124,7 @@ for.end: ; preds = %for.body
}
; CHECK: [[TAG]] = !{[[TYPE_LL:!.*]], [[TYPE_LL]], i64 0}
-; CHECK: [[TYPE_LL]] = !{!"long long", {{!.*}}}
+; CHECK: [[TYPE_LL]] = !{!"omnipotent char", {{!.*}}}
!0 = !{!6, !6, i64 0}
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA"}
@@ -135,3 +135,4 @@ for.end: ; preds = %for.body
!7 = !{!"long long", !1}
!8 = !{!"int", !1}
!9 = !{!"float", !1}
+!10 = !{!1, !1, i64 0}