diff options
Diffstat (limited to 'test/Analysis')
-rw-r--r-- | test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll | 4 | ||||
-rw-r--r-- | test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll | 2 | ||||
-rw-r--r-- | test/Analysis/BasicAA/bug.23540.ll | 2 | ||||
-rw-r--r-- | test/Analysis/BasicAA/bug.23626.ll | 14 | ||||
-rw-r--r-- | test/Analysis/BasicAA/constant-over-index.ll | 2 | ||||
-rw-r--r-- | test/Analysis/BasicAA/fallback-mayalias.ll | 23 | ||||
-rw-r--r-- | test/Analysis/BasicAA/q.bad.ll | 14 | ||||
-rw-r--r-- | test/Analysis/BasicAA/returned.ll | 16 | ||||
-rw-r--r-- | test/Analysis/BasicAA/sequential-gep.ll | 6 | ||||
-rw-r--r-- | test/Analysis/BasicAA/struct-geps.ll | 78 | ||||
-rw-r--r-- | test/Analysis/BasicAA/zext.ll | 8 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/arith.ll | 1002 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/interleaved-load-i8.ll | 98 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/interleaved-store-i8.ll | 85 | ||||
-rw-r--r-- | test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll | 87 | ||||
-rw-r--r-- | test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll | 19 |
16 files changed, 901 insertions, 559 deletions
diff --git a/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll b/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll index 8388d6c97adfe..200e24f428ef3 100644 --- a/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll +++ b/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll @@ -3,9 +3,9 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: Function: foo -; CHECK: PartialAlias: i32* %Ipointer, i32* %Jpointer +; CHECK: MayAlias: i32* %Ipointer, i32* %Jpointer ; CHECK: 9 no alias responses -; CHECK: 6 partial alias responses +; CHECK: 6 may alias responses define void @foo(i32* noalias %p, i32* noalias %q, i32 %i, i32 %j) { %Ipointer = getelementptr i32, i32* %p, i32 %i diff --git a/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll b/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll index b2e7a60047bd3..79421824e6ea2 100644 --- a/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll +++ b/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll @@ -3,7 +3,7 @@ target datalayout = "e-p:32:32:32" -; CHECK: 1 partial alias response +; CHECK: 1 may alias responses define i32 @test(i32* %tab, i32 %indvar) nounwind { %tmp31 = mul i32 %indvar, -2 diff --git a/test/Analysis/BasicAA/bug.23540.ll b/test/Analysis/BasicAA/bug.23540.ll index f693bcf73cd63..6a00abdce3a10 100644 --- a/test/Analysis/BasicAA/bug.23540.ll +++ b/test/Analysis/BasicAA/bug.23540.ll @@ -5,7 +5,7 @@ target triple = "x86_64-unknown-linux-gnu" @c = external global i32 ; CHECK-LABEL: f -; CHECK: PartialAlias: i32* %arrayidx, i32* %arrayidx6 +; CHECK: MayAlias: i32* %arrayidx, i32* %arrayidx6 define void @f() { %idxprom = zext i32 undef to i64 %add4 = add i32 0, 1 diff --git a/test/Analysis/BasicAA/bug.23626.ll b/test/Analysis/BasicAA/bug.23626.ll index 6a1478c65cefd..7d5b5ad06698d 100644 --- a/test/Analysis/BasicAA/bug.23626.ll +++ b/test/Analysis/BasicAA/bug.23626.ll @@ -3,12 +3,12 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-darwin13.4.0" ; CHECK-LABEL: compute1 -; CHECK: PartialAlias: i32* %arrayidx8, i32* %out -; CHECK: PartialAlias: i32* %arrayidx11, i32* %out -; CHECK: PartialAlias: i32* %arrayidx11, i32* %arrayidx8 -; CHECK: PartialAlias: i32* %arrayidx14, i32* %out -; CHECK: PartialAlias: i32* %arrayidx14, i32* %arrayidx8 -; CHECK: PartialAlias: i32* %arrayidx11, i32* %arrayidx14 +; CHECK: MayAlias: i32* %arrayidx8, i32* %out +; CHECK: MayAlias: i32* %arrayidx11, i32* %out +; CHECK: MayAlias: i32* %arrayidx11, i32* %arrayidx8 +; CHECK: MayAlias: i32* %arrayidx14, i32* %out +; CHECK: MayAlias: i32* %arrayidx14, i32* %arrayidx8 +; CHECK: MayAlias: i32* %arrayidx11, i32* %arrayidx14 define void @compute1(i32 %num.0.lcssa, i32* %out) { %idxprom = zext i32 %num.0.lcssa to i64 %arrayidx8 = getelementptr inbounds i32, i32* %out, i64 %idxprom @@ -22,7 +22,7 @@ define void @compute1(i32 %num.0.lcssa, i32* %out) { } ; CHECK-LABEL: compute2 -; CHECK: PartialAlias: i32* %arrayidx11, i32* %out.addr +; CHECK: MayAlias: i32* %arrayidx11, i32* %out.addr define void @compute2(i32 %num, i32* %out.addr) { %add9 = add i32 %num, 1 %idxprom10 = zext i32 %add9 to i64 diff --git a/test/Analysis/BasicAA/constant-over-index.ll b/test/Analysis/BasicAA/constant-over-index.ll index f5e2c7c13617c..f77156305c260 100644 --- a/test/Analysis/BasicAA/constant-over-index.ll +++ b/test/Analysis/BasicAA/constant-over-index.ll @@ -3,7 +3,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -; CHECK: PartialAlias: double* %p.0.i.0, double* %p3 +; CHECK: MayAlias: double* %p.0.i.0, double* %p3 ; %p3 is equal to %p.0.i.0 on the second iteration of the loop, ; so MayAlias is needed. In practice, basicaa returns PartialAlias diff --git a/test/Analysis/BasicAA/fallback-mayalias.ll b/test/Analysis/BasicAA/fallback-mayalias.ll new file mode 100644 index 0000000000000..a1e4b12d20ade --- /dev/null +++ b/test/Analysis/BasicAA/fallback-mayalias.ll @@ -0,0 +1,23 @@ +; RUN: opt -basicaa -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s + +; Check that BasicAA falls back to MayAlias (instead of PartialAlias) when none +; of its little tricks are applicable. + +; CHECK: MayAlias: float* %arrayidxA, float* %arrayidxB + +define void @fallback_mayalias(float* noalias nocapture %C, i64 %i, i64 %j) local_unnamed_addr { +entry: + %shl = shl i64 %i, 3 + %mul = shl nsw i64 %j, 4 + %addA = add nsw i64 %mul, %shl + %orB = or i64 %shl, 1 + %addB = add nsw i64 %mul, %orB + + %arrayidxA = getelementptr inbounds float, float* %C, i64 %addA + store float undef, float* %arrayidxA, align 4 + + %arrayidxB = getelementptr inbounds float, float* %C, i64 %addB + store float undef, float* %arrayidxB, align 4 + + ret void +} diff --git a/test/Analysis/BasicAA/q.bad.ll b/test/Analysis/BasicAA/q.bad.ll index f2de6a76c5e01..2c7bc1d8591ee 100644 --- a/test/Analysis/BasicAA/q.bad.ll +++ b/test/Analysis/BasicAA/q.bad.ll @@ -15,7 +15,7 @@ define void @test_zext_sext_amounts255(i8* %mem) { } ; CHECK-LABEL: test_zext_sext_amounts -; CHECK: PartialAlias: i8* %a, i8* %b +; CHECK: MayAlias: i8* %a, i8* %b ; %a and %b only PartialAlias as, although they're both zext(sext(%num)) they'll extend the sign by a different ; number of bits before zext-ing the remainder. define void @test_zext_sext_amounts(i8* %mem, i8 %num) { @@ -44,9 +44,9 @@ define void @based_on_pr18068(i32 %loaded, i8* %mem) { } ; CHECK-LABEL: test_path_dependence -; CHECK: PartialAlias: i8* %a, i8* %b +; CHECK: MayAlias: i8* %a, i8* %b ; CHECK: MustAlias: i8* %a, i8* %c -; CHECK: PartialAlias: i8* %a, i8* %d +; CHECK: MayAlias: i8* %a, i8* %d define void @test_path_dependence(i32 %p, i8* %mem) { %p.minus1 = add i32 %p, -1 ; this will always unsigned-wrap, unless %p == 0 %p.minus1.64 = zext i32 %p.minus1 to i64 @@ -83,7 +83,7 @@ define void @test_zext_sext_255(i8* %mem) { } ; CHECK-LABEL: test_zext_sext_num -; CHECK: PartialAlias: i8* %a, i8* %b +; CHECK: MayAlias: i8* %a, i8* %b ; %a and %b NoAlias if %num == 255 (see @test_zext_sext_255), but %a and %b NoAlias for other values of %num (e.g. 0) define void @test_zext_sext_num(i8* %mem, i8 %num) { %zext.num = zext i8 %num to i16 @@ -142,9 +142,9 @@ define void @constantOffsetHeuristic_i8_i32(i32* %mem, i8 %val) { } ; CHECK-LABEL: constantOffsetHeuristic_i3_i8 -; CHECK: PartialAlias: i32* %a, i32* %b +; CHECK: MayAlias: i32* %a, i32* %b ; CHECK: NoAlias: i32* %a, i32* %c -; CHECK: PartialAlias: i32* %b, i32* %c +; CHECK: MayAlias: i32* %b, i32* %c define void @constantOffsetHeuristic_i3_i8(i8* %mem, i3 %val) { %zext.plus.7 = add nsw i3 %val, 7 %zext.plus.4 = add nsw i3 %val, 4 @@ -161,7 +161,7 @@ define void @constantOffsetHeuristic_i3_i8(i8* %mem, i3 %val) { } ; CHECK-LABEL: constantOffsetHeuristic_i8_i8 -; CHECK: PartialAlias: i32* %a, i32* %b +; CHECK: MayAlias: i32* %a, i32* %b ; CHECK: NoAlias: i32* %a, i32* %c ; CHECK: NoAlias: i32* %b, i32* %c define void @constantOffsetHeuristic_i8_i8(i8* %mem, i8 %val) { diff --git a/test/Analysis/BasicAA/returned.ll b/test/Analysis/BasicAA/returned.ll index c6ef6806140df..f0d0f1ec1fe9b 100644 --- a/test/Analysis/BasicAA/returned.ll +++ b/test/Analysis/BasicAA/returned.ll @@ -8,20 +8,20 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK-DAG: MustAlias: %struct* %st, %struct* %sta -; CHECK-DAG: PartialAlias: %struct* %st, i32* %x -; CHECK-DAG: PartialAlias: %struct* %st, i32* %y -; CHECK-DAG: PartialAlias: %struct* %st, i32* %z +; CHECK-DAG: MayAlias: %struct* %st, i32* %x +; CHECK-DAG: MayAlias: %struct* %st, i32* %y +; CHECK-DAG: MayAlias: %struct* %st, i32* %z ; CHECK-DAG: NoAlias: i32* %x, i32* %y ; CHECK-DAG: NoAlias: i32* %x, i32* %z ; CHECK-DAG: NoAlias: i32* %y, i32* %z -; CHECK-DAG: PartialAlias: %struct* %st, %struct* %y_12 -; CHECK-DAG: PartialAlias: %struct* %y_12, i32* %x -; CHECK-DAG: PartialAlias: i32* %x, i80* %y_10 +; CHECK-DAG: MayAlias: %struct* %st, %struct* %y_12 +; CHECK-DAG: MayAlias: %struct* %y_12, i32* %x +; CHECK-DAG: MayAlias: i32* %x, i80* %y_10 -; CHECK-DAG: PartialAlias: %struct* %st, i64* %y_8 -; CHECK-DAG: PartialAlias: i32* %z, i64* %y_8 +; CHECK-DAG: MayAlias: %struct* %st, i64* %y_8 +; CHECK-DAG: MayAlias: i32* %z, i64* %y_8 ; CHECK-DAG: NoAlias: i32* %x, i64* %y_8 ; CHECK-DAG: MustAlias: %struct* %y_12, i32* %y diff --git a/test/Analysis/BasicAA/sequential-gep.ll b/test/Analysis/BasicAA/sequential-gep.ll index c17a782aa04b6..5bedab61e17dc 100644 --- a/test/Analysis/BasicAA/sequential-gep.ll +++ b/test/Analysis/BasicAA/sequential-gep.ll @@ -11,7 +11,7 @@ define void @t1([8 x i32]* %p, i32 %addend, i32* %q) { } ; CHECK: Function: t2 -; CHECK: PartialAlias: i32* %gep1, i32* %gep2 +; CHECK: MayAlias: i32* %gep1, i32* %gep2 define void @t2([8 x i32]* %p, i32 %addend, i32* %q) { %knownnonzero = load i32, i32* %q, !range !0 %add = add nsw nuw i32 %addend, %knownnonzero @@ -31,7 +31,7 @@ define void @t3([8 x i32]* %p, i32 %addend, i32* %q) { } ; CHECK: Function: t4 -; CHECK: PartialAlias: i32* %gep1, i32* %gep2 +; CHECK: MayAlias: i32* %gep1, i32* %gep2 define void @t4([8 x i32]* %p, i32 %addend, i32* %q) { %knownnonzero = load i32, i32* %q, !range !0 %add = add nsw nuw i32 %addend, %knownnonzero @@ -41,7 +41,7 @@ define void @t4([8 x i32]* %p, i32 %addend, i32* %q) { } ; CHECK: Function: t5 -; CHECK: PartialAlias: i32* %gep2, i64* %bc +; CHECK: MayAlias: i32* %gep2, i64* %bc define void @t5([8 x i32]* %p, i32 %addend, i32* %q) { %knownnonzero = load i32, i32* %q, !range !0 %add = add nsw nuw i32 %addend, %knownnonzero diff --git a/test/Analysis/BasicAA/struct-geps.ll b/test/Analysis/BasicAA/struct-geps.ll index 2d85e1dd0173d..e048baf4c64a9 100644 --- a/test/Analysis/BasicAA/struct-geps.ll +++ b/test/Analysis/BasicAA/struct-geps.ll @@ -6,20 +6,20 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK-LABEL: test_simple -; CHECK-DAG: PartialAlias: %struct* %st, i32* %x -; CHECK-DAG: PartialAlias: %struct* %st, i32* %y -; CHECK-DAG: PartialAlias: %struct* %st, i32* %z +; CHECK-DAG: MayAlias: %struct* %st, i32* %x +; CHECK-DAG: MayAlias: %struct* %st, i32* %y +; CHECK-DAG: MayAlias: %struct* %st, i32* %z ; CHECK-DAG: NoAlias: i32* %x, i32* %y ; CHECK-DAG: NoAlias: i32* %x, i32* %z ; CHECK-DAG: NoAlias: i32* %y, i32* %z -; CHECK-DAG: PartialAlias: %struct* %st, %struct* %y_12 -; CHECK-DAG: PartialAlias: %struct* %y_12, i32* %x -; CHECK-DAG: PartialAlias: i32* %x, i80* %y_10 +; CHECK-DAG: MayAlias: %struct* %st, %struct* %y_12 +; CHECK-DAG: MayAlias: %struct* %y_12, i32* %x +; CHECK-DAG: MayAlias: i32* %x, i80* %y_10 -; CHECK-DAG: PartialAlias: %struct* %st, i64* %y_8 -; CHECK-DAG: PartialAlias: i32* %z, i64* %y_8 +; CHECK-DAG: MayAlias: %struct* %st, i64* %y_8 +; CHECK-DAG: MayAlias: i32* %z, i64* %y_8 ; CHECK-DAG: NoAlias: i32* %x, i64* %y_8 ; CHECK-DAG: MustAlias: %struct* %y_12, i32* %y @@ -38,20 +38,20 @@ define void @test_simple(%struct* %st, i64 %i, i64 %j, i64 %k) { ; CHECK-LABEL: test_in_array -; CHECK-DAG: PartialAlias: [1 x %struct]* %st, i32* %x -; CHECK-DAG: PartialAlias: [1 x %struct]* %st, i32* %y -; CHECK-DAG: PartialAlias: [1 x %struct]* %st, i32* %z +; CHECK-DAG: MayAlias: [1 x %struct]* %st, i32* %x +; CHECK-DAG: MayAlias: [1 x %struct]* %st, i32* %y +; CHECK-DAG: MayAlias: [1 x %struct]* %st, i32* %z ; CHECK-DAG: NoAlias: i32* %x, i32* %y ; CHECK-DAG: NoAlias: i32* %x, i32* %z ; CHECK-DAG: NoAlias: i32* %y, i32* %z -; CHECK-DAG: PartialAlias: %struct* %y_12, [1 x %struct]* %st -; CHECK-DAG: PartialAlias: %struct* %y_12, i32* %x -; CHECK-DAG: PartialAlias: i32* %x, i80* %y_10 +; CHECK-DAG: MayAlias: %struct* %y_12, [1 x %struct]* %st +; CHECK-DAG: MayAlias: %struct* %y_12, i32* %x +; CHECK-DAG: MayAlias: i32* %x, i80* %y_10 -; CHECK-DAG: PartialAlias: [1 x %struct]* %st, i64* %y_8 -; CHECK-DAG: PartialAlias: i32* %z, i64* %y_8 +; CHECK-DAG: MayAlias: [1 x %struct]* %st, i64* %y_8 +; CHECK-DAG: MayAlias: i32* %z, i64* %y_8 ; CHECK-DAG: NoAlias: i32* %x, i64* %y_8 ; CHECK-DAG: MustAlias: %struct* %y_12, i32* %y @@ -70,20 +70,20 @@ define void @test_in_array([1 x %struct]* %st, i64 %i, i64 %j, i64 %k, i64 %i1, ; CHECK-LABEL: test_in_3d_array -; CHECK-DAG: PartialAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %x -; CHECK-DAG: PartialAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %y -; CHECK-DAG: PartialAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %z +; CHECK-DAG: MayAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %x +; CHECK-DAG: MayAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %y +; CHECK-DAG: MayAlias: [1 x [1 x [1 x %struct]]]* %st, i32* %z ; CHECK-DAG: NoAlias: i32* %x, i32* %y ; CHECK-DAG: NoAlias: i32* %x, i32* %z ; CHECK-DAG: NoAlias: i32* %y, i32* %z -; CHECK-DAG: PartialAlias: %struct* %y_12, [1 x [1 x [1 x %struct]]]* %st -; CHECK-DAG: PartialAlias: %struct* %y_12, i32* %x -; CHECK-DAG: PartialAlias: i32* %x, i80* %y_10 +; CHECK-DAG: MayAlias: %struct* %y_12, [1 x [1 x [1 x %struct]]]* %st +; CHECK-DAG: MayAlias: %struct* %y_12, i32* %x +; CHECK-DAG: MayAlias: i32* %x, i80* %y_10 -; CHECK-DAG: PartialAlias: [1 x [1 x [1 x %struct]]]* %st, i64* %y_8 -; CHECK-DAG: PartialAlias: i32* %z, i64* %y_8 +; CHECK-DAG: MayAlias: [1 x [1 x [1 x %struct]]]* %st, i64* %y_8 +; CHECK-DAG: MayAlias: i32* %z, i64* %y_8 ; CHECK-DAG: NoAlias: i32* %x, i64* %y_8 ; CHECK-DAG: MustAlias: %struct* %y_12, i32* %y @@ -106,14 +106,14 @@ define void @test_in_3d_array([1 x [1 x [1 x %struct]]]* %st, i64 %i, i64 %j, i6 ; CHECK-DAG: NoAlias: i32* %y, i32* %y2 ; CHECK-DAG: NoAlias: i32* %z, i32* %z2 -; CHECK-DAG: PartialAlias: i32* %x, i32* %y2 -; CHECK-DAG: PartialAlias: i32* %x, i32* %z2 +; CHECK-DAG: MayAlias: i32* %x, i32* %y2 +; CHECK-DAG: MayAlias: i32* %x, i32* %z2 -; CHECK-DAG: PartialAlias: i32* %x2, i32* %y -; CHECK-DAG: PartialAlias: i32* %y, i32* %z2 +; CHECK-DAG: MayAlias: i32* %x2, i32* %y +; CHECK-DAG: MayAlias: i32* %y, i32* %z2 -; CHECK-DAG: PartialAlias: i32* %x2, i32* %z -; CHECK-DAG: PartialAlias: i32* %y2, i32* %z +; CHECK-DAG: MayAlias: i32* %x2, i32* %z +; CHECK-DAG: MayAlias: i32* %y2, i32* %z define void @test_same_underlying_object_same_indices(%struct* %st, i64 %i, i64 %j, i64 %k) { %st2 = getelementptr %struct, %struct* %st, i32 10 @@ -128,18 +128,18 @@ define void @test_same_underlying_object_same_indices(%struct* %st, i64 %i, i64 ; CHECK-LABEL: test_same_underlying_object_different_indices -; CHECK-DAG: PartialAlias: i32* %x, i32* %x2 -; CHECK-DAG: PartialAlias: i32* %y, i32* %y2 -; CHECK-DAG: PartialAlias: i32* %z, i32* %z2 +; CHECK-DAG: MayAlias: i32* %x, i32* %x2 +; CHECK-DAG: MayAlias: i32* %y, i32* %y2 +; CHECK-DAG: MayAlias: i32* %z, i32* %z2 -; CHECK-DAG: PartialAlias: i32* %x, i32* %y2 -; CHECK-DAG: PartialAlias: i32* %x, i32* %z2 +; CHECK-DAG: MayAlias: i32* %x, i32* %y2 +; CHECK-DAG: MayAlias: i32* %x, i32* %z2 -; CHECK-DAG: PartialAlias: i32* %x2, i32* %y -; CHECK-DAG: PartialAlias: i32* %y, i32* %z2 +; CHECK-DAG: MayAlias: i32* %x2, i32* %y +; CHECK-DAG: MayAlias: i32* %y, i32* %z2 -; CHECK-DAG: PartialAlias: i32* %x2, i32* %z -; CHECK-DAG: PartialAlias: i32* %y2, i32* %z +; CHECK-DAG: MayAlias: i32* %x2, i32* %z +; CHECK-DAG: MayAlias: i32* %y2, i32* %z define void @test_same_underlying_object_different_indices(%struct* %st, i64 %i1, i64 %j1, i64 %k1, i64 %i2, i64 %k2, i64 %j2) { %st2 = getelementptr %struct, %struct* %st, i32 10 diff --git a/test/Analysis/BasicAA/zext.ll b/test/Analysis/BasicAA/zext.ll index 685d45be61512..f8f02353db2ec 100644 --- a/test/Analysis/BasicAA/zext.ll +++ b/test/Analysis/BasicAA/zext.ll @@ -69,7 +69,7 @@ for.loop.exit: } ; CHECK-LABEL: test_sign_extension -; CHECK: PartialAlias: i64* %b.i64, i8* %a +; CHECK: MayAlias: i64* %b.i64, i8* %a define void @test_sign_extension(i32 %p) { %1 = tail call i8* @malloc(i64 120) @@ -83,7 +83,7 @@ define void @test_sign_extension(i32 %p) { } ; CHECK-LABEL: test_fe_tools -; CHECK: PartialAlias: i32* %a, i32* %b +; CHECK: MayAlias: i32* %a, i32* %b define void @test_fe_tools([8 x i32]* %values) { br label %reorder @@ -108,7 +108,7 @@ for.loop.exit: @d = global i32 0, align 4 ; CHECK-LABEL: test_spec2006 -; CHECK: PartialAlias: i32** %x, i32** %y +; CHECK: MayAlias: i32** %x, i32** %y define void @test_spec2006() { %h = alloca [1 x [2 x i32*]], align 16 @@ -164,7 +164,7 @@ for.loop.exit: } ; CHECK-LABEL: test_modulo_analysis_with_global -; CHECK: PartialAlias: i32** %x, i32** %y +; CHECK: MayAlias: i32** %x, i32** %y define void @test_modulo_analysis_with_global() { %h = alloca [1 x [2 x i32*]], align 16 diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll index b7a615f55cdef..d9e06a3e7b411 100644 --- a/test/Analysis/CostModel/X86/arith.ll +++ b/test/Analysis/CostModel/X86/arith.ll @@ -1,516 +1,564 @@ -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSSE3 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512DQ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSSE3 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512DQ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" ; CHECK-LABEL: 'add' define i32 @add(i32 %arg) { - ; SSSE3: cost of 1 {{.*}} %A = add - ; SSE42: cost of 1 {{.*}} %A = add - ; AVX: cost of 1 {{.*}} %A = add - ; AVX2: cost of 1 {{.*}} %A = add - ; AVX512: cost of 1 {{.*}} %A = add - %A = add <2 x i64> undef, undef - ; SSSE3: cost of 2 {{.*}} %B = add - ; SSE42: cost of 2 {{.*}} %B = add - ; AVX: cost of 4 {{.*}} %B = add - ; AVX2: cost of 1 {{.*}} %B = add - ; AVX512: cost of 1 {{.*}} %B = add - %B = add <4 x i64> undef, undef - ; SSSE3: cost of 4 {{.*}} %C = add - ; SSE42: cost of 4 {{.*}} %C = add - ; AVX: cost of 8 {{.*}} %C = add - ; AVX2: cost of 2 {{.*}} %C = add - ; AVX512: cost of 1 {{.*}} %C = add - %C = add <8 x i64> undef, undef - - ; SSSE3: cost of 1 {{.*}} %D = add - ; SSE42: cost of 1 {{.*}} %D = add - ; AVX: cost of 1 {{.*}} %D = add - ; AVX2: cost of 1 {{.*}} %D = add - ; AVX512: cost of 1 {{.*}} %D = add - %D = add <4 x i32> undef, undef - ; SSSE3: cost of 2 {{.*}} %E = add - ; SSE42: cost of 2 {{.*}} %E = add - ; AVX: cost of 4 {{.*}} %E = add - ; AVX2: cost of 1 {{.*}} %E = add - ; AVX512: cost of 1 {{.*}} %E = add - %E = add <8 x i32> undef, undef - ; SSSE3: cost of 4 {{.*}} %F = add - ; SSE42: cost of 4 {{.*}} %F = add - ; AVX: cost of 8 {{.*}} %F = add - ; AVX2: cost of 2 {{.*}} %F = add - ; AVX512: cost of 1 {{.*}} %F = add - %F = add <16 x i32> undef, undef - - ; SSSE3: cost of 1 {{.*}} %G = add - ; SSE42: cost of 1 {{.*}} %G = add - ; AVX: cost of 1 {{.*}} %G = add - ; AVX2: cost of 1 {{.*}} %G = add - ; AVX512: cost of 1 {{.*}} %G = add - %G = add <8 x i16> undef, undef - ; SSSE3: cost of 2 {{.*}} %H = add - ; SSE42: cost of 2 {{.*}} %H = add - ; AVX: cost of 4 {{.*}} %H = add - ; AVX2: cost of 1 {{.*}} %H = add - ; AVX512: cost of 1 {{.*}} %H = add - %H = add <16 x i16> undef, undef - ; SSSE3: cost of 4 {{.*}} %I = add - ; SSE42: cost of 4 {{.*}} %I = add - ; AVX: cost of 8 {{.*}} %I = add - ; AVX2: cost of 2 {{.*}} %I = add - ; AVX512F: cost of 2 {{.*}} %I = add - ; AVX512BW: cost of 1 {{.*}} %I = add - %I = add <32 x i16> undef, undef - - ; SSSE3: cost of 1 {{.*}} %J = add - ; SSE42: cost of 1 {{.*}} %J = add - ; AVX: cost of 1 {{.*}} %J = add - ; AVX2: cost of 1 {{.*}} %J = add - ; AVX512: cost of 1 {{.*}} %J = add - %J = add <16 x i8> undef, undef - ; SSSE3: cost of 2 {{.*}} %K = add - ; SSE42: cost of 2 {{.*}} %K = add - ; AVX: cost of 4 {{.*}} %K = add - ; AVX2: cost of 1 {{.*}} %K = add - ; AVX512: cost of 1 {{.*}} %K = add - %K = add <32 x i8> undef, undef - ; SSSE3: cost of 4 {{.*}} %L = add - ; SSE42: cost of 4 {{.*}} %L = add - ; AVX: cost of 8 {{.*}} %L = add - ; AVX2: cost of 2 {{.*}} %L = add - ; AVX512F: cost of 2 {{.*}} %L = add - ; AVX512BW: cost of 1 {{.*}} %L = add - %L = add <64 x i8> undef, undef + ; CHECK: cost of 1 {{.*}} %I64 = add + %I64 = add i64 undef, undef + ; SSSE3: cost of 1 {{.*}} %V2I64 = add + ; SSE42: cost of 1 {{.*}} %V2I64 = add + ; AVX: cost of 1 {{.*}} %V2I64 = add + ; AVX2: cost of 1 {{.*}} %V2I64 = add + ; AVX512: cost of 1 {{.*}} %V2I64 = add + %V2I64 = add <2 x i64> undef, undef + ; SSSE3: cost of 2 {{.*}} %V4I64 = add + ; SSE42: cost of 2 {{.*}} %V4I64 = add + ; AVX: cost of 4 {{.*}} %V4I64 = add + ; AVX2: cost of 1 {{.*}} %V4I64 = add + ; AVX512: cost of 1 {{.*}} %V4I64 = add + %V4I64 = add <4 x i64> undef, undef + ; SSSE3: cost of 4 {{.*}} %V8I64 = add + ; SSE42: cost of 4 {{.*}} %V8I64 = add + ; AVX: cost of 8 {{.*}} %V8I64 = add + ; AVX2: cost of 2 {{.*}} %V8I64 = add + ; AVX512: cost of 1 {{.*}} %V8I64 = add + %V8I64 = add <8 x i64> undef, undef + + ; CHECK: cost of 1 {{.*}} %I32 = add + %I32 = add i32 undef, undef + ; SSSE3: cost of 1 {{.*}} %V4I32 = add + ; SSE42: cost of 1 {{.*}} %V4I32 = add + ; AVX: cost of 1 {{.*}} %V4I32 = add + ; AVX2: cost of 1 {{.*}} %V4I32 = add + ; AVX512: cost of 1 {{.*}} %V4I32 = add + %V4I32 = add <4 x i32> undef, undef + ; SSSE3: cost of 2 {{.*}} %V8I32 = add + ; SSE42: cost of 2 {{.*}} %V8I32 = add + ; AVX: cost of 4 {{.*}} %V8I32 = add + ; AVX2: cost of 1 {{.*}} %V8I32 = add + ; AVX512: cost of 1 {{.*}} %V8I32 = add + %V8I32 = add <8 x i32> undef, undef + ; SSSE3: cost of 4 {{.*}} %V16I32 = add + ; SSE42: cost of 4 {{.*}} %V16I32 = add + ; AVX: cost of 8 {{.*}} %V16I32 = add + ; AVX2: cost of 2 {{.*}} %V16I32 = add + ; AVX512: cost of 1 {{.*}} %V16I32 = add + %V16I32 = add <16 x i32> undef, undef + + ; CHECK: cost of 1 {{.*}} %I16 = add + %I16 = add i16 undef, undef + ; SSSE3: cost of 1 {{.*}} %V8I16 = add + ; SSE42: cost of 1 {{.*}} %V8I16 = add + ; AVX: cost of 1 {{.*}} %V8I16 = add + ; AVX2: cost of 1 {{.*}} %V8I16 = add + ; AVX512: cost of 1 {{.*}} %V8I16 = add + %V8I16 = add <8 x i16> undef, undef + ; SSSE3: cost of 2 {{.*}} %V16I16 = add + ; SSE42: cost of 2 {{.*}} %V16I16 = add + ; AVX: cost of 4 {{.*}} %V16I16 = add + ; AVX2: cost of 1 {{.*}} %V16I16 = add + ; AVX512: cost of 1 {{.*}} %V16I16 = add + %V16I16 = add <16 x i16> undef, undef + ; SSSE3: cost of 4 {{.*}} %V32I16 = add + ; SSE42: cost of 4 {{.*}} %V32I16 = add + ; AVX: cost of 8 {{.*}} %V32I16 = add + ; AVX2: cost of 2 {{.*}} %V32I16 = add + ; AVX512F: cost of 2 {{.*}} %V32I16 = add + ; AVX512BW: cost of 1 {{.*}} %V32I16 = add + %V32I16 = add <32 x i16> undef, undef + + ; CHECK: cost of 1 {{.*}} %I8 = add + %I8 = add i8 undef, undef + ; SSSE3: cost of 1 {{.*}} %V16I8 = add + ; SSE42: cost of 1 {{.*}} %V16I8 = add + ; AVX: cost of 1 {{.*}} %V16I8 = add + ; AVX2: cost of 1 {{.*}} %V16I8 = add + ; AVX512: cost of 1 {{.*}} %V16I8 = add + %V16I8 = add <16 x i8> undef, undef + ; SSSE3: cost of 2 {{.*}} %V32I8 = add + ; SSE42: cost of 2 {{.*}} %V32I8 = add + ; AVX: cost of 4 {{.*}} %V32I8 = add + ; AVX2: cost of 1 {{.*}} %V32I8 = add + ; AVX512: cost of 1 {{.*}} %V32I8 = add + %V32I8 = add <32 x i8> undef, undef + ; SSSE3: cost of 4 {{.*}} %V64I8 = add + ; SSE42: cost of 4 {{.*}} %V64I8 = add + ; AVX: cost of 8 {{.*}} %V64I8 = add + ; AVX2: cost of 2 {{.*}} %V64I8 = add + ; AVX512F: cost of 2 {{.*}} %V64I8 = add + ; AVX512BW: cost of 1 {{.*}} %V64I8 = add + %V64I8 = add <64 x i8> undef, undef ret i32 undef } ; CHECK-LABEL: 'sub' define i32 @sub(i32 %arg) { - ; SSSE3: cost of 1 {{.*}} %A = sub - ; SSE42: cost of 1 {{.*}} %A = sub - ; AVX: cost of 1 {{.*}} %A = sub - ; AVX2: cost of 1 {{.*}} %A = sub - ; AVX512: cost of 1 {{.*}} %A = sub - %A = sub <2 x i64> undef, undef - ; SSSE3: cost of 2 {{.*}} %B = sub - ; SSE42: cost of 2 {{.*}} %B = sub - ; AVX: cost of 4 {{.*}} %B = sub - ; AVX2: cost of 1 {{.*}} %B = sub - ; AVX512: cost of 1 {{.*}} %B = sub - %B = sub <4 x i64> undef, undef - ; SSSE3: cost of 4 {{.*}} %C = sub - ; SSE42: cost of 4 {{.*}} %C = sub - ; AVX: cost of 8 {{.*}} %C = sub - ; AVX2: cost of 2 {{.*}} %C = sub - ; AVX512: cost of 1 {{.*}} %C = sub - %C = sub <8 x i64> undef, undef - - ; SSSE3: cost of 1 {{.*}} %D = sub - ; SSE42: cost of 1 {{.*}} %D = sub - ; AVX: cost of 1 {{.*}} %D = sub - ; AVX2: cost of 1 {{.*}} %D = sub - ; AVX512: cost of 1 {{.*}} %D = sub - %D = sub <4 x i32> undef, undef - ; SSSE3: cost of 2 {{.*}} %E = sub - ; SSE42: cost of 2 {{.*}} %E = sub - ; AVX: cost of 4 {{.*}} %E = sub - ; AVX2: cost of 1 {{.*}} %E = sub - ; AVX512: cost of 1 {{.*}} %E = sub - %E = sub <8 x i32> undef, undef - ; SSSE3: cost of 4 {{.*}} %F = sub - ; SSE42: cost of 4 {{.*}} %F = sub - ; AVX: cost of 8 {{.*}} %F = sub - ; AVX2: cost of 2 {{.*}} %F = sub - ; AVX512: cost of 1 {{.*}} %F = sub - %F = sub <16 x i32> undef, undef - - ; SSSE3: cost of 1 {{.*}} %G = sub - ; SSE42: cost of 1 {{.*}} %G = sub - ; AVX: cost of 1 {{.*}} %G = sub - ; AVX2: cost of 1 {{.*}} %G = sub - ; AVX512: cost of 1 {{.*}} %G = sub - %G = sub <8 x i16> undef, undef - ; SSSE3: cost of 2 {{.*}} %H = sub - ; SSE42: cost of 2 {{.*}} %H = sub - ; AVX: cost of 4 {{.*}} %H = sub - ; AVX2: cost of 1 {{.*}} %H = sub - ; AVX512: cost of 1 {{.*}} %H = sub - %H = sub <16 x i16> undef, undef - ; SSSE3: cost of 4 {{.*}} %I = sub - ; SSE42: cost of 4 {{.*}} %I = sub - ; AVX: cost of 8 {{.*}} %I = sub - ; AVX2: cost of 2 {{.*}} %I = sub - ; AVX512F: cost of 2 {{.*}} %I = sub - ; AVX512BW: cost of 1 {{.*}} %I = sub - %I = sub <32 x i16> undef, undef - - ; SSSE3: cost of 1 {{.*}} %J = sub - ; SSE42: cost of 1 {{.*}} %J = sub - ; AVX: cost of 1 {{.*}} %J = sub - ; AVX2: cost of 1 {{.*}} %J = sub - ; AVX512: cost of 1 {{.*}} %J = sub - %J = sub <16 x i8> undef, undef - ; SSSE3: cost of 2 {{.*}} %K = sub - ; SSE42: cost of 2 {{.*}} %K = sub - ; AVX: cost of 4 {{.*}} %K = sub - ; AVX2: cost of 1 {{.*}} %K = sub - ; AVX512: cost of 1 {{.*}} %K = sub - %K = sub <32 x i8> undef, undef - ; SSSE3: cost of 4 {{.*}} %L = sub - ; SSE42: cost of 4 {{.*}} %L = sub - ; AVX: cost of 8 {{.*}} %L = sub - ; AVX2: cost of 2 {{.*}} %L = sub - ; AVX512F: cost of 2 {{.*}} %L = sub - ; AVX512BW: cost of 1 {{.*}} %L = sub - %L = sub <64 x i8> undef, undef + ; CHECK: cost of 1 {{.*}} %I64 = sub + %I64 = sub i64 undef, undef + ; SSSE3: cost of 1 {{.*}} %V2I64 = sub + ; SSE42: cost of 1 {{.*}} %V2I64 = sub + ; AVX: cost of 1 {{.*}} %V2I64 = sub + ; AVX2: cost of 1 {{.*}} %V2I64 = sub + ; AVX512: cost of 1 {{.*}} %V2I64 = sub + %V2I64 = sub <2 x i64> undef, undef + ; SSSE3: cost of 2 {{.*}} %V4I64 = sub + ; SSE42: cost of 2 {{.*}} %V4I64 = sub + ; AVX: cost of 4 {{.*}} %V4I64 = sub + ; AVX2: cost of 1 {{.*}} %V4I64 = sub + ; AVX512: cost of 1 {{.*}} %V4I64 = sub + %V4I64 = sub <4 x i64> undef, undef + ; SSSE3: cost of 4 {{.*}} %V8I64 = sub + ; SSE42: cost of 4 {{.*}} %V8I64 = sub + ; AVX: cost of 8 {{.*}} %V8I64 = sub + ; AVX2: cost of 2 {{.*}} %V8I64 = sub + ; AVX512: cost of 1 {{.*}} %V8I64 = sub + %V8I64 = sub <8 x i64> undef, undef + + ; CHECK: cost of 1 {{.*}} %I32 = sub + %I32 = sub i32 undef, undef + ; SSSE3: cost of 1 {{.*}} %V4I32 = sub + ; SSE42: cost of 1 {{.*}} %V4I32 = sub + ; AVX: cost of 1 {{.*}} %V4I32 = sub + ; AVX2: cost of 1 {{.*}} %V4I32 = sub + ; AVX512: cost of 1 {{.*}} %V4I32 = sub + %V4I32 = sub <4 x i32> undef, undef + ; SSSE3: cost of 2 {{.*}} %V8I32 = sub + ; SSE42: cost of 2 {{.*}} %V8I32 = sub + ; AVX: cost of 4 {{.*}} %V8I32 = sub + ; AVX2: cost of 1 {{.*}} %V8I32 = sub + ; AVX512: cost of 1 {{.*}} %V8I32 = sub + %V8I32 = sub <8 x i32> undef, undef + ; SSSE3: cost of 4 {{.*}} %V16I32 = sub + ; SSE42: cost of 4 {{.*}} %V16I32 = sub + ; AVX: cost of 8 {{.*}} %V16I32 = sub + ; AVX2: cost of 2 {{.*}} %V16I32 = sub + ; AVX512: cost of 1 {{.*}} %V16I32 = sub + %V16I32 = sub <16 x i32> undef, undef + + ; CHECK: cost of 1 {{.*}} %I16 = sub + %I16 = sub i16 undef, undef + ; SSSE3: cost of 1 {{.*}} %V8I16 = sub + ; SSE42: cost of 1 {{.*}} %V8I16 = sub + ; AVX: cost of 1 {{.*}} %V8I16 = sub + ; AVX2: cost of 1 {{.*}} %V8I16 = sub + ; AVX512: cost of 1 {{.*}} %V8I16 = sub + %V8I16 = sub <8 x i16> undef, undef + ; SSSE3: cost of 2 {{.*}} %V16I16 = sub + ; SSE42: cost of 2 {{.*}} %V16I16 = sub + ; AVX: cost of 4 {{.*}} %V16I16 = sub + ; AVX2: cost of 1 {{.*}} %V16I16 = sub + ; AVX512: cost of 1 {{.*}} %V16I16 = sub + %V16I16 = sub <16 x i16> undef, undef + ; SSSE3: cost of 4 {{.*}} %V32I16 = sub + ; SSE42: cost of 4 {{.*}} %V32I16 = sub + ; AVX: cost of 8 {{.*}} %V32I16 = sub + ; AVX2: cost of 2 {{.*}} %V32I16 = sub + ; AVX512F: cost of 2 {{.*}} %V32I16 = sub + ; AVX512BW: cost of 1 {{.*}} %V32I16 = sub + %V32I16 = sub <32 x i16> undef, undef + + ; CHECK: cost of 1 {{.*}} %I8 = sub + %I8 = sub i8 undef, undef + ; SSSE3: cost of 1 {{.*}} %V16I8 = sub + ; SSE42: cost of 1 {{.*}} %V16I8 = sub + ; AVX: cost of 1 {{.*}} %V16I8 = sub + ; AVX2: cost of 1 {{.*}} %V16I8 = sub + ; AVX512: cost of 1 {{.*}} %V16I8 = sub + %V16I8 = sub <16 x i8> undef, undef + ; SSSE3: cost of 2 {{.*}} %V32I8 = sub + ; SSE42: cost of 2 {{.*}} %V32I8 = sub + ; AVX: cost of 4 {{.*}} %V32I8 = sub + ; AVX2: cost of 1 {{.*}} %V32I8 = sub + ; AVX512: cost of 1 {{.*}} %V32I8 = sub + %V32I8 = sub <32 x i8> undef, undef + ; SSSE3: cost of 4 {{.*}} %V64I8 = sub + ; SSE42: cost of 4 {{.*}} %V64I8 = sub + ; AVX: cost of 8 {{.*}} %V64I8 = sub + ; AVX2: cost of 2 {{.*}} %V64I8 = sub + ; AVX512F: cost of 2 {{.*}} %V64I8 = sub + ; AVX512BW: cost of 1 {{.*}} %V64I8 = sub + %V64I8 = sub <64 x i8> undef, undef ret i32 undef } ; CHECK-LABEL: 'or' define i32 @or(i32 %arg) { - ; SSSE3: cost of 1 {{.*}} %A = or - ; SSE42: cost of 1 {{.*}} %A = or - ; AVX: cost of 1 {{.*}} %A = or - ; AVX2: cost of 1 {{.*}} %A = or - ; AVX512: cost of 1 {{.*}} %A = or - %A = or <2 x i64> undef, undef - ; SSSE3: cost of 2 {{.*}} %B = or - ; SSE42: cost of 2 {{.*}} %B = or - ; AVX: cost of 1 {{.*}} %B = or - ; AVX2: cost of 1 {{.*}} %B = or - ; AVX512: cost of 1 {{.*}} %B = or - %B = or <4 x i64> undef, undef - ; SSSE3: cost of 4 {{.*}} %C = or - ; SSE42: cost of 4 {{.*}} %C = or - ; AVX: cost of 2 {{.*}} %C = or - ; AVX2: cost of 2 {{.*}} %C = or - ; AVX512: cost of 1 {{.*}} %C = or - %C = or <8 x i64> undef, undef - - ; SSSE3: cost of 1 {{.*}} %D = or - ; SSE42: cost of 1 {{.*}} %D = or - ; AVX: cost of 1 {{.*}} %D = or - ; AVX2: cost of 1 {{.*}} %D = or - ; AVX512: cost of 1 {{.*}} %D = or - %D = or <4 x i32> undef, undef - ; SSSE3: cost of 2 {{.*}} %E = or - ; SSE42: cost of 2 {{.*}} %E = or - ; AVX: cost of 1 {{.*}} %E = or - ; AVX2: cost of 1 {{.*}} %E = or - ; AVX512: cost of 1 {{.*}} %E = or - %E = or <8 x i32> undef, undef - ; SSSE3: cost of 4 {{.*}} %F = or - ; SSE42: cost of 4 {{.*}} %F = or - ; AVX: cost of 2 {{.*}} %F = or - ; AVX2: cost of 2 {{.*}} %F = or - ; AVX512: cost of 1 {{.*}} %F = or - %F = or <16 x i32> undef, undef - - ; SSSE3: cost of 1 {{.*}} %G = or - ; SSE42: cost of 1 {{.*}} %G = or - ; AVX: cost of 1 {{.*}} %G = or - ; AVX2: cost of 1 {{.*}} %G = or - ; AVX512: cost of 1 {{.*}} %G = or - %G = or <8 x i16> undef, undef - ; SSSE3: cost of 2 {{.*}} %H = or - ; SSE42: cost of 2 {{.*}} %H = or - ; AVX: cost of 1 {{.*}} %H = or - ; AVX2: cost of 1 {{.*}} %H = or - ; AVX512: cost of 1 {{.*}} %H = or - %H = or <16 x i16> undef, undef - ; SSSE3: cost of 4 {{.*}} %I = or - ; SSE42: cost of 4 {{.*}} %I = or - ; AVX: cost of 2 {{.*}} %I = or - ; AVX2: cost of 2 {{.*}} %I = or - ; AVX512F: cost of 2 {{.*}} %I = or - ; AVX512BW: cost of 1 {{.*}} %I = or - %I = or <32 x i16> undef, undef - - ; SSSE3: cost of 1 {{.*}} %J = or - ; SSE42: cost of 1 {{.*}} %J = or - ; AVX: cost of 1 {{.*}} %J = or - ; AVX2: cost of 1 {{.*}} %J = or - ; AVX512: cost of 1 {{.*}} %J = or - %J = or <16 x i8> undef, undef - ; SSSE3: cost of 2 {{.*}} %K = or - ; SSE42: cost of 2 {{.*}} %K = or - ; AVX: cost of 1 {{.*}} %K = or - ; AVX2: cost of 1 {{.*}} %K = or - ; AVX512: cost of 1 {{.*}} %K = or - %K = or <32 x i8> undef, undef - ; SSSE3: cost of 4 {{.*}} %L = or - ; SSE42: cost of 4 {{.*}} %L = or - ; AVX: cost of 2 {{.*}} %L = or - ; AVX2: cost of 2 {{.*}} %L = or - ; AVX512F: cost of 2 {{.*}} %L = or - ; AVX512BW: cost of 1 {{.*}} %L = or - %L = or <64 x i8> undef, undef + ; CHECK: cost of 1 {{.*}} %I64 = or + %I64 = or i64 undef, undef + ; SSSE3: cost of 1 {{.*}} %V2I64 = or + ; SSE42: cost of 1 {{.*}} %V2I64 = or + ; AVX: cost of 1 {{.*}} %V2I64 = or + ; AVX2: cost of 1 {{.*}} %V2I64 = or + ; AVX512: cost of 1 {{.*}} %V2I64 = or + %V2I64 = or <2 x i64> undef, undef + ; SSSE3: cost of 2 {{.*}} %V4I64 = or + ; SSE42: cost of 2 {{.*}} %V4I64 = or + ; AVX: cost of 1 {{.*}} %V4I64 = or + ; AVX2: cost of 1 {{.*}} %V4I64 = or + ; AVX512: cost of 1 {{.*}} %V4I64 = or + %V4I64 = or <4 x i64> undef, undef + ; SSSE3: cost of 4 {{.*}} %V8I64 = or + ; SSE42: cost of 4 {{.*}} %V8I64 = or + ; AVX: cost of 2 {{.*}} %V8I64 = or + ; AVX2: cost of 2 {{.*}} %V8I64 = or + ; AVX512: cost of 1 {{.*}} %V8I64 = or + %V8I64 = or <8 x i64> undef, undef + + ; CHECK: cost of 1 {{.*}} %I32 = or + %I32 = or i32 undef, undef + ; SSSE3: cost of 1 {{.*}} %V4I32 = or + ; SSE42: cost of 1 {{.*}} %V4I32 = or + ; AVX: cost of 1 {{.*}} %V4I32 = or + ; AVX2: cost of 1 {{.*}} %V4I32 = or + ; AVX512: cost of 1 {{.*}} %V4I32 = or + %V4I32 = or <4 x i32> undef, undef + ; SSSE3: cost of 2 {{.*}} %V8I32 = or + ; SSE42: cost of 2 {{.*}} %V8I32 = or + ; AVX: cost of 1 {{.*}} %V8I32 = or + ; AVX2: cost of 1 {{.*}} %V8I32 = or + ; AVX512: cost of 1 {{.*}} %V8I32 = or + %V8I32 = or <8 x i32> undef, undef + ; SSSE3: cost of 4 {{.*}} %V16I32 = or + ; SSE42: cost of 4 {{.*}} %V16I32 = or + ; AVX: cost of 2 {{.*}} %V16I32 = or + ; AVX2: cost of 2 {{.*}} %V16I32 = or + ; AVX512: cost of 1 {{.*}} %V16I32 = or + %V16I32 = or <16 x i32> undef, undef + + ; CHECK: cost of 1 {{.*}} %I16 = or + %I16 = or i16 undef, undef + ; SSSE3: cost of 1 {{.*}} %V8I16 = or + ; SSE42: cost of 1 {{.*}} %V8I16 = or + ; AVX: cost of 1 {{.*}} %V8I16 = or + ; AVX2: cost of 1 {{.*}} %V8I16 = or + ; AVX512: cost of 1 {{.*}} %V8I16 = or + %V8I16 = or <8 x i16> undef, undef + ; SSSE3: cost of 2 {{.*}} %V16I16 = or + ; SSE42: cost of 2 {{.*}} %V16I16 = or + ; AVX: cost of 1 {{.*}} %V16I16 = or + ; AVX2: cost of 1 {{.*}} %V16I16 = or + ; AVX512: cost of 1 {{.*}} %V16I16 = or + %V16I16 = or <16 x i16> undef, undef + ; SSSE3: cost of 4 {{.*}} %V32I16 = or + ; SSE42: cost of 4 {{.*}} %V32I16 = or + ; AVX: cost of 2 {{.*}} %V32I16 = or + ; AVX2: cost of 2 {{.*}} %V32I16 = or + ; AVX512F: cost of 2 {{.*}} %V32I16 = or + ; AVX512BW: cost of 1 {{.*}} %V32I16 = or + %V32I16 = or <32 x i16> undef, undef + + ; CHECK: cost of 1 {{.*}} %I8 = or + %I8 = or i8 undef, undef + ; SSSE3: cost of 1 {{.*}} %V16I8 = or + ; SSE42: cost of 1 {{.*}} %V16I8 = or + ; AVX: cost of 1 {{.*}} %V16I8 = or + ; AVX2: cost of 1 {{.*}} %V16I8 = or + ; AVX512: cost of 1 {{.*}} %V16I8 = or + %V16I8 = or <16 x i8> undef, undef + ; SSSE3: cost of 2 {{.*}} %V32I8 = or + ; SSE42: cost of 2 {{.*}} %V32I8 = or + ; AVX: cost of 1 {{.*}} %V32I8 = or + ; AVX2: cost of 1 {{.*}} %V32I8 = or + ; AVX512: cost of 1 {{.*}} %V32I8 = or + %V32I8 = or <32 x i8> undef, undef + ; SSSE3: cost of 4 {{.*}} %V64I8 = or + ; SSE42: cost of 4 {{.*}} %V64I8 = or + ; AVX: cost of 2 {{.*}} %V64I8 = or + ; AVX2: cost of 2 {{.*}} %V64I8 = or + ; AVX512F: cost of 2 {{.*}} %V64I8 = or + ; AVX512BW: cost of 1 {{.*}} %V64I8 = or + %V64I8 = or <64 x i8> undef, undef ret i32 undef } ; CHECK-LABEL: 'xor' define i32 @xor(i32 %arg) { - ; SSSE3: cost of 1 {{.*}} %A = xor - ; SSE42: cost of 1 {{.*}} %A = xor - ; AVX: cost of 1 {{.*}} %A = xor - ; AVX2: cost of 1 {{.*}} %A = xor - ; AVX512: cost of 1 {{.*}} %A = xor - %A = xor <2 x i64> undef, undef - ; SSSE3: cost of 2 {{.*}} %B = xor - ; SSE42: cost of 2 {{.*}} %B = xor - ; AVX: cost of 1 {{.*}} %B = xor - ; AVX2: cost of 1 {{.*}} %B = xor - ; AVX512: cost of 1 {{.*}} %B = xor - %B = xor <4 x i64> undef, undef - ; SSSE3: cost of 4 {{.*}} %C = xor - ; SSE42: cost of 4 {{.*}} %C = xor - ; AVX: cost of 2 {{.*}} %C = xor - ; AVX2: cost of 2 {{.*}} %C = xor - ; AVX512: cost of 1 {{.*}} %C = xor - %C = xor <8 x i64> undef, undef - - ; SSSE3: cost of 1 {{.*}} %D = xor - ; SSE42: cost of 1 {{.*}} %D = xor - ; AVX: cost of 1 {{.*}} %D = xor - ; AVX2: cost of 1 {{.*}} %D = xor - ; AVX512: cost of 1 {{.*}} %D = xor - %D = xor <4 x i32> undef, undef - ; SSSE3: cost of 2 {{.*}} %E = xor - ; SSE42: cost of 2 {{.*}} %E = xor - ; AVX: cost of 1 {{.*}} %E = xor - ; AVX2: cost of 1 {{.*}} %E = xor - ; AVX512: cost of 1 {{.*}} %E = xor - %E = xor <8 x i32> undef, undef - ; SSSE3: cost of 4 {{.*}} %F = xor - ; SSE42: cost of 4 {{.*}} %F = xor - ; AVX: cost of 2 {{.*}} %F = xor - ; AVX2: cost of 2 {{.*}} %F = xor - ; AVX512: cost of 1 {{.*}} %F = xor - %F = xor <16 x i32> undef, undef - - ; SSSE3: cost of 1 {{.*}} %G = xor - ; SSE42: cost of 1 {{.*}} %G = xor - ; AVX: cost of 1 {{.*}} %G = xor - ; AVX2: cost of 1 {{.*}} %G = xor - ; AVX512: cost of 1 {{.*}} %G = xor - %G = xor <8 x i16> undef, undef - ; SSSE3: cost of 2 {{.*}} %H = xor - ; SSE42: cost of 2 {{.*}} %H = xor - ; AVX: cost of 1 {{.*}} %H = xor - ; AVX2: cost of 1 {{.*}} %H = xor - ; AVX512: cost of 1 {{.*}} %H = xor - %H = xor <16 x i16> undef, undef - ; SSSE3: cost of 4 {{.*}} %I = xor - ; SSE42: cost of 4 {{.*}} %I = xor - ; AVX: cost of 2 {{.*}} %I = xor - ; AVX2: cost of 2 {{.*}} %I = xor - ; AVX512F: cost of 2 {{.*}} %I = xor - ; AVX512BW: cost of 1 {{.*}} %I = xor - %I = xor <32 x i16> undef, undef - - ; SSSE3: cost of 1 {{.*}} %J = xor - ; SSE42: cost of 1 {{.*}} %J = xor - ; AVX: cost of 1 {{.*}} %J = xor - ; AVX2: cost of 1 {{.*}} %J = xor - ; AVX512: cost of 1 {{.*}} %J = xor - %J = xor <16 x i8> undef, undef - ; SSSE3: cost of 2 {{.*}} %K = xor - ; SSE42: cost of 2 {{.*}} %K = xor - ; AVX: cost of 1 {{.*}} %K = xor - ; AVX2: cost of 1 {{.*}} %K = xor - ; AVX512: cost of 1 {{.*}} %K = xor - %K = xor <32 x i8> undef, undef - ; SSSE3: cost of 4 {{.*}} %L = xor - ; SSE42: cost of 4 {{.*}} %L = xor - ; AVX: cost of 2 {{.*}} %L = xor - ; AVX2: cost of 2 {{.*}} %L = xor - ; AVX512F: cost of 2 {{.*}} %L = xor - ; AVX512BW: cost of 1 {{.*}} %L = xor - %L = xor <64 x i8> undef, undef + ; CHECK: cost of 1 {{.*}} %I64 = xor + %I64 = xor i64 undef, undef + ; SSSE3: cost of 1 {{.*}} %V2I64 = xor + ; SSE42: cost of 1 {{.*}} %V2I64 = xor + ; AVX: cost of 1 {{.*}} %V2I64 = xor + ; AVX2: cost of 1 {{.*}} %V2I64 = xor + ; AVX512: cost of 1 {{.*}} %V2I64 = xor + %V2I64 = xor <2 x i64> undef, undef + ; SSSE3: cost of 2 {{.*}} %V4I64 = xor + ; SSE42: cost of 2 {{.*}} %V4I64 = xor + ; AVX: cost of 1 {{.*}} %V4I64 = xor + ; AVX2: cost of 1 {{.*}} %V4I64 = xor + ; AVX512: cost of 1 {{.*}} %V4I64 = xor + %V4I64 = xor <4 x i64> undef, undef + ; SSSE3: cost of 4 {{.*}} %V8I64 = xor + ; SSE42: cost of 4 {{.*}} %V8I64 = xor + ; AVX: cost of 2 {{.*}} %V8I64 = xor + ; AVX2: cost of 2 {{.*}} %V8I64 = xor + ; AVX512: cost of 1 {{.*}} %V8I64 = xor + %V8I64 = xor <8 x i64> undef, undef + + ; CHECK: cost of 1 {{.*}} %I32 = xor + %I32 = xor i32 undef, undef + ; SSSE3: cost of 1 {{.*}} %V4I32 = xor + ; SSE42: cost of 1 {{.*}} %V4I32 = xor + ; AVX: cost of 1 {{.*}} %V4I32 = xor + ; AVX2: cost of 1 {{.*}} %V4I32 = xor + ; AVX512: cost of 1 {{.*}} %V4I32 = xor + %V4I32 = xor <4 x i32> undef, undef + ; SSSE3: cost of 2 {{.*}} %V8I32 = xor + ; SSE42: cost of 2 {{.*}} %V8I32 = xor + ; AVX: cost of 1 {{.*}} %V8I32 = xor + ; AVX2: cost of 1 {{.*}} %V8I32 = xor + ; AVX512: cost of 1 {{.*}} %V8I32 = xor + %V8I32 = xor <8 x i32> undef, undef + ; SSSE3: cost of 4 {{.*}} %V16I32 = xor + ; SSE42: cost of 4 {{.*}} %V16I32 = xor + ; AVX: cost of 2 {{.*}} %V16I32 = xor + ; AVX2: cost of 2 {{.*}} %V16I32 = xor + ; AVX512: cost of 1 {{.*}} %V16I32 = xor + %V16I32 = xor <16 x i32> undef, undef + + ; CHECK: cost of 1 {{.*}} %I16 = xor + %I16 = xor i16 undef, undef + ; SSSE3: cost of 1 {{.*}} %V8I16 = xor + ; SSE42: cost of 1 {{.*}} %V8I16 = xor + ; AVX: cost of 1 {{.*}} %V8I16 = xor + ; AVX2: cost of 1 {{.*}} %V8I16 = xor + ; AVX512: cost of 1 {{.*}} %V8I16 = xor + %V8I16 = xor <8 x i16> undef, undef + ; SSSE3: cost of 2 {{.*}} %V16I16 = xor + ; SSE42: cost of 2 {{.*}} %V16I16 = xor + ; AVX: cost of 1 {{.*}} %V16I16 = xor + ; AVX2: cost of 1 {{.*}} %V16I16 = xor + ; AVX512: cost of 1 {{.*}} %V16I16 = xor + %V16I16 = xor <16 x i16> undef, undef + ; SSSE3: cost of 4 {{.*}} %V32I16 = xor + ; SSE42: cost of 4 {{.*}} %V32I16 = xor + ; AVX: cost of 2 {{.*}} %V32I16 = xor + ; AVX2: cost of 2 {{.*}} %V32I16 = xor + ; AVX512F: cost of 2 {{.*}} %V32I16 = xor + ; AVX512BW: cost of 1 {{.*}} %V32I16 = xor + %V32I16 = xor <32 x i16> undef, undef + + ; CHECK: cost of 1 {{.*}} %I8 = xor + %I8 = xor i8 undef, undef + ; SSSE3: cost of 1 {{.*}} %V16I8 = xor + ; SSE42: cost of 1 {{.*}} %V16I8 = xor + ; AVX: cost of 1 {{.*}} %V16I8 = xor + ; AVX2: cost of 1 {{.*}} %V16I8 = xor + ; AVX512: cost of 1 {{.*}} %V16I8 = xor + %V16I8 = xor <16 x i8> undef, undef + ; SSSE3: cost of 2 {{.*}} %V32I8 = xor + ; SSE42: cost of 2 {{.*}} %V32I8 = xor + ; AVX: cost of 1 {{.*}} %V32I8 = xor + ; AVX2: cost of 1 {{.*}} %V32I8 = xor + ; AVX512: cost of 1 {{.*}} %V32I8 = xor + %V32I8 = xor <32 x i8> undef, undef + ; SSSE3: cost of 4 {{.*}} %V64I8 = xor + ; SSE42: cost of 4 {{.*}} %V64I8 = xor + ; AVX: cost of 2 {{.*}} %V64I8 = xor + ; AVX2: cost of 2 {{.*}} %V64I8 = xor + ; AVX512F: cost of 2 {{.*}} %V64I8 = xor + ; AVX512BW: cost of 1 {{.*}} %V64I8 = xor + %V64I8 = xor <64 x i8> undef, undef ret i32 undef } ; CHECK-LABEL: 'and' define i32 @and(i32 %arg) { - ; SSSE3: cost of 1 {{.*}} %A = and - ; SSE42: cost of 1 {{.*}} %A = and - ; AVX: cost of 1 {{.*}} %A = and - ; AVX2: cost of 1 {{.*}} %A = and - ; AVX512: cost of 1 {{.*}} %A = and - %A = and <2 x i64> undef, undef - ; SSSE3: cost of 2 {{.*}} %B = and - ; SSE42: cost of 2 {{.*}} %B = and - ; AVX: cost of 1 {{.*}} %B = and - ; AVX2: cost of 1 {{.*}} %B = and - ; AVX512: cost of 1 {{.*}} %B = and - %B = and <4 x i64> undef, undef - ; SSSE3: cost of 4 {{.*}} %C = and - ; SSE42: cost of 4 {{.*}} %C = and - ; AVX: cost of 2 {{.*}} %C = and - ; AVX2: cost of 2 {{.*}} %C = and - ; AVX512: cost of 1 {{.*}} %C = and - %C = and <8 x i64> undef, undef - - ; SSSE3: cost of 1 {{.*}} %D = and - ; SSE42: cost of 1 {{.*}} %D = and - ; AVX: cost of 1 {{.*}} %D = and - ; AVX2: cost of 1 {{.*}} %D = and - ; AVX512: cost of 1 {{.*}} %D = and - %D = and <4 x i32> undef, undef - ; SSSE3: cost of 2 {{.*}} %E = and - ; SSE42: cost of 2 {{.*}} %E = and - ; AVX: cost of 1 {{.*}} %E = and - ; AVX2: cost of 1 {{.*}} %E = and - ; AVX512: cost of 1 {{.*}} %E = and - %E = and <8 x i32> undef, undef - ; SSSE3: cost of 4 {{.*}} %F = and - ; SSE42: cost of 4 {{.*}} %F = and - ; AVX: cost of 2 {{.*}} %F = and - ; AVX2: cost of 2 {{.*}} %F = and - ; AVX512: cost of 1 {{.*}} %F = and - %F = and <16 x i32> undef, undef - - ; SSSE3: cost of 1 {{.*}} %G = and - ; SSE42: cost of 1 {{.*}} %G = and - ; AVX: cost of 1 {{.*}} %G = and - ; AVX2: cost of 1 {{.*}} %G = and - ; AVX512: cost of 1 {{.*}} %G = and - %G = and <8 x i16> undef, undef - ; SSSE3: cost of 2 {{.*}} %H = and - ; SSE42: cost of 2 {{.*}} %H = and - ; AVX: cost of 1 {{.*}} %H = and - ; AVX2: cost of 1 {{.*}} %H = and - ; AVX512: cost of 1 {{.*}} %H = and - %H = and <16 x i16> undef, undef - ; SSSE3: cost of 4 {{.*}} %I = and - ; SSE42: cost of 4 {{.*}} %I = and - ; AVX: cost of 2 {{.*}} %I = and - ; AVX2: cost of 2 {{.*}} %I = and - ; AVX512F: cost of 2 {{.*}} %I = and - ; AVX512BW: cost of 1 {{.*}} %I = and - %I = and <32 x i16> undef, undef - - ; SSSE3: cost of 1 {{.*}} %J = and - ; SSE42: cost of 1 {{.*}} %J = and - ; AVX: cost of 1 {{.*}} %J = and - ; AVX2: cost of 1 {{.*}} %J = and - ; AVX512: cost of 1 {{.*}} %J = and - %J = and <16 x i8> undef, undef - ; SSSE3: cost of 2 {{.*}} %K = and - ; SSE42: cost of 2 {{.*}} %K = and - ; AVX: cost of 1 {{.*}} %K = and - ; AVX2: cost of 1 {{.*}} %K = and - ; AVX512: cost of 1 {{.*}} %K = and - %K = and <32 x i8> undef, undef - ; SSSE3: cost of 4 {{.*}} %L = and - ; SSE42: cost of 4 {{.*}} %L = and - ; AVX: cost of 2 {{.*}} %L = and - ; AVX2: cost of 2 {{.*}} %L = and - ; AVX512F: cost of 2 {{.*}} %L = and - ; AVX512BW: cost of 1 {{.*}} %L = and - %L = and <64 x i8> undef, undef + ; CHECK: cost of 1 {{.*}} %I64 = and + %I64 = and i64 undef, undef + ; SSSE3: cost of 1 {{.*}} %V2I64 = and + ; SSE42: cost of 1 {{.*}} %V2I64 = and + ; AVX: cost of 1 {{.*}} %V2I64 = and + ; AVX2: cost of 1 {{.*}} %V2I64 = and + ; AVX512: cost of 1 {{.*}} %V2I64 = and + %V2I64 = and <2 x i64> undef, undef + ; SSSE3: cost of 2 {{.*}} %V4I64 = and + ; SSE42: cost of 2 {{.*}} %V4I64 = and + ; AVX: cost of 1 {{.*}} %V4I64 = and + ; AVX2: cost of 1 {{.*}} %V4I64 = and + ; AVX512: cost of 1 {{.*}} %V4I64 = and + %V4I64 = and <4 x i64> undef, undef + ; SSSE3: cost of 4 {{.*}} %V8I64 = and + ; SSE42: cost of 4 {{.*}} %V8I64 = and + ; AVX: cost of 2 {{.*}} %V8I64 = and + ; AVX2: cost of 2 {{.*}} %V8I64 = and + ; AVX512: cost of 1 {{.*}} %V8I64 = and + %V8I64 = and <8 x i64> undef, undef + + ; CHECK: cost of 1 {{.*}} %I32 = and + %I32 = and i32 undef, undef + ; SSSE3: cost of 1 {{.*}} %V4I32 = and + ; SSE42: cost of 1 {{.*}} %V4I32 = and + ; AVX: cost of 1 {{.*}} %V4I32 = and + ; AVX2: cost of 1 {{.*}} %V4I32 = and + ; AVX512: cost of 1 {{.*}} %V4I32 = and + %V4I32 = and <4 x i32> undef, undef + ; SSSE3: cost of 2 {{.*}} %V8I32 = and + ; SSE42: cost of 2 {{.*}} %V8I32 = and + ; AVX: cost of 1 {{.*}} %V8I32 = and + ; AVX2: cost of 1 {{.*}} %V8I32 = and + ; AVX512: cost of 1 {{.*}} %V8I32 = and + %V8I32 = and <8 x i32> undef, undef + ; SSSE3: cost of 4 {{.*}} %V16I32 = and + ; SSE42: cost of 4 {{.*}} %V16I32 = and + ; AVX: cost of 2 {{.*}} %V16I32 = and + ; AVX2: cost of 2 {{.*}} %V16I32 = and + ; AVX512: cost of 1 {{.*}} %V16I32 = and + %V16I32 = and <16 x i32> undef, undef + + ; CHECK: cost of 1 {{.*}} %I16 = and + %I16 = and i16 undef, undef + ; SSSE3: cost of 1 {{.*}} %V8I16 = and + ; SSE42: cost of 1 {{.*}} %V8I16 = and + ; AVX: cost of 1 {{.*}} %V8I16 = and + ; AVX2: cost of 1 {{.*}} %V8I16 = and + ; AVX512: cost of 1 {{.*}} %V8I16 = and + %V8I16 = and <8 x i16> undef, undef + ; SSSE3: cost of 2 {{.*}} %V16I16 = and + ; SSE42: cost of 2 {{.*}} %V16I16 = and + ; AVX: cost of 1 {{.*}} %V16I16 = and + ; AVX2: cost of 1 {{.*}} %V16I16 = and + ; AVX512: cost of 1 {{.*}} %V16I16 = and + %V16I16 = and <16 x i16> undef, undef + ; SSSE3: cost of 4 {{.*}} %V32I16 = and + ; SSE42: cost of 4 {{.*}} %V32I16 = and + ; AVX: cost of 2 {{.*}} %V32I16 = and + ; AVX2: cost of 2 {{.*}} %V32I16 = and + ; AVX512F: cost of 2 {{.*}} %V32I16 = and + ; AVX512BW: cost of 1 {{.*}} %V32I16 = and + %V32I16 = and <32 x i16> undef, undef + + ; CHECK: cost of 1 {{.*}} %I8 = and + %I8 = and i8 undef, undef + ; SSSE3: cost of 1 {{.*}} %V16I8 = and + ; SSE42: cost of 1 {{.*}} %V16I8 = and + ; AVX: cost of 1 {{.*}} %V16I8 = and + ; AVX2: cost of 1 {{.*}} %V16I8 = and + ; AVX512: cost of 1 {{.*}} %V16I8 = and + %V16I8 = and <16 x i8> undef, undef + ; SSSE3: cost of 2 {{.*}} %V32I8 = and + ; SSE42: cost of 2 {{.*}} %V32I8 = and + ; AVX: cost of 1 {{.*}} %V32I8 = and + ; AVX2: cost of 1 {{.*}} %V32I8 = and + ; AVX512: cost of 1 {{.*}} %V32I8 = and + %V32I8 = and <32 x i8> undef, undef + ; SSSE3: cost of 4 {{.*}} %V64I8 = and + ; SSE42: cost of 4 {{.*}} %V64I8 = and + ; AVX: cost of 2 {{.*}} %V64I8 = and + ; AVX2: cost of 2 {{.*}} %V64I8 = and + ; AVX512F: cost of 2 {{.*}} %V64I8 = and + ; AVX512BW: cost of 1 {{.*}} %V64I8 = and + %V64I8 = and <64 x i8> undef, undef ret i32 undef } ; CHECK-LABEL: 'mul' define i32 @mul(i32 %arg) { - ; SSSE3: cost of 8 {{.*}} %A = mul - ; SSE42: cost of 8 {{.*}} %A = mul - ; AVX: cost of 8 {{.*}} %A = mul - ; AVX2: cost of 8 {{.*}} %A = mul - ; AVX512F: cost of 8 {{.*}} %A = mul - ; AVX512BW: cost of 8 {{.*}} %A = mul - ; AVX512DQ: cost of 1 {{.*}} %A = mul - %A = mul <2 x i64> undef, undef - ; SSSE3: cost of 16 {{.*}} %B = mul - ; SSE42: cost of 16 {{.*}} %B = mul - ; AVX: cost of 18 {{.*}} %B = mul - ; AVX2: cost of 8 {{.*}} %B = mul - ; AVX512F: cost of 8 {{.*}} %B = mul - ; AVX512BW: cost of 8 {{.*}} %B = mul - ; AVX512DQ: cost of 1 {{.*}} %B = mul - %B = mul <4 x i64> undef, undef - ; SSSE3: cost of 32 {{.*}} %C = mul - ; SSE42: cost of 32 {{.*}} %C = mul - ; AVX: cost of 36 {{.*}} %C = mul - ; AVX2: cost of 16 {{.*}} %C = mul - ; AVX512F: cost of 8 {{.*}} %C = mul - ; AVX512BW: cost of 8 {{.*}} %C = mul - ; AVX512DQ: cost of 1 {{.*}} %C = mul - %C = mul <8 x i64> undef, undef - - ; SSSE3: cost of 6 {{.*}} %D = mul - ; SSE42: cost of 1 {{.*}} %D = mul - ; AVX: cost of 1 {{.*}} %D = mul - ; AVX2: cost of 1 {{.*}} %D = mul - ; AVX512: cost of 1 {{.*}} %D = mul - %D = mul <4 x i32> undef, undef - ; SSSE3: cost of 12 {{.*}} %E = mul - ; SSE42: cost of 2 {{.*}} %E = mul - ; AVX: cost of 4 {{.*}} %E = mul - ; AVX2: cost of 1 {{.*}} %E = mul - ; AVX512: cost of 1 {{.*}} %E = mul - %E = mul <8 x i32> undef, undef - ; SSSE3: cost of 24 {{.*}} %F = mul - ; SSE42: cost of 4 {{.*}} %F = mul - ; AVX: cost of 8 {{.*}} %F = mul - ; AVX2: cost of 2 {{.*}} %F = mul - ; AVX512: cost of 1 {{.*}} %F = mul - %F = mul <16 x i32> undef, undef - - ; SSSE3: cost of 1 {{.*}} %G = mul - ; SSE42: cost of 1 {{.*}} %G = mul - ; AVX: cost of 1 {{.*}} %G = mul - ; AVX2: cost of 1 {{.*}} %G = mul - ; AVX512: cost of 1 {{.*}} %G = mul - %G = mul <8 x i16> undef, undef - ; SSSE3: cost of 2 {{.*}} %H = mul - ; SSE42: cost of 2 {{.*}} %H = mul - ; AVX: cost of 4 {{.*}} %H = mul - ; AVX2: cost of 1 {{.*}} %H = mul - ; AVX512: cost of 1 {{.*}} %H = mul - %H = mul <16 x i16> undef, undef - ; SSSE3: cost of 4 {{.*}} %I = mul - ; SSE42: cost of 4 {{.*}} %I = mul - ; AVX: cost of 8 {{.*}} %I = mul - ; AVX2: cost of 2 {{.*}} %I = mul - ; AVX512F: cost of 2 {{.*}} %I = mul - ; AVX512BW: cost of 1 {{.*}} %I = mul - %I = mul <32 x i16> undef, undef - - ; SSSE3: cost of 12 {{.*}} %J = mul - ; SSE42: cost of 12 {{.*}} %J = mul - ; AVX: cost of 12 {{.*}} %J = mul - ; AVX2: cost of 7 {{.*}} %J = mul - ; AVX512F: cost of 5 {{.*}} %J = mul - ; AVX512BW: cost of 4 {{.*}} %J = mul - %J = mul <16 x i8> undef, undef - ; SSSE3: cost of 24 {{.*}} %K = mul - ; SSE42: cost of 24 {{.*}} %K = mul - ; AVX: cost of 26 {{.*}} %K = mul - ; AVX2: cost of 17 {{.*}} %K = mul - ; AVX512F: cost of 13 {{.*}} %K = mul - ; AVX512BW: cost of 4 {{.*}} %K = mul - %K = mul <32 x i8> undef, undef - ; SSSE3: cost of 48 {{.*}} %L = mul - ; SSE42: cost of 48 {{.*}} %L = mul - ; AVX: cost of 52 {{.*}} %L = mul - ; AVX2: cost of 34 {{.*}} %L = mul - ; AVX512F: cost of 26 {{.*}} %L = mul - ; AVX512BW: cost of 11 {{.*}} %L = mul - %L = mul <64 x i8> undef, undef + ; CHECK: cost of 1 {{.*}} %I64 = mul + %I64 = mul i64 undef, undef + ; SSSE3: cost of 8 {{.*}} %V2I64 = mul + ; SSE42: cost of 8 {{.*}} %V2I64 = mul + ; AVX: cost of 8 {{.*}} %V2I64 = mul + ; AVX2: cost of 8 {{.*}} %V2I64 = mul + ; AVX512F: cost of 8 {{.*}} %V2I64 = mul + ; AVX512BW: cost of 8 {{.*}} %V2I64 = mul + ; AVX512DQ: cost of 1 {{.*}} %V2I64 = mul + %V2I64 = mul <2 x i64> undef, undef + ; SSSE3: cost of 16 {{.*}} %V4I64 = mul + ; SSE42: cost of 16 {{.*}} %V4I64 = mul + ; AVX: cost of 18 {{.*}} %V4I64 = mul + ; AVX2: cost of 8 {{.*}} %V4I64 = mul + ; AVX512F: cost of 8 {{.*}} %V4I64 = mul + ; AVX512BW: cost of 8 {{.*}} %V4I64 = mul + ; AVX512DQ: cost of 1 {{.*}} %V4I64 = mul + %V4I64 = mul <4 x i64> undef, undef + ; SSSE3: cost of 32 {{.*}} %V8I64 = mul + ; SSE42: cost of 32 {{.*}} %V8I64 = mul + ; AVX: cost of 36 {{.*}} %V8I64 = mul + ; AVX2: cost of 16 {{.*}} %V8I64 = mul + ; AVX512F: cost of 8 {{.*}} %V8I64 = mul + ; AVX512BW: cost of 8 {{.*}} %V8I64 = mul + ; AVX512DQ: cost of 1 {{.*}} %V8I64 = mul + %V8I64 = mul <8 x i64> undef, undef + + ; CHECK: cost of 1 {{.*}} %I32 = mul + %I32 = mul i32 undef, undef + ; SSSE3: cost of 6 {{.*}} %V4I32 = mul + ; SSE42: cost of 1 {{.*}} %V4I32 = mul + ; AVX: cost of 1 {{.*}} %V4I32 = mul + ; AVX2: cost of 1 {{.*}} %V4I32 = mul + ; AVX512: cost of 1 {{.*}} %V4I32 = mul + %V4I32 = mul <4 x i32> undef, undef + ; SSSE3: cost of 12 {{.*}} %V8I32 = mul + ; SSE42: cost of 2 {{.*}} %V8I32 = mul + ; AVX: cost of 4 {{.*}} %V8I32 = mul + ; AVX2: cost of 1 {{.*}} %V8I32 = mul + ; AVX512: cost of 1 {{.*}} %V8I32 = mul + %V8I32 = mul <8 x i32> undef, undef + ; SSSE3: cost of 24 {{.*}} %V16I32 = mul + ; SSE42: cost of 4 {{.*}} %V16I32 = mul + ; AVX: cost of 8 {{.*}} %V16I32 = mul + ; AVX2: cost of 2 {{.*}} %V16I32 = mul + ; AVX512: cost of 1 {{.*}} %V16I32 = mul + %V16I32 = mul <16 x i32> undef, undef + + ; CHECK: cost of 1 {{.*}} %I16 = mul + %I16 = mul i16 undef, undef + ; SSSE3: cost of 1 {{.*}} %V8I16 = mul + ; SSE42: cost of 1 {{.*}} %V8I16 = mul + ; AVX: cost of 1 {{.*}} %V8I16 = mul + ; AVX2: cost of 1 {{.*}} %V8I16 = mul + ; AVX512: cost of 1 {{.*}} %V8I16 = mul + %V8I16 = mul <8 x i16> undef, undef + ; SSSE3: cost of 2 {{.*}} %V16I16 = mul + ; SSE42: cost of 2 {{.*}} %V16I16 = mul + ; AVX: cost of 4 {{.*}} %V16I16 = mul + ; AVX2: cost of 1 {{.*}} %V16I16 = mul + ; AVX512: cost of 1 {{.*}} %V16I16 = mul + %V16I16 = mul <16 x i16> undef, undef + ; SSSE3: cost of 4 {{.*}} %V32I16 = mul + ; SSE42: cost of 4 {{.*}} %V32I16 = mul + ; AVX: cost of 8 {{.*}} %V32I16 = mul + ; AVX2: cost of 2 {{.*}} %V32I16 = mul + ; AVX512F: cost of 2 {{.*}} %V32I16 = mul + ; AVX512BW: cost of 1 {{.*}} %V32I16 = mul + %V32I16 = mul <32 x i16> undef, undef + + ; CHECK: cost of 1 {{.*}} %I8 = mul + %I8 = mul i8 undef, undef + ; SSSE3: cost of 12 {{.*}} %V16I8 = mul + ; SSE42: cost of 12 {{.*}} %V16I8 = mul + ; AVX: cost of 12 {{.*}} %V16I8 = mul + ; AVX2: cost of 7 {{.*}} %V16I8 = mul + ; AVX512F: cost of 5 {{.*}} %V16I8 = mul + ; AVX512BW: cost of 4 {{.*}} %V16I8 = mul + %V16I8 = mul <16 x i8> undef, undef + ; SSSE3: cost of 24 {{.*}} %V32I8 = mul + ; SSE42: cost of 24 {{.*}} %V32I8 = mul + ; AVX: cost of 26 {{.*}} %V32I8 = mul + ; AVX2: cost of 17 {{.*}} %V32I8 = mul + ; AVX512F: cost of 13 {{.*}} %V32I8 = mul + ; AVX512BW: cost of 4 {{.*}} %V32I8 = mul + %V32I8 = mul <32 x i8> undef, undef + ; SSSE3: cost of 48 {{.*}} %V64I8 = mul + ; SSE42: cost of 48 {{.*}} %V64I8 = mul + ; AVX: cost of 52 {{.*}} %V64I8 = mul + ; AVX2: cost of 34 {{.*}} %V64I8 = mul + ; AVX512F: cost of 26 {{.*}} %V64I8 = mul + ; AVX512BW: cost of 11 {{.*}} %V64I8 = mul + %V64I8 = mul <64 x i8> undef, undef ret i32 undef } diff --git a/test/Analysis/CostModel/X86/interleaved-load-i8.ll b/test/Analysis/CostModel/X86/interleaved-load-i8.ll new file mode 100644 index 0000000000000..382e5e5301d69 --- /dev/null +++ b/test/Analysis/CostModel/X86/interleaved-load-i8.ll @@ -0,0 +1,98 @@ +; REQUIRES: asserts +; RUN: opt -loop-vectorize -S -mcpu=core-avx2 --debug-only=loop-vectorize -vectorizer-maximize-bandwidth < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: norecurse nounwind readonly uwtable +define i32 @doit_stride3(i8* nocapture readonly %Ptr, i32 %Nels) { +;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %0 = load i8 +;CHECK: LV: Found an estimated cost of 11 for VF 2 For instruction: %0 = load i8 +;CHECK: LV: Found an estimated cost of 5 for VF 4 For instruction: %0 = load i8 +;CHECK: LV: Found an estimated cost of 10 for VF 8 For instruction: %0 = load i8 +;CHECK: LV: Found an estimated cost of 20 for VF 16 For instruction: %0 = load i8 +;CHECK: LV: Found an estimated cost of 45 for VF 32 For instruction: %0 = load i8 +entry: + %cmp13 = icmp sgt i32 %Nels, 0 + br i1 %cmp13, label %for.body.preheader, label %for.end + +for.body.preheader: + br label %for.body + +for.body: + %Ptr.addr.016 = phi i8* [ %incdec.ptr2, %for.body ], [ %Ptr, %for.body.preheader ] + %i.015 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %s.014 = phi i32 [ %add6, %for.body ], [ 0, %for.body.preheader ] + %incdec.ptr = getelementptr inbounds i8, i8* %Ptr.addr.016, i64 1 + %0 = load i8, i8* %Ptr.addr.016, align 1 + %incdec.ptr1 = getelementptr inbounds i8, i8* %Ptr.addr.016, i64 2 + %1 = load i8, i8* %incdec.ptr, align 1 + %incdec.ptr2 = getelementptr inbounds i8, i8* %Ptr.addr.016, i64 3 + %2 = load i8, i8* %incdec.ptr1, align 1 + %conv = zext i8 %0 to i32 + %conv3 = zext i8 %1 to i32 + %conv4 = zext i8 %2 to i32 + %add = add i32 %s.014, %conv + %add5 = add i32 %add, %conv3 + %add6 = add i32 %add5, %conv4 + %inc = add nuw nsw i32 %i.015, 1 + %exitcond = icmp eq i32 %inc, %Nels + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + %add6.lcssa = phi i32 [ %add6, %for.body ] + br label %for.end + +for.end: + %s.0.lcssa = phi i32 [ 0, %entry ], [ %add6.lcssa, %for.end.loopexit ] + ret i32 %s.0.lcssa +} + +; Function Attrs: norecurse nounwind readonly uwtable +define i32 @doit_stride4(i8* nocapture readonly %Ptr, i32 %Nels) local_unnamed_addr { +;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %0 = load i8 +;CHECK: LV: Found an estimated cost of 13 for VF 2 For instruction: %0 = load i8 +;CHECK: LV: Found an estimated cost of 5 for VF 4 For instruction: %0 = load i8 +;CHECK: LV: Found an estimated cost of 21 for VF 8 For instruction: %0 = load i8 +;CHECK: LV: Found an estimated cost of 41 for VF 16 For instruction: %0 = load i8 +;CHECK: LV: Found an estimated cost of 84 for VF 32 For instruction: %0 = load i8 +entry: + %cmp59 = icmp sgt i32 %Nels, 0 + br i1 %cmp59, label %for.body.preheader, label %for.end + +for.body.preheader: + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %Ptr.addr.062 = phi i8* [ %incdec.ptr3, %for.body ], [ %Ptr, %for.body.preheader ] + %i.061 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %s.060 = phi i32 [ %cond39, %for.body ], [ 0, %for.body.preheader ] + %incdec.ptr = getelementptr inbounds i8, i8* %Ptr.addr.062, i64 1 + %0 = load i8, i8* %Ptr.addr.062, align 1 + %incdec.ptr1 = getelementptr inbounds i8, i8* %Ptr.addr.062, i64 2 + %1 = load i8, i8* %incdec.ptr, align 1 + %incdec.ptr2 = getelementptr inbounds i8, i8* %Ptr.addr.062, i64 3 + %2 = load i8, i8* %incdec.ptr1, align 1 + %incdec.ptr3 = getelementptr inbounds i8, i8* %Ptr.addr.062, i64 4 + %3 = load i8, i8* %incdec.ptr2, align 1 + %cmp5 = icmp ult i8 %0, %1 + %.sink = select i1 %cmp5, i8 %0, i8 %1 + %cmp12 = icmp ult i8 %.sink, %2 + %.sink40 = select i1 %cmp12, i8 %.sink, i8 %2 + %cmp23 = icmp ult i8 %.sink40, %3 + %.sink41 = select i1 %cmp23, i8 %.sink40, i8 %3 + %conv28 = zext i8 %.sink41 to i32 + %cmp33 = icmp slt i32 %s.060, %conv28 + %cond39 = select i1 %cmp33, i32 %s.060, i32 %conv28 + %inc = add nuw nsw i32 %i.061, 1 + %exitcond = icmp eq i32 %inc, %Nels + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + %cond39.lcssa = phi i32 [ %cond39, %for.body ] + br label %for.end + +for.end: + %s.0.lcssa = phi i32 [ 0, %entry ], [ %cond39.lcssa, %for.end.loopexit ] + ret i32 %s.0.lcssa +} diff --git a/test/Analysis/CostModel/X86/interleaved-store-i8.ll b/test/Analysis/CostModel/X86/interleaved-store-i8.ll new file mode 100644 index 0000000000000..d8408c1527633 --- /dev/null +++ b/test/Analysis/CostModel/X86/interleaved-store-i8.ll @@ -0,0 +1,85 @@ +; REQUIRES: asserts +; RUN: opt -loop-vectorize -S -mcpu=core-avx2 --debug-only=loop-vectorize -vectorizer-maximize-bandwidth < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: norecurse nounwind uwtable +define void @doit_stride3(i8* nocapture %Ptr, i32 %Nels) local_unnamed_addr { +;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %conv4 +;CHECK: LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %conv4 +;CHECK: LV: Found an estimated cost of 9 for VF 4 For instruction: store i8 %conv4 +;CHECK: LV: Found an estimated cost of 12 for VF 8 For instruction: store i8 %conv4 +;CHECK: LV: Found an estimated cost of 19 for VF 16 For instruction: store i8 %conv4 +;CHECK: LV: Found an estimated cost of 35 for VF 32 For instruction: store i8 %conv4 +entry: + %cmp14 = icmp sgt i32 %Nels, 0 + br i1 %cmp14, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: + %conv = trunc i32 %Nels to i8 + %conv1 = shl i8 %conv, 1 + %conv4 = shl i8 %conv, 2 + br label %for.body + +for.body: + %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %Ptr.addr.015 = phi i8* [ %Ptr, %for.body.lr.ph ], [ %incdec.ptr5, %for.body ] + %incdec.ptr = getelementptr inbounds i8, i8* %Ptr.addr.015, i64 1 + store i8 %conv, i8* %Ptr.addr.015, align 1 + %incdec.ptr2 = getelementptr inbounds i8, i8* %Ptr.addr.015, i64 2 + store i8 %conv1, i8* %incdec.ptr, align 1 + %incdec.ptr5 = getelementptr inbounds i8, i8* %Ptr.addr.015, i64 3 + store i8 %conv4, i8* %incdec.ptr2, align 1 + %inc = add nuw nsw i32 %i.016, 1 + %exitcond = icmp eq i32 %inc, %Nels + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; Function Attrs: norecurse nounwind uwtable +define void @doit_stride4(i8* nocapture %Ptr, i32 %Nels) local_unnamed_addr { +;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %conv7 +;CHECK: LV: Found an estimated cost of 13 for VF 2 For instruction: store i8 %conv7 +;CHECK: LV: Found an estimated cost of 10 for VF 4 For instruction: store i8 %conv7 +;CHECK: LV: Found an estimated cost of 17 for VF 8 For instruction: store i8 %conv7 +;CHECK: LV: Found an estimated cost of 22 for VF 16 For instruction: store i8 %conv7 +;CHECK: LV: Found an estimated cost of 44 for VF 32 For instruction: store i8 %conv7 +entry: + %cmp19 = icmp sgt i32 %Nels, 0 + br i1 %cmp19, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: + %conv = trunc i32 %Nels to i8 + %conv1 = shl i8 %conv, 1 + %conv4 = shl i8 %conv, 2 + %mul6 = mul nsw i32 %Nels, 5 + %conv7 = trunc i32 %mul6 to i8 + br label %for.body + +for.body: + %i.021 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %Ptr.addr.020 = phi i8* [ %Ptr, %for.body.lr.ph ], [ %incdec.ptr8, %for.body ] + %incdec.ptr = getelementptr inbounds i8, i8* %Ptr.addr.020, i64 1 + store i8 %conv, i8* %Ptr.addr.020, align 1 + %incdec.ptr2 = getelementptr inbounds i8, i8* %Ptr.addr.020, i64 2 + store i8 %conv1, i8* %incdec.ptr, align 1 + %incdec.ptr5 = getelementptr inbounds i8, i8* %Ptr.addr.020, i64 3 + store i8 %conv4, i8* %incdec.ptr2, align 1 + %incdec.ptr8 = getelementptr inbounds i8, i8* %Ptr.addr.020, i64 4 + store i8 %conv7, i8* %incdec.ptr5, align 1 + %inc = add nuw nsw i32 %i.021, 1 + %exitcond = icmp eq i32 %inc, %Nels + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} diff --git a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll index 3a0ab0f03b995..e797b377556e9 100644 --- a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll +++ b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll @@ -100,3 +100,90 @@ backedge: exit: ret i8 0 } + +; Merging cont block into do block. Make sure that we do not incorrectly have the cont +; LVI info as LVI info for the beginning of do block. LVI info for %i is Range[0,1) +; at beginning of cont Block, which is incorrect at the beginning of do block. +define i32 @test3(i32 %i, i1 %f, i32 %n) { +; CHECK-LABEL: LVI for function 'test3': +; CHECK-LABEL: entry +; CHECK: ; LatticeVal for: 'i32 %i' is: overdefined +; CHECK: %c = icmp ne i32 %i, -2134 +; CHECK: br i1 %c, label %cont, label %exit +entry: + %c = icmp ne i32 %i, -2134 + br i1 %c, label %do, label %exit + +exit: + %c1 = icmp ne i32 %i, -42 + br i1 %c1, label %exit2, label %exit + +; CHECK-LABEL: cont: +; Here cont is merged to do and i is any value except -2134. +; i is not the single value: zero. +; CHECK-NOT: ; LatticeVal for: 'i32 %i' is: constantrange<0, 1> +; CHECK: ; LatticeVal for: 'i32 %i' is: constantrange<-2133, -2134> +; CHECK: ; LatticeVal for: ' %cond.0 = icmp sgt i32 %i, 0' in BB: '%cont' is: overdefined +; CHECK: %cond.0 = icmp sgt i32 %i, 0 +; CHECK: %consume = call i32 @consume +; CHECK: %cond = icmp eq i32 %i, 0 +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %cond) +; CHECK: %cond.3 = icmp sgt i32 %i, %n +; CHECK: br i1 %cond.3, label %exit2, label %exit +cont: + %cond.3 = icmp sgt i32 %i, %n + br i1 %cond.3, label %exit2, label %exit + +do: + %cond.0 = icmp sgt i32 %i, 0 + %consume = call i32 @consume(i1 %cond.0) + %cond = icmp eq i32 %i, 0 + call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ] + %cond.2 = icmp sgt i32 %i, 0 + br i1 %cond.2, label %exit, label %cont + +exit2: +; CHECK-LABEL: exit2: +; LatticeVal for: 'i32 %i' is: constantrange<-2134, 1> + ret i32 30 +} + +; FIXME: We should be able to merge cont into do. +; When we do so, LVI for cont cannot be the one for the merged do block. +define i32 @test4(i32 %i, i1 %f, i32 %n) { +; CHECK-LABEL: LVI for function 'test4': +entry: + %c = icmp ne i32 %i, -2134 + br i1 %c, label %do, label %exit + +exit: ; preds = %do, %cont, %exit, %entry + %c1 = icmp ne i32 %i, -42 + br i1 %c1, label %exit2, label %exit + +cont: ; preds = %do +; CHECK-LABEL: cont: +; CHECK: ; LatticeVal for: 'i1 %f' is: constantrange<-1, 0> +; CHECK: call void @dummy(i1 %f) + call void @dummy(i1 %f) + br label %exit2 + +do: ; preds = %entry +; CHECK-LABEL: do: +; CHECK: ; LatticeVal for: 'i1 %f' is: overdefined +; CHECK: call void @dummy(i1 %f) +; CHECK: br i1 %cond, label %exit, label %cont + call void @dummy(i1 %f) + %consume = call i32 @exit() + call void @llvm.assume(i1 %f) + %cond = icmp eq i1 %f, false + br i1 %cond, label %exit, label %cont + +exit2: ; preds = %cont, %exit + ret i32 30 +} + +declare i32 @exit() +declare i32 @consume(i1) +declare void @llvm.assume(i1) nounwind +declare void @dummy(i1) nounwind +declare void @llvm.experimental.guard(i1, ...) diff --git a/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll b/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll index 455968d7a4017..8ac6301f93184 100644 --- a/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll +++ b/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll @@ -26,21 +26,21 @@ for.body: ; preds = %entry, %for.body %idxprom = sext i32 %sub to i64 %half = bitcast %union.vector_t* %vb to [8 x i16]* %arrayidx = getelementptr inbounds [8 x i16], [8 x i16]* %half, i64 0, i64 %idxprom - %tmp4 = load i16, i16* %arrayidx, align 2, !tbaa !0 + %tmp4 = load i16, i16* %arrayidx, align 2, !tbaa !10 %conv = zext i16 %tmp4 to i32 %and = and i32 %conv, 15 %sub6 = sub nsw i32 7, %i.01 %idxprom7 = sext i32 %sub6 to i64 %half9 = bitcast %union.vector_t* %va to [8 x i16]* %arrayidx10 = getelementptr inbounds [8 x i16], [8 x i16]* %half9, i64 0, i64 %idxprom7 - %tmp11 = load i16, i16* %arrayidx10, align 2, !tbaa !0 + %tmp11 = load i16, i16* %arrayidx10, align 2, !tbaa !10 %conv12 = zext i16 %tmp11 to i32 %shl = shl i32 %conv12, %and %sub15 = sub nsw i32 7, %i.01 %idxprom16 = sext i32 %sub15 to i64 %half18 = bitcast %union.vector_t* %va to [8 x i16]* %arrayidx19 = getelementptr inbounds [8 x i16], [8 x i16]* %half18, i64 0, i64 %idxprom16 - %tmp20 = load i16, i16* %arrayidx19, align 2, !tbaa !0 + %tmp20 = load i16, i16* %arrayidx19, align 2, !tbaa !10 %conv21 = zext i16 %tmp20 to i32 %sub23 = sub nsw i32 16, %and %shr = lshr i32 %conv21, %sub23 @@ -50,20 +50,20 @@ for.body: ; preds = %entry, %for.body %idxprom27 = sext i32 %sub26 to i64 %half28 = bitcast %union.vector_t* %t to [8 x i16]* %arrayidx29 = getelementptr inbounds [8 x i16], [8 x i16]* %half28, i64 0, i64 %idxprom27 - store i16 %conv24, i16* %arrayidx29, align 2, !tbaa !0 + store i16 %conv24, i16* %arrayidx29, align 2, !tbaa !10 %inc = add nsw i32 %i.01, 1 %cmp = icmp slt i32 %inc, 8 br i1 %cmp, label %for.body, label %for.end for.end: ; preds = %for.body %arrayidx31 = getelementptr inbounds %union.vector_t, %union.vector_t* %t, i64 0, i32 0, i64 1 - %tmp32 = load i64, i64* %arrayidx31, align 8, !tbaa !3 + %tmp32 = load i64, i64* %arrayidx31, align 8, !tbaa !10 %arrayidx35 = getelementptr inbounds %union.vector_t, %union.vector_t* %vd, i64 0, i32 0, i64 1 - store i64 %tmp32, i64* %arrayidx35, align 8, !tbaa !3 + store i64 %tmp32, i64* %arrayidx35, align 8, !tbaa !10 %arrayidx37 = getelementptr inbounds %union.vector_t, %union.vector_t* %t, i64 0, i32 0, i64 0 - %tmp38 = load i64, i64* %arrayidx37, align 8, !tbaa !3 + %tmp38 = load i64, i64* %arrayidx37, align 8, !tbaa !10 %arrayidx41 = getelementptr inbounds %union.vector_t, %union.vector_t* %vd, i64 0, i32 0, i64 0 - store i64 %tmp38, i64* %arrayidx41, align 8, !tbaa !3 + store i64 %tmp38, i64* %arrayidx41, align 8, !tbaa !10 ret void } @@ -124,7 +124,7 @@ for.end: ; preds = %for.body } ; CHECK: [[TAG]] = !{[[TYPE_LL:!.*]], [[TYPE_LL]], i64 0} -; CHECK: [[TYPE_LL]] = !{!"long long", {{!.*}}} +; CHECK: [[TYPE_LL]] = !{!"omnipotent char", {{!.*}}} !0 = !{!6, !6, i64 0} !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA"} @@ -135,3 +135,4 @@ for.end: ; preds = %for.body !7 = !{!"long long", !1} !8 = !{!"int", !1} !9 = !{!"float", !1} +!10 = !{!1, !1, i64 0} |